From 8f9291883d6a2c86e23aedd16baece91523b8aaa Mon Sep 17 00:00:00 2001 From: null3FF3KT Date: Mon, 18 May 2026 11:00:15 -0500 Subject: [PATCH] feat: initialize YouTube summarizer project with OpenAI integration and map-reduce processing strategy --- .gitignore | 27 ++ AppSettings.cs | 56 +++++ ConsoleRenderer.cs | 197 +++++++++++++++ Program.cs | 227 +++++++++++++++++ README.md | 113 +++++++++ SummarizerService.cs | 342 ++++++++++++++++++++++++++ TranscriptFileService.cs | 212 ++++++++++++++++ VideoModels.cs | 161 ++++++++++++ YouTubeService.cs | 518 +++++++++++++++++++++++++++++++++++++++ YoutubeSummarizer.csproj | 36 +++ appsettings.json | 14 ++ summarize.sln | 24 ++ 12 files changed, 1927 insertions(+) create mode 100644 .gitignore create mode 100644 AppSettings.cs create mode 100644 ConsoleRenderer.cs create mode 100644 Program.cs create mode 100644 README.md create mode 100644 SummarizerService.cs create mode 100644 TranscriptFileService.cs create mode 100644 VideoModels.cs create mode 100644 YouTubeService.cs create mode 100644 YoutubeSummarizer.csproj create mode 100644 appsettings.json create mode 100644 summarize.sln diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8b7f69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Visual Studio / .NET build outputs +[Bb]in/ +[Oo]bj/ +[Pp]ublish/ +*.user +*.userosscache +*.sln.docstates +*.suo +*.cache + +# IDEs / Tools +.idea/ +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.swp +*.~* + +# OS metadata +.DS_Store +Thumbs.db + +# Project specific / Temporary files +scratch/ +*.log diff --git a/AppSettings.cs b/AppSettings.cs new file mode 100644 index 0000000..2a2e76f --- /dev/null +++ b/AppSettings.cs @@ -0,0 +1,56 @@ +namespace YoutubeSummarizer.Configuration; + +/// +/// Root configuration object bound from appsettings.json. +/// Only OpenAI and Summarizer sections are required. +/// +public sealed class AppSettings +{ + public LlmSettings LLM { get; init; } = new(); + public SummarizerSettings Summarizer { get; init; } = new(); +} + +/// +/// Settings for the LLM API (OpenAI or Ollama). +/// +public sealed class LlmSettings +{ + /// + /// Base URL for the API. + /// For OpenAI: https://api.openai.com/v1 + /// For Ollama: http://localhost:11434/v1 + /// + public string BaseUrl { get; init; } = "https://api.openai.com/v1"; + + /// Your API key. (For Ollama, any value works). + public string ApiKey { get; init; } = string.Empty; + + /// + /// Model to use. + /// OpenAI: gpt-4o-mini, gpt-4o + /// Ollama: qwen3:14b, llama3.1 + /// + public string Model { get; init; } = "gpt-4o-mini"; + + /// Max tokens for the summary response (not the input). + public int MaxTokens { get; init; } = 1500; + + /// Timeout in seconds for API calls. + public int TimeoutSeconds { get; init; } = 100; +} + +/// +/// Controls summarization behavior. +/// +public sealed class SummarizerSettings +{ + /// + /// Approximate word count at which we split a long transcript into chunks + /// before doing a final "summary of summaries" pass. This keeps individual + /// API calls within model context limits. + /// + public int ChunkWordLimit { get; init; } = 3000; + + /// When true, prints the full transcript text before summarizing. + public bool ShowTranscript { get; init; } = false; +} diff --git a/ConsoleRenderer.cs b/ConsoleRenderer.cs new file mode 100644 index 0000000..42fa45d --- /dev/null +++ b/ConsoleRenderer.cs @@ -0,0 +1,197 @@ +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Handles all console output formatting. +/// Keeping display logic separate from business logic makes it easy to +/// later add output modes (JSON, Markdown file, HTML report) without +/// touching the service layer. +/// +public static class ConsoleRenderer +{ + // ANSI color codes. These render correctly in most Linux terminals. + // If you pipe output to a file, the escape codes will appear as-is — + // run with --no-color if that's a concern (not implemented here, left + // as an exercise). + private const string Reset = "\x1b[0m"; + private const string Bold = "\x1b[1m"; + private const string Cyan = "\x1b[36m"; + private const string Yellow = "\x1b[33m"; + private const string Green = "\x1b[32m"; + private const string Red = "\x1b[31m"; + private const string Dim = "\x1b[2m"; + + /// Prints the application banner on startup. + public static void PrintBanner() + { + Console.WriteLine(); + Console.WriteLine($"{Bold}{Cyan}╔════════════════════════════════════════╗{Reset}"); + Console.WriteLine($"{Bold}{Cyan}║ YouTube Video Summarizer ║{Reset}"); + Console.WriteLine($"{Bold}{Cyan}╚════════════════════════════════════════╝{Reset}"); + Console.WriteLine(); + } + + /// Prompts the user for a URL and reads input. + public static string PromptForUrl() + { + Console.Write($"{Bold}Enter YouTube URL (or 'q' to quit):{Reset} "); + return Console.ReadLine()?.Trim() ?? string.Empty; + } + + /// + /// Asks the user whether they want to save the transcript to a text file. + /// Returns true if the user answers yes. + /// + public static bool PromptSaveTranscript() + { + Console.Write($"{Bold}Save transcript to file? (y/n):{Reset} "); + var answer = Console.ReadLine()?.Trim() ?? string.Empty; + return answer.Equals("y", StringComparison.OrdinalIgnoreCase) + || answer.Equals("yes", StringComparison.OrdinalIgnoreCase); + } + + /// Prints a success message with the saved file path. + public static void PrintFileSaved(string filePath) + { + Console.WriteLine($" {Green}✓ Transcript saved to:{Reset} {filePath}"); + Console.WriteLine(); + } + + /// + /// Prompts the user to choose a summary mode. + /// Returns the selected . + /// + public static SummaryMode PromptSummaryMode() + { + Console.WriteLine($" {Dim}Summary modes:{Reset}"); + Console.WriteLine($" {Bold}1{Reset} – Standard (detailed bullet-point summary)"); + Console.WriteLine($" {Bold}2{Reset} – Personal Filter (relevance verdict: ACT / MONITOR / IGNORE)"); + Console.Write($"{Bold}Choose summary mode [1]:{Reset} "); + var choice = Console.ReadLine()?.Trim() ?? string.Empty; + return choice == "2" ? SummaryMode.PersonalFilter : SummaryMode.Standard; + } + + /// Displays a spinner-style "working" indicator while async work runs. + public static void PrintWorking(string message) + { + Console.WriteLine($" {Dim}→ {message}...{Reset}"); + } + + /// + /// Renders the full summary result to the console in a structured, + /// readable format. Includes metadata header, quality warning, and + /// the summary body. + /// + public static void PrintSummary(VideoSummary summary, bool showTranscriptSource) + { + Console.WriteLine(); + PrintDivider(); + + // ── Metadata header ────────────────────────────────────────────────── + Console.WriteLine($"{Bold}{Green} {summary.Metadata.Title}{Reset}"); + Console.WriteLine($" {Dim}Channel:{Reset} {summary.Metadata.ChannelTitle}"); + Console.WriteLine($" {Dim}Published:{Reset} {summary.Metadata.PublishedAt:MMMM d, yyyy}"); + Console.WriteLine($" {Dim}Duration:{Reset} {summary.Metadata.FormattedDuration}"); + Console.WriteLine($" {Dim}URL:{Reset} https://youtu.be/{summary.Metadata.VideoId}"); + + // ── Transcript source badge ────────────────────────────────────────── + if (showTranscriptSource) + { + var (badge, color) = summary.TranscriptSource switch + { + TranscriptSource.OwnerPublished => ("✓ Owner-published captions", Green), + TranscriptSource.CommunityContributed=> ("✓ Community captions", Green), + TranscriptSource.AutoGenerated => ("~ Auto-generated (ASR)", Yellow), + TranscriptSource.MetadataOnly => ("✗ Metadata only", Red), + _ => ("? Unknown", Dim) + }; + Console.WriteLine($" {Dim}Transcript:{Reset} {color}{badge}{Reset}"); + } + + Console.WriteLine($" {Dim}Model:{Reset} {summary.ModelUsed}"); + Console.WriteLine($" {Dim}Generated:{Reset} {summary.GeneratedAt:yyyy-MM-dd HH:mm} UTC"); + + PrintDivider(); + + // ── Quality warning ────────────────────────────────────────────────── + if (summary.QualityWarning is not null) + { + Console.WriteLine(); + Console.WriteLine($" {Yellow}{summary.QualityWarning}{Reset}"); + } + + // ── Summary body ───────────────────────────────────────────────────── + Console.WriteLine(); + Console.WriteLine($"{Bold} SUMMARY{Reset}"); + Console.WriteLine(); + + // Word-wrap the summary body at 80 characters so it's readable in + // standard terminal widths without horizontal scrolling. + foreach (var line in WordWrap(summary.SummaryText, maxWidth: 78)) + { + Console.WriteLine($" {line}"); + } + + Console.WriteLine(); + PrintDivider(); + Console.WriteLine(); + } + + /// Prints a styled error message. + public static void PrintError(string message) + { + Console.WriteLine(); + Console.WriteLine($" {Red}✗ Error: {message}{Reset}"); + Console.WriteLine(); + } + + /// Prints a styled warning (non-fatal). + public static void PrintWarning(string message) + { + Console.WriteLine($" {Yellow}⚠ {message}{Reset}"); + } + + // ───────────────────────────────────────────────────────────────────────── + // Private helpers + // ───────────────────────────────────────────────────────────────────────── + + private static void PrintDivider() + { + Console.WriteLine($" {Dim}{"─".PadRight(74, '─')}{Reset}"); + } + + /// + /// Splits text into lines no wider than characters, + /// breaking only at word boundaries. Respects existing newlines in the input. + /// + private static IEnumerable WordWrap(string text, int maxWidth) + { + foreach (var paragraph in text.Split('\n')) + { + if (string.IsNullOrWhiteSpace(paragraph)) + { + yield return string.Empty; + continue; + } + + var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var current = new System.Text.StringBuilder(); + + foreach (var word in words) + { + if (current.Length + word.Length + 1 > maxWidth) + { + yield return current.ToString(); + current.Clear(); + } + + if (current.Length > 0) current.Append(' '); + current.Append(word); + } + + if (current.Length > 0) + yield return current.ToString(); + } + } +} diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..84782df --- /dev/null +++ b/Program.cs @@ -0,0 +1,227 @@ +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using YoutubeSummarizer.Configuration; +using YoutubeSummarizer.Models; +using YoutubeSummarizer.Services; + +// ═════════════════════════════════════════════════════════════════════════════ +// Bootstrap +// ═════════════════════════════════════════════════════════════════════════════ + +// Build configuration from appsettings.json (required) with optional +// environment variable overrides (useful for CI or containerized deployment). +// Environment variables follow the pattern: YouTube__ApiKey, LLM__ApiKey, etc. +var config = new ConfigurationBuilder() + .SetBasePath(AppContext.BaseDirectory) + .AddJsonFile("appsettings.json", optional: false, reloadOnChange: false) + .AddEnvironmentVariables() // overrides appsettings values if set + .Build(); + +// Bind configuration sections to strongly-typed objects. +var appSettings = new AppSettings(); +config.Bind(appSettings); + +// Validate required keys up front — fail fast with a clear message rather +// than letting the first API call blow up with a cryptic 401. +ValidateSettings(appSettings); + +// Wire up DI container. +// For a console app this is lightweight, but it mirrors the pattern used +// in the LIKA/IKA ASP.NET services so the code is easy to lift into a +// background service or API controller later. +var services = new ServiceCollection(); + +// Register HttpClient for the YouTube timedtext endpoint. +// Using IHttpClientFactory gives us connection pooling and the ability to +// attach Polly retry policies. +services.AddHttpClient(client => +{ + client.DefaultRequestHeaders.Add("User-Agent", + "Mozilla/5.0 (compatible; YoutubeSummarizer/1.0)"); + client.Timeout = TimeSpan.FromSeconds(30); +}); + +// Register services with their config dependencies. +services.AddSingleton(appSettings.LLM); +services.AddSingleton(appSettings.Summarizer); +services.AddTransient(); + +var serviceProvider = services.BuildServiceProvider(); + +// ═════════════════════════════════════════════════════════════════════════════ +// Main loop +// ═════════════════════════════════════════════════════════════════════════════ + +ConsoleRenderer.PrintBanner(); + +// Handle Ctrl+C gracefully so any in-progress API call can finish or cancel. +using var cts = new CancellationTokenSource(); +Console.CancelKeyPress += (_, e) => +{ + e.Cancel = true; // prevent immediate termination + cts.Cancel(); + Console.WriteLine("\n Cancellation requested. Finishing current operation..."); +}; + +while (!cts.Token.IsCancellationRequested) +{ + var input = ConsoleRenderer.PromptForUrl(); + + if (string.IsNullOrWhiteSpace(input)) continue; + if (input.Equals("q", StringComparison.OrdinalIgnoreCase)) break; + + // Parse the video ID from the URL + var videoId = YouTubeService.ExtractVideoId(input); + if (videoId is null) + { + ConsoleRenderer.PrintError("Could not extract a valid YouTube video ID from that URL."); + ConsoleRenderer.PrintWarning("Accepted formats: watch?v=..., youtu.be/..., /shorts/..., /embed/..."); + continue; + } + + // Ask whether to save transcript to file before processing + var saveTranscript = ConsoleRenderer.PromptSaveTranscript(); + + // Choose summary mode + var summaryMode = ConsoleRenderer.PromptSummaryMode(); + + await ProcessVideoAsync(videoId, serviceProvider, appSettings.Summarizer, saveTranscript, summaryMode, cts.Token); +} + +Console.WriteLine(" Goodbye!"); + +// ═════════════════════════════════════════════════════════════════════════════ +// Video processing pipeline +// ═════════════════════════════════════════════════════════════════════════════ + +/// +/// Orchestrates the full pipeline for a single video: +/// 1. Fetch metadata (YouTube Data API) +/// 2. Fetch transcript (caption track or timedtext fallback) +/// 3. Summarize (LLM Chat Completions) +/// 4. Display (ConsoleRenderer) +/// +static async Task ProcessVideoAsync( + string videoId, + IServiceProvider sp, + SummarizerSettings summarizerSettings, + bool saveTranscript, + SummaryMode summaryMode, + CancellationToken ct) +{ + try + { + // Resolve scoped services + var youtubeService = sp.GetRequiredService(); + var summarizerService = sp.GetRequiredService(); + + // ── Step 1: Metadata ────────────────────────────────────────────── + ConsoleRenderer.PrintWorking("Fetching video metadata"); + var metadata = await youtubeService.GetVideoMetadataAsync(videoId, ct); + + if (metadata is null) + { + ConsoleRenderer.PrintError($"Video not found or is private: {videoId}"); + return; + } + + Console.WriteLine($" {metadata.Title}"); + + // ── Step 2: Transcript ──────────────────────────────────────────── + ConsoleRenderer.PrintWorking("Fetching transcript"); + var transcript = await youtubeService.GetTranscriptAsync(metadata, ct); + + // Optionally show raw transcript for debugging / inspection + if (summarizerSettings.ShowTranscript) + { + Console.WriteLine(); + Console.WriteLine(" ─── RAW TRANSCRIPT ───"); + Console.WriteLine(transcript.Text); + Console.WriteLine(" ─── END TRANSCRIPT ───"); + Console.WriteLine(); + } + + Console.WriteLine( + $" Transcript: {transcript.Source} | {transcript.WordCount:N0} words"); + + // ── Step 2.5: Save transcript to file (if requested) ───────────── + // (moved after summarization so we can include the summary) + + // ── Step 3: Summarize ───────────────────────────────────────────── + // Always run the standard summary (used for file saving). + ConsoleRenderer.PrintWorking("Summarizing with LLM"); + var standardSummary = await summarizerService.SummarizeAsync( + metadata, transcript, SummaryMode.Standard, ct); + + // If the user chose Personal Filter, run a second pass for display. + VideoSummary displaySummary; + if (summaryMode == SummaryMode.PersonalFilter) + { + ConsoleRenderer.PrintWorking("Applying Personal Information Filter"); + displaySummary = await summarizerService.SummarizeAsync( + metadata, transcript, SummaryMode.PersonalFilter, ct); + } + else + { + displaySummary = standardSummary; + } + + // ── Step 3.5: Save transcript + standard summary to file ───────── + if (saveTranscript) + { + var transcriptsDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), + "Downloads", "transcripts"); + ConsoleRenderer.PrintWorking("Saving transcript to file"); + var savedPath = await TranscriptFileService.SaveAsync( + metadata, transcript, summaryText: standardSummary.SummaryText, + outputDirectory: transcriptsDir, ct: ct); + ConsoleRenderer.PrintFileSaved(savedPath); + } + + // ── Step 4: Display ─────────────────────────────────────────────── + ConsoleRenderer.PrintSummary(displaySummary, showTranscriptSource: true); + } + catch (OperationCanceledException) + { + // User pressed Ctrl+C — nothing to report, the loop will exit + } + catch (Exception ex) + { + ConsoleRenderer.PrintError(ex.Message); + + // Print the stack trace in dim text for debugging without overwhelming + // normal users who will rarely see this path. + Console.WriteLine($"\x1b[2m{ex}\x1b[0m"); + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// Configuration validation +// ═════════════════════════════════════════════════════════════════════════════ + +static void ValidateSettings(AppSettings settings) +{ + var errors = new List(); + + if (string.IsNullOrWhiteSpace(settings.LLM.ApiKey) || + settings.LLM.ApiKey == "YOUR_API_KEY_HERE") + { + // For local Ollama, we don't strictly need a real key, but it shouldn't be the placeholder. + // If they are using OpenAI, they definitely need a key. + if (settings.LLM.BaseUrl.Contains("openai.com", StringComparison.OrdinalIgnoreCase)) + { + errors.Add("LLM:ApiKey is not set in appsettings.json (Required for OpenAI)"); + } + } + + if (errors.Count > 0) + { + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine("\nConfiguration errors:"); + errors.ForEach(e => Console.WriteLine($" ✗ {e}")); + Console.ResetColor(); + Console.WriteLine("\nCopy appsettings.example.json → appsettings.json and fill in your keys.\n"); + Environment.Exit(1); + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..ac3011c --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# YouTube Video Summarizer + +A .NET 8 console application that fetches YouTube video transcripts and produces structured summaries using an LLM (Ollama or OpenAI). + +--- + +## Prerequisites + +- [.NET 8 SDK](https://dotnet.microsoft.com/download) +- A **YouTube Data API v3** key → [Google Cloud Console](https://console.cloud.google.com) +- **Local Ollama** (Recommended) or an **OpenAI API key**. + +--- + +## Setup + +```bash +# 1. Clone / copy the project +cd YoutubeSummarizer + +# 2. Copy the example config and fill in your keys +cp appsettings.example.json appsettings.json +nano appsettings.json # or your editor of choice + +# 3. Restore packages +dotnet restore + +# 4. Run +dotnet run +``` + +--- + +## Google Cloud Setup (YouTube API Key) + +1. Go to [console.cloud.google.com](https://console.cloud.google.com) +2. Create or select a project +3. **APIs & Services → Library** → search "YouTube Data API v3" → Enable +4. **APIs & Services → Credentials → Create Credentials → API key** +5. (Optional but recommended) Restrict the key to only the YouTube Data API v3 + +> Free quota: **10,000 units/day**. Each video lookup costs ~3 units. You can summarize thousands of videos before hitting the limit. + +--- + +## Configuration Reference + +| Key | Description | Default | +|---|---|---| +| `YouTube:ApiKey` | Your YouTube Data API v3 key | *(required)* | +| `LLM:BaseUrl` | API endpoint | `http://localhost:11434/v1` | +| `LLM:ApiKey` | API key (any for Ollama) | `ollama` | +| `LLM:Model` | Chat model to use | `qwen3:14b` | +| `LLM:MaxTokens` | Max tokens in summary response | `1500` | +| `LLM:TimeoutSeconds` | Max time for LLM generation | `300` | +| `Summarizer:ChunkWordLimit` | Words per chunk for long videos | `3000` | +| `Summarizer:ShowTranscript` | Print raw transcript before summary | `false` | + +--- + +## Architecture + +``` +Program.cs +│ Main loop → parses URL → calls pipeline +│ +├── YouTubeService +│ ├── ExtractVideoId() — URL parsing +│ ├── GetVideoMetadataAsync() — YouTube Data API v3 (Videos.list) +│ └── GetTranscriptAsync() — Caption list + timedtext download +│ +├── SummarizerService +│ ├── SummarizeAsync() — Routes to single-pass or chunked +│ ├── SinglePassSummarize() — One OpenAI call for short videos +│ └── ChunkedSummarize() — Map-reduce for long videos +│ +└── ConsoleRenderer — All terminal output / formatting +``` + +### Caption Quality Transparency + +The app tracks how the transcript was obtained and flags it accordingly: + +| Source | Label | Warning shown? | +|---|---|---| +| Owner-published captions | `✓ Owner-published` | No | +| Community-contributed | `✓ Community captions` | Minor note | +| Auto-generated (ASR) | `~ Auto-generated` | Yes — accuracy caveat | +| No captions (metadata only) | `✗ Metadata only` | Yes — limited accuracy | + +### Long Video Strategy + +Videos with transcripts exceeding `ChunkWordLimit` words use a **map-reduce** approach: + +1. **Split** — transcript divided into overlapping chunks (200-word overlap preserves context at boundaries) +2. **Map** — each chunk summarized independently +3. **Reduce** — chunk summaries combined into a final coherent summary + +This handles hour-long lectures, conference talks, and podcasts without hitting model context limits. + +--- + +## Environment Variable Overrides + +You can override `appsettings.json` values with environment variables, useful for CI or Docker: + +```bash +export YouTube__ApiKey="your-key" +export LLM__ApiKey="ollama" +dotnet run +``` + +Note the double-underscore `__` as the section separator (standard .NET configuration convention). diff --git a/SummarizerService.cs b/SummarizerService.cs new file mode 100644 index 0000000..0177e75 --- /dev/null +++ b/SummarizerService.cs @@ -0,0 +1,342 @@ +using OpenAI; +using OpenAI.Chat; +using YoutubeSummarizer.Configuration; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Sends transcript text to OpenAI's Chat Completions API and returns a +/// structured summary. +/// +/// Long transcripts (word count > ChunkWordLimit) are handled with a +/// "map-reduce" strategy: +/// 1. Split the transcript into overlapping chunks. +/// 2. Summarize each chunk independently (map phase). +/// 3. Combine chunk summaries into a final cohesive summary (reduce phase). +/// +/// This keeps individual API calls within model context limits while still +/// producing an accurate summary of long-form content like hour-long lectures. +/// +public sealed class SummarizerService +{ + private readonly LlmSettings _llmSettings; + private readonly SummarizerSettings _summarizerSettings; + private readonly ChatClient _chatClient; + + // System prompt used for single-pass and chunk summarization. + // Keeping it focused on facts and structure produces better summaries + // than open-ended "summarize this" prompts. + private const string ChunkSystemPrompt = """ + You are a precise, factual assistant that summarizes YouTube video transcripts. + When given a transcript segment, produce a concise summary that: + - Captures the key points, arguments, and conclusions + - Preserves any specific facts, names, dates, or statistics mentioned + - Uses bullet points for individual points, then a short paragraph for the overall gist + - Omits filler words, repeated phrases, and off-topic tangents + - Does NOT add information not present in the transcript + Respond with only the summary text, no preamble. + """; + + // Personal Information Filter — concise relevance-based summary. + private const string PersonalFilterSystemPrompt = """ + You are a concise, factual assistant that applies a Personal Information Filter + to YouTube video transcripts. When given a transcript, respond with EXACTLY + three sections and nothing else: + + Summary – A concise, plain-English summary in 1–2 sentences. + + Why it matters – Directly evaluate relevance only against these priorities: + time, finances, health, family, service to others. + If none apply, say so clearly. + + Priority tag – End with a single word verdict: ACT, MONITOR, or IGNORE. + + Constraints: + - Do not timestamp or number entries. + - Do not infer user interest beyond what is explicitly provided. + - Do not expand or add context unless the user requests it. + - The burden of interest is on the user. + - Respond with only the three sections above, no preamble. + """; + + // Personal Filter combine prompt for long transcripts. + private const string PersonalFilterCombinePrompt = """ + You are a concise, factual assistant. You will receive several partial summaries + of consecutive segments of a YouTube video, each formatted with Summary, + Why it matters, and Priority tag sections. Combine them into a single response + using the same three-section format: + + Summary – A concise, plain-English summary of the entire video in 1–2 sentences. + + Why it matters – Directly evaluate relevance only against these priorities: + time, finances, health, family, service to others. + If none apply, say so clearly. + + Priority tag – A single word verdict: ACT, MONITOR, or IGNORE. + + Respond with only these three sections, no preamble. + """; + + // Used in the reduce phase to combine chunk summaries coherently. + private const string CombineSystemPrompt = """ + You are a precise, factual assistant. You will receive several partial summaries + of consecutive segments of a YouTube video. Your task is to combine them into + a single, coherent, well-structured summary that: + - Flows as a unified narrative, not as a list of sub-summaries + - Preserves all key facts, names, dates, and statistics + - Uses bullet points for supporting details beneath each main topic + - Omits redundant information that appears across multiple segments + - Concludes with a 2–3 sentence takeaway paragraph + Respond with only the combined summary, no preamble. + """; + + public SummarizerService(LlmSettings llmSettings, SummarizerSettings summarizerSettings) + { + _llmSettings = llmSettings; + _summarizerSettings = summarizerSettings; + + // Initialize the client with the specified model and endpoint. + // We use the OpenAI SDK's ability to point to any OpenAI-compatible API (like Ollama). + _chatClient = new ChatClient( + model: llmSettings.Model, + credential: new System.ClientModel.ApiKeyCredential(llmSettings.ApiKey), + options: new OpenAIClientOptions + { + Endpoint = new Uri(llmSettings.BaseUrl), + NetworkTimeout = TimeSpan.FromSeconds(llmSettings.TimeoutSeconds) + }); + } + + // ───────────────────────────────────────────────────────────────────────── + // Public API + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Produces a from the video's metadata and transcript. + /// Automatically routes to single-pass or chunked strategy based on word count. + /// + public async Task SummarizeAsync( + VideoMetadata metadata, + VideoTranscript transcript, + SummaryMode mode = SummaryMode.Standard, + CancellationToken ct = default) + { + string summaryText; + + // Select prompt set based on mode + var chunkPrompt = mode == SummaryMode.PersonalFilter + ? PersonalFilterSystemPrompt : ChunkSystemPrompt; + var combinePrompt = mode == SummaryMode.PersonalFilter + ? PersonalFilterCombinePrompt : CombineSystemPrompt; + + if (transcript.WordCount <= _summarizerSettings.ChunkWordLimit) + { + // Short video — single API call is sufficient + summaryText = await SinglePassSummarizeAsync(transcript.Text, metadata, chunkPrompt, ct); + } + else + { + // Long video — chunk-and-combine strategy + summaryText = await ChunkedSummarizeAsync(transcript.Text, metadata, chunkPrompt, combinePrompt, ct); + } + + // Attach a quality warning when the transcript quality is uncertain + var warning = BuildQualityWarning(transcript.Source); + + return new VideoSummary + { + Metadata = metadata, + SummaryText = summaryText, + TranscriptSource = transcript.Source, + QualityWarning = warning, + ModelUsed = _llmSettings.Model + }; + } + + // ───────────────────────────────────────────────────────────────────────── + // Summarization strategies + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Single-pass: sends the entire transcript in one API call. + /// Best for videos under ~30 minutes (roughly 3000–4000 words). + /// + private async Task SinglePassSummarizeAsync( + string transcriptText, + VideoMetadata metadata, + string systemPrompt, + CancellationToken ct) + { + var userMessage = BuildUserPrompt(metadata, transcriptText); + return await CallChatCompletionAsync(systemPrompt, userMessage, ct); + } + + /// + /// Map-reduce: splits long transcripts, summarizes each chunk, then combines. + /// + /// Overlap: each chunk ends with a brief overlap window (last ~200 words of + /// the previous chunk) so the model retains context across chunk boundaries + /// and avoids abrupt topic changes in the summaries. + /// + private async Task ChunkedSummarizeAsync( + string transcriptText, + VideoMetadata metadata, + string chunkSystemPrompt, + string combineSystemPrompt, + CancellationToken ct) + { + var words = transcriptText.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var chunks = SplitIntoChunks(words, _summarizerSettings.ChunkWordLimit, overlapWords: 200); + + Console.WriteLine($"\n [Chunking] Transcript split into {chunks.Count} chunks for processing..."); + + // Map phase: summarize each chunk in sequence + // (Parallel would be faster but could hit rate limits — sequential is safer) + var chunkSummaries = new List(chunks.Count); + for (int i = 0; i < chunks.Count; i++) + { + Console.Write($" [Chunk {i + 1}/{chunks.Count}] Summarizing"); + var chunkText = string.Join(" ", chunks[i]); + var prompt = $"This is segment {i + 1} of {chunks.Count} from the video \"{metadata.Title}\":\n\n{chunkText}"; + var summary = await CallChatCompletionAsync(chunkSystemPrompt, prompt, ct); + chunkSummaries.Add(summary); + } + + // Reduce phase: combine all chunk summaries into one coherent summary + Console.Write(" [Combine] Merging chunk summaries into final summary"); + var combinedInput = string.Join("\n\n---\n\n", + chunkSummaries.Select((s, i) => $"Segment {i + 1} summary:\n{s}")); + + var combinePrompt = $"Video: \"{metadata.Title}\" by {metadata.ChannelTitle}\n\n" + + $"The following are summaries of {chunks.Count} consecutive segments:\n\n{combinedInput}"; + + return await CallChatCompletionAsync(combineSystemPrompt, combinePrompt, ct); + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Sends a system + user message pair to the Chat Completions endpoint + /// and returns the assistant's reply text. + /// + private async Task CallChatCompletionAsync( + string systemPrompt, + string userMessage, + CancellationToken ct) + { + var messages = new List + { + new SystemChatMessage(systemPrompt), + new UserChatMessage(userMessage) + }; + + var options = new ChatCompletionOptions + { + MaxOutputTokenCount = _llmSettings.MaxTokens + }; + + var sw = System.Diagnostics.Stopwatch.StartNew(); + var fullContent = new System.Text.StringBuilder(); + + try + { + var streamingUpdates = _chatClient.CompleteChatStreamingAsync(messages, options, ct); + + await foreach (var update in streamingUpdates) + { + foreach (var part in update.ContentUpdate) + { + if (!string.IsNullOrEmpty(part.Text)) + { + if (fullContent.Length == 0) + { + // First token received! + Console.Write(" (working)"); + } + + fullContent.Append(part.Text); + + // Show progress: print a dot every ~50 characters of output + // or just periodically. For now, let's just do a dot every update + // to show it's alive. + if (fullContent.Length % 20 == 0) Console.Write("."); + } + } + } + } + finally + { + sw.Stop(); + Console.WriteLine($" Done! ({sw.Elapsed.TotalSeconds:F1}s)"); + } + + return fullContent.ToString(); + } + + /// + /// Builds the user-turn prompt for a single-pass summarization. + /// Including the title and channel anchors the model to the subject matter, + /// which reduces hallucination on ambiguous ASR transcripts. + /// + private static string BuildUserPrompt(VideoMetadata metadata, string transcriptText) + { + return $""" + Video title: {metadata.Title} + Channel: {metadata.ChannelTitle} + Published: {metadata.PublishedAt:MMMM d, yyyy} + Duration: {metadata.FormattedDuration} + + Full transcript: + {transcriptText} + """; + } + + /// + /// Splits a word array into overlapping chunks of roughly words. + /// The overlap prevents the model from missing context at chunk boundaries. + /// + private static List SplitIntoChunks(string[] words, int chunkSize, int overlapWords) + { + var chunks = new List(); + int start = 0; + + while (start < words.Length) + { + int end = Math.Min(start + chunkSize, words.Length); + chunks.Add(words[start..end]); + + // Next chunk starts after current chunk minus the overlap window + start = end - overlapWords; + + // Guard: if remaining words are less than the overlap, we're done + if (start >= words.Length - overlapWords) break; + } + + return chunks; + } + + /// + /// Returns a human-readable warning when transcript quality may affect summary accuracy. + /// Returns null for high-confidence sources (no warning needed). + /// + private static string? BuildQualityWarning(TranscriptSource source) => + source switch + { + TranscriptSource.AutoGenerated => + "⚠ This summary is based on YouTube's auto-generated captions (ASR). " + + "The transcript may contain errors, especially for technical terms, names, or accented speech.", + + TranscriptSource.MetadataOnly => + "⚠ No captions were available. This summary is based on the video's title " + + "and description only — it may be incomplete or inaccurate.", + + TranscriptSource.CommunityContributed => + "ℹ This summary is based on community-contributed captions. " + + "Quality is generally good but not guaranteed.", + + _ => null // OwnerPublished — no warning needed + }; +} diff --git a/TranscriptFileService.cs b/TranscriptFileService.cs new file mode 100644 index 0000000..f9e5ca0 --- /dev/null +++ b/TranscriptFileService.cs @@ -0,0 +1,212 @@ +using System.Text; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Saves video metadata and timestamped transcript to a plain text file. +/// The file is formatted with metadata at the top followed by the transcript +/// organized by timestamps. +/// +public static class TranscriptFileService +{ + /// + /// Saves the transcript and metadata to a text file in the specified directory. + /// Returns the full path to the saved file. + /// + public static async Task SaveAsync( + VideoMetadata metadata, + VideoTranscript transcript, + string? summaryText = null, + string? outputDirectory = null, + CancellationToken ct = default) + { + outputDirectory ??= Environment.CurrentDirectory; + Directory.CreateDirectory(outputDirectory); + + // Build a safe filename from the video title + var safeTitle = SanitizeFileName(metadata.Title); + var fileName = $"{safeTitle}_{metadata.VideoId}.txt"; + var filePath = Path.Combine(outputDirectory, fileName); + + var sb = new StringBuilder(); + + // ── Metadata section ───────────────────────────────────────────────── + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" VIDEO METADATA"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(); + sb.AppendLine($" Title: {metadata.Title}"); + sb.AppendLine($" Channel: {metadata.ChannelTitle}"); + sb.AppendLine($" Published: {metadata.PublishedAt:MMMM d, yyyy}"); + sb.AppendLine($" Duration: {metadata.FormattedDuration}"); + sb.AppendLine($" Video ID: {metadata.VideoId}"); + sb.AppendLine($" URL: https://youtu.be/{metadata.VideoId}"); + + if (!string.IsNullOrWhiteSpace(summaryText)) + { + sb.AppendLine(); + sb.AppendLine(" ── SUMMARY ──────────────────────────────────────────────"); + sb.AppendLine(); + foreach (var wrappedLine in WordWrap(summaryText, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + } + + sb.AppendLine(); + + // ── Transcript source ──────────────────────────────────────────────── + var sourceLabel = transcript.Source switch + { + TranscriptSource.OwnerPublished => "Owner-published captions", + TranscriptSource.CommunityContributed => "Community-contributed captions", + TranscriptSource.AutoGenerated => "Auto-generated (ASR)", + TranscriptSource.MetadataOnly => "Metadata only (no captions)", + _ => "Unknown" + }; + sb.AppendLine($" Transcript Source: {sourceLabel}"); + sb.AppendLine($" Word Count: {transcript.WordCount:N0}"); + sb.AppendLine($" Saved: {DateTimeOffset.UtcNow:yyyy-MM-dd HH:mm} UTC"); + sb.AppendLine(); + + // ── Transcript section ─────────────────────────────────────────────── + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" TRANSCRIPT"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(); + + if (transcript.Segments.Count > 0) + { + // Group segments into blocks by time intervals for readability + // Each block groups consecutive segments within ~30 seconds + var blocks = GroupSegmentsByInterval(transcript.Segments, intervalSeconds: 30); + + foreach (var block in blocks) + { + var firstTimestamp = block[0].FormattedTimestamp; + sb.AppendLine($" [{firstTimestamp}]"); + + // Combine the text for segments in this time block + var blockText = string.Join(" ", block.Select(s => s.Text)); + foreach (var wrappedLine in WordWrap(blockText, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + sb.AppendLine(); + } + } + else + { + // No timestamps available — write plain text + sb.AppendLine(" (No timestamp data available)"); + sb.AppendLine(); + foreach (var wrappedLine in WordWrap(transcript.Text, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + sb.AppendLine(); + } + + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" END OF TRANSCRIPT"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + + await File.WriteAllTextAsync(filePath, sb.ToString(), ct); + return filePath; + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Groups timestamped segments into blocks based on a time interval. + /// This produces readable chunks (e.g. every 30 seconds) instead of + /// one line per subtitle cue. + /// + private static List> GroupSegmentsByInterval( + IReadOnlyList segments, + int intervalSeconds) + { + var blocks = new List>(); + if (segments.Count == 0) return blocks; + + var currentBlock = new List { segments[0] }; + var blockStartTime = segments[0].Start; + + for (int i = 1; i < segments.Count; i++) + { + if ((segments[i].Start - blockStartTime).TotalSeconds >= intervalSeconds) + { + blocks.Add(currentBlock); + currentBlock = new List(); + blockStartTime = segments[i].Start; + } + currentBlock.Add(segments[i]); + } + + if (currentBlock.Count > 0) + blocks.Add(currentBlock); + + return blocks; + } + + /// + /// Removes characters that are invalid in file names. + /// Truncates to a reasonable length to avoid path-length issues. + /// + private static string SanitizeFileName(string title) + { + var invalid = Path.GetInvalidFileNameChars(); + var sb = new StringBuilder(title.Length); + + foreach (var ch in title) + { + if (Array.IndexOf(invalid, ch) < 0) + sb.Append(ch); + else + sb.Append('_'); + } + + // Replace runs of spaces/underscores with a single underscore + var result = System.Text.RegularExpressions.Regex.Replace( + sb.ToString().Trim(), @"[\s_]+", "_"); + + // Truncate to keep file paths manageable + return result.Length > 80 ? result[..80] : result; + } + + /// + /// Word-wraps text at the specified width, breaking at word boundaries. + /// + private static IEnumerable WordWrap(string text, int maxWidth) + { + foreach (var paragraph in text.Split('\n')) + { + if (string.IsNullOrWhiteSpace(paragraph)) + { + yield return string.Empty; + continue; + } + + var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var current = new StringBuilder(); + + foreach (var word in words) + { + if (current.Length + word.Length + 1 > maxWidth) + { + yield return current.ToString(); + current.Clear(); + } + + if (current.Length > 0) current.Append(' '); + current.Append(word); + } + + if (current.Length > 0) + yield return current.ToString(); + } + } +} diff --git a/VideoModels.cs b/VideoModels.cs new file mode 100644 index 0000000..e510328 --- /dev/null +++ b/VideoModels.cs @@ -0,0 +1,161 @@ +namespace YoutubeSummarizer.Models; + +/// +/// Metadata returned from the YouTube Data API for a single video. +/// This is a slim projection — the API returns far more fields, but we +/// only bind what we actually need for the summarization workflow. +/// +public sealed class VideoMetadata +{ + /// The 11-character YouTube video ID parsed from the URL. + public required string VideoId { get; init; } + + /// Full video title as shown on YouTube. + public required string Title { get; init; } + + /// Channel that published the video. + public required string ChannelTitle { get; init; } + + /// UTC publish date of the video. + public DateTimeOffset PublishedAt { get; init; } + + /// + /// Video duration in ISO 8601 format (e.g. "PT1H4M32S"). + /// We store it raw and parse it for display purposes. + /// + public string? Duration { get; init; } + + /// First 5000 characters of the video description (API cap). + public string? Description { get; init; } + + /// Human-readable duration parsed from . + public string FormattedDuration => + Duration is null ? "Unknown" + : System.Xml.XmlConvert.ToTimeSpan(Duration).ToString(@"hh\:mm\:ss").TrimStart('0', ':'); +} + +/// +/// Represents a single caption track available for a video. +/// YouTube can provide multiple tracks (languages, auto-generated vs. manual). +/// +public sealed class CaptionTrack +{ + public required string TrackId { get; init; } + public required string Language { get; init; } // BCP-47, e.g. "en" + public required string TrackKind { get; init; } // "standard", "asr" (auto), "forced" + public required string Name { get; init; } // Display name from YouTube + + /// + /// True when the track was automatically generated by YouTube's ASR system. + /// ASR captions are less reliable — typos, missing punctuation, run-on sentences. + /// + public bool IsAutoGenerated => TrackKind.Equals("asr", StringComparison.OrdinalIgnoreCase); +} + +/// +/// The full textual transcript assembled from caption data, +/// along with provenance information about how it was obtained. +/// +public sealed class VideoTranscript +{ + public required string VideoId { get; init; } + + /// The concatenated, cleaned transcript text. + public required string Text { get; init; } + + /// The caption track this text came from, if available. + public CaptionTrack? SourceTrack { get; init; } + + /// + /// How the transcript was obtained. This is important context for + /// interpreting the quality of the summary. + /// + public TranscriptSource Source { get; init; } + + /// + /// Individual timestamped segments from the caption track. + /// Empty when timestamps are not available (e.g. metadata-only transcripts). + /// + public IReadOnlyList Segments { get; init; } = Array.Empty(); + + /// Approximate word count of the raw transcript. + public int WordCount => Text.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length; +} + +/// +/// A single timestamped segment from a caption track. +/// Used when saving the transcript to a file with timestamp formatting. +/// +public sealed class TimestampedSegment +{ + /// Start time offset from the beginning of the video. + public TimeSpan Start { get; init; } + + /// Duration of this caption segment. + public TimeSpan Duration { get; init; } + + /// The caption text for this segment. + public required string Text { get; init; } + + /// Formats the start time as [HH:MM:SS] or [MM:SS] for display. + public string FormattedTimestamp => + Start.TotalHours >= 1 + ? Start.ToString(@"hh\:mm\:ss") + : Start.ToString(@"mm\:ss"); +} + +/// +/// Describes how a transcript was obtained, ordered from most to least reliable. +/// This maps directly to the caption quality transparency layer discussed in LIKA. +/// +public enum TranscriptSource +{ + /// Human-reviewed caption track provided by the video owner. + OwnerPublished, + + /// Community-contributed captions (YouTube retired this but tracks may exist). + CommunityContributed, + + /// YouTube's automatic speech recognition — less reliable. + AutoGenerated, + + /// No captions available; summary based on metadata/description only. + MetadataOnly +} + +/// +/// Controls which summarization prompt style is used. +/// +public enum SummaryMode +{ + /// Default detailed summary with bullet points and takeaways. + Standard, + + /// + /// Personal Information Filter — brief 1–2 sentence summary, relevance + /// evaluation against personal priorities (time, finances, health, family, + /// service to others), and a single-word verdict: ACT, MONITOR, or IGNORE. + /// + PersonalFilter +} + +/// +/// The final deliverable: a structured summary of a YouTube video. +/// +public sealed class VideoSummary +{ + public required VideoMetadata Metadata { get; init; } + public required string SummaryText { get; init; } + public required TranscriptSource TranscriptSource { get; init; } + + /// + /// Warning shown when the summary is based on low-quality or missing transcript data. + /// Null when the source is reliable. + /// + public string? QualityWarning { get; init; } + + /// Model used to generate this summary. + public required string ModelUsed { get; init; } + + public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow; +} diff --git a/YouTubeService.cs b/YouTubeService.cs new file mode 100644 index 0000000..2a3c389 --- /dev/null +++ b/YouTubeService.cs @@ -0,0 +1,518 @@ +using System.Diagnostics; +using System.Text.Json; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Uses yt-dlp (https://github.com/yt-dlp/yt-dlp) to retrieve video metadata +/// and download caption tracks. No YouTube API key required. +/// +/// yt-dlp is the de-facto standard tool for reliably extracting video +/// information and subtitles from YouTube. It must be installed and +/// available on PATH (e.g. pip install yt-dlp). +/// +public sealed class YouTubeService +{ + private readonly HttpClient _httpClient; + + public YouTubeService(HttpClient httpClient) + { + _httpClient = httpClient; + } + + // ───────────────────────────────────────────────────────────────────────── + // Public API + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Parses a YouTube video ID from any common URL format. + /// Handles: watch?v=, youtu.be/, /embed/, /shorts/ + /// + public static string? ExtractVideoId(string url) + { + // Normalize — strip whitespace the user may have pasted + url = url.Trim(); + + // youtu.be short links: https://youtu.be/VIDEO_ID + if (Uri.TryCreate(url, UriKind.Absolute, out var uri)) + { + if (uri.Host.Contains("youtu.be")) + return uri.AbsolutePath.TrimStart('/').Split('?')[0]; + + // Standard and embed URLs: ?v=VIDEO_ID, /embed/VIDEO_ID, /shorts/VIDEO_ID + var query = System.Web.HttpUtility.ParseQueryString(uri.Query); + if (query["v"] is { } vParam && vParam.Length == 11) + return vParam; + + var segments = uri.AbsolutePath.Split('/', StringSplitOptions.RemoveEmptyEntries); + for (int i = 0; i < segments.Length - 1; i++) + { + if (segments[i] is "embed" or "shorts" or "v") + return segments[i + 1].Split('?')[0]; + } + } + + // Raw ID passed directly (11 alphanumeric chars + dash/underscore) + if (System.Text.RegularExpressions.Regex.IsMatch(url, @"^[\w-]{11}$")) + return url; + + return null; + } + + /// + /// Fetches metadata for a video using yt-dlp --dump-json. + /// No API key required — yt-dlp scrapes the public video page. + /// + public async Task GetVideoMetadataAsync(string videoId, CancellationToken ct = default) + { + var psi = new ProcessStartInfo + { + FileName = "yt-dlp", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + psi.ArgumentList.Add("--dump-json"); + psi.ArgumentList.Add("--no-download"); + psi.ArgumentList.Add($"https://www.youtube.com/watch?v={videoId}"); + + using var process = new Process { StartInfo = psi }; + process.Start(); + + var stdoutTask = process.StandardOutput.ReadToEndAsync(ct); + var stderrTask = process.StandardError.ReadToEndAsync(ct); + + await process.WaitForExitAsync(ct); + + if (process.ExitCode != 0) + return null; + + var json = await stdoutTask; + if (string.IsNullOrWhiteSpace(json)) + return null; + + try + { + using var doc = JsonDocument.Parse(json); + var root = doc.RootElement; + + var title = root.TryGetProperty("title", out var t) ? t.GetString() ?? "(no title)" : "(no title)"; + var channel = root.TryGetProperty("channel", out var c) ? c.GetString() ?? "(unknown channel)" : "(unknown channel)"; + var description = root.TryGetProperty("description", out var d) ? d.GetString() : null; + + // yt-dlp returns duration in seconds + TimeSpan? duration = null; + if (root.TryGetProperty("duration", out var dur) && dur.ValueKind == JsonValueKind.Number) + duration = TimeSpan.FromSeconds(dur.GetDouble()); + + // Upload date comes as "YYYYMMDD" + DateTimeOffset publishedAt = DateTimeOffset.MinValue; + if (root.TryGetProperty("upload_date", out var ud) && ud.GetString() is { } dateStr + && DateTime.TryParseExact(dateStr, "yyyyMMdd", null, + System.Globalization.DateTimeStyles.None, out var parsed)) + { + publishedAt = new DateTimeOffset(parsed, TimeSpan.Zero); + } + + // Build ISO 8601 duration string for FormattedDuration compatibility + string? isoDuration = null; + if (duration.HasValue) + { + var ts = duration.Value; + isoDuration = $"PT{(int)ts.TotalHours}H{ts.Minutes}M{ts.Seconds}S"; + } + + return new VideoMetadata + { + VideoId = videoId, + Title = title, + ChannelTitle = channel, + PublishedAt = publishedAt, + Duration = isoDuration, + Description = description + }; + } + catch + { + return null; + } + } + + /// + /// Retrieves the best available transcript for the video using yt-dlp. + /// + /// yt-dlp is invoked to download subtitle files (preferring manual English + /// captions, falling back to auto-generated). The downloaded XML is parsed + /// into clean plain text for summarization. + /// + /// If yt-dlp fails or no captions exist, returns a metadata-only transcript + /// from the video description. + /// + public async Task GetTranscriptAsync( + VideoMetadata metadata, + CancellationToken ct = default) + { + // Try manual (human-written) subtitles first, then auto-generated + var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(metadata.VideoId, ct); + + if (!string.IsNullOrWhiteSpace(text)) + { + return new VideoTranscript + { + VideoId = metadata.VideoId, + Text = text, + Segments = segments, + SourceTrack = new CaptionTrack + { + TrackId = "yt-dlp", + Language = "en", + TrackKind = isAuto ? "asr" : "standard", + Name = isAuto ? "Auto-generated (en)" : "English" + }, + Source = isAuto + ? TranscriptSource.AutoGenerated + : TranscriptSource.OwnerPublished + }; + } + + // No captions at all — fall back to the description text + return BuildMetadataOnlyTranscript(metadata); + } + + // ───────────────────────────────────────────────────────────────────────── + // Private helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Shells out to yt-dlp to download subtitles for the given video. + /// First attempts manual subs, then auto-generated if none found. + /// Returns the cleaned transcript text and whether it was auto-generated. + /// + private static async Task<(string? Text, bool IsAuto)> DownloadSubtitlesWithYtDlpAsync( + string videoId, + CancellationToken ct) + { + var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(videoId, ct); + return (text, isAuto); + } + + /// + /// Downloads subtitles and returns both the plain text and timestamped segments. + /// + private static async Task<(string? Text, List Segments, bool IsAuto)> DownloadSubtitlesWithTimestampsAsync( + string videoId, + CancellationToken ct) + { + var tempDir = Path.Combine(Path.GetTempPath(), $"ytsumm_{videoId}_{Guid.NewGuid():N}"); + Directory.CreateDirectory(tempDir); + + try + { + // Attempt 1: manual (human-written) subtitles only + var (manualText, manualSegments) = await RunYtDlpSubtitleWithTimestampsAsync( + videoId, tempDir, writeSub: true, writeAutoSub: false, ct); + + if (!string.IsNullOrWhiteSpace(manualText)) + return (manualText, manualSegments, false); + + // Attempt 2: auto-generated subtitles + var (autoText, autoSegments) = await RunYtDlpSubtitleWithTimestampsAsync( + videoId, tempDir, writeSub: false, writeAutoSub: true, ct); + + if (!string.IsNullOrWhiteSpace(autoText)) + return (autoText, autoSegments, true); + + return (null, new List(), false); + } + finally + { + try { Directory.Delete(tempDir, recursive: true); } catch { /* best effort */ } + } + } + + /// + /// Runs a single yt-dlp invocation to download subtitles with timestamps. + /// Returns the parsed plain-text transcript and timestamped segments. + /// + private static async Task<(string? Text, List Segments)> RunYtDlpSubtitleWithTimestampsAsync( + string videoId, + string tempDir, + bool writeSub, + bool writeAutoSub, + CancellationToken ct) + { + // Clean any previous subtitle files from this temp dir + foreach (var f in Directory.GetFiles(tempDir, "*.srv1")) + File.Delete(f); + foreach (var f in Directory.GetFiles(tempDir, "*.vtt")) + File.Delete(f); + + var args = new List + { + "--skip-download", + "--sub-lang", "en,en-US,en-GB,en.*", + "--sub-format", "srv1/vtt/best", + "-o", Path.Combine(tempDir, "%(id)s"), + }; + + if (writeSub) + args.Add("--write-sub"); + if (writeAutoSub) + args.Add("--write-auto-sub"); + + args.Add($"https://www.youtube.com/watch?v={videoId}"); + + var psi = new ProcessStartInfo + { + FileName = "yt-dlp", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + foreach (var arg in args) + psi.ArgumentList.Add(arg); + + using var process = new Process { StartInfo = psi }; + process.Start(); + + var stdoutTask = process.StandardOutput.ReadToEndAsync(ct); + var stderrTask = process.StandardError.ReadToEndAsync(ct); + + await process.WaitForExitAsync(ct); + + if (process.ExitCode != 0) + return (null, new List()); + + var subFiles = Directory.GetFiles(tempDir) + .Where(f => f.EndsWith(".srv1") || f.EndsWith(".vtt") || f.EndsWith(".srt")) + .OrderBy(f => f.EndsWith(".srv1") ? 0 : 1) + .ToList(); + + if (subFiles.Count == 0) + return (null, new List()); + + var content = await File.ReadAllTextAsync(subFiles[0], ct); + + if (string.IsNullOrWhiteSpace(content)) + return (null, new List()); + + return subFiles[0].EndsWith(".srv1") + ? ParseTimedTextXmlWithTimestamps(content) + : ParseVttOrSrtWithTimestamps(content); + } + + /// + /// Parses YouTube's srv1 timed-text XML into plain text and timestamped segments. + /// + private static (string Text, List Segments) ParseTimedTextXmlWithTimestamps(string xml) + { + try + { + var doc = System.Xml.Linq.XDocument.Parse(xml); + var segments = new List(); + var textParts = new List(); + + foreach (var el in doc.Descendants("text")) + { + var decoded = System.Web.HttpUtility.HtmlDecode(el.Value); + var cleaned = System.Text.RegularExpressions.Regex.Replace(decoded, @"\s+", " ").Trim(); + + if (string.IsNullOrEmpty(cleaned)) continue; + + textParts.Add(cleaned); + + var startAttr = el.Attribute("start")?.Value; + var durAttr = el.Attribute("dur")?.Value; + + var start = double.TryParse(startAttr, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s) + ? TimeSpan.FromSeconds(s) : TimeSpan.Zero; + + var dur = double.TryParse(durAttr, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var d) + ? TimeSpan.FromSeconds(d) : TimeSpan.Zero; + + segments.Add(new TimestampedSegment + { + Start = start, + Duration = dur, + Text = cleaned + }); + } + + return (string.Join(" ", textParts), segments); + } + catch + { + return (xml, new List()); + } + } + + /// + /// Parses YouTube's srv1 timed-text XML into clean plain text. + /// + /// The XML structure looks like: + /// <transcript> + /// <text start="0.5" dur="2.1">Hello world</text> + /// ... + /// </transcript> + /// + private static string ParseTimedTextXml(string xml) + { + var (text, _) = ParseTimedTextXmlWithTimestamps(xml); + return text; + } + + /// + /// Parses VTT or SRT subtitle formats into plain text and timestamped segments. + /// Strips cue identifiers and formatting tags while preserving timestamp associations. + /// + private static (string Text, List Segments) ParseVttOrSrtWithTimestamps(string content) + { + var segments = new List(); + var allLines = content.Split('\n').Select(l => l.Trim()).ToArray(); + + TimeSpan currentStart = TimeSpan.Zero; + TimeSpan currentEnd = TimeSpan.Zero; + var currentText = new List(); + + foreach (var line in allLines) + { + // Skip headers and metadata + if (string.IsNullOrEmpty(line) || + line.StartsWith("WEBVTT") || + line.StartsWith("NOTE") || + line.StartsWith("Kind:") || + line.StartsWith("Language:") || + System.Text.RegularExpressions.Regex.IsMatch(line, @"^\d+$")) + { + // Flush current segment on blank line + if (string.IsNullOrEmpty(line) && currentText.Count > 0) + { + var text = string.Join(" ", currentText); + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = text + }); + currentText.Clear(); + } + continue; + } + + // Timestamp line: "00:01:23.456 --> 00:01:27.890" + var tsMatch = System.Text.RegularExpressions.Regex.Match(line, + @"^(\d{2}:\d{2}[:\.][\d\.]+)\s*-->\s*(\d{2}:\d{2}[:\.][\d\.]+)"); + if (tsMatch.Success) + { + // Flush previous segment if any text buffered + if (currentText.Count > 0) + { + var text = string.Join(" ", currentText); + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = text + }); + currentText.Clear(); + } + + currentStart = ParseVttTimestamp(tsMatch.Groups[1].Value); + currentEnd = ParseVttTimestamp(tsMatch.Groups[2].Value); + continue; + } + + // Content line — strip HTML tags and decode + var stripped = System.Text.RegularExpressions.Regex.Replace(line, @"<[^>]+>", ""); + var decoded = System.Web.HttpUtility.HtmlDecode(stripped).Trim(); + if (!string.IsNullOrEmpty(decoded)) + currentText.Add(decoded); + } + + // Flush last segment + if (currentText.Count > 0) + { + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = string.Join(" ", currentText) + }); + } + + // Deduplicate consecutive identical text segments (common in VTT) + var deduped = new List(); + string? prevText = null; + foreach (var seg in segments) + { + if (seg.Text != prevText) + deduped.Add(seg); + prevText = seg.Text; + } + + var plainText = string.Join(" ", deduped.Select(s => s.Text)); + return (plainText, deduped); + } + + /// Parses a VTT/SRT timestamp string into a TimeSpan. + private static TimeSpan ParseVttTimestamp(string ts) + { + // Normalize: VTT uses "." for ms, SRT uses "," — handle both + ts = ts.Replace(',', '.'); + + // Handle both HH:MM:SS.mmm and MM:SS.mmm + var parts = ts.Split(':'); + if (parts.Length == 3) + { + int.TryParse(parts[0], out var h); + int.TryParse(parts[1], out var m); + double.TryParse(parts[2], System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s); + return new TimeSpan(0, h, m, (int)s, (int)((s - (int)s) * 1000)); + } + else if (parts.Length == 2) + { + int.TryParse(parts[0], out var m); + double.TryParse(parts[1], System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s); + return new TimeSpan(0, 0, m, (int)s, (int)((s - (int)s) * 1000)); + } + + return TimeSpan.Zero; + } + + /// + /// Parses VTT or SRT subtitle formats into clean plain text. + /// Strips timestamps, cue identifiers, and formatting tags. + /// + private static string ParseVttOrSrt(string content) + { + var (text, _) = ParseVttOrSrtWithTimestamps(content); + return text; + } + + /// + /// When no captions exist, builds a minimal "transcript" from the video description. + /// The summary will be based on much less information and will be flagged accordingly. + /// + private static VideoTranscript BuildMetadataOnlyTranscript(VideoMetadata metadata) + { + var text = string.IsNullOrWhiteSpace(metadata.Description) + ? $"No transcript or description available for: {metadata.Title}" + : $"Video title: {metadata.Title}\n\nChannel: {metadata.ChannelTitle}\n\nDescription:\n{metadata.Description}"; + + return new VideoTranscript + { + VideoId = metadata.VideoId, + Text = text, + SourceTrack = null, + Source = TranscriptSource.MetadataOnly + }; + } +} diff --git a/YoutubeSummarizer.csproj b/YoutubeSummarizer.csproj new file mode 100644 index 0000000..1a34344 --- /dev/null +++ b/YoutubeSummarizer.csproj @@ -0,0 +1,36 @@ + + + + Exe + net10.0 + enable + enable + YoutubeSummarizer + YoutubeSummarizer + + + + + PreserveNewest + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/appsettings.json b/appsettings.json new file mode 100644 index 0000000..51a1e65 --- /dev/null +++ b/appsettings.json @@ -0,0 +1,14 @@ +{ + + "LLM": { + "BaseUrl": "http://localhost:11434/v1", + "ApiKey": "ollama", + "Model": "qwen3:14b", + "MaxTokens": 1500, + "TimeoutSeconds": 600 + }, + "Summarizer": { + "ChunkWordLimit": 1500, + "ShowTranscript": false + } +} diff --git a/summarize.sln b/summarize.sln new file mode 100644 index 0000000..a5a1a51 --- /dev/null +++ b/summarize.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.2.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YoutubeSummarizer", "YoutubeSummarizer.csproj", "{2364E226-41E1-8549-7D9A-3C959F71FD8A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {58A79D7B-0ADD-4677-A65B-B4E6E38D9AFE} + EndGlobalSection +EndGlobal