commit 8f9291883d6a2c86e23aedd16baece91523b8aaa Author: null3FF3KT Date: Mon May 18 11:00:15 2026 -0500 feat: initialize YouTube summarizer project with OpenAI integration and map-reduce processing strategy diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8b7f69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Visual Studio / .NET build outputs +[Bb]in/ +[Oo]bj/ +[Pp]ublish/ +*.user +*.userosscache +*.sln.docstates +*.suo +*.cache + +# IDEs / Tools +.idea/ +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.swp +*.~* + +# OS metadata +.DS_Store +Thumbs.db + +# Project specific / Temporary files +scratch/ +*.log diff --git a/AppSettings.cs b/AppSettings.cs new file mode 100644 index 0000000..2a2e76f --- /dev/null +++ b/AppSettings.cs @@ -0,0 +1,56 @@ +namespace YoutubeSummarizer.Configuration; + +/// +/// Root configuration object bound from appsettings.json. +/// Only OpenAI and Summarizer sections are required. +/// +public sealed class AppSettings +{ + public LlmSettings LLM { get; init; } = new(); + public SummarizerSettings Summarizer { get; init; } = new(); +} + +/// +/// Settings for the LLM API (OpenAI or Ollama). +/// +public sealed class LlmSettings +{ + /// + /// Base URL for the API. + /// For OpenAI: https://api.openai.com/v1 + /// For Ollama: http://localhost:11434/v1 + /// + public string BaseUrl { get; init; } = "https://api.openai.com/v1"; + + /// Your API key. (For Ollama, any value works). + public string ApiKey { get; init; } = string.Empty; + + /// + /// Model to use. + /// OpenAI: gpt-4o-mini, gpt-4o + /// Ollama: qwen3:14b, llama3.1 + /// + public string Model { get; init; } = "gpt-4o-mini"; + + /// Max tokens for the summary response (not the input). + public int MaxTokens { get; init; } = 1500; + + /// Timeout in seconds for API calls. + public int TimeoutSeconds { get; init; } = 100; +} + +/// +/// Controls summarization behavior. +/// +public sealed class SummarizerSettings +{ + /// + /// Approximate word count at which we split a long transcript into chunks + /// before doing a final "summary of summaries" pass. This keeps individual + /// API calls within model context limits. + /// + public int ChunkWordLimit { get; init; } = 3000; + + /// When true, prints the full transcript text before summarizing. + public bool ShowTranscript { get; init; } = false; +} diff --git a/ConsoleRenderer.cs b/ConsoleRenderer.cs new file mode 100644 index 0000000..42fa45d --- /dev/null +++ b/ConsoleRenderer.cs @@ -0,0 +1,197 @@ +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Handles all console output formatting. +/// Keeping display logic separate from business logic makes it easy to +/// later add output modes (JSON, Markdown file, HTML report) without +/// touching the service layer. +/// +public static class ConsoleRenderer +{ + // ANSI color codes. These render correctly in most Linux terminals. + // If you pipe output to a file, the escape codes will appear as-is — + // run with --no-color if that's a concern (not implemented here, left + // as an exercise). + private const string Reset = "\x1b[0m"; + private const string Bold = "\x1b[1m"; + private const string Cyan = "\x1b[36m"; + private const string Yellow = "\x1b[33m"; + private const string Green = "\x1b[32m"; + private const string Red = "\x1b[31m"; + private const string Dim = "\x1b[2m"; + + /// Prints the application banner on startup. + public static void PrintBanner() + { + Console.WriteLine(); + Console.WriteLine($"{Bold}{Cyan}╔════════════════════════════════════════╗{Reset}"); + Console.WriteLine($"{Bold}{Cyan}║ YouTube Video Summarizer ║{Reset}"); + Console.WriteLine($"{Bold}{Cyan}╚════════════════════════════════════════╝{Reset}"); + Console.WriteLine(); + } + + /// Prompts the user for a URL and reads input. + public static string PromptForUrl() + { + Console.Write($"{Bold}Enter YouTube URL (or 'q' to quit):{Reset} "); + return Console.ReadLine()?.Trim() ?? string.Empty; + } + + /// + /// Asks the user whether they want to save the transcript to a text file. + /// Returns true if the user answers yes. + /// + public static bool PromptSaveTranscript() + { + Console.Write($"{Bold}Save transcript to file? (y/n):{Reset} "); + var answer = Console.ReadLine()?.Trim() ?? string.Empty; + return answer.Equals("y", StringComparison.OrdinalIgnoreCase) + || answer.Equals("yes", StringComparison.OrdinalIgnoreCase); + } + + /// Prints a success message with the saved file path. + public static void PrintFileSaved(string filePath) + { + Console.WriteLine($" {Green}✓ Transcript saved to:{Reset} {filePath}"); + Console.WriteLine(); + } + + /// + /// Prompts the user to choose a summary mode. + /// Returns the selected . + /// + public static SummaryMode PromptSummaryMode() + { + Console.WriteLine($" {Dim}Summary modes:{Reset}"); + Console.WriteLine($" {Bold}1{Reset} – Standard (detailed bullet-point summary)"); + Console.WriteLine($" {Bold}2{Reset} – Personal Filter (relevance verdict: ACT / MONITOR / IGNORE)"); + Console.Write($"{Bold}Choose summary mode [1]:{Reset} "); + var choice = Console.ReadLine()?.Trim() ?? string.Empty; + return choice == "2" ? SummaryMode.PersonalFilter : SummaryMode.Standard; + } + + /// Displays a spinner-style "working" indicator while async work runs. + public static void PrintWorking(string message) + { + Console.WriteLine($" {Dim}→ {message}...{Reset}"); + } + + /// + /// Renders the full summary result to the console in a structured, + /// readable format. Includes metadata header, quality warning, and + /// the summary body. + /// + public static void PrintSummary(VideoSummary summary, bool showTranscriptSource) + { + Console.WriteLine(); + PrintDivider(); + + // ── Metadata header ────────────────────────────────────────────────── + Console.WriteLine($"{Bold}{Green} {summary.Metadata.Title}{Reset}"); + Console.WriteLine($" {Dim}Channel:{Reset} {summary.Metadata.ChannelTitle}"); + Console.WriteLine($" {Dim}Published:{Reset} {summary.Metadata.PublishedAt:MMMM d, yyyy}"); + Console.WriteLine($" {Dim}Duration:{Reset} {summary.Metadata.FormattedDuration}"); + Console.WriteLine($" {Dim}URL:{Reset} https://youtu.be/{summary.Metadata.VideoId}"); + + // ── Transcript source badge ────────────────────────────────────────── + if (showTranscriptSource) + { + var (badge, color) = summary.TranscriptSource switch + { + TranscriptSource.OwnerPublished => ("✓ Owner-published captions", Green), + TranscriptSource.CommunityContributed=> ("✓ Community captions", Green), + TranscriptSource.AutoGenerated => ("~ Auto-generated (ASR)", Yellow), + TranscriptSource.MetadataOnly => ("✗ Metadata only", Red), + _ => ("? Unknown", Dim) + }; + Console.WriteLine($" {Dim}Transcript:{Reset} {color}{badge}{Reset}"); + } + + Console.WriteLine($" {Dim}Model:{Reset} {summary.ModelUsed}"); + Console.WriteLine($" {Dim}Generated:{Reset} {summary.GeneratedAt:yyyy-MM-dd HH:mm} UTC"); + + PrintDivider(); + + // ── Quality warning ────────────────────────────────────────────────── + if (summary.QualityWarning is not null) + { + Console.WriteLine(); + Console.WriteLine($" {Yellow}{summary.QualityWarning}{Reset}"); + } + + // ── Summary body ───────────────────────────────────────────────────── + Console.WriteLine(); + Console.WriteLine($"{Bold} SUMMARY{Reset}"); + Console.WriteLine(); + + // Word-wrap the summary body at 80 characters so it's readable in + // standard terminal widths without horizontal scrolling. + foreach (var line in WordWrap(summary.SummaryText, maxWidth: 78)) + { + Console.WriteLine($" {line}"); + } + + Console.WriteLine(); + PrintDivider(); + Console.WriteLine(); + } + + /// Prints a styled error message. + public static void PrintError(string message) + { + Console.WriteLine(); + Console.WriteLine($" {Red}✗ Error: {message}{Reset}"); + Console.WriteLine(); + } + + /// Prints a styled warning (non-fatal). + public static void PrintWarning(string message) + { + Console.WriteLine($" {Yellow}⚠ {message}{Reset}"); + } + + // ───────────────────────────────────────────────────────────────────────── + // Private helpers + // ───────────────────────────────────────────────────────────────────────── + + private static void PrintDivider() + { + Console.WriteLine($" {Dim}{"─".PadRight(74, '─')}{Reset}"); + } + + /// + /// Splits text into lines no wider than characters, + /// breaking only at word boundaries. Respects existing newlines in the input. + /// + private static IEnumerable WordWrap(string text, int maxWidth) + { + foreach (var paragraph in text.Split('\n')) + { + if (string.IsNullOrWhiteSpace(paragraph)) + { + yield return string.Empty; + continue; + } + + var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var current = new System.Text.StringBuilder(); + + foreach (var word in words) + { + if (current.Length + word.Length + 1 > maxWidth) + { + yield return current.ToString(); + current.Clear(); + } + + if (current.Length > 0) current.Append(' '); + current.Append(word); + } + + if (current.Length > 0) + yield return current.ToString(); + } + } +} diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..84782df --- /dev/null +++ b/Program.cs @@ -0,0 +1,227 @@ +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using YoutubeSummarizer.Configuration; +using YoutubeSummarizer.Models; +using YoutubeSummarizer.Services; + +// ═════════════════════════════════════════════════════════════════════════════ +// Bootstrap +// ═════════════════════════════════════════════════════════════════════════════ + +// Build configuration from appsettings.json (required) with optional +// environment variable overrides (useful for CI or containerized deployment). +// Environment variables follow the pattern: YouTube__ApiKey, LLM__ApiKey, etc. +var config = new ConfigurationBuilder() + .SetBasePath(AppContext.BaseDirectory) + .AddJsonFile("appsettings.json", optional: false, reloadOnChange: false) + .AddEnvironmentVariables() // overrides appsettings values if set + .Build(); + +// Bind configuration sections to strongly-typed objects. +var appSettings = new AppSettings(); +config.Bind(appSettings); + +// Validate required keys up front — fail fast with a clear message rather +// than letting the first API call blow up with a cryptic 401. +ValidateSettings(appSettings); + +// Wire up DI container. +// For a console app this is lightweight, but it mirrors the pattern used +// in the LIKA/IKA ASP.NET services so the code is easy to lift into a +// background service or API controller later. +var services = new ServiceCollection(); + +// Register HttpClient for the YouTube timedtext endpoint. +// Using IHttpClientFactory gives us connection pooling and the ability to +// attach Polly retry policies. +services.AddHttpClient(client => +{ + client.DefaultRequestHeaders.Add("User-Agent", + "Mozilla/5.0 (compatible; YoutubeSummarizer/1.0)"); + client.Timeout = TimeSpan.FromSeconds(30); +}); + +// Register services with their config dependencies. +services.AddSingleton(appSettings.LLM); +services.AddSingleton(appSettings.Summarizer); +services.AddTransient(); + +var serviceProvider = services.BuildServiceProvider(); + +// ═════════════════════════════════════════════════════════════════════════════ +// Main loop +// ═════════════════════════════════════════════════════════════════════════════ + +ConsoleRenderer.PrintBanner(); + +// Handle Ctrl+C gracefully so any in-progress API call can finish or cancel. +using var cts = new CancellationTokenSource(); +Console.CancelKeyPress += (_, e) => +{ + e.Cancel = true; // prevent immediate termination + cts.Cancel(); + Console.WriteLine("\n Cancellation requested. Finishing current operation..."); +}; + +while (!cts.Token.IsCancellationRequested) +{ + var input = ConsoleRenderer.PromptForUrl(); + + if (string.IsNullOrWhiteSpace(input)) continue; + if (input.Equals("q", StringComparison.OrdinalIgnoreCase)) break; + + // Parse the video ID from the URL + var videoId = YouTubeService.ExtractVideoId(input); + if (videoId is null) + { + ConsoleRenderer.PrintError("Could not extract a valid YouTube video ID from that URL."); + ConsoleRenderer.PrintWarning("Accepted formats: watch?v=..., youtu.be/..., /shorts/..., /embed/..."); + continue; + } + + // Ask whether to save transcript to file before processing + var saveTranscript = ConsoleRenderer.PromptSaveTranscript(); + + // Choose summary mode + var summaryMode = ConsoleRenderer.PromptSummaryMode(); + + await ProcessVideoAsync(videoId, serviceProvider, appSettings.Summarizer, saveTranscript, summaryMode, cts.Token); +} + +Console.WriteLine(" Goodbye!"); + +// ═════════════════════════════════════════════════════════════════════════════ +// Video processing pipeline +// ═════════════════════════════════════════════════════════════════════════════ + +/// +/// Orchestrates the full pipeline for a single video: +/// 1. Fetch metadata (YouTube Data API) +/// 2. Fetch transcript (caption track or timedtext fallback) +/// 3. Summarize (LLM Chat Completions) +/// 4. Display (ConsoleRenderer) +/// +static async Task ProcessVideoAsync( + string videoId, + IServiceProvider sp, + SummarizerSettings summarizerSettings, + bool saveTranscript, + SummaryMode summaryMode, + CancellationToken ct) +{ + try + { + // Resolve scoped services + var youtubeService = sp.GetRequiredService(); + var summarizerService = sp.GetRequiredService(); + + // ── Step 1: Metadata ────────────────────────────────────────────── + ConsoleRenderer.PrintWorking("Fetching video metadata"); + var metadata = await youtubeService.GetVideoMetadataAsync(videoId, ct); + + if (metadata is null) + { + ConsoleRenderer.PrintError($"Video not found or is private: {videoId}"); + return; + } + + Console.WriteLine($" {metadata.Title}"); + + // ── Step 2: Transcript ──────────────────────────────────────────── + ConsoleRenderer.PrintWorking("Fetching transcript"); + var transcript = await youtubeService.GetTranscriptAsync(metadata, ct); + + // Optionally show raw transcript for debugging / inspection + if (summarizerSettings.ShowTranscript) + { + Console.WriteLine(); + Console.WriteLine(" ─── RAW TRANSCRIPT ───"); + Console.WriteLine(transcript.Text); + Console.WriteLine(" ─── END TRANSCRIPT ───"); + Console.WriteLine(); + } + + Console.WriteLine( + $" Transcript: {transcript.Source} | {transcript.WordCount:N0} words"); + + // ── Step 2.5: Save transcript to file (if requested) ───────────── + // (moved after summarization so we can include the summary) + + // ── Step 3: Summarize ───────────────────────────────────────────── + // Always run the standard summary (used for file saving). + ConsoleRenderer.PrintWorking("Summarizing with LLM"); + var standardSummary = await summarizerService.SummarizeAsync( + metadata, transcript, SummaryMode.Standard, ct); + + // If the user chose Personal Filter, run a second pass for display. + VideoSummary displaySummary; + if (summaryMode == SummaryMode.PersonalFilter) + { + ConsoleRenderer.PrintWorking("Applying Personal Information Filter"); + displaySummary = await summarizerService.SummarizeAsync( + metadata, transcript, SummaryMode.PersonalFilter, ct); + } + else + { + displaySummary = standardSummary; + } + + // ── Step 3.5: Save transcript + standard summary to file ───────── + if (saveTranscript) + { + var transcriptsDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), + "Downloads", "transcripts"); + ConsoleRenderer.PrintWorking("Saving transcript to file"); + var savedPath = await TranscriptFileService.SaveAsync( + metadata, transcript, summaryText: standardSummary.SummaryText, + outputDirectory: transcriptsDir, ct: ct); + ConsoleRenderer.PrintFileSaved(savedPath); + } + + // ── Step 4: Display ─────────────────────────────────────────────── + ConsoleRenderer.PrintSummary(displaySummary, showTranscriptSource: true); + } + catch (OperationCanceledException) + { + // User pressed Ctrl+C — nothing to report, the loop will exit + } + catch (Exception ex) + { + ConsoleRenderer.PrintError(ex.Message); + + // Print the stack trace in dim text for debugging without overwhelming + // normal users who will rarely see this path. + Console.WriteLine($"\x1b[2m{ex}\x1b[0m"); + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// Configuration validation +// ═════════════════════════════════════════════════════════════════════════════ + +static void ValidateSettings(AppSettings settings) +{ + var errors = new List(); + + if (string.IsNullOrWhiteSpace(settings.LLM.ApiKey) || + settings.LLM.ApiKey == "YOUR_API_KEY_HERE") + { + // For local Ollama, we don't strictly need a real key, but it shouldn't be the placeholder. + // If they are using OpenAI, they definitely need a key. + if (settings.LLM.BaseUrl.Contains("openai.com", StringComparison.OrdinalIgnoreCase)) + { + errors.Add("LLM:ApiKey is not set in appsettings.json (Required for OpenAI)"); + } + } + + if (errors.Count > 0) + { + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine("\nConfiguration errors:"); + errors.ForEach(e => Console.WriteLine($" ✗ {e}")); + Console.ResetColor(); + Console.WriteLine("\nCopy appsettings.example.json → appsettings.json and fill in your keys.\n"); + Environment.Exit(1); + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..ac3011c --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# YouTube Video Summarizer + +A .NET 8 console application that fetches YouTube video transcripts and produces structured summaries using an LLM (Ollama or OpenAI). + +--- + +## Prerequisites + +- [.NET 8 SDK](https://dotnet.microsoft.com/download) +- A **YouTube Data API v3** key → [Google Cloud Console](https://console.cloud.google.com) +- **Local Ollama** (Recommended) or an **OpenAI API key**. + +--- + +## Setup + +```bash +# 1. Clone / copy the project +cd YoutubeSummarizer + +# 2. Copy the example config and fill in your keys +cp appsettings.example.json appsettings.json +nano appsettings.json # or your editor of choice + +# 3. Restore packages +dotnet restore + +# 4. Run +dotnet run +``` + +--- + +## Google Cloud Setup (YouTube API Key) + +1. Go to [console.cloud.google.com](https://console.cloud.google.com) +2. Create or select a project +3. **APIs & Services → Library** → search "YouTube Data API v3" → Enable +4. **APIs & Services → Credentials → Create Credentials → API key** +5. (Optional but recommended) Restrict the key to only the YouTube Data API v3 + +> Free quota: **10,000 units/day**. Each video lookup costs ~3 units. You can summarize thousands of videos before hitting the limit. + +--- + +## Configuration Reference + +| Key | Description | Default | +|---|---|---| +| `YouTube:ApiKey` | Your YouTube Data API v3 key | *(required)* | +| `LLM:BaseUrl` | API endpoint | `http://localhost:11434/v1` | +| `LLM:ApiKey` | API key (any for Ollama) | `ollama` | +| `LLM:Model` | Chat model to use | `qwen3:14b` | +| `LLM:MaxTokens` | Max tokens in summary response | `1500` | +| `LLM:TimeoutSeconds` | Max time for LLM generation | `300` | +| `Summarizer:ChunkWordLimit` | Words per chunk for long videos | `3000` | +| `Summarizer:ShowTranscript` | Print raw transcript before summary | `false` | + +--- + +## Architecture + +``` +Program.cs +│ Main loop → parses URL → calls pipeline +│ +├── YouTubeService +│ ├── ExtractVideoId() — URL parsing +│ ├── GetVideoMetadataAsync() — YouTube Data API v3 (Videos.list) +│ └── GetTranscriptAsync() — Caption list + timedtext download +│ +├── SummarizerService +│ ├── SummarizeAsync() — Routes to single-pass or chunked +│ ├── SinglePassSummarize() — One OpenAI call for short videos +│ └── ChunkedSummarize() — Map-reduce for long videos +│ +└── ConsoleRenderer — All terminal output / formatting +``` + +### Caption Quality Transparency + +The app tracks how the transcript was obtained and flags it accordingly: + +| Source | Label | Warning shown? | +|---|---|---| +| Owner-published captions | `✓ Owner-published` | No | +| Community-contributed | `✓ Community captions` | Minor note | +| Auto-generated (ASR) | `~ Auto-generated` | Yes — accuracy caveat | +| No captions (metadata only) | `✗ Metadata only` | Yes — limited accuracy | + +### Long Video Strategy + +Videos with transcripts exceeding `ChunkWordLimit` words use a **map-reduce** approach: + +1. **Split** — transcript divided into overlapping chunks (200-word overlap preserves context at boundaries) +2. **Map** — each chunk summarized independently +3. **Reduce** — chunk summaries combined into a final coherent summary + +This handles hour-long lectures, conference talks, and podcasts without hitting model context limits. + +--- + +## Environment Variable Overrides + +You can override `appsettings.json` values with environment variables, useful for CI or Docker: + +```bash +export YouTube__ApiKey="your-key" +export LLM__ApiKey="ollama" +dotnet run +``` + +Note the double-underscore `__` as the section separator (standard .NET configuration convention). diff --git a/SummarizerService.cs b/SummarizerService.cs new file mode 100644 index 0000000..0177e75 --- /dev/null +++ b/SummarizerService.cs @@ -0,0 +1,342 @@ +using OpenAI; +using OpenAI.Chat; +using YoutubeSummarizer.Configuration; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Sends transcript text to OpenAI's Chat Completions API and returns a +/// structured summary. +/// +/// Long transcripts (word count > ChunkWordLimit) are handled with a +/// "map-reduce" strategy: +/// 1. Split the transcript into overlapping chunks. +/// 2. Summarize each chunk independently (map phase). +/// 3. Combine chunk summaries into a final cohesive summary (reduce phase). +/// +/// This keeps individual API calls within model context limits while still +/// producing an accurate summary of long-form content like hour-long lectures. +/// +public sealed class SummarizerService +{ + private readonly LlmSettings _llmSettings; + private readonly SummarizerSettings _summarizerSettings; + private readonly ChatClient _chatClient; + + // System prompt used for single-pass and chunk summarization. + // Keeping it focused on facts and structure produces better summaries + // than open-ended "summarize this" prompts. + private const string ChunkSystemPrompt = """ + You are a precise, factual assistant that summarizes YouTube video transcripts. + When given a transcript segment, produce a concise summary that: + - Captures the key points, arguments, and conclusions + - Preserves any specific facts, names, dates, or statistics mentioned + - Uses bullet points for individual points, then a short paragraph for the overall gist + - Omits filler words, repeated phrases, and off-topic tangents + - Does NOT add information not present in the transcript + Respond with only the summary text, no preamble. + """; + + // Personal Information Filter — concise relevance-based summary. + private const string PersonalFilterSystemPrompt = """ + You are a concise, factual assistant that applies a Personal Information Filter + to YouTube video transcripts. When given a transcript, respond with EXACTLY + three sections and nothing else: + + Summary – A concise, plain-English summary in 1–2 sentences. + + Why it matters – Directly evaluate relevance only against these priorities: + time, finances, health, family, service to others. + If none apply, say so clearly. + + Priority tag – End with a single word verdict: ACT, MONITOR, or IGNORE. + + Constraints: + - Do not timestamp or number entries. + - Do not infer user interest beyond what is explicitly provided. + - Do not expand or add context unless the user requests it. + - The burden of interest is on the user. + - Respond with only the three sections above, no preamble. + """; + + // Personal Filter combine prompt for long transcripts. + private const string PersonalFilterCombinePrompt = """ + You are a concise, factual assistant. You will receive several partial summaries + of consecutive segments of a YouTube video, each formatted with Summary, + Why it matters, and Priority tag sections. Combine them into a single response + using the same three-section format: + + Summary – A concise, plain-English summary of the entire video in 1–2 sentences. + + Why it matters – Directly evaluate relevance only against these priorities: + time, finances, health, family, service to others. + If none apply, say so clearly. + + Priority tag – A single word verdict: ACT, MONITOR, or IGNORE. + + Respond with only these three sections, no preamble. + """; + + // Used in the reduce phase to combine chunk summaries coherently. + private const string CombineSystemPrompt = """ + You are a precise, factual assistant. You will receive several partial summaries + of consecutive segments of a YouTube video. Your task is to combine them into + a single, coherent, well-structured summary that: + - Flows as a unified narrative, not as a list of sub-summaries + - Preserves all key facts, names, dates, and statistics + - Uses bullet points for supporting details beneath each main topic + - Omits redundant information that appears across multiple segments + - Concludes with a 2–3 sentence takeaway paragraph + Respond with only the combined summary, no preamble. + """; + + public SummarizerService(LlmSettings llmSettings, SummarizerSettings summarizerSettings) + { + _llmSettings = llmSettings; + _summarizerSettings = summarizerSettings; + + // Initialize the client with the specified model and endpoint. + // We use the OpenAI SDK's ability to point to any OpenAI-compatible API (like Ollama). + _chatClient = new ChatClient( + model: llmSettings.Model, + credential: new System.ClientModel.ApiKeyCredential(llmSettings.ApiKey), + options: new OpenAIClientOptions + { + Endpoint = new Uri(llmSettings.BaseUrl), + NetworkTimeout = TimeSpan.FromSeconds(llmSettings.TimeoutSeconds) + }); + } + + // ───────────────────────────────────────────────────────────────────────── + // Public API + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Produces a from the video's metadata and transcript. + /// Automatically routes to single-pass or chunked strategy based on word count. + /// + public async Task SummarizeAsync( + VideoMetadata metadata, + VideoTranscript transcript, + SummaryMode mode = SummaryMode.Standard, + CancellationToken ct = default) + { + string summaryText; + + // Select prompt set based on mode + var chunkPrompt = mode == SummaryMode.PersonalFilter + ? PersonalFilterSystemPrompt : ChunkSystemPrompt; + var combinePrompt = mode == SummaryMode.PersonalFilter + ? PersonalFilterCombinePrompt : CombineSystemPrompt; + + if (transcript.WordCount <= _summarizerSettings.ChunkWordLimit) + { + // Short video — single API call is sufficient + summaryText = await SinglePassSummarizeAsync(transcript.Text, metadata, chunkPrompt, ct); + } + else + { + // Long video — chunk-and-combine strategy + summaryText = await ChunkedSummarizeAsync(transcript.Text, metadata, chunkPrompt, combinePrompt, ct); + } + + // Attach a quality warning when the transcript quality is uncertain + var warning = BuildQualityWarning(transcript.Source); + + return new VideoSummary + { + Metadata = metadata, + SummaryText = summaryText, + TranscriptSource = transcript.Source, + QualityWarning = warning, + ModelUsed = _llmSettings.Model + }; + } + + // ───────────────────────────────────────────────────────────────────────── + // Summarization strategies + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Single-pass: sends the entire transcript in one API call. + /// Best for videos under ~30 minutes (roughly 3000–4000 words). + /// + private async Task SinglePassSummarizeAsync( + string transcriptText, + VideoMetadata metadata, + string systemPrompt, + CancellationToken ct) + { + var userMessage = BuildUserPrompt(metadata, transcriptText); + return await CallChatCompletionAsync(systemPrompt, userMessage, ct); + } + + /// + /// Map-reduce: splits long transcripts, summarizes each chunk, then combines. + /// + /// Overlap: each chunk ends with a brief overlap window (last ~200 words of + /// the previous chunk) so the model retains context across chunk boundaries + /// and avoids abrupt topic changes in the summaries. + /// + private async Task ChunkedSummarizeAsync( + string transcriptText, + VideoMetadata metadata, + string chunkSystemPrompt, + string combineSystemPrompt, + CancellationToken ct) + { + var words = transcriptText.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var chunks = SplitIntoChunks(words, _summarizerSettings.ChunkWordLimit, overlapWords: 200); + + Console.WriteLine($"\n [Chunking] Transcript split into {chunks.Count} chunks for processing..."); + + // Map phase: summarize each chunk in sequence + // (Parallel would be faster but could hit rate limits — sequential is safer) + var chunkSummaries = new List(chunks.Count); + for (int i = 0; i < chunks.Count; i++) + { + Console.Write($" [Chunk {i + 1}/{chunks.Count}] Summarizing"); + var chunkText = string.Join(" ", chunks[i]); + var prompt = $"This is segment {i + 1} of {chunks.Count} from the video \"{metadata.Title}\":\n\n{chunkText}"; + var summary = await CallChatCompletionAsync(chunkSystemPrompt, prompt, ct); + chunkSummaries.Add(summary); + } + + // Reduce phase: combine all chunk summaries into one coherent summary + Console.Write(" [Combine] Merging chunk summaries into final summary"); + var combinedInput = string.Join("\n\n---\n\n", + chunkSummaries.Select((s, i) => $"Segment {i + 1} summary:\n{s}")); + + var combinePrompt = $"Video: \"{metadata.Title}\" by {metadata.ChannelTitle}\n\n" + + $"The following are summaries of {chunks.Count} consecutive segments:\n\n{combinedInput}"; + + return await CallChatCompletionAsync(combineSystemPrompt, combinePrompt, ct); + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Sends a system + user message pair to the Chat Completions endpoint + /// and returns the assistant's reply text. + /// + private async Task CallChatCompletionAsync( + string systemPrompt, + string userMessage, + CancellationToken ct) + { + var messages = new List + { + new SystemChatMessage(systemPrompt), + new UserChatMessage(userMessage) + }; + + var options = new ChatCompletionOptions + { + MaxOutputTokenCount = _llmSettings.MaxTokens + }; + + var sw = System.Diagnostics.Stopwatch.StartNew(); + var fullContent = new System.Text.StringBuilder(); + + try + { + var streamingUpdates = _chatClient.CompleteChatStreamingAsync(messages, options, ct); + + await foreach (var update in streamingUpdates) + { + foreach (var part in update.ContentUpdate) + { + if (!string.IsNullOrEmpty(part.Text)) + { + if (fullContent.Length == 0) + { + // First token received! + Console.Write(" (working)"); + } + + fullContent.Append(part.Text); + + // Show progress: print a dot every ~50 characters of output + // or just periodically. For now, let's just do a dot every update + // to show it's alive. + if (fullContent.Length % 20 == 0) Console.Write("."); + } + } + } + } + finally + { + sw.Stop(); + Console.WriteLine($" Done! ({sw.Elapsed.TotalSeconds:F1}s)"); + } + + return fullContent.ToString(); + } + + /// + /// Builds the user-turn prompt for a single-pass summarization. + /// Including the title and channel anchors the model to the subject matter, + /// which reduces hallucination on ambiguous ASR transcripts. + /// + private static string BuildUserPrompt(VideoMetadata metadata, string transcriptText) + { + return $""" + Video title: {metadata.Title} + Channel: {metadata.ChannelTitle} + Published: {metadata.PublishedAt:MMMM d, yyyy} + Duration: {metadata.FormattedDuration} + + Full transcript: + {transcriptText} + """; + } + + /// + /// Splits a word array into overlapping chunks of roughly words. + /// The overlap prevents the model from missing context at chunk boundaries. + /// + private static List SplitIntoChunks(string[] words, int chunkSize, int overlapWords) + { + var chunks = new List(); + int start = 0; + + while (start < words.Length) + { + int end = Math.Min(start + chunkSize, words.Length); + chunks.Add(words[start..end]); + + // Next chunk starts after current chunk minus the overlap window + start = end - overlapWords; + + // Guard: if remaining words are less than the overlap, we're done + if (start >= words.Length - overlapWords) break; + } + + return chunks; + } + + /// + /// Returns a human-readable warning when transcript quality may affect summary accuracy. + /// Returns null for high-confidence sources (no warning needed). + /// + private static string? BuildQualityWarning(TranscriptSource source) => + source switch + { + TranscriptSource.AutoGenerated => + "⚠ This summary is based on YouTube's auto-generated captions (ASR). " + + "The transcript may contain errors, especially for technical terms, names, or accented speech.", + + TranscriptSource.MetadataOnly => + "⚠ No captions were available. This summary is based on the video's title " + + "and description only — it may be incomplete or inaccurate.", + + TranscriptSource.CommunityContributed => + "ℹ This summary is based on community-contributed captions. " + + "Quality is generally good but not guaranteed.", + + _ => null // OwnerPublished — no warning needed + }; +} diff --git a/TranscriptFileService.cs b/TranscriptFileService.cs new file mode 100644 index 0000000..f9e5ca0 --- /dev/null +++ b/TranscriptFileService.cs @@ -0,0 +1,212 @@ +using System.Text; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Saves video metadata and timestamped transcript to a plain text file. +/// The file is formatted with metadata at the top followed by the transcript +/// organized by timestamps. +/// +public static class TranscriptFileService +{ + /// + /// Saves the transcript and metadata to a text file in the specified directory. + /// Returns the full path to the saved file. + /// + public static async Task SaveAsync( + VideoMetadata metadata, + VideoTranscript transcript, + string? summaryText = null, + string? outputDirectory = null, + CancellationToken ct = default) + { + outputDirectory ??= Environment.CurrentDirectory; + Directory.CreateDirectory(outputDirectory); + + // Build a safe filename from the video title + var safeTitle = SanitizeFileName(metadata.Title); + var fileName = $"{safeTitle}_{metadata.VideoId}.txt"; + var filePath = Path.Combine(outputDirectory, fileName); + + var sb = new StringBuilder(); + + // ── Metadata section ───────────────────────────────────────────────── + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" VIDEO METADATA"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(); + sb.AppendLine($" Title: {metadata.Title}"); + sb.AppendLine($" Channel: {metadata.ChannelTitle}"); + sb.AppendLine($" Published: {metadata.PublishedAt:MMMM d, yyyy}"); + sb.AppendLine($" Duration: {metadata.FormattedDuration}"); + sb.AppendLine($" Video ID: {metadata.VideoId}"); + sb.AppendLine($" URL: https://youtu.be/{metadata.VideoId}"); + + if (!string.IsNullOrWhiteSpace(summaryText)) + { + sb.AppendLine(); + sb.AppendLine(" ── SUMMARY ──────────────────────────────────────────────"); + sb.AppendLine(); + foreach (var wrappedLine in WordWrap(summaryText, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + } + + sb.AppendLine(); + + // ── Transcript source ──────────────────────────────────────────────── + var sourceLabel = transcript.Source switch + { + TranscriptSource.OwnerPublished => "Owner-published captions", + TranscriptSource.CommunityContributed => "Community-contributed captions", + TranscriptSource.AutoGenerated => "Auto-generated (ASR)", + TranscriptSource.MetadataOnly => "Metadata only (no captions)", + _ => "Unknown" + }; + sb.AppendLine($" Transcript Source: {sourceLabel}"); + sb.AppendLine($" Word Count: {transcript.WordCount:N0}"); + sb.AppendLine($" Saved: {DateTimeOffset.UtcNow:yyyy-MM-dd HH:mm} UTC"); + sb.AppendLine(); + + // ── Transcript section ─────────────────────────────────────────────── + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" TRANSCRIPT"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(); + + if (transcript.Segments.Count > 0) + { + // Group segments into blocks by time intervals for readability + // Each block groups consecutive segments within ~30 seconds + var blocks = GroupSegmentsByInterval(transcript.Segments, intervalSeconds: 30); + + foreach (var block in blocks) + { + var firstTimestamp = block[0].FormattedTimestamp; + sb.AppendLine($" [{firstTimestamp}]"); + + // Combine the text for segments in this time block + var blockText = string.Join(" ", block.Select(s => s.Text)); + foreach (var wrappedLine in WordWrap(blockText, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + sb.AppendLine(); + } + } + else + { + // No timestamps available — write plain text + sb.AppendLine(" (No timestamp data available)"); + sb.AppendLine(); + foreach (var wrappedLine in WordWrap(transcript.Text, maxWidth: 72)) + { + sb.AppendLine($" {wrappedLine}"); + } + sb.AppendLine(); + } + + sb.AppendLine("════════════════════════════════════════════════════════════════"); + sb.AppendLine(" END OF TRANSCRIPT"); + sb.AppendLine("════════════════════════════════════════════════════════════════"); + + await File.WriteAllTextAsync(filePath, sb.ToString(), ct); + return filePath; + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Groups timestamped segments into blocks based on a time interval. + /// This produces readable chunks (e.g. every 30 seconds) instead of + /// one line per subtitle cue. + /// + private static List> GroupSegmentsByInterval( + IReadOnlyList segments, + int intervalSeconds) + { + var blocks = new List>(); + if (segments.Count == 0) return blocks; + + var currentBlock = new List { segments[0] }; + var blockStartTime = segments[0].Start; + + for (int i = 1; i < segments.Count; i++) + { + if ((segments[i].Start - blockStartTime).TotalSeconds >= intervalSeconds) + { + blocks.Add(currentBlock); + currentBlock = new List(); + blockStartTime = segments[i].Start; + } + currentBlock.Add(segments[i]); + } + + if (currentBlock.Count > 0) + blocks.Add(currentBlock); + + return blocks; + } + + /// + /// Removes characters that are invalid in file names. + /// Truncates to a reasonable length to avoid path-length issues. + /// + private static string SanitizeFileName(string title) + { + var invalid = Path.GetInvalidFileNameChars(); + var sb = new StringBuilder(title.Length); + + foreach (var ch in title) + { + if (Array.IndexOf(invalid, ch) < 0) + sb.Append(ch); + else + sb.Append('_'); + } + + // Replace runs of spaces/underscores with a single underscore + var result = System.Text.RegularExpressions.Regex.Replace( + sb.ToString().Trim(), @"[\s_]+", "_"); + + // Truncate to keep file paths manageable + return result.Length > 80 ? result[..80] : result; + } + + /// + /// Word-wraps text at the specified width, breaking at word boundaries. + /// + private static IEnumerable WordWrap(string text, int maxWidth) + { + foreach (var paragraph in text.Split('\n')) + { + if (string.IsNullOrWhiteSpace(paragraph)) + { + yield return string.Empty; + continue; + } + + var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries); + var current = new StringBuilder(); + + foreach (var word in words) + { + if (current.Length + word.Length + 1 > maxWidth) + { + yield return current.ToString(); + current.Clear(); + } + + if (current.Length > 0) current.Append(' '); + current.Append(word); + } + + if (current.Length > 0) + yield return current.ToString(); + } + } +} diff --git a/VideoModels.cs b/VideoModels.cs new file mode 100644 index 0000000..e510328 --- /dev/null +++ b/VideoModels.cs @@ -0,0 +1,161 @@ +namespace YoutubeSummarizer.Models; + +/// +/// Metadata returned from the YouTube Data API for a single video. +/// This is a slim projection — the API returns far more fields, but we +/// only bind what we actually need for the summarization workflow. +/// +public sealed class VideoMetadata +{ + /// The 11-character YouTube video ID parsed from the URL. + public required string VideoId { get; init; } + + /// Full video title as shown on YouTube. + public required string Title { get; init; } + + /// Channel that published the video. + public required string ChannelTitle { get; init; } + + /// UTC publish date of the video. + public DateTimeOffset PublishedAt { get; init; } + + /// + /// Video duration in ISO 8601 format (e.g. "PT1H4M32S"). + /// We store it raw and parse it for display purposes. + /// + public string? Duration { get; init; } + + /// First 5000 characters of the video description (API cap). + public string? Description { get; init; } + + /// Human-readable duration parsed from . + public string FormattedDuration => + Duration is null ? "Unknown" + : System.Xml.XmlConvert.ToTimeSpan(Duration).ToString(@"hh\:mm\:ss").TrimStart('0', ':'); +} + +/// +/// Represents a single caption track available for a video. +/// YouTube can provide multiple tracks (languages, auto-generated vs. manual). +/// +public sealed class CaptionTrack +{ + public required string TrackId { get; init; } + public required string Language { get; init; } // BCP-47, e.g. "en" + public required string TrackKind { get; init; } // "standard", "asr" (auto), "forced" + public required string Name { get; init; } // Display name from YouTube + + /// + /// True when the track was automatically generated by YouTube's ASR system. + /// ASR captions are less reliable — typos, missing punctuation, run-on sentences. + /// + public bool IsAutoGenerated => TrackKind.Equals("asr", StringComparison.OrdinalIgnoreCase); +} + +/// +/// The full textual transcript assembled from caption data, +/// along with provenance information about how it was obtained. +/// +public sealed class VideoTranscript +{ + public required string VideoId { get; init; } + + /// The concatenated, cleaned transcript text. + public required string Text { get; init; } + + /// The caption track this text came from, if available. + public CaptionTrack? SourceTrack { get; init; } + + /// + /// How the transcript was obtained. This is important context for + /// interpreting the quality of the summary. + /// + public TranscriptSource Source { get; init; } + + /// + /// Individual timestamped segments from the caption track. + /// Empty when timestamps are not available (e.g. metadata-only transcripts). + /// + public IReadOnlyList Segments { get; init; } = Array.Empty(); + + /// Approximate word count of the raw transcript. + public int WordCount => Text.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length; +} + +/// +/// A single timestamped segment from a caption track. +/// Used when saving the transcript to a file with timestamp formatting. +/// +public sealed class TimestampedSegment +{ + /// Start time offset from the beginning of the video. + public TimeSpan Start { get; init; } + + /// Duration of this caption segment. + public TimeSpan Duration { get; init; } + + /// The caption text for this segment. + public required string Text { get; init; } + + /// Formats the start time as [HH:MM:SS] or [MM:SS] for display. + public string FormattedTimestamp => + Start.TotalHours >= 1 + ? Start.ToString(@"hh\:mm\:ss") + : Start.ToString(@"mm\:ss"); +} + +/// +/// Describes how a transcript was obtained, ordered from most to least reliable. +/// This maps directly to the caption quality transparency layer discussed in LIKA. +/// +public enum TranscriptSource +{ + /// Human-reviewed caption track provided by the video owner. + OwnerPublished, + + /// Community-contributed captions (YouTube retired this but tracks may exist). + CommunityContributed, + + /// YouTube's automatic speech recognition — less reliable. + AutoGenerated, + + /// No captions available; summary based on metadata/description only. + MetadataOnly +} + +/// +/// Controls which summarization prompt style is used. +/// +public enum SummaryMode +{ + /// Default detailed summary with bullet points and takeaways. + Standard, + + /// + /// Personal Information Filter — brief 1–2 sentence summary, relevance + /// evaluation against personal priorities (time, finances, health, family, + /// service to others), and a single-word verdict: ACT, MONITOR, or IGNORE. + /// + PersonalFilter +} + +/// +/// The final deliverable: a structured summary of a YouTube video. +/// +public sealed class VideoSummary +{ + public required VideoMetadata Metadata { get; init; } + public required string SummaryText { get; init; } + public required TranscriptSource TranscriptSource { get; init; } + + /// + /// Warning shown when the summary is based on low-quality or missing transcript data. + /// Null when the source is reliable. + /// + public string? QualityWarning { get; init; } + + /// Model used to generate this summary. + public required string ModelUsed { get; init; } + + public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow; +} diff --git a/YouTubeService.cs b/YouTubeService.cs new file mode 100644 index 0000000..2a3c389 --- /dev/null +++ b/YouTubeService.cs @@ -0,0 +1,518 @@ +using System.Diagnostics; +using System.Text.Json; +using YoutubeSummarizer.Models; + +namespace YoutubeSummarizer.Services; + +/// +/// Uses yt-dlp (https://github.com/yt-dlp/yt-dlp) to retrieve video metadata +/// and download caption tracks. No YouTube API key required. +/// +/// yt-dlp is the de-facto standard tool for reliably extracting video +/// information and subtitles from YouTube. It must be installed and +/// available on PATH (e.g. pip install yt-dlp). +/// +public sealed class YouTubeService +{ + private readonly HttpClient _httpClient; + + public YouTubeService(HttpClient httpClient) + { + _httpClient = httpClient; + } + + // ───────────────────────────────────────────────────────────────────────── + // Public API + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Parses a YouTube video ID from any common URL format. + /// Handles: watch?v=, youtu.be/, /embed/, /shorts/ + /// + public static string? ExtractVideoId(string url) + { + // Normalize — strip whitespace the user may have pasted + url = url.Trim(); + + // youtu.be short links: https://youtu.be/VIDEO_ID + if (Uri.TryCreate(url, UriKind.Absolute, out var uri)) + { + if (uri.Host.Contains("youtu.be")) + return uri.AbsolutePath.TrimStart('/').Split('?')[0]; + + // Standard and embed URLs: ?v=VIDEO_ID, /embed/VIDEO_ID, /shorts/VIDEO_ID + var query = System.Web.HttpUtility.ParseQueryString(uri.Query); + if (query["v"] is { } vParam && vParam.Length == 11) + return vParam; + + var segments = uri.AbsolutePath.Split('/', StringSplitOptions.RemoveEmptyEntries); + for (int i = 0; i < segments.Length - 1; i++) + { + if (segments[i] is "embed" or "shorts" or "v") + return segments[i + 1].Split('?')[0]; + } + } + + // Raw ID passed directly (11 alphanumeric chars + dash/underscore) + if (System.Text.RegularExpressions.Regex.IsMatch(url, @"^[\w-]{11}$")) + return url; + + return null; + } + + /// + /// Fetches metadata for a video using yt-dlp --dump-json. + /// No API key required — yt-dlp scrapes the public video page. + /// + public async Task GetVideoMetadataAsync(string videoId, CancellationToken ct = default) + { + var psi = new ProcessStartInfo + { + FileName = "yt-dlp", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + psi.ArgumentList.Add("--dump-json"); + psi.ArgumentList.Add("--no-download"); + psi.ArgumentList.Add($"https://www.youtube.com/watch?v={videoId}"); + + using var process = new Process { StartInfo = psi }; + process.Start(); + + var stdoutTask = process.StandardOutput.ReadToEndAsync(ct); + var stderrTask = process.StandardError.ReadToEndAsync(ct); + + await process.WaitForExitAsync(ct); + + if (process.ExitCode != 0) + return null; + + var json = await stdoutTask; + if (string.IsNullOrWhiteSpace(json)) + return null; + + try + { + using var doc = JsonDocument.Parse(json); + var root = doc.RootElement; + + var title = root.TryGetProperty("title", out var t) ? t.GetString() ?? "(no title)" : "(no title)"; + var channel = root.TryGetProperty("channel", out var c) ? c.GetString() ?? "(unknown channel)" : "(unknown channel)"; + var description = root.TryGetProperty("description", out var d) ? d.GetString() : null; + + // yt-dlp returns duration in seconds + TimeSpan? duration = null; + if (root.TryGetProperty("duration", out var dur) && dur.ValueKind == JsonValueKind.Number) + duration = TimeSpan.FromSeconds(dur.GetDouble()); + + // Upload date comes as "YYYYMMDD" + DateTimeOffset publishedAt = DateTimeOffset.MinValue; + if (root.TryGetProperty("upload_date", out var ud) && ud.GetString() is { } dateStr + && DateTime.TryParseExact(dateStr, "yyyyMMdd", null, + System.Globalization.DateTimeStyles.None, out var parsed)) + { + publishedAt = new DateTimeOffset(parsed, TimeSpan.Zero); + } + + // Build ISO 8601 duration string for FormattedDuration compatibility + string? isoDuration = null; + if (duration.HasValue) + { + var ts = duration.Value; + isoDuration = $"PT{(int)ts.TotalHours}H{ts.Minutes}M{ts.Seconds}S"; + } + + return new VideoMetadata + { + VideoId = videoId, + Title = title, + ChannelTitle = channel, + PublishedAt = publishedAt, + Duration = isoDuration, + Description = description + }; + } + catch + { + return null; + } + } + + /// + /// Retrieves the best available transcript for the video using yt-dlp. + /// + /// yt-dlp is invoked to download subtitle files (preferring manual English + /// captions, falling back to auto-generated). The downloaded XML is parsed + /// into clean plain text for summarization. + /// + /// If yt-dlp fails or no captions exist, returns a metadata-only transcript + /// from the video description. + /// + public async Task GetTranscriptAsync( + VideoMetadata metadata, + CancellationToken ct = default) + { + // Try manual (human-written) subtitles first, then auto-generated + var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(metadata.VideoId, ct); + + if (!string.IsNullOrWhiteSpace(text)) + { + return new VideoTranscript + { + VideoId = metadata.VideoId, + Text = text, + Segments = segments, + SourceTrack = new CaptionTrack + { + TrackId = "yt-dlp", + Language = "en", + TrackKind = isAuto ? "asr" : "standard", + Name = isAuto ? "Auto-generated (en)" : "English" + }, + Source = isAuto + ? TranscriptSource.AutoGenerated + : TranscriptSource.OwnerPublished + }; + } + + // No captions at all — fall back to the description text + return BuildMetadataOnlyTranscript(metadata); + } + + // ───────────────────────────────────────────────────────────────────────── + // Private helpers + // ───────────────────────────────────────────────────────────────────────── + + /// + /// Shells out to yt-dlp to download subtitles for the given video. + /// First attempts manual subs, then auto-generated if none found. + /// Returns the cleaned transcript text and whether it was auto-generated. + /// + private static async Task<(string? Text, bool IsAuto)> DownloadSubtitlesWithYtDlpAsync( + string videoId, + CancellationToken ct) + { + var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(videoId, ct); + return (text, isAuto); + } + + /// + /// Downloads subtitles and returns both the plain text and timestamped segments. + /// + private static async Task<(string? Text, List Segments, bool IsAuto)> DownloadSubtitlesWithTimestampsAsync( + string videoId, + CancellationToken ct) + { + var tempDir = Path.Combine(Path.GetTempPath(), $"ytsumm_{videoId}_{Guid.NewGuid():N}"); + Directory.CreateDirectory(tempDir); + + try + { + // Attempt 1: manual (human-written) subtitles only + var (manualText, manualSegments) = await RunYtDlpSubtitleWithTimestampsAsync( + videoId, tempDir, writeSub: true, writeAutoSub: false, ct); + + if (!string.IsNullOrWhiteSpace(manualText)) + return (manualText, manualSegments, false); + + // Attempt 2: auto-generated subtitles + var (autoText, autoSegments) = await RunYtDlpSubtitleWithTimestampsAsync( + videoId, tempDir, writeSub: false, writeAutoSub: true, ct); + + if (!string.IsNullOrWhiteSpace(autoText)) + return (autoText, autoSegments, true); + + return (null, new List(), false); + } + finally + { + try { Directory.Delete(tempDir, recursive: true); } catch { /* best effort */ } + } + } + + /// + /// Runs a single yt-dlp invocation to download subtitles with timestamps. + /// Returns the parsed plain-text transcript and timestamped segments. + /// + private static async Task<(string? Text, List Segments)> RunYtDlpSubtitleWithTimestampsAsync( + string videoId, + string tempDir, + bool writeSub, + bool writeAutoSub, + CancellationToken ct) + { + // Clean any previous subtitle files from this temp dir + foreach (var f in Directory.GetFiles(tempDir, "*.srv1")) + File.Delete(f); + foreach (var f in Directory.GetFiles(tempDir, "*.vtt")) + File.Delete(f); + + var args = new List + { + "--skip-download", + "--sub-lang", "en,en-US,en-GB,en.*", + "--sub-format", "srv1/vtt/best", + "-o", Path.Combine(tempDir, "%(id)s"), + }; + + if (writeSub) + args.Add("--write-sub"); + if (writeAutoSub) + args.Add("--write-auto-sub"); + + args.Add($"https://www.youtube.com/watch?v={videoId}"); + + var psi = new ProcessStartInfo + { + FileName = "yt-dlp", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + foreach (var arg in args) + psi.ArgumentList.Add(arg); + + using var process = new Process { StartInfo = psi }; + process.Start(); + + var stdoutTask = process.StandardOutput.ReadToEndAsync(ct); + var stderrTask = process.StandardError.ReadToEndAsync(ct); + + await process.WaitForExitAsync(ct); + + if (process.ExitCode != 0) + return (null, new List()); + + var subFiles = Directory.GetFiles(tempDir) + .Where(f => f.EndsWith(".srv1") || f.EndsWith(".vtt") || f.EndsWith(".srt")) + .OrderBy(f => f.EndsWith(".srv1") ? 0 : 1) + .ToList(); + + if (subFiles.Count == 0) + return (null, new List()); + + var content = await File.ReadAllTextAsync(subFiles[0], ct); + + if (string.IsNullOrWhiteSpace(content)) + return (null, new List()); + + return subFiles[0].EndsWith(".srv1") + ? ParseTimedTextXmlWithTimestamps(content) + : ParseVttOrSrtWithTimestamps(content); + } + + /// + /// Parses YouTube's srv1 timed-text XML into plain text and timestamped segments. + /// + private static (string Text, List Segments) ParseTimedTextXmlWithTimestamps(string xml) + { + try + { + var doc = System.Xml.Linq.XDocument.Parse(xml); + var segments = new List(); + var textParts = new List(); + + foreach (var el in doc.Descendants("text")) + { + var decoded = System.Web.HttpUtility.HtmlDecode(el.Value); + var cleaned = System.Text.RegularExpressions.Regex.Replace(decoded, @"\s+", " ").Trim(); + + if (string.IsNullOrEmpty(cleaned)) continue; + + textParts.Add(cleaned); + + var startAttr = el.Attribute("start")?.Value; + var durAttr = el.Attribute("dur")?.Value; + + var start = double.TryParse(startAttr, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s) + ? TimeSpan.FromSeconds(s) : TimeSpan.Zero; + + var dur = double.TryParse(durAttr, System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var d) + ? TimeSpan.FromSeconds(d) : TimeSpan.Zero; + + segments.Add(new TimestampedSegment + { + Start = start, + Duration = dur, + Text = cleaned + }); + } + + return (string.Join(" ", textParts), segments); + } + catch + { + return (xml, new List()); + } + } + + /// + /// Parses YouTube's srv1 timed-text XML into clean plain text. + /// + /// The XML structure looks like: + /// <transcript> + /// <text start="0.5" dur="2.1">Hello world</text> + /// ... + /// </transcript> + /// + private static string ParseTimedTextXml(string xml) + { + var (text, _) = ParseTimedTextXmlWithTimestamps(xml); + return text; + } + + /// + /// Parses VTT or SRT subtitle formats into plain text and timestamped segments. + /// Strips cue identifiers and formatting tags while preserving timestamp associations. + /// + private static (string Text, List Segments) ParseVttOrSrtWithTimestamps(string content) + { + var segments = new List(); + var allLines = content.Split('\n').Select(l => l.Trim()).ToArray(); + + TimeSpan currentStart = TimeSpan.Zero; + TimeSpan currentEnd = TimeSpan.Zero; + var currentText = new List(); + + foreach (var line in allLines) + { + // Skip headers and metadata + if (string.IsNullOrEmpty(line) || + line.StartsWith("WEBVTT") || + line.StartsWith("NOTE") || + line.StartsWith("Kind:") || + line.StartsWith("Language:") || + System.Text.RegularExpressions.Regex.IsMatch(line, @"^\d+$")) + { + // Flush current segment on blank line + if (string.IsNullOrEmpty(line) && currentText.Count > 0) + { + var text = string.Join(" ", currentText); + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = text + }); + currentText.Clear(); + } + continue; + } + + // Timestamp line: "00:01:23.456 --> 00:01:27.890" + var tsMatch = System.Text.RegularExpressions.Regex.Match(line, + @"^(\d{2}:\d{2}[:\.][\d\.]+)\s*-->\s*(\d{2}:\d{2}[:\.][\d\.]+)"); + if (tsMatch.Success) + { + // Flush previous segment if any text buffered + if (currentText.Count > 0) + { + var text = string.Join(" ", currentText); + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = text + }); + currentText.Clear(); + } + + currentStart = ParseVttTimestamp(tsMatch.Groups[1].Value); + currentEnd = ParseVttTimestamp(tsMatch.Groups[2].Value); + continue; + } + + // Content line — strip HTML tags and decode + var stripped = System.Text.RegularExpressions.Regex.Replace(line, @"<[^>]+>", ""); + var decoded = System.Web.HttpUtility.HtmlDecode(stripped).Trim(); + if (!string.IsNullOrEmpty(decoded)) + currentText.Add(decoded); + } + + // Flush last segment + if (currentText.Count > 0) + { + segments.Add(new TimestampedSegment + { + Start = currentStart, + Duration = currentEnd - currentStart, + Text = string.Join(" ", currentText) + }); + } + + // Deduplicate consecutive identical text segments (common in VTT) + var deduped = new List(); + string? prevText = null; + foreach (var seg in segments) + { + if (seg.Text != prevText) + deduped.Add(seg); + prevText = seg.Text; + } + + var plainText = string.Join(" ", deduped.Select(s => s.Text)); + return (plainText, deduped); + } + + /// Parses a VTT/SRT timestamp string into a TimeSpan. + private static TimeSpan ParseVttTimestamp(string ts) + { + // Normalize: VTT uses "." for ms, SRT uses "," — handle both + ts = ts.Replace(',', '.'); + + // Handle both HH:MM:SS.mmm and MM:SS.mmm + var parts = ts.Split(':'); + if (parts.Length == 3) + { + int.TryParse(parts[0], out var h); + int.TryParse(parts[1], out var m); + double.TryParse(parts[2], System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s); + return new TimeSpan(0, h, m, (int)s, (int)((s - (int)s) * 1000)); + } + else if (parts.Length == 2) + { + int.TryParse(parts[0], out var m); + double.TryParse(parts[1], System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, out var s); + return new TimeSpan(0, 0, m, (int)s, (int)((s - (int)s) * 1000)); + } + + return TimeSpan.Zero; + } + + /// + /// Parses VTT or SRT subtitle formats into clean plain text. + /// Strips timestamps, cue identifiers, and formatting tags. + /// + private static string ParseVttOrSrt(string content) + { + var (text, _) = ParseVttOrSrtWithTimestamps(content); + return text; + } + + /// + /// When no captions exist, builds a minimal "transcript" from the video description. + /// The summary will be based on much less information and will be flagged accordingly. + /// + private static VideoTranscript BuildMetadataOnlyTranscript(VideoMetadata metadata) + { + var text = string.IsNullOrWhiteSpace(metadata.Description) + ? $"No transcript or description available for: {metadata.Title}" + : $"Video title: {metadata.Title}\n\nChannel: {metadata.ChannelTitle}\n\nDescription:\n{metadata.Description}"; + + return new VideoTranscript + { + VideoId = metadata.VideoId, + Text = text, + SourceTrack = null, + Source = TranscriptSource.MetadataOnly + }; + } +} diff --git a/YoutubeSummarizer.csproj b/YoutubeSummarizer.csproj new file mode 100644 index 0000000..1a34344 --- /dev/null +++ b/YoutubeSummarizer.csproj @@ -0,0 +1,36 @@ + + + + Exe + net10.0 + enable + enable + YoutubeSummarizer + YoutubeSummarizer + + + + + PreserveNewest + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/appsettings.json b/appsettings.json new file mode 100644 index 0000000..51a1e65 --- /dev/null +++ b/appsettings.json @@ -0,0 +1,14 @@ +{ + + "LLM": { + "BaseUrl": "http://localhost:11434/v1", + "ApiKey": "ollama", + "Model": "qwen3:14b", + "MaxTokens": 1500, + "TimeoutSeconds": 600 + }, + "Summarizer": { + "ChunkWordLimit": 1500, + "ShowTranscript": false + } +} diff --git a/summarize.sln b/summarize.sln new file mode 100644 index 0000000..a5a1a51 --- /dev/null +++ b/summarize.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.2.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YoutubeSummarizer", "YoutubeSummarizer.csproj", "{2364E226-41E1-8549-7D9A-3C959F71FD8A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {58A79D7B-0ADD-4677-A65B-B4E6E38D9AFE} + EndGlobalSection +EndGlobal