feat: initialize YouTube summarizer project with OpenAI integration and map-reduce processing strategy
This commit is contained in:
commit
8f9291883d
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# Visual Studio / .NET build outputs
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Pp]ublish/
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
*.suo
|
||||
*.cache
|
||||
|
||||
# IDEs / Tools
|
||||
.idea/
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
*.swp
|
||||
*.~*
|
||||
|
||||
# OS metadata
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Project specific / Temporary files
|
||||
scratch/
|
||||
*.log
|
||||
56
AppSettings.cs
Normal file
56
AppSettings.cs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
namespace YoutubeSummarizer.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Root configuration object bound from appsettings.json.
|
||||
/// Only OpenAI and Summarizer sections are required.
|
||||
/// </summary>
|
||||
public sealed class AppSettings
|
||||
{
|
||||
public LlmSettings LLM { get; init; } = new();
|
||||
public SummarizerSettings Summarizer { get; init; } = new();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Settings for the LLM API (OpenAI or Ollama).
|
||||
/// </summary>
|
||||
public sealed class LlmSettings
|
||||
{
|
||||
/// <summary>
|
||||
/// Base URL for the API.
|
||||
/// For OpenAI: https://api.openai.com/v1
|
||||
/// For Ollama: http://localhost:11434/v1
|
||||
/// </summary>
|
||||
public string BaseUrl { get; init; } = "https://api.openai.com/v1";
|
||||
|
||||
/// <summary>Your API key. (For Ollama, any value works).</summary>
|
||||
public string ApiKey { get; init; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Model to use.
|
||||
/// OpenAI: gpt-4o-mini, gpt-4o
|
||||
/// Ollama: qwen3:14b, llama3.1
|
||||
/// </summary>
|
||||
public string Model { get; init; } = "gpt-4o-mini";
|
||||
|
||||
/// <summary>Max tokens for the summary response (not the input).</summary>
|
||||
public int MaxTokens { get; init; } = 1500;
|
||||
|
||||
/// <summary>Timeout in seconds for API calls.</summary>
|
||||
public int TimeoutSeconds { get; init; } = 100;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Controls summarization behavior.
|
||||
/// </summary>
|
||||
public sealed class SummarizerSettings
|
||||
{
|
||||
/// <summary>
|
||||
/// Approximate word count at which we split a long transcript into chunks
|
||||
/// before doing a final "summary of summaries" pass. This keeps individual
|
||||
/// API calls within model context limits.
|
||||
/// </summary>
|
||||
public int ChunkWordLimit { get; init; } = 3000;
|
||||
|
||||
/// <summary>When true, prints the full transcript text before summarizing.</summary>
|
||||
public bool ShowTranscript { get; init; } = false;
|
||||
}
|
||||
197
ConsoleRenderer.cs
Normal file
197
ConsoleRenderer.cs
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
using YoutubeSummarizer.Models;
|
||||
|
||||
namespace YoutubeSummarizer.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Handles all console output formatting.
|
||||
/// Keeping display logic separate from business logic makes it easy to
|
||||
/// later add output modes (JSON, Markdown file, HTML report) without
|
||||
/// touching the service layer.
|
||||
/// </summary>
|
||||
public static class ConsoleRenderer
|
||||
{
|
||||
// ANSI color codes. These render correctly in most Linux terminals.
|
||||
// If you pipe output to a file, the escape codes will appear as-is —
|
||||
// run with --no-color if that's a concern (not implemented here, left
|
||||
// as an exercise).
|
||||
private const string Reset = "\x1b[0m";
|
||||
private const string Bold = "\x1b[1m";
|
||||
private const string Cyan = "\x1b[36m";
|
||||
private const string Yellow = "\x1b[33m";
|
||||
private const string Green = "\x1b[32m";
|
||||
private const string Red = "\x1b[31m";
|
||||
private const string Dim = "\x1b[2m";
|
||||
|
||||
/// <summary>Prints the application banner on startup.</summary>
|
||||
public static void PrintBanner()
|
||||
{
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($"{Bold}{Cyan}╔════════════════════════════════════════╗{Reset}");
|
||||
Console.WriteLine($"{Bold}{Cyan}║ YouTube Video Summarizer ║{Reset}");
|
||||
Console.WriteLine($"{Bold}{Cyan}╚════════════════════════════════════════╝{Reset}");
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
/// <summary>Prompts the user for a URL and reads input.</summary>
|
||||
public static string PromptForUrl()
|
||||
{
|
||||
Console.Write($"{Bold}Enter YouTube URL (or 'q' to quit):{Reset} ");
|
||||
return Console.ReadLine()?.Trim() ?? string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Asks the user whether they want to save the transcript to a text file.
|
||||
/// Returns true if the user answers yes.
|
||||
/// </summary>
|
||||
public static bool PromptSaveTranscript()
|
||||
{
|
||||
Console.Write($"{Bold}Save transcript to file? (y/n):{Reset} ");
|
||||
var answer = Console.ReadLine()?.Trim() ?? string.Empty;
|
||||
return answer.Equals("y", StringComparison.OrdinalIgnoreCase)
|
||||
|| answer.Equals("yes", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>Prints a success message with the saved file path.</summary>
|
||||
public static void PrintFileSaved(string filePath)
|
||||
{
|
||||
Console.WriteLine($" {Green}✓ Transcript saved to:{Reset} {filePath}");
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Prompts the user to choose a summary mode.
|
||||
/// Returns the selected <see cref="SummaryMode"/>.
|
||||
/// </summary>
|
||||
public static SummaryMode PromptSummaryMode()
|
||||
{
|
||||
Console.WriteLine($" {Dim}Summary modes:{Reset}");
|
||||
Console.WriteLine($" {Bold}1{Reset} – Standard (detailed bullet-point summary)");
|
||||
Console.WriteLine($" {Bold}2{Reset} – Personal Filter (relevance verdict: ACT / MONITOR / IGNORE)");
|
||||
Console.Write($"{Bold}Choose summary mode [1]:{Reset} ");
|
||||
var choice = Console.ReadLine()?.Trim() ?? string.Empty;
|
||||
return choice == "2" ? SummaryMode.PersonalFilter : SummaryMode.Standard;
|
||||
}
|
||||
|
||||
/// <summary>Displays a spinner-style "working" indicator while async work runs.</summary>
|
||||
public static void PrintWorking(string message)
|
||||
{
|
||||
Console.WriteLine($" {Dim}→ {message}...{Reset}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Renders the full summary result to the console in a structured,
|
||||
/// readable format. Includes metadata header, quality warning, and
|
||||
/// the summary body.
|
||||
/// </summary>
|
||||
public static void PrintSummary(VideoSummary summary, bool showTranscriptSource)
|
||||
{
|
||||
Console.WriteLine();
|
||||
PrintDivider();
|
||||
|
||||
// ── Metadata header ──────────────────────────────────────────────────
|
||||
Console.WriteLine($"{Bold}{Green} {summary.Metadata.Title}{Reset}");
|
||||
Console.WriteLine($" {Dim}Channel:{Reset} {summary.Metadata.ChannelTitle}");
|
||||
Console.WriteLine($" {Dim}Published:{Reset} {summary.Metadata.PublishedAt:MMMM d, yyyy}");
|
||||
Console.WriteLine($" {Dim}Duration:{Reset} {summary.Metadata.FormattedDuration}");
|
||||
Console.WriteLine($" {Dim}URL:{Reset} https://youtu.be/{summary.Metadata.VideoId}");
|
||||
|
||||
// ── Transcript source badge ──────────────────────────────────────────
|
||||
if (showTranscriptSource)
|
||||
{
|
||||
var (badge, color) = summary.TranscriptSource switch
|
||||
{
|
||||
TranscriptSource.OwnerPublished => ("✓ Owner-published captions", Green),
|
||||
TranscriptSource.CommunityContributed=> ("✓ Community captions", Green),
|
||||
TranscriptSource.AutoGenerated => ("~ Auto-generated (ASR)", Yellow),
|
||||
TranscriptSource.MetadataOnly => ("✗ Metadata only", Red),
|
||||
_ => ("? Unknown", Dim)
|
||||
};
|
||||
Console.WriteLine($" {Dim}Transcript:{Reset} {color}{badge}{Reset}");
|
||||
}
|
||||
|
||||
Console.WriteLine($" {Dim}Model:{Reset} {summary.ModelUsed}");
|
||||
Console.WriteLine($" {Dim}Generated:{Reset} {summary.GeneratedAt:yyyy-MM-dd HH:mm} UTC");
|
||||
|
||||
PrintDivider();
|
||||
|
||||
// ── Quality warning ──────────────────────────────────────────────────
|
||||
if (summary.QualityWarning is not null)
|
||||
{
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($" {Yellow}{summary.QualityWarning}{Reset}");
|
||||
}
|
||||
|
||||
// ── Summary body ─────────────────────────────────────────────────────
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($"{Bold} SUMMARY{Reset}");
|
||||
Console.WriteLine();
|
||||
|
||||
// Word-wrap the summary body at 80 characters so it's readable in
|
||||
// standard terminal widths without horizontal scrolling.
|
||||
foreach (var line in WordWrap(summary.SummaryText, maxWidth: 78))
|
||||
{
|
||||
Console.WriteLine($" {line}");
|
||||
}
|
||||
|
||||
Console.WriteLine();
|
||||
PrintDivider();
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
/// <summary>Prints a styled error message.</summary>
|
||||
public static void PrintError(string message)
|
||||
{
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($" {Red}✗ Error: {message}{Reset}");
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
/// <summary>Prints a styled warning (non-fatal).</summary>
|
||||
public static void PrintWarning(string message)
|
||||
{
|
||||
Console.WriteLine($" {Yellow}⚠ {message}{Reset}");
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Private helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private static void PrintDivider()
|
||||
{
|
||||
Console.WriteLine($" {Dim}{"─".PadRight(74, '─')}{Reset}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits text into lines no wider than <paramref name="maxWidth"/> characters,
|
||||
/// breaking only at word boundaries. Respects existing newlines in the input.
|
||||
/// </summary>
|
||||
private static IEnumerable<string> WordWrap(string text, int maxWidth)
|
||||
{
|
||||
foreach (var paragraph in text.Split('\n'))
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(paragraph))
|
||||
{
|
||||
yield return string.Empty;
|
||||
continue;
|
||||
}
|
||||
|
||||
var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
var current = new System.Text.StringBuilder();
|
||||
|
||||
foreach (var word in words)
|
||||
{
|
||||
if (current.Length + word.Length + 1 > maxWidth)
|
||||
{
|
||||
yield return current.ToString();
|
||||
current.Clear();
|
||||
}
|
||||
|
||||
if (current.Length > 0) current.Append(' ');
|
||||
current.Append(word);
|
||||
}
|
||||
|
||||
if (current.Length > 0)
|
||||
yield return current.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
227
Program.cs
Normal file
227
Program.cs
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using YoutubeSummarizer.Configuration;
|
||||
using YoutubeSummarizer.Models;
|
||||
using YoutubeSummarizer.Services;
|
||||
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
// Bootstrap
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
// Build configuration from appsettings.json (required) with optional
|
||||
// environment variable overrides (useful for CI or containerized deployment).
|
||||
// Environment variables follow the pattern: YouTube__ApiKey, LLM__ApiKey, etc.
|
||||
var config = new ConfigurationBuilder()
|
||||
.SetBasePath(AppContext.BaseDirectory)
|
||||
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: false)
|
||||
.AddEnvironmentVariables() // overrides appsettings values if set
|
||||
.Build();
|
||||
|
||||
// Bind configuration sections to strongly-typed objects.
|
||||
var appSettings = new AppSettings();
|
||||
config.Bind(appSettings);
|
||||
|
||||
// Validate required keys up front — fail fast with a clear message rather
|
||||
// than letting the first API call blow up with a cryptic 401.
|
||||
ValidateSettings(appSettings);
|
||||
|
||||
// Wire up DI container.
|
||||
// For a console app this is lightweight, but it mirrors the pattern used
|
||||
// in the LIKA/IKA ASP.NET services so the code is easy to lift into a
|
||||
// background service or API controller later.
|
||||
var services = new ServiceCollection();
|
||||
|
||||
// Register HttpClient for the YouTube timedtext endpoint.
|
||||
// Using IHttpClientFactory gives us connection pooling and the ability to
|
||||
// attach Polly retry policies.
|
||||
services.AddHttpClient<YouTubeService>(client =>
|
||||
{
|
||||
client.DefaultRequestHeaders.Add("User-Agent",
|
||||
"Mozilla/5.0 (compatible; YoutubeSummarizer/1.0)");
|
||||
client.Timeout = TimeSpan.FromSeconds(30);
|
||||
});
|
||||
|
||||
// Register services with their config dependencies.
|
||||
services.AddSingleton(appSettings.LLM);
|
||||
services.AddSingleton(appSettings.Summarizer);
|
||||
services.AddTransient<SummarizerService>();
|
||||
|
||||
var serviceProvider = services.BuildServiceProvider();
|
||||
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
// Main loop
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
ConsoleRenderer.PrintBanner();
|
||||
|
||||
// Handle Ctrl+C gracefully so any in-progress API call can finish or cancel.
|
||||
using var cts = new CancellationTokenSource();
|
||||
Console.CancelKeyPress += (_, e) =>
|
||||
{
|
||||
e.Cancel = true; // prevent immediate termination
|
||||
cts.Cancel();
|
||||
Console.WriteLine("\n Cancellation requested. Finishing current operation...");
|
||||
};
|
||||
|
||||
while (!cts.Token.IsCancellationRequested)
|
||||
{
|
||||
var input = ConsoleRenderer.PromptForUrl();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(input)) continue;
|
||||
if (input.Equals("q", StringComparison.OrdinalIgnoreCase)) break;
|
||||
|
||||
// Parse the video ID from the URL
|
||||
var videoId = YouTubeService.ExtractVideoId(input);
|
||||
if (videoId is null)
|
||||
{
|
||||
ConsoleRenderer.PrintError("Could not extract a valid YouTube video ID from that URL.");
|
||||
ConsoleRenderer.PrintWarning("Accepted formats: watch?v=..., youtu.be/..., /shorts/..., /embed/...");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ask whether to save transcript to file before processing
|
||||
var saveTranscript = ConsoleRenderer.PromptSaveTranscript();
|
||||
|
||||
// Choose summary mode
|
||||
var summaryMode = ConsoleRenderer.PromptSummaryMode();
|
||||
|
||||
await ProcessVideoAsync(videoId, serviceProvider, appSettings.Summarizer, saveTranscript, summaryMode, cts.Token);
|
||||
}
|
||||
|
||||
Console.WriteLine(" Goodbye!");
|
||||
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
// Video processing pipeline
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates the full pipeline for a single video:
|
||||
/// 1. Fetch metadata (YouTube Data API)
|
||||
/// 2. Fetch transcript (caption track or timedtext fallback)
|
||||
/// 3. Summarize (LLM Chat Completions)
|
||||
/// 4. Display (ConsoleRenderer)
|
||||
/// </summary>
|
||||
static async Task ProcessVideoAsync(
|
||||
string videoId,
|
||||
IServiceProvider sp,
|
||||
SummarizerSettings summarizerSettings,
|
||||
bool saveTranscript,
|
||||
SummaryMode summaryMode,
|
||||
CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Resolve scoped services
|
||||
var youtubeService = sp.GetRequiredService<YouTubeService>();
|
||||
var summarizerService = sp.GetRequiredService<SummarizerService>();
|
||||
|
||||
// ── Step 1: Metadata ──────────────────────────────────────────────
|
||||
ConsoleRenderer.PrintWorking("Fetching video metadata");
|
||||
var metadata = await youtubeService.GetVideoMetadataAsync(videoId, ct);
|
||||
|
||||
if (metadata is null)
|
||||
{
|
||||
ConsoleRenderer.PrintError($"Video not found or is private: {videoId}");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine($" {metadata.Title}");
|
||||
|
||||
// ── Step 2: Transcript ────────────────────────────────────────────
|
||||
ConsoleRenderer.PrintWorking("Fetching transcript");
|
||||
var transcript = await youtubeService.GetTranscriptAsync(metadata, ct);
|
||||
|
||||
// Optionally show raw transcript for debugging / inspection
|
||||
if (summarizerSettings.ShowTranscript)
|
||||
{
|
||||
Console.WriteLine();
|
||||
Console.WriteLine(" ─── RAW TRANSCRIPT ───");
|
||||
Console.WriteLine(transcript.Text);
|
||||
Console.WriteLine(" ─── END TRANSCRIPT ───");
|
||||
Console.WriteLine();
|
||||
}
|
||||
|
||||
Console.WriteLine(
|
||||
$" Transcript: {transcript.Source} | {transcript.WordCount:N0} words");
|
||||
|
||||
// ── Step 2.5: Save transcript to file (if requested) ─────────────
|
||||
// (moved after summarization so we can include the summary)
|
||||
|
||||
// ── Step 3: Summarize ─────────────────────────────────────────────
|
||||
// Always run the standard summary (used for file saving).
|
||||
ConsoleRenderer.PrintWorking("Summarizing with LLM");
|
||||
var standardSummary = await summarizerService.SummarizeAsync(
|
||||
metadata, transcript, SummaryMode.Standard, ct);
|
||||
|
||||
// If the user chose Personal Filter, run a second pass for display.
|
||||
VideoSummary displaySummary;
|
||||
if (summaryMode == SummaryMode.PersonalFilter)
|
||||
{
|
||||
ConsoleRenderer.PrintWorking("Applying Personal Information Filter");
|
||||
displaySummary = await summarizerService.SummarizeAsync(
|
||||
metadata, transcript, SummaryMode.PersonalFilter, ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
displaySummary = standardSummary;
|
||||
}
|
||||
|
||||
// ── Step 3.5: Save transcript + standard summary to file ─────────
|
||||
if (saveTranscript)
|
||||
{
|
||||
var transcriptsDir = Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile),
|
||||
"Downloads", "transcripts");
|
||||
ConsoleRenderer.PrintWorking("Saving transcript to file");
|
||||
var savedPath = await TranscriptFileService.SaveAsync(
|
||||
metadata, transcript, summaryText: standardSummary.SummaryText,
|
||||
outputDirectory: transcriptsDir, ct: ct);
|
||||
ConsoleRenderer.PrintFileSaved(savedPath);
|
||||
}
|
||||
|
||||
// ── Step 4: Display ───────────────────────────────────────────────
|
||||
ConsoleRenderer.PrintSummary(displaySummary, showTranscriptSource: true);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// User pressed Ctrl+C — nothing to report, the loop will exit
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
ConsoleRenderer.PrintError(ex.Message);
|
||||
|
||||
// Print the stack trace in dim text for debugging without overwhelming
|
||||
// normal users who will rarely see this path.
|
||||
Console.WriteLine($"\x1b[2m{ex}\x1b[0m");
|
||||
}
|
||||
}
|
||||
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
// Configuration validation
|
||||
// ═════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
static void ValidateSettings(AppSettings settings)
|
||||
{
|
||||
var errors = new List<string>();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(settings.LLM.ApiKey) ||
|
||||
settings.LLM.ApiKey == "YOUR_API_KEY_HERE")
|
||||
{
|
||||
// For local Ollama, we don't strictly need a real key, but it shouldn't be the placeholder.
|
||||
// If they are using OpenAI, they definitely need a key.
|
||||
if (settings.LLM.BaseUrl.Contains("openai.com", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
errors.Add("LLM:ApiKey is not set in appsettings.json (Required for OpenAI)");
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.Count > 0)
|
||||
{
|
||||
Console.ForegroundColor = ConsoleColor.Red;
|
||||
Console.WriteLine("\nConfiguration errors:");
|
||||
errors.ForEach(e => Console.WriteLine($" ✗ {e}"));
|
||||
Console.ResetColor();
|
||||
Console.WriteLine("\nCopy appsettings.example.json → appsettings.json and fill in your keys.\n");
|
||||
Environment.Exit(1);
|
||||
}
|
||||
}
|
||||
113
README.md
Normal file
113
README.md
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# YouTube Video Summarizer
|
||||
|
||||
A .NET 8 console application that fetches YouTube video transcripts and produces structured summaries using an LLM (Ollama or OpenAI).
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [.NET 8 SDK](https://dotnet.microsoft.com/download)
|
||||
- A **YouTube Data API v3** key → [Google Cloud Console](https://console.cloud.google.com)
|
||||
- **Local Ollama** (Recommended) or an **OpenAI API key**.
|
||||
|
||||
---
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# 1. Clone / copy the project
|
||||
cd YoutubeSummarizer
|
||||
|
||||
# 2. Copy the example config and fill in your keys
|
||||
cp appsettings.example.json appsettings.json
|
||||
nano appsettings.json # or your editor of choice
|
||||
|
||||
# 3. Restore packages
|
||||
dotnet restore
|
||||
|
||||
# 4. Run
|
||||
dotnet run
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Google Cloud Setup (YouTube API Key)
|
||||
|
||||
1. Go to [console.cloud.google.com](https://console.cloud.google.com)
|
||||
2. Create or select a project
|
||||
3. **APIs & Services → Library** → search "YouTube Data API v3" → Enable
|
||||
4. **APIs & Services → Credentials → Create Credentials → API key**
|
||||
5. (Optional but recommended) Restrict the key to only the YouTube Data API v3
|
||||
|
||||
> Free quota: **10,000 units/day**. Each video lookup costs ~3 units. You can summarize thousands of videos before hitting the limit.
|
||||
|
||||
---
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
| Key | Description | Default |
|
||||
|---|---|---|
|
||||
| `YouTube:ApiKey` | Your YouTube Data API v3 key | *(required)* |
|
||||
| `LLM:BaseUrl` | API endpoint | `http://localhost:11434/v1` |
|
||||
| `LLM:ApiKey` | API key (any for Ollama) | `ollama` |
|
||||
| `LLM:Model` | Chat model to use | `qwen3:14b` |
|
||||
| `LLM:MaxTokens` | Max tokens in summary response | `1500` |
|
||||
| `LLM:TimeoutSeconds` | Max time for LLM generation | `300` |
|
||||
| `Summarizer:ChunkWordLimit` | Words per chunk for long videos | `3000` |
|
||||
| `Summarizer:ShowTranscript` | Print raw transcript before summary | `false` |
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Program.cs
|
||||
│ Main loop → parses URL → calls pipeline
|
||||
│
|
||||
├── YouTubeService
|
||||
│ ├── ExtractVideoId() — URL parsing
|
||||
│ ├── GetVideoMetadataAsync() — YouTube Data API v3 (Videos.list)
|
||||
│ └── GetTranscriptAsync() — Caption list + timedtext download
|
||||
│
|
||||
├── SummarizerService
|
||||
│ ├── SummarizeAsync() — Routes to single-pass or chunked
|
||||
│ ├── SinglePassSummarize() — One OpenAI call for short videos
|
||||
│ └── ChunkedSummarize() — Map-reduce for long videos
|
||||
│
|
||||
└── ConsoleRenderer — All terminal output / formatting
|
||||
```
|
||||
|
||||
### Caption Quality Transparency
|
||||
|
||||
The app tracks how the transcript was obtained and flags it accordingly:
|
||||
|
||||
| Source | Label | Warning shown? |
|
||||
|---|---|---|
|
||||
| Owner-published captions | `✓ Owner-published` | No |
|
||||
| Community-contributed | `✓ Community captions` | Minor note |
|
||||
| Auto-generated (ASR) | `~ Auto-generated` | Yes — accuracy caveat |
|
||||
| No captions (metadata only) | `✗ Metadata only` | Yes — limited accuracy |
|
||||
|
||||
### Long Video Strategy
|
||||
|
||||
Videos with transcripts exceeding `ChunkWordLimit` words use a **map-reduce** approach:
|
||||
|
||||
1. **Split** — transcript divided into overlapping chunks (200-word overlap preserves context at boundaries)
|
||||
2. **Map** — each chunk summarized independently
|
||||
3. **Reduce** — chunk summaries combined into a final coherent summary
|
||||
|
||||
This handles hour-long lectures, conference talks, and podcasts without hitting model context limits.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variable Overrides
|
||||
|
||||
You can override `appsettings.json` values with environment variables, useful for CI or Docker:
|
||||
|
||||
```bash
|
||||
export YouTube__ApiKey="your-key"
|
||||
export LLM__ApiKey="ollama"
|
||||
dotnet run
|
||||
```
|
||||
|
||||
Note the double-underscore `__` as the section separator (standard .NET configuration convention).
|
||||
342
SummarizerService.cs
Normal file
342
SummarizerService.cs
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
using OpenAI;
|
||||
using OpenAI.Chat;
|
||||
using YoutubeSummarizer.Configuration;
|
||||
using YoutubeSummarizer.Models;
|
||||
|
||||
namespace YoutubeSummarizer.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Sends transcript text to OpenAI's Chat Completions API and returns a
|
||||
/// structured summary.
|
||||
///
|
||||
/// Long transcripts (word count > ChunkWordLimit) are handled with a
|
||||
/// "map-reduce" strategy:
|
||||
/// 1. Split the transcript into overlapping chunks.
|
||||
/// 2. Summarize each chunk independently (map phase).
|
||||
/// 3. Combine chunk summaries into a final cohesive summary (reduce phase).
|
||||
///
|
||||
/// This keeps individual API calls within model context limits while still
|
||||
/// producing an accurate summary of long-form content like hour-long lectures.
|
||||
/// </summary>
|
||||
public sealed class SummarizerService
|
||||
{
|
||||
private readonly LlmSettings _llmSettings;
|
||||
private readonly SummarizerSettings _summarizerSettings;
|
||||
private readonly ChatClient _chatClient;
|
||||
|
||||
// System prompt used for single-pass and chunk summarization.
|
||||
// Keeping it focused on facts and structure produces better summaries
|
||||
// than open-ended "summarize this" prompts.
|
||||
private const string ChunkSystemPrompt = """
|
||||
You are a precise, factual assistant that summarizes YouTube video transcripts.
|
||||
When given a transcript segment, produce a concise summary that:
|
||||
- Captures the key points, arguments, and conclusions
|
||||
- Preserves any specific facts, names, dates, or statistics mentioned
|
||||
- Uses bullet points for individual points, then a short paragraph for the overall gist
|
||||
- Omits filler words, repeated phrases, and off-topic tangents
|
||||
- Does NOT add information not present in the transcript
|
||||
Respond with only the summary text, no preamble.
|
||||
""";
|
||||
|
||||
// Personal Information Filter — concise relevance-based summary.
|
||||
private const string PersonalFilterSystemPrompt = """
|
||||
You are a concise, factual assistant that applies a Personal Information Filter
|
||||
to YouTube video transcripts. When given a transcript, respond with EXACTLY
|
||||
three sections and nothing else:
|
||||
|
||||
Summary – A concise, plain-English summary in 1–2 sentences.
|
||||
|
||||
Why it matters – Directly evaluate relevance only against these priorities:
|
||||
time, finances, health, family, service to others.
|
||||
If none apply, say so clearly.
|
||||
|
||||
Priority tag – End with a single word verdict: ACT, MONITOR, or IGNORE.
|
||||
|
||||
Constraints:
|
||||
- Do not timestamp or number entries.
|
||||
- Do not infer user interest beyond what is explicitly provided.
|
||||
- Do not expand or add context unless the user requests it.
|
||||
- The burden of interest is on the user.
|
||||
- Respond with only the three sections above, no preamble.
|
||||
""";
|
||||
|
||||
// Personal Filter combine prompt for long transcripts.
|
||||
private const string PersonalFilterCombinePrompt = """
|
||||
You are a concise, factual assistant. You will receive several partial summaries
|
||||
of consecutive segments of a YouTube video, each formatted with Summary,
|
||||
Why it matters, and Priority tag sections. Combine them into a single response
|
||||
using the same three-section format:
|
||||
|
||||
Summary – A concise, plain-English summary of the entire video in 1–2 sentences.
|
||||
|
||||
Why it matters – Directly evaluate relevance only against these priorities:
|
||||
time, finances, health, family, service to others.
|
||||
If none apply, say so clearly.
|
||||
|
||||
Priority tag – A single word verdict: ACT, MONITOR, or IGNORE.
|
||||
|
||||
Respond with only these three sections, no preamble.
|
||||
""";
|
||||
|
||||
// Used in the reduce phase to combine chunk summaries coherently.
|
||||
private const string CombineSystemPrompt = """
|
||||
You are a precise, factual assistant. You will receive several partial summaries
|
||||
of consecutive segments of a YouTube video. Your task is to combine them into
|
||||
a single, coherent, well-structured summary that:
|
||||
- Flows as a unified narrative, not as a list of sub-summaries
|
||||
- Preserves all key facts, names, dates, and statistics
|
||||
- Uses bullet points for supporting details beneath each main topic
|
||||
- Omits redundant information that appears across multiple segments
|
||||
- Concludes with a 2–3 sentence takeaway paragraph
|
||||
Respond with only the combined summary, no preamble.
|
||||
""";
|
||||
|
||||
public SummarizerService(LlmSettings llmSettings, SummarizerSettings summarizerSettings)
|
||||
{
|
||||
_llmSettings = llmSettings;
|
||||
_summarizerSettings = summarizerSettings;
|
||||
|
||||
// Initialize the client with the specified model and endpoint.
|
||||
// We use the OpenAI SDK's ability to point to any OpenAI-compatible API (like Ollama).
|
||||
_chatClient = new ChatClient(
|
||||
model: llmSettings.Model,
|
||||
credential: new System.ClientModel.ApiKeyCredential(llmSettings.ApiKey),
|
||||
options: new OpenAIClientOptions
|
||||
{
|
||||
Endpoint = new Uri(llmSettings.BaseUrl),
|
||||
NetworkTimeout = TimeSpan.FromSeconds(llmSettings.TimeoutSeconds)
|
||||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Public API
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Produces a <see cref="VideoSummary"/> from the video's metadata and transcript.
|
||||
/// Automatically routes to single-pass or chunked strategy based on word count.
|
||||
/// </summary>
|
||||
public async Task<VideoSummary> SummarizeAsync(
|
||||
VideoMetadata metadata,
|
||||
VideoTranscript transcript,
|
||||
SummaryMode mode = SummaryMode.Standard,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
string summaryText;
|
||||
|
||||
// Select prompt set based on mode
|
||||
var chunkPrompt = mode == SummaryMode.PersonalFilter
|
||||
? PersonalFilterSystemPrompt : ChunkSystemPrompt;
|
||||
var combinePrompt = mode == SummaryMode.PersonalFilter
|
||||
? PersonalFilterCombinePrompt : CombineSystemPrompt;
|
||||
|
||||
if (transcript.WordCount <= _summarizerSettings.ChunkWordLimit)
|
||||
{
|
||||
// Short video — single API call is sufficient
|
||||
summaryText = await SinglePassSummarizeAsync(transcript.Text, metadata, chunkPrompt, ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Long video — chunk-and-combine strategy
|
||||
summaryText = await ChunkedSummarizeAsync(transcript.Text, metadata, chunkPrompt, combinePrompt, ct);
|
||||
}
|
||||
|
||||
// Attach a quality warning when the transcript quality is uncertain
|
||||
var warning = BuildQualityWarning(transcript.Source);
|
||||
|
||||
return new VideoSummary
|
||||
{
|
||||
Metadata = metadata,
|
||||
SummaryText = summaryText,
|
||||
TranscriptSource = transcript.Source,
|
||||
QualityWarning = warning,
|
||||
ModelUsed = _llmSettings.Model
|
||||
};
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Summarization strategies
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Single-pass: sends the entire transcript in one API call.
|
||||
/// Best for videos under ~30 minutes (roughly 3000–4000 words).
|
||||
/// </summary>
|
||||
private async Task<string> SinglePassSummarizeAsync(
|
||||
string transcriptText,
|
||||
VideoMetadata metadata,
|
||||
string systemPrompt,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var userMessage = BuildUserPrompt(metadata, transcriptText);
|
||||
return await CallChatCompletionAsync(systemPrompt, userMessage, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Map-reduce: splits long transcripts, summarizes each chunk, then combines.
|
||||
///
|
||||
/// Overlap: each chunk ends with a brief overlap window (last ~200 words of
|
||||
/// the previous chunk) so the model retains context across chunk boundaries
|
||||
/// and avoids abrupt topic changes in the summaries.
|
||||
/// </summary>
|
||||
private async Task<string> ChunkedSummarizeAsync(
|
||||
string transcriptText,
|
||||
VideoMetadata metadata,
|
||||
string chunkSystemPrompt,
|
||||
string combineSystemPrompt,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var words = transcriptText.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
var chunks = SplitIntoChunks(words, _summarizerSettings.ChunkWordLimit, overlapWords: 200);
|
||||
|
||||
Console.WriteLine($"\n [Chunking] Transcript split into {chunks.Count} chunks for processing...");
|
||||
|
||||
// Map phase: summarize each chunk in sequence
|
||||
// (Parallel would be faster but could hit rate limits — sequential is safer)
|
||||
var chunkSummaries = new List<string>(chunks.Count);
|
||||
for (int i = 0; i < chunks.Count; i++)
|
||||
{
|
||||
Console.Write($" [Chunk {i + 1}/{chunks.Count}] Summarizing");
|
||||
var chunkText = string.Join(" ", chunks[i]);
|
||||
var prompt = $"This is segment {i + 1} of {chunks.Count} from the video \"{metadata.Title}\":\n\n{chunkText}";
|
||||
var summary = await CallChatCompletionAsync(chunkSystemPrompt, prompt, ct);
|
||||
chunkSummaries.Add(summary);
|
||||
}
|
||||
|
||||
// Reduce phase: combine all chunk summaries into one coherent summary
|
||||
Console.Write(" [Combine] Merging chunk summaries into final summary");
|
||||
var combinedInput = string.Join("\n\n---\n\n",
|
||||
chunkSummaries.Select((s, i) => $"Segment {i + 1} summary:\n{s}"));
|
||||
|
||||
var combinePrompt = $"Video: \"{metadata.Title}\" by {metadata.ChannelTitle}\n\n" +
|
||||
$"The following are summaries of {chunks.Count} consecutive segments:\n\n{combinedInput}";
|
||||
|
||||
return await CallChatCompletionAsync(combineSystemPrompt, combinePrompt, ct);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Sends a system + user message pair to the Chat Completions endpoint
|
||||
/// and returns the assistant's reply text.
|
||||
/// </summary>
|
||||
private async Task<string> CallChatCompletionAsync(
|
||||
string systemPrompt,
|
||||
string userMessage,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var messages = new List<ChatMessage>
|
||||
{
|
||||
new SystemChatMessage(systemPrompt),
|
||||
new UserChatMessage(userMessage)
|
||||
};
|
||||
|
||||
var options = new ChatCompletionOptions
|
||||
{
|
||||
MaxOutputTokenCount = _llmSettings.MaxTokens
|
||||
};
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
var fullContent = new System.Text.StringBuilder();
|
||||
|
||||
try
|
||||
{
|
||||
var streamingUpdates = _chatClient.CompleteChatStreamingAsync(messages, options, ct);
|
||||
|
||||
await foreach (var update in streamingUpdates)
|
||||
{
|
||||
foreach (var part in update.ContentUpdate)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(part.Text))
|
||||
{
|
||||
if (fullContent.Length == 0)
|
||||
{
|
||||
// First token received!
|
||||
Console.Write(" (working)");
|
||||
}
|
||||
|
||||
fullContent.Append(part.Text);
|
||||
|
||||
// Show progress: print a dot every ~50 characters of output
|
||||
// or just periodically. For now, let's just do a dot every update
|
||||
// to show it's alive.
|
||||
if (fullContent.Length % 20 == 0) Console.Write(".");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
sw.Stop();
|
||||
Console.WriteLine($" Done! ({sw.Elapsed.TotalSeconds:F1}s)");
|
||||
}
|
||||
|
||||
return fullContent.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the user-turn prompt for a single-pass summarization.
|
||||
/// Including the title and channel anchors the model to the subject matter,
|
||||
/// which reduces hallucination on ambiguous ASR transcripts.
|
||||
/// </summary>
|
||||
private static string BuildUserPrompt(VideoMetadata metadata, string transcriptText)
|
||||
{
|
||||
return $"""
|
||||
Video title: {metadata.Title}
|
||||
Channel: {metadata.ChannelTitle}
|
||||
Published: {metadata.PublishedAt:MMMM d, yyyy}
|
||||
Duration: {metadata.FormattedDuration}
|
||||
|
||||
Full transcript:
|
||||
{transcriptText}
|
||||
""";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits a word array into overlapping chunks of roughly <paramref name="chunkSize"/> words.
|
||||
/// The overlap prevents the model from missing context at chunk boundaries.
|
||||
/// </summary>
|
||||
private static List<string[]> SplitIntoChunks(string[] words, int chunkSize, int overlapWords)
|
||||
{
|
||||
var chunks = new List<string[]>();
|
||||
int start = 0;
|
||||
|
||||
while (start < words.Length)
|
||||
{
|
||||
int end = Math.Min(start + chunkSize, words.Length);
|
||||
chunks.Add(words[start..end]);
|
||||
|
||||
// Next chunk starts after current chunk minus the overlap window
|
||||
start = end - overlapWords;
|
||||
|
||||
// Guard: if remaining words are less than the overlap, we're done
|
||||
if (start >= words.Length - overlapWords) break;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a human-readable warning when transcript quality may affect summary accuracy.
|
||||
/// Returns null for high-confidence sources (no warning needed).
|
||||
/// </summary>
|
||||
private static string? BuildQualityWarning(TranscriptSource source) =>
|
||||
source switch
|
||||
{
|
||||
TranscriptSource.AutoGenerated =>
|
||||
"⚠ This summary is based on YouTube's auto-generated captions (ASR). " +
|
||||
"The transcript may contain errors, especially for technical terms, names, or accented speech.",
|
||||
|
||||
TranscriptSource.MetadataOnly =>
|
||||
"⚠ No captions were available. This summary is based on the video's title " +
|
||||
"and description only — it may be incomplete or inaccurate.",
|
||||
|
||||
TranscriptSource.CommunityContributed =>
|
||||
"ℹ This summary is based on community-contributed captions. " +
|
||||
"Quality is generally good but not guaranteed.",
|
||||
|
||||
_ => null // OwnerPublished — no warning needed
|
||||
};
|
||||
}
|
||||
212
TranscriptFileService.cs
Normal file
212
TranscriptFileService.cs
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
using System.Text;
|
||||
using YoutubeSummarizer.Models;
|
||||
|
||||
namespace YoutubeSummarizer.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Saves video metadata and timestamped transcript to a plain text file.
|
||||
/// The file is formatted with metadata at the top followed by the transcript
|
||||
/// organized by timestamps.
|
||||
/// </summary>
|
||||
public static class TranscriptFileService
|
||||
{
|
||||
/// <summary>
|
||||
/// Saves the transcript and metadata to a text file in the specified directory.
|
||||
/// Returns the full path to the saved file.
|
||||
/// </summary>
|
||||
public static async Task<string> SaveAsync(
|
||||
VideoMetadata metadata,
|
||||
VideoTranscript transcript,
|
||||
string? summaryText = null,
|
||||
string? outputDirectory = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
outputDirectory ??= Environment.CurrentDirectory;
|
||||
Directory.CreateDirectory(outputDirectory);
|
||||
|
||||
// Build a safe filename from the video title
|
||||
var safeTitle = SanitizeFileName(metadata.Title);
|
||||
var fileName = $"{safeTitle}_{metadata.VideoId}.txt";
|
||||
var filePath = Path.Combine(outputDirectory, fileName);
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// ── Metadata section ─────────────────────────────────────────────────
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
sb.AppendLine(" VIDEO METADATA");
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
sb.AppendLine();
|
||||
sb.AppendLine($" Title: {metadata.Title}");
|
||||
sb.AppendLine($" Channel: {metadata.ChannelTitle}");
|
||||
sb.AppendLine($" Published: {metadata.PublishedAt:MMMM d, yyyy}");
|
||||
sb.AppendLine($" Duration: {metadata.FormattedDuration}");
|
||||
sb.AppendLine($" Video ID: {metadata.VideoId}");
|
||||
sb.AppendLine($" URL: https://youtu.be/{metadata.VideoId}");
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(summaryText))
|
||||
{
|
||||
sb.AppendLine();
|
||||
sb.AppendLine(" ── SUMMARY ──────────────────────────────────────────────");
|
||||
sb.AppendLine();
|
||||
foreach (var wrappedLine in WordWrap(summaryText, maxWidth: 72))
|
||||
{
|
||||
sb.AppendLine($" {wrappedLine}");
|
||||
}
|
||||
}
|
||||
|
||||
sb.AppendLine();
|
||||
|
||||
// ── Transcript source ────────────────────────────────────────────────
|
||||
var sourceLabel = transcript.Source switch
|
||||
{
|
||||
TranscriptSource.OwnerPublished => "Owner-published captions",
|
||||
TranscriptSource.CommunityContributed => "Community-contributed captions",
|
||||
TranscriptSource.AutoGenerated => "Auto-generated (ASR)",
|
||||
TranscriptSource.MetadataOnly => "Metadata only (no captions)",
|
||||
_ => "Unknown"
|
||||
};
|
||||
sb.AppendLine($" Transcript Source: {sourceLabel}");
|
||||
sb.AppendLine($" Word Count: {transcript.WordCount:N0}");
|
||||
sb.AppendLine($" Saved: {DateTimeOffset.UtcNow:yyyy-MM-dd HH:mm} UTC");
|
||||
sb.AppendLine();
|
||||
|
||||
// ── Transcript section ───────────────────────────────────────────────
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
sb.AppendLine(" TRANSCRIPT");
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
sb.AppendLine();
|
||||
|
||||
if (transcript.Segments.Count > 0)
|
||||
{
|
||||
// Group segments into blocks by time intervals for readability
|
||||
// Each block groups consecutive segments within ~30 seconds
|
||||
var blocks = GroupSegmentsByInterval(transcript.Segments, intervalSeconds: 30);
|
||||
|
||||
foreach (var block in blocks)
|
||||
{
|
||||
var firstTimestamp = block[0].FormattedTimestamp;
|
||||
sb.AppendLine($" [{firstTimestamp}]");
|
||||
|
||||
// Combine the text for segments in this time block
|
||||
var blockText = string.Join(" ", block.Select(s => s.Text));
|
||||
foreach (var wrappedLine in WordWrap(blockText, maxWidth: 72))
|
||||
{
|
||||
sb.AppendLine($" {wrappedLine}");
|
||||
}
|
||||
sb.AppendLine();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No timestamps available — write plain text
|
||||
sb.AppendLine(" (No timestamp data available)");
|
||||
sb.AppendLine();
|
||||
foreach (var wrappedLine in WordWrap(transcript.Text, maxWidth: 72))
|
||||
{
|
||||
sb.AppendLine($" {wrappedLine}");
|
||||
}
|
||||
sb.AppendLine();
|
||||
}
|
||||
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
sb.AppendLine(" END OF TRANSCRIPT");
|
||||
sb.AppendLine("════════════════════════════════════════════════════════════════");
|
||||
|
||||
await File.WriteAllTextAsync(filePath, sb.ToString(), ct);
|
||||
return filePath;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Groups timestamped segments into blocks based on a time interval.
|
||||
/// This produces readable chunks (e.g. every 30 seconds) instead of
|
||||
/// one line per subtitle cue.
|
||||
/// </summary>
|
||||
private static List<List<TimestampedSegment>> GroupSegmentsByInterval(
|
||||
IReadOnlyList<TimestampedSegment> segments,
|
||||
int intervalSeconds)
|
||||
{
|
||||
var blocks = new List<List<TimestampedSegment>>();
|
||||
if (segments.Count == 0) return blocks;
|
||||
|
||||
var currentBlock = new List<TimestampedSegment> { segments[0] };
|
||||
var blockStartTime = segments[0].Start;
|
||||
|
||||
for (int i = 1; i < segments.Count; i++)
|
||||
{
|
||||
if ((segments[i].Start - blockStartTime).TotalSeconds >= intervalSeconds)
|
||||
{
|
||||
blocks.Add(currentBlock);
|
||||
currentBlock = new List<TimestampedSegment>();
|
||||
blockStartTime = segments[i].Start;
|
||||
}
|
||||
currentBlock.Add(segments[i]);
|
||||
}
|
||||
|
||||
if (currentBlock.Count > 0)
|
||||
blocks.Add(currentBlock);
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes characters that are invalid in file names.
|
||||
/// Truncates to a reasonable length to avoid path-length issues.
|
||||
/// </summary>
|
||||
private static string SanitizeFileName(string title)
|
||||
{
|
||||
var invalid = Path.GetInvalidFileNameChars();
|
||||
var sb = new StringBuilder(title.Length);
|
||||
|
||||
foreach (var ch in title)
|
||||
{
|
||||
if (Array.IndexOf(invalid, ch) < 0)
|
||||
sb.Append(ch);
|
||||
else
|
||||
sb.Append('_');
|
||||
}
|
||||
|
||||
// Replace runs of spaces/underscores with a single underscore
|
||||
var result = System.Text.RegularExpressions.Regex.Replace(
|
||||
sb.ToString().Trim(), @"[\s_]+", "_");
|
||||
|
||||
// Truncate to keep file paths manageable
|
||||
return result.Length > 80 ? result[..80] : result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Word-wraps text at the specified width, breaking at word boundaries.
|
||||
/// </summary>
|
||||
private static IEnumerable<string> WordWrap(string text, int maxWidth)
|
||||
{
|
||||
foreach (var paragraph in text.Split('\n'))
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(paragraph))
|
||||
{
|
||||
yield return string.Empty;
|
||||
continue;
|
||||
}
|
||||
|
||||
var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
var current = new StringBuilder();
|
||||
|
||||
foreach (var word in words)
|
||||
{
|
||||
if (current.Length + word.Length + 1 > maxWidth)
|
||||
{
|
||||
yield return current.ToString();
|
||||
current.Clear();
|
||||
}
|
||||
|
||||
if (current.Length > 0) current.Append(' ');
|
||||
current.Append(word);
|
||||
}
|
||||
|
||||
if (current.Length > 0)
|
||||
yield return current.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
161
VideoModels.cs
Normal file
161
VideoModels.cs
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
namespace YoutubeSummarizer.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Metadata returned from the YouTube Data API for a single video.
|
||||
/// This is a slim projection — the API returns far more fields, but we
|
||||
/// only bind what we actually need for the summarization workflow.
|
||||
/// </summary>
|
||||
public sealed class VideoMetadata
|
||||
{
|
||||
/// <summary>The 11-character YouTube video ID parsed from the URL.</summary>
|
||||
public required string VideoId { get; init; }
|
||||
|
||||
/// <summary>Full video title as shown on YouTube.</summary>
|
||||
public required string Title { get; init; }
|
||||
|
||||
/// <summary>Channel that published the video.</summary>
|
||||
public required string ChannelTitle { get; init; }
|
||||
|
||||
/// <summary>UTC publish date of the video.</summary>
|
||||
public DateTimeOffset PublishedAt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Video duration in ISO 8601 format (e.g. "PT1H4M32S").
|
||||
/// We store it raw and parse it for display purposes.
|
||||
/// </summary>
|
||||
public string? Duration { get; init; }
|
||||
|
||||
/// <summary>First 5000 characters of the video description (API cap).</summary>
|
||||
public string? Description { get; init; }
|
||||
|
||||
/// <summary>Human-readable duration parsed from <see cref="Duration"/>.</summary>
|
||||
public string FormattedDuration =>
|
||||
Duration is null ? "Unknown"
|
||||
: System.Xml.XmlConvert.ToTimeSpan(Duration).ToString(@"hh\:mm\:ss").TrimStart('0', ':');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single caption track available for a video.
|
||||
/// YouTube can provide multiple tracks (languages, auto-generated vs. manual).
|
||||
/// </summary>
|
||||
public sealed class CaptionTrack
|
||||
{
|
||||
public required string TrackId { get; init; }
|
||||
public required string Language { get; init; } // BCP-47, e.g. "en"
|
||||
public required string TrackKind { get; init; } // "standard", "asr" (auto), "forced"
|
||||
public required string Name { get; init; } // Display name from YouTube
|
||||
|
||||
/// <summary>
|
||||
/// True when the track was automatically generated by YouTube's ASR system.
|
||||
/// ASR captions are less reliable — typos, missing punctuation, run-on sentences.
|
||||
/// </summary>
|
||||
public bool IsAutoGenerated => TrackKind.Equals("asr", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The full textual transcript assembled from caption data,
|
||||
/// along with provenance information about how it was obtained.
|
||||
/// </summary>
|
||||
public sealed class VideoTranscript
|
||||
{
|
||||
public required string VideoId { get; init; }
|
||||
|
||||
/// <summary>The concatenated, cleaned transcript text.</summary>
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>The caption track this text came from, if available.</summary>
|
||||
public CaptionTrack? SourceTrack { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// How the transcript was obtained. This is important context for
|
||||
/// interpreting the quality of the summary.
|
||||
/// </summary>
|
||||
public TranscriptSource Source { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Individual timestamped segments from the caption track.
|
||||
/// Empty when timestamps are not available (e.g. metadata-only transcripts).
|
||||
/// </summary>
|
||||
public IReadOnlyList<TimestampedSegment> Segments { get; init; } = Array.Empty<TimestampedSegment>();
|
||||
|
||||
/// <summary>Approximate word count of the raw transcript.</summary>
|
||||
public int WordCount => Text.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A single timestamped segment from a caption track.
|
||||
/// Used when saving the transcript to a file with timestamp formatting.
|
||||
/// </summary>
|
||||
public sealed class TimestampedSegment
|
||||
{
|
||||
/// <summary>Start time offset from the beginning of the video.</summary>
|
||||
public TimeSpan Start { get; init; }
|
||||
|
||||
/// <summary>Duration of this caption segment.</summary>
|
||||
public TimeSpan Duration { get; init; }
|
||||
|
||||
/// <summary>The caption text for this segment.</summary>
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>Formats the start time as [HH:MM:SS] or [MM:SS] for display.</summary>
|
||||
public string FormattedTimestamp =>
|
||||
Start.TotalHours >= 1
|
||||
? Start.ToString(@"hh\:mm\:ss")
|
||||
: Start.ToString(@"mm\:ss");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Describes how a transcript was obtained, ordered from most to least reliable.
|
||||
/// This maps directly to the caption quality transparency layer discussed in LIKA.
|
||||
/// </summary>
|
||||
public enum TranscriptSource
|
||||
{
|
||||
/// <summary>Human-reviewed caption track provided by the video owner.</summary>
|
||||
OwnerPublished,
|
||||
|
||||
/// <summary>Community-contributed captions (YouTube retired this but tracks may exist).</summary>
|
||||
CommunityContributed,
|
||||
|
||||
/// <summary>YouTube's automatic speech recognition — less reliable.</summary>
|
||||
AutoGenerated,
|
||||
|
||||
/// <summary>No captions available; summary based on metadata/description only.</summary>
|
||||
MetadataOnly
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Controls which summarization prompt style is used.
|
||||
/// </summary>
|
||||
public enum SummaryMode
|
||||
{
|
||||
/// <summary>Default detailed summary with bullet points and takeaways.</summary>
|
||||
Standard,
|
||||
|
||||
/// <summary>
|
||||
/// Personal Information Filter — brief 1–2 sentence summary, relevance
|
||||
/// evaluation against personal priorities (time, finances, health, family,
|
||||
/// service to others), and a single-word verdict: ACT, MONITOR, or IGNORE.
|
||||
/// </summary>
|
||||
PersonalFilter
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The final deliverable: a structured summary of a YouTube video.
|
||||
/// </summary>
|
||||
public sealed class VideoSummary
|
||||
{
|
||||
public required VideoMetadata Metadata { get; init; }
|
||||
public required string SummaryText { get; init; }
|
||||
public required TranscriptSource TranscriptSource { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Warning shown when the summary is based on low-quality or missing transcript data.
|
||||
/// Null when the source is reliable.
|
||||
/// </summary>
|
||||
public string? QualityWarning { get; init; }
|
||||
|
||||
/// <summary>Model used to generate this summary.</summary>
|
||||
public required string ModelUsed { get; init; }
|
||||
|
||||
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
|
||||
}
|
||||
518
YouTubeService.cs
Normal file
518
YouTubeService.cs
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
using System.Diagnostics;
|
||||
using System.Text.Json;
|
||||
using YoutubeSummarizer.Models;
|
||||
|
||||
namespace YoutubeSummarizer.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Uses yt-dlp (https://github.com/yt-dlp/yt-dlp) to retrieve video metadata
|
||||
/// and download caption tracks. No YouTube API key required.
|
||||
///
|
||||
/// yt-dlp is the de-facto standard tool for reliably extracting video
|
||||
/// information and subtitles from YouTube. It must be installed and
|
||||
/// available on PATH (e.g. pip install yt-dlp).
|
||||
/// </summary>
|
||||
public sealed class YouTubeService
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
public YouTubeService(HttpClient httpClient)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Public API
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Parses a YouTube video ID from any common URL format.
|
||||
/// Handles: watch?v=, youtu.be/, /embed/, /shorts/
|
||||
/// </summary>
|
||||
public static string? ExtractVideoId(string url)
|
||||
{
|
||||
// Normalize — strip whitespace the user may have pasted
|
||||
url = url.Trim();
|
||||
|
||||
// youtu.be short links: https://youtu.be/VIDEO_ID
|
||||
if (Uri.TryCreate(url, UriKind.Absolute, out var uri))
|
||||
{
|
||||
if (uri.Host.Contains("youtu.be"))
|
||||
return uri.AbsolutePath.TrimStart('/').Split('?')[0];
|
||||
|
||||
// Standard and embed URLs: ?v=VIDEO_ID, /embed/VIDEO_ID, /shorts/VIDEO_ID
|
||||
var query = System.Web.HttpUtility.ParseQueryString(uri.Query);
|
||||
if (query["v"] is { } vParam && vParam.Length == 11)
|
||||
return vParam;
|
||||
|
||||
var segments = uri.AbsolutePath.Split('/', StringSplitOptions.RemoveEmptyEntries);
|
||||
for (int i = 0; i < segments.Length - 1; i++)
|
||||
{
|
||||
if (segments[i] is "embed" or "shorts" or "v")
|
||||
return segments[i + 1].Split('?')[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Raw ID passed directly (11 alphanumeric chars + dash/underscore)
|
||||
if (System.Text.RegularExpressions.Regex.IsMatch(url, @"^[\w-]{11}$"))
|
||||
return url;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches metadata for a video using yt-dlp --dump-json.
|
||||
/// No API key required — yt-dlp scrapes the public video page.
|
||||
/// </summary>
|
||||
public async Task<VideoMetadata?> GetVideoMetadataAsync(string videoId, CancellationToken ct = default)
|
||||
{
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = "yt-dlp",
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
};
|
||||
psi.ArgumentList.Add("--dump-json");
|
||||
psi.ArgumentList.Add("--no-download");
|
||||
psi.ArgumentList.Add($"https://www.youtube.com/watch?v={videoId}");
|
||||
|
||||
using var process = new Process { StartInfo = psi };
|
||||
process.Start();
|
||||
|
||||
var stdoutTask = process.StandardOutput.ReadToEndAsync(ct);
|
||||
var stderrTask = process.StandardError.ReadToEndAsync(ct);
|
||||
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
if (process.ExitCode != 0)
|
||||
return null;
|
||||
|
||||
var json = await stdoutTask;
|
||||
if (string.IsNullOrWhiteSpace(json))
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var root = doc.RootElement;
|
||||
|
||||
var title = root.TryGetProperty("title", out var t) ? t.GetString() ?? "(no title)" : "(no title)";
|
||||
var channel = root.TryGetProperty("channel", out var c) ? c.GetString() ?? "(unknown channel)" : "(unknown channel)";
|
||||
var description = root.TryGetProperty("description", out var d) ? d.GetString() : null;
|
||||
|
||||
// yt-dlp returns duration in seconds
|
||||
TimeSpan? duration = null;
|
||||
if (root.TryGetProperty("duration", out var dur) && dur.ValueKind == JsonValueKind.Number)
|
||||
duration = TimeSpan.FromSeconds(dur.GetDouble());
|
||||
|
||||
// Upload date comes as "YYYYMMDD"
|
||||
DateTimeOffset publishedAt = DateTimeOffset.MinValue;
|
||||
if (root.TryGetProperty("upload_date", out var ud) && ud.GetString() is { } dateStr
|
||||
&& DateTime.TryParseExact(dateStr, "yyyyMMdd", null,
|
||||
System.Globalization.DateTimeStyles.None, out var parsed))
|
||||
{
|
||||
publishedAt = new DateTimeOffset(parsed, TimeSpan.Zero);
|
||||
}
|
||||
|
||||
// Build ISO 8601 duration string for FormattedDuration compatibility
|
||||
string? isoDuration = null;
|
||||
if (duration.HasValue)
|
||||
{
|
||||
var ts = duration.Value;
|
||||
isoDuration = $"PT{(int)ts.TotalHours}H{ts.Minutes}M{ts.Seconds}S";
|
||||
}
|
||||
|
||||
return new VideoMetadata
|
||||
{
|
||||
VideoId = videoId,
|
||||
Title = title,
|
||||
ChannelTitle = channel,
|
||||
PublishedAt = publishedAt,
|
||||
Duration = isoDuration,
|
||||
Description = description
|
||||
};
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the best available transcript for the video using yt-dlp.
|
||||
///
|
||||
/// yt-dlp is invoked to download subtitle files (preferring manual English
|
||||
/// captions, falling back to auto-generated). The downloaded XML is parsed
|
||||
/// into clean plain text for summarization.
|
||||
///
|
||||
/// If yt-dlp fails or no captions exist, returns a metadata-only transcript
|
||||
/// from the video description.
|
||||
/// </summary>
|
||||
public async Task<VideoTranscript> GetTranscriptAsync(
|
||||
VideoMetadata metadata,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
// Try manual (human-written) subtitles first, then auto-generated
|
||||
var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(metadata.VideoId, ct);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return new VideoTranscript
|
||||
{
|
||||
VideoId = metadata.VideoId,
|
||||
Text = text,
|
||||
Segments = segments,
|
||||
SourceTrack = new CaptionTrack
|
||||
{
|
||||
TrackId = "yt-dlp",
|
||||
Language = "en",
|
||||
TrackKind = isAuto ? "asr" : "standard",
|
||||
Name = isAuto ? "Auto-generated (en)" : "English"
|
||||
},
|
||||
Source = isAuto
|
||||
? TranscriptSource.AutoGenerated
|
||||
: TranscriptSource.OwnerPublished
|
||||
};
|
||||
}
|
||||
|
||||
// No captions at all — fall back to the description text
|
||||
return BuildMetadataOnlyTranscript(metadata);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Private helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Shells out to yt-dlp to download subtitles for the given video.
|
||||
/// First attempts manual subs, then auto-generated if none found.
|
||||
/// Returns the cleaned transcript text and whether it was auto-generated.
|
||||
/// </summary>
|
||||
private static async Task<(string? Text, bool IsAuto)> DownloadSubtitlesWithYtDlpAsync(
|
||||
string videoId,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var (text, segments, isAuto) = await DownloadSubtitlesWithTimestampsAsync(videoId, ct);
|
||||
return (text, isAuto);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Downloads subtitles and returns both the plain text and timestamped segments.
|
||||
/// </summary>
|
||||
private static async Task<(string? Text, List<TimestampedSegment> Segments, bool IsAuto)> DownloadSubtitlesWithTimestampsAsync(
|
||||
string videoId,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var tempDir = Path.Combine(Path.GetTempPath(), $"ytsumm_{videoId}_{Guid.NewGuid():N}");
|
||||
Directory.CreateDirectory(tempDir);
|
||||
|
||||
try
|
||||
{
|
||||
// Attempt 1: manual (human-written) subtitles only
|
||||
var (manualText, manualSegments) = await RunYtDlpSubtitleWithTimestampsAsync(
|
||||
videoId, tempDir, writeSub: true, writeAutoSub: false, ct);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(manualText))
|
||||
return (manualText, manualSegments, false);
|
||||
|
||||
// Attempt 2: auto-generated subtitles
|
||||
var (autoText, autoSegments) = await RunYtDlpSubtitleWithTimestampsAsync(
|
||||
videoId, tempDir, writeSub: false, writeAutoSub: true, ct);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(autoText))
|
||||
return (autoText, autoSegments, true);
|
||||
|
||||
return (null, new List<TimestampedSegment>(), false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { Directory.Delete(tempDir, recursive: true); } catch { /* best effort */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs a single yt-dlp invocation to download subtitles with timestamps.
|
||||
/// Returns the parsed plain-text transcript and timestamped segments.
|
||||
/// </summary>
|
||||
private static async Task<(string? Text, List<TimestampedSegment> Segments)> RunYtDlpSubtitleWithTimestampsAsync(
|
||||
string videoId,
|
||||
string tempDir,
|
||||
bool writeSub,
|
||||
bool writeAutoSub,
|
||||
CancellationToken ct)
|
||||
{
|
||||
// Clean any previous subtitle files from this temp dir
|
||||
foreach (var f in Directory.GetFiles(tempDir, "*.srv1"))
|
||||
File.Delete(f);
|
||||
foreach (var f in Directory.GetFiles(tempDir, "*.vtt"))
|
||||
File.Delete(f);
|
||||
|
||||
var args = new List<string>
|
||||
{
|
||||
"--skip-download",
|
||||
"--sub-lang", "en,en-US,en-GB,en.*",
|
||||
"--sub-format", "srv1/vtt/best",
|
||||
"-o", Path.Combine(tempDir, "%(id)s"),
|
||||
};
|
||||
|
||||
if (writeSub)
|
||||
args.Add("--write-sub");
|
||||
if (writeAutoSub)
|
||||
args.Add("--write-auto-sub");
|
||||
|
||||
args.Add($"https://www.youtube.com/watch?v={videoId}");
|
||||
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = "yt-dlp",
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
};
|
||||
|
||||
foreach (var arg in args)
|
||||
psi.ArgumentList.Add(arg);
|
||||
|
||||
using var process = new Process { StartInfo = psi };
|
||||
process.Start();
|
||||
|
||||
var stdoutTask = process.StandardOutput.ReadToEndAsync(ct);
|
||||
var stderrTask = process.StandardError.ReadToEndAsync(ct);
|
||||
|
||||
await process.WaitForExitAsync(ct);
|
||||
|
||||
if (process.ExitCode != 0)
|
||||
return (null, new List<TimestampedSegment>());
|
||||
|
||||
var subFiles = Directory.GetFiles(tempDir)
|
||||
.Where(f => f.EndsWith(".srv1") || f.EndsWith(".vtt") || f.EndsWith(".srt"))
|
||||
.OrderBy(f => f.EndsWith(".srv1") ? 0 : 1)
|
||||
.ToList();
|
||||
|
||||
if (subFiles.Count == 0)
|
||||
return (null, new List<TimestampedSegment>());
|
||||
|
||||
var content = await File.ReadAllTextAsync(subFiles[0], ct);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(content))
|
||||
return (null, new List<TimestampedSegment>());
|
||||
|
||||
return subFiles[0].EndsWith(".srv1")
|
||||
? ParseTimedTextXmlWithTimestamps(content)
|
||||
: ParseVttOrSrtWithTimestamps(content);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses YouTube's srv1 timed-text XML into plain text and timestamped segments.
|
||||
/// </summary>
|
||||
private static (string Text, List<TimestampedSegment> Segments) ParseTimedTextXmlWithTimestamps(string xml)
|
||||
{
|
||||
try
|
||||
{
|
||||
var doc = System.Xml.Linq.XDocument.Parse(xml);
|
||||
var segments = new List<TimestampedSegment>();
|
||||
var textParts = new List<string>();
|
||||
|
||||
foreach (var el in doc.Descendants("text"))
|
||||
{
|
||||
var decoded = System.Web.HttpUtility.HtmlDecode(el.Value);
|
||||
var cleaned = System.Text.RegularExpressions.Regex.Replace(decoded, @"\s+", " ").Trim();
|
||||
|
||||
if (string.IsNullOrEmpty(cleaned)) continue;
|
||||
|
||||
textParts.Add(cleaned);
|
||||
|
||||
var startAttr = el.Attribute("start")?.Value;
|
||||
var durAttr = el.Attribute("dur")?.Value;
|
||||
|
||||
var start = double.TryParse(startAttr, System.Globalization.NumberStyles.Float,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var s)
|
||||
? TimeSpan.FromSeconds(s) : TimeSpan.Zero;
|
||||
|
||||
var dur = double.TryParse(durAttr, System.Globalization.NumberStyles.Float,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var d)
|
||||
? TimeSpan.FromSeconds(d) : TimeSpan.Zero;
|
||||
|
||||
segments.Add(new TimestampedSegment
|
||||
{
|
||||
Start = start,
|
||||
Duration = dur,
|
||||
Text = cleaned
|
||||
});
|
||||
}
|
||||
|
||||
return (string.Join(" ", textParts), segments);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return (xml, new List<TimestampedSegment>());
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses YouTube's srv1 timed-text XML into clean plain text.
|
||||
///
|
||||
/// The XML structure looks like:
|
||||
/// <transcript>
|
||||
/// <text start="0.5" dur="2.1">Hello world</text>
|
||||
/// ...
|
||||
/// </transcript>
|
||||
/// </summary>
|
||||
private static string ParseTimedTextXml(string xml)
|
||||
{
|
||||
var (text, _) = ParseTimedTextXmlWithTimestamps(xml);
|
||||
return text;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses VTT or SRT subtitle formats into plain text and timestamped segments.
|
||||
/// Strips cue identifiers and formatting tags while preserving timestamp associations.
|
||||
/// </summary>
|
||||
private static (string Text, List<TimestampedSegment> Segments) ParseVttOrSrtWithTimestamps(string content)
|
||||
{
|
||||
var segments = new List<TimestampedSegment>();
|
||||
var allLines = content.Split('\n').Select(l => l.Trim()).ToArray();
|
||||
|
||||
TimeSpan currentStart = TimeSpan.Zero;
|
||||
TimeSpan currentEnd = TimeSpan.Zero;
|
||||
var currentText = new List<string>();
|
||||
|
||||
foreach (var line in allLines)
|
||||
{
|
||||
// Skip headers and metadata
|
||||
if (string.IsNullOrEmpty(line) ||
|
||||
line.StartsWith("WEBVTT") ||
|
||||
line.StartsWith("NOTE") ||
|
||||
line.StartsWith("Kind:") ||
|
||||
line.StartsWith("Language:") ||
|
||||
System.Text.RegularExpressions.Regex.IsMatch(line, @"^\d+$"))
|
||||
{
|
||||
// Flush current segment on blank line
|
||||
if (string.IsNullOrEmpty(line) && currentText.Count > 0)
|
||||
{
|
||||
var text = string.Join(" ", currentText);
|
||||
segments.Add(new TimestampedSegment
|
||||
{
|
||||
Start = currentStart,
|
||||
Duration = currentEnd - currentStart,
|
||||
Text = text
|
||||
});
|
||||
currentText.Clear();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Timestamp line: "00:01:23.456 --> 00:01:27.890"
|
||||
var tsMatch = System.Text.RegularExpressions.Regex.Match(line,
|
||||
@"^(\d{2}:\d{2}[:\.][\d\.]+)\s*-->\s*(\d{2}:\d{2}[:\.][\d\.]+)");
|
||||
if (tsMatch.Success)
|
||||
{
|
||||
// Flush previous segment if any text buffered
|
||||
if (currentText.Count > 0)
|
||||
{
|
||||
var text = string.Join(" ", currentText);
|
||||
segments.Add(new TimestampedSegment
|
||||
{
|
||||
Start = currentStart,
|
||||
Duration = currentEnd - currentStart,
|
||||
Text = text
|
||||
});
|
||||
currentText.Clear();
|
||||
}
|
||||
|
||||
currentStart = ParseVttTimestamp(tsMatch.Groups[1].Value);
|
||||
currentEnd = ParseVttTimestamp(tsMatch.Groups[2].Value);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Content line — strip HTML tags and decode
|
||||
var stripped = System.Text.RegularExpressions.Regex.Replace(line, @"<[^>]+>", "");
|
||||
var decoded = System.Web.HttpUtility.HtmlDecode(stripped).Trim();
|
||||
if (!string.IsNullOrEmpty(decoded))
|
||||
currentText.Add(decoded);
|
||||
}
|
||||
|
||||
// Flush last segment
|
||||
if (currentText.Count > 0)
|
||||
{
|
||||
segments.Add(new TimestampedSegment
|
||||
{
|
||||
Start = currentStart,
|
||||
Duration = currentEnd - currentStart,
|
||||
Text = string.Join(" ", currentText)
|
||||
});
|
||||
}
|
||||
|
||||
// Deduplicate consecutive identical text segments (common in VTT)
|
||||
var deduped = new List<TimestampedSegment>();
|
||||
string? prevText = null;
|
||||
foreach (var seg in segments)
|
||||
{
|
||||
if (seg.Text != prevText)
|
||||
deduped.Add(seg);
|
||||
prevText = seg.Text;
|
||||
}
|
||||
|
||||
var plainText = string.Join(" ", deduped.Select(s => s.Text));
|
||||
return (plainText, deduped);
|
||||
}
|
||||
|
||||
/// <summary>Parses a VTT/SRT timestamp string into a TimeSpan.</summary>
|
||||
private static TimeSpan ParseVttTimestamp(string ts)
|
||||
{
|
||||
// Normalize: VTT uses "." for ms, SRT uses "," — handle both
|
||||
ts = ts.Replace(',', '.');
|
||||
|
||||
// Handle both HH:MM:SS.mmm and MM:SS.mmm
|
||||
var parts = ts.Split(':');
|
||||
if (parts.Length == 3)
|
||||
{
|
||||
int.TryParse(parts[0], out var h);
|
||||
int.TryParse(parts[1], out var m);
|
||||
double.TryParse(parts[2], System.Globalization.NumberStyles.Float,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var s);
|
||||
return new TimeSpan(0, h, m, (int)s, (int)((s - (int)s) * 1000));
|
||||
}
|
||||
else if (parts.Length == 2)
|
||||
{
|
||||
int.TryParse(parts[0], out var m);
|
||||
double.TryParse(parts[1], System.Globalization.NumberStyles.Float,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out var s);
|
||||
return new TimeSpan(0, 0, m, (int)s, (int)((s - (int)s) * 1000));
|
||||
}
|
||||
|
||||
return TimeSpan.Zero;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses VTT or SRT subtitle formats into clean plain text.
|
||||
/// Strips timestamps, cue identifiers, and formatting tags.
|
||||
/// </summary>
|
||||
private static string ParseVttOrSrt(string content)
|
||||
{
|
||||
var (text, _) = ParseVttOrSrtWithTimestamps(content);
|
||||
return text;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// When no captions exist, builds a minimal "transcript" from the video description.
|
||||
/// The summary will be based on much less information and will be flagged accordingly.
|
||||
/// </summary>
|
||||
private static VideoTranscript BuildMetadataOnlyTranscript(VideoMetadata metadata)
|
||||
{
|
||||
var text = string.IsNullOrWhiteSpace(metadata.Description)
|
||||
? $"No transcript or description available for: {metadata.Title}"
|
||||
: $"Video title: {metadata.Title}\n\nChannel: {metadata.ChannelTitle}\n\nDescription:\n{metadata.Description}";
|
||||
|
||||
return new VideoTranscript
|
||||
{
|
||||
VideoId = metadata.VideoId,
|
||||
Text = text,
|
||||
SourceTrack = null,
|
||||
Source = TranscriptSource.MetadataOnly
|
||||
};
|
||||
}
|
||||
}
|
||||
36
YoutubeSummarizer.csproj
Normal file
36
YoutubeSummarizer.csproj
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<RootNamespace>YoutubeSummarizer</RootNamespace>
|
||||
<AssemblyName>YoutubeSummarizer</AssemblyName>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="appsettings.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- OpenAI .NET SDK (official) -->
|
||||
<PackageReference Include="OpenAI" Version="2.1.0" />
|
||||
|
||||
<!-- Microsoft.Extensions for config/DI patterns without full host overhead -->
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.EnvironmentVariables" Version="10.0.0" />
|
||||
<!-- Binder provides the .Bind(object) extension method on IConfiguration -->
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="10.0.0" />
|
||||
<!-- Http provides AddHttpClient() / IHttpClientFactory -->
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
|
||||
|
||||
<!-- Polly for resilient HTTP retry logic -->
|
||||
<PackageReference Include="Polly" Version="8.4.1" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
14
appsettings.json
Normal file
14
appsettings.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
|
||||
"LLM": {
|
||||
"BaseUrl": "http://localhost:11434/v1",
|
||||
"ApiKey": "ollama",
|
||||
"Model": "qwen3:14b",
|
||||
"MaxTokens": 1500,
|
||||
"TimeoutSeconds": 600
|
||||
},
|
||||
"Summarizer": {
|
||||
"ChunkWordLimit": 1500,
|
||||
"ShowTranscript": false
|
||||
}
|
||||
}
|
||||
24
summarize.sln
Normal file
24
summarize.sln
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.5.2.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YoutubeSummarizer", "YoutubeSummarizer.csproj", "{2364E226-41E1-8549-7D9A-3C959F71FD8A}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{2364E226-41E1-8549-7D9A-3C959F71FD8A}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2364E226-41E1-8549-7D9A-3C959F71FD8A}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {58A79D7B-0ADD-4677-A65B-B4E6E38D9AFE}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
Loading…
Reference in a new issue