using System.Text; using YoutubeSummarizer.Models; namespace YoutubeSummarizer.Services; /// /// Saves video metadata and timestamped transcript to a plain text file. /// The file is formatted with metadata at the top followed by the transcript /// organized by timestamps. /// public static class TranscriptFileService { /// /// Saves the transcript and metadata to a text file in the specified directory. /// Returns the full path to the saved file. /// public static async Task SaveAsync( VideoMetadata metadata, VideoTranscript transcript, string? summaryText = null, string? outputDirectory = null, CancellationToken ct = default) { outputDirectory ??= Environment.CurrentDirectory; Directory.CreateDirectory(outputDirectory); // Build a safe filename from the video title var safeTitle = SanitizeFileName(metadata.Title); var fileName = $"{safeTitle}_{metadata.VideoId}.txt"; var filePath = Path.Combine(outputDirectory, fileName); var sb = new StringBuilder(); // ── Metadata section ───────────────────────────────────────────────── sb.AppendLine("════════════════════════════════════════════════════════════════"); sb.AppendLine(" VIDEO METADATA"); sb.AppendLine("════════════════════════════════════════════════════════════════"); sb.AppendLine(); sb.AppendLine($" Title: {metadata.Title}"); sb.AppendLine($" Channel: {metadata.ChannelTitle}"); sb.AppendLine($" Published: {metadata.PublishedAt:MMMM d, yyyy}"); sb.AppendLine($" Duration: {metadata.FormattedDuration}"); sb.AppendLine($" Video ID: {metadata.VideoId}"); sb.AppendLine($" URL: https://youtu.be/{metadata.VideoId}"); if (!string.IsNullOrWhiteSpace(summaryText)) { sb.AppendLine(); sb.AppendLine(" ── SUMMARY ──────────────────────────────────────────────"); sb.AppendLine(); foreach (var wrappedLine in WordWrap(summaryText, maxWidth: 72)) { sb.AppendLine($" {wrappedLine}"); } } sb.AppendLine(); // ── Transcript source ──────────────────────────────────────────────── var sourceLabel = transcript.Source switch { TranscriptSource.OwnerPublished => "Owner-published captions", TranscriptSource.CommunityContributed => "Community-contributed captions", TranscriptSource.AutoGenerated => "Auto-generated (ASR)", TranscriptSource.MetadataOnly => "Metadata only (no captions)", _ => "Unknown" }; sb.AppendLine($" Transcript Source: {sourceLabel}"); sb.AppendLine($" Word Count: {transcript.WordCount:N0}"); sb.AppendLine($" Saved: {DateTimeOffset.UtcNow:yyyy-MM-dd HH:mm} UTC"); sb.AppendLine(); // ── Transcript section ─────────────────────────────────────────────── sb.AppendLine("════════════════════════════════════════════════════════════════"); sb.AppendLine(" TRANSCRIPT"); sb.AppendLine("════════════════════════════════════════════════════════════════"); sb.AppendLine(); if (transcript.Segments.Count > 0) { // Group segments into blocks by time intervals for readability // Each block groups consecutive segments within ~30 seconds var blocks = GroupSegmentsByInterval(transcript.Segments, intervalSeconds: 30); foreach (var block in blocks) { var firstTimestamp = block[0].FormattedTimestamp; sb.AppendLine($" [{firstTimestamp}]"); // Combine the text for segments in this time block var blockText = string.Join(" ", block.Select(s => s.Text)); foreach (var wrappedLine in WordWrap(blockText, maxWidth: 72)) { sb.AppendLine($" {wrappedLine}"); } sb.AppendLine(); } } else { // No timestamps available — write plain text sb.AppendLine(" (No timestamp data available)"); sb.AppendLine(); foreach (var wrappedLine in WordWrap(transcript.Text, maxWidth: 72)) { sb.AppendLine($" {wrappedLine}"); } sb.AppendLine(); } sb.AppendLine("════════════════════════════════════════════════════════════════"); sb.AppendLine(" END OF TRANSCRIPT"); sb.AppendLine("════════════════════════════════════════════════════════════════"); await File.WriteAllTextAsync(filePath, sb.ToString(), ct); return filePath; } // ───────────────────────────────────────────────────────────────────────── // Helpers // ───────────────────────────────────────────────────────────────────────── /// /// Groups timestamped segments into blocks based on a time interval. /// This produces readable chunks (e.g. every 30 seconds) instead of /// one line per subtitle cue. /// private static List> GroupSegmentsByInterval( IReadOnlyList segments, int intervalSeconds) { var blocks = new List>(); if (segments.Count == 0) return blocks; var currentBlock = new List { segments[0] }; var blockStartTime = segments[0].Start; for (int i = 1; i < segments.Count; i++) { if ((segments[i].Start - blockStartTime).TotalSeconds >= intervalSeconds) { blocks.Add(currentBlock); currentBlock = new List(); blockStartTime = segments[i].Start; } currentBlock.Add(segments[i]); } if (currentBlock.Count > 0) blocks.Add(currentBlock); return blocks; } /// /// Removes characters that are invalid in file names. /// Truncates to a reasonable length to avoid path-length issues. /// private static string SanitizeFileName(string title) { var invalid = Path.GetInvalidFileNameChars(); var sb = new StringBuilder(title.Length); foreach (var ch in title) { if (Array.IndexOf(invalid, ch) < 0) sb.Append(ch); else sb.Append('_'); } // Replace runs of spaces/underscores with a single underscore var result = System.Text.RegularExpressions.Regex.Replace( sb.ToString().Trim(), @"[\s_]+", "_"); // Truncate to keep file paths manageable return result.Length > 80 ? result[..80] : result; } /// /// Word-wraps text at the specified width, breaking at word boundaries. /// private static IEnumerable WordWrap(string text, int maxWidth) { foreach (var paragraph in text.Split('\n')) { if (string.IsNullOrWhiteSpace(paragraph)) { yield return string.Empty; continue; } var words = paragraph.Split(' ', StringSplitOptions.RemoveEmptyEntries); var current = new StringBuilder(); foreach (var word in words) { if (current.Length + word.Length + 1 > maxWidth) { yield return current.ToString(); current.Clear(); } if (current.Length > 0) current.Append(' '); current.Append(word); } if (current.Length > 0) yield return current.ToString(); } } }