summarizer/VideoModels.cs

162 lines
5.9 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace YoutubeSummarizer.Models;
/// <summary>
/// Metadata returned from the YouTube Data API for a single video.
/// This is a slim projection — the API returns far more fields, but we
/// only bind what we actually need for the summarization workflow.
/// </summary>
public sealed class VideoMetadata
{
/// <summary>The 11-character YouTube video ID parsed from the URL.</summary>
public required string VideoId { get; init; }
/// <summary>Full video title as shown on YouTube.</summary>
public required string Title { get; init; }
/// <summary>Channel that published the video.</summary>
public required string ChannelTitle { get; init; }
/// <summary>UTC publish date of the video.</summary>
public DateTimeOffset PublishedAt { get; init; }
/// <summary>
/// Video duration in ISO 8601 format (e.g. "PT1H4M32S").
/// We store it raw and parse it for display purposes.
/// </summary>
public string? Duration { get; init; }
/// <summary>First 5000 characters of the video description (API cap).</summary>
public string? Description { get; init; }
/// <summary>Human-readable duration parsed from <see cref="Duration"/>.</summary>
public string FormattedDuration =>
Duration is null ? "Unknown"
: System.Xml.XmlConvert.ToTimeSpan(Duration).ToString(@"hh\:mm\:ss").TrimStart('0', ':');
}
/// <summary>
/// Represents a single caption track available for a video.
/// YouTube can provide multiple tracks (languages, auto-generated vs. manual).
/// </summary>
public sealed class CaptionTrack
{
public required string TrackId { get; init; }
public required string Language { get; init; } // BCP-47, e.g. "en"
public required string TrackKind { get; init; } // "standard", "asr" (auto), "forced"
public required string Name { get; init; } // Display name from YouTube
/// <summary>
/// True when the track was automatically generated by YouTube's ASR system.
/// ASR captions are less reliable — typos, missing punctuation, run-on sentences.
/// </summary>
public bool IsAutoGenerated => TrackKind.Equals("asr", StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// The full textual transcript assembled from caption data,
/// along with provenance information about how it was obtained.
/// </summary>
public sealed class VideoTranscript
{
public required string VideoId { get; init; }
/// <summary>The concatenated, cleaned transcript text.</summary>
public required string Text { get; init; }
/// <summary>The caption track this text came from, if available.</summary>
public CaptionTrack? SourceTrack { get; init; }
/// <summary>
/// How the transcript was obtained. This is important context for
/// interpreting the quality of the summary.
/// </summary>
public TranscriptSource Source { get; init; }
/// <summary>
/// Individual timestamped segments from the caption track.
/// Empty when timestamps are not available (e.g. metadata-only transcripts).
/// </summary>
public IReadOnlyList<TimestampedSegment> Segments { get; init; } = Array.Empty<TimestampedSegment>();
/// <summary>Approximate word count of the raw transcript.</summary>
public int WordCount => Text.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
}
/// <summary>
/// A single timestamped segment from a caption track.
/// Used when saving the transcript to a file with timestamp formatting.
/// </summary>
public sealed class TimestampedSegment
{
/// <summary>Start time offset from the beginning of the video.</summary>
public TimeSpan Start { get; init; }
/// <summary>Duration of this caption segment.</summary>
public TimeSpan Duration { get; init; }
/// <summary>The caption text for this segment.</summary>
public required string Text { get; init; }
/// <summary>Formats the start time as [HH:MM:SS] or [MM:SS] for display.</summary>
public string FormattedTimestamp =>
Start.TotalHours >= 1
? Start.ToString(@"hh\:mm\:ss")
: Start.ToString(@"mm\:ss");
}
/// <summary>
/// Describes how a transcript was obtained, ordered from most to least reliable.
/// This maps directly to the caption quality transparency layer discussed in LIKA.
/// </summary>
public enum TranscriptSource
{
/// <summary>Human-reviewed caption track provided by the video owner.</summary>
OwnerPublished,
/// <summary>Community-contributed captions (YouTube retired this but tracks may exist).</summary>
CommunityContributed,
/// <summary>YouTube's automatic speech recognition — less reliable.</summary>
AutoGenerated,
/// <summary>No captions available; summary based on metadata/description only.</summary>
MetadataOnly
}
/// <summary>
/// Controls which summarization prompt style is used.
/// </summary>
public enum SummaryMode
{
/// <summary>Default detailed summary with bullet points and takeaways.</summary>
Standard,
/// <summary>
/// Personal Information Filter — brief 12 sentence summary, relevance
/// evaluation against personal priorities (time, finances, health, family,
/// service to others), and a single-word verdict: ACT, MONITOR, or IGNORE.
/// </summary>
PersonalFilter
}
/// <summary>
/// The final deliverable: a structured summary of a YouTube video.
/// </summary>
public sealed class VideoSummary
{
public required VideoMetadata Metadata { get; init; }
public required string SummaryText { get; init; }
public required TranscriptSource TranscriptSource { get; init; }
/// <summary>
/// Warning shown when the summary is based on low-quality or missing transcript data.
/// Null when the source is reliable.
/// </summary>
public string? QualityWarning { get; init; }
/// <summary>Model used to generate this summary.</summary>
public required string ModelUsed { get; init; }
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
}