162 lines
5.9 KiB
C#
162 lines
5.9 KiB
C#
namespace YoutubeSummarizer.Models;
|
||
|
||
/// <summary>
|
||
/// Metadata returned from the YouTube Data API for a single video.
|
||
/// This is a slim projection — the API returns far more fields, but we
|
||
/// only bind what we actually need for the summarization workflow.
|
||
/// </summary>
|
||
public sealed class VideoMetadata
|
||
{
|
||
/// <summary>The 11-character YouTube video ID parsed from the URL.</summary>
|
||
public required string VideoId { get; init; }
|
||
|
||
/// <summary>Full video title as shown on YouTube.</summary>
|
||
public required string Title { get; init; }
|
||
|
||
/// <summary>Channel that published the video.</summary>
|
||
public required string ChannelTitle { get; init; }
|
||
|
||
/// <summary>UTC publish date of the video.</summary>
|
||
public DateTimeOffset PublishedAt { get; init; }
|
||
|
||
/// <summary>
|
||
/// Video duration in ISO 8601 format (e.g. "PT1H4M32S").
|
||
/// We store it raw and parse it for display purposes.
|
||
/// </summary>
|
||
public string? Duration { get; init; }
|
||
|
||
/// <summary>First 5000 characters of the video description (API cap).</summary>
|
||
public string? Description { get; init; }
|
||
|
||
/// <summary>Human-readable duration parsed from <see cref="Duration"/>.</summary>
|
||
public string FormattedDuration =>
|
||
Duration is null ? "Unknown"
|
||
: System.Xml.XmlConvert.ToTimeSpan(Duration).ToString(@"hh\:mm\:ss").TrimStart('0', ':');
|
||
}
|
||
|
||
/// <summary>
|
||
/// Represents a single caption track available for a video.
|
||
/// YouTube can provide multiple tracks (languages, auto-generated vs. manual).
|
||
/// </summary>
|
||
public sealed class CaptionTrack
|
||
{
|
||
public required string TrackId { get; init; }
|
||
public required string Language { get; init; } // BCP-47, e.g. "en"
|
||
public required string TrackKind { get; init; } // "standard", "asr" (auto), "forced"
|
||
public required string Name { get; init; } // Display name from YouTube
|
||
|
||
/// <summary>
|
||
/// True when the track was automatically generated by YouTube's ASR system.
|
||
/// ASR captions are less reliable — typos, missing punctuation, run-on sentences.
|
||
/// </summary>
|
||
public bool IsAutoGenerated => TrackKind.Equals("asr", StringComparison.OrdinalIgnoreCase);
|
||
}
|
||
|
||
/// <summary>
|
||
/// The full textual transcript assembled from caption data,
|
||
/// along with provenance information about how it was obtained.
|
||
/// </summary>
|
||
public sealed class VideoTranscript
|
||
{
|
||
public required string VideoId { get; init; }
|
||
|
||
/// <summary>The concatenated, cleaned transcript text.</summary>
|
||
public required string Text { get; init; }
|
||
|
||
/// <summary>The caption track this text came from, if available.</summary>
|
||
public CaptionTrack? SourceTrack { get; init; }
|
||
|
||
/// <summary>
|
||
/// How the transcript was obtained. This is important context for
|
||
/// interpreting the quality of the summary.
|
||
/// </summary>
|
||
public TranscriptSource Source { get; init; }
|
||
|
||
/// <summary>
|
||
/// Individual timestamped segments from the caption track.
|
||
/// Empty when timestamps are not available (e.g. metadata-only transcripts).
|
||
/// </summary>
|
||
public IReadOnlyList<TimestampedSegment> Segments { get; init; } = Array.Empty<TimestampedSegment>();
|
||
|
||
/// <summary>Approximate word count of the raw transcript.</summary>
|
||
public int WordCount => Text.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length;
|
||
}
|
||
|
||
/// <summary>
|
||
/// A single timestamped segment from a caption track.
|
||
/// Used when saving the transcript to a file with timestamp formatting.
|
||
/// </summary>
|
||
public sealed class TimestampedSegment
|
||
{
|
||
/// <summary>Start time offset from the beginning of the video.</summary>
|
||
public TimeSpan Start { get; init; }
|
||
|
||
/// <summary>Duration of this caption segment.</summary>
|
||
public TimeSpan Duration { get; init; }
|
||
|
||
/// <summary>The caption text for this segment.</summary>
|
||
public required string Text { get; init; }
|
||
|
||
/// <summary>Formats the start time as [HH:MM:SS] or [MM:SS] for display.</summary>
|
||
public string FormattedTimestamp =>
|
||
Start.TotalHours >= 1
|
||
? Start.ToString(@"hh\:mm\:ss")
|
||
: Start.ToString(@"mm\:ss");
|
||
}
|
||
|
||
/// <summary>
|
||
/// Describes how a transcript was obtained, ordered from most to least reliable.
|
||
/// This maps directly to the caption quality transparency layer discussed in LIKA.
|
||
/// </summary>
|
||
public enum TranscriptSource
|
||
{
|
||
/// <summary>Human-reviewed caption track provided by the video owner.</summary>
|
||
OwnerPublished,
|
||
|
||
/// <summary>Community-contributed captions (YouTube retired this but tracks may exist).</summary>
|
||
CommunityContributed,
|
||
|
||
/// <summary>YouTube's automatic speech recognition — less reliable.</summary>
|
||
AutoGenerated,
|
||
|
||
/// <summary>No captions available; summary based on metadata/description only.</summary>
|
||
MetadataOnly
|
||
}
|
||
|
||
/// <summary>
|
||
/// Controls which summarization prompt style is used.
|
||
/// </summary>
|
||
public enum SummaryMode
|
||
{
|
||
/// <summary>Default detailed summary with bullet points and takeaways.</summary>
|
||
Standard,
|
||
|
||
/// <summary>
|
||
/// Personal Information Filter — brief 1–2 sentence summary, relevance
|
||
/// evaluation against personal priorities (time, finances, health, family,
|
||
/// service to others), and a single-word verdict: ACT, MONITOR, or IGNORE.
|
||
/// </summary>
|
||
PersonalFilter
|
||
}
|
||
|
||
/// <summary>
|
||
/// The final deliverable: a structured summary of a YouTube video.
|
||
/// </summary>
|
||
public sealed class VideoSummary
|
||
{
|
||
public required VideoMetadata Metadata { get; init; }
|
||
public required string SummaryText { get; init; }
|
||
public required TranscriptSource TranscriptSource { get; init; }
|
||
|
||
/// <summary>
|
||
/// Warning shown when the summary is based on low-quality or missing transcript data.
|
||
/// Null when the source is reliable.
|
||
/// </summary>
|
||
public string? QualityWarning { get; init; }
|
||
|
||
/// <summary>Model used to generate this summary.</summary>
|
||
public required string ModelUsed { get; init; }
|
||
|
||
public DateTimeOffset GeneratedAt { get; init; } = DateTimeOffset.UtcNow;
|
||
}
|