Skip to main content

omni_dev/transcript/
source.rs

1//! The [`TranscriptSource`] trait and its supporting value types.
2//!
3//! A *source* is a media platform (YouTube, Vimeo, podcast feed, generic
4//! captions URL, …) capable of resolving a locator to a transcript. Concrete
5//! implementations live under [`crate::transcript::sources`].
6
7use async_trait::async_trait;
8use serde::{Deserialize, Serialize};
9
10use crate::transcript::cue::Cue;
11use crate::transcript::error::Result;
12
13/// Caller-supplied options that influence track selection during
14/// [`TranscriptSource::fetch`].
15#[derive(Clone, Debug, PartialEq, Eq)]
16pub struct FetchOpts {
17    /// Preferred caption language code (e.g. `"en"`, `"en-US"`). Sources
18    /// should apply prefix fallback (`"en"` matches `"en-US"`).
19    pub language: String,
20    /// If `true`, allow falling through to auto-generated (ASR) tracks when
21    /// no manual track matches. If `false`, exhausting manual tracks should
22    /// surface [`crate::transcript::TranscriptError::AutoCaptionsRequireOptIn`].
23    pub allow_auto: bool,
24    /// If set and no native track matches `language`, request a translated
25    /// track in this target language. Sources that cannot translate should
26    /// ignore this field.
27    pub translate_to: Option<String>,
28}
29
30impl FetchOpts {
31    /// Construct options requesting `language` with no auto-captions and no
32    /// translation. Callers building richer requests should mutate the
33    /// returned struct.
34    pub fn new(language: impl Into<String>) -> Self {
35        Self {
36            language: language.into(),
37            allow_auto: false,
38            translate_to: None,
39        }
40    }
41}
42
43/// Whether a track was authored by humans, generated by ASR, or synthesised
44/// by the platform's machine translation.
45#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
46#[serde(rename_all = "lowercase")]
47pub enum TrackKind {
48    /// Human-authored captions.
49    Manual,
50    /// Auto-generated (ASR) captions.
51    Auto,
52    /// Machine-translated from another track.
53    Translated,
54}
55
56/// A fetched transcript: the cues plus the metadata needed to interpret them.
57#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
58pub struct Transcript {
59    /// Name of the source that produced this transcript (e.g. `"youtube"`).
60    pub source: String,
61    /// Source-specific identifier for the media item (e.g. YouTube video ID).
62    pub locator_id: String,
63    /// Language code of the returned cues.
64    pub language: String,
65    /// Whether the track is manual, auto-generated, or translated.
66    pub kind: TrackKind,
67    /// The timed cues, in chronological order.
68    pub cues: Vec<Cue>,
69}
70
71/// Metadata describing a single available caption track on a media item.
72#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
73pub struct LanguageInfo {
74    /// Language code (e.g. `"en"`, `"en-US"`).
75    pub code: String,
76    /// Human-readable language name (e.g. `"English"`).
77    pub name: String,
78    /// Whether the track is manual, auto-generated, or translated.
79    pub kind: TrackKind,
80}
81
82/// Top-level metadata about a media item.
83#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
84pub struct MediaInfo {
85    /// Source name that produced this metadata.
86    pub source: String,
87    /// Source-specific identifier.
88    pub locator_id: String,
89    /// Title of the media item.
90    pub title: String,
91    /// Author / channel / uploader.
92    pub author: Option<String>,
93    /// Duration of the media item in milliseconds, if known.
94    pub duration_ms: Option<u64>,
95    /// All caption tracks available on this media item.
96    pub languages: Vec<LanguageInfo>,
97}
98
99/// A source capable of fetching transcripts from a media platform.
100///
101/// Implementations are expected to be cheap to construct; per-request state
102/// (HTTP client, auth, etc.) lives inside the implementor.
103///
104/// The trait is `Send + Sync` so implementations can be used behind
105/// `Box<dyn TranscriptSource>` for runtime dispatch (e.g. once
106/// `omni-dev transcript fetch <url>` auto-detection lands in a follow-up).
107#[async_trait]
108pub trait TranscriptSource: Send + Sync {
109    /// Stable, lowercase identifier for the source (e.g. `"youtube"`). Used
110    /// in error messages, the [`Transcript::source`] field, and as the
111    /// CLI subcommand name.
112    fn name(&self) -> &'static str;
113
114    /// Whether this source recognises `url` as one of its locators. Used by
115    /// future auto-detection (`omni-dev transcript fetch <url>`).
116    ///
117    /// `where Self: Sized` keeps this method out of the dyn vtable so
118    /// `dyn TranscriptSource` remains object-safe.
119    fn matches(url: &str) -> bool
120    where
121        Self: Sized;
122
123    /// Resolve `locator` to a transcript matching `opts`.
124    async fn fetch(&self, locator: &str, opts: &FetchOpts) -> Result<Transcript>;
125
126    /// List the caption tracks available on `locator`.
127    async fn list_languages(&self, locator: &str) -> Result<Vec<LanguageInfo>>;
128
129    /// Fetch top-level metadata about `locator`.
130    async fn info(&self, locator: &str) -> Result<MediaInfo>;
131}
132
133#[cfg(test)]
134#[allow(clippy::unwrap_used, clippy::expect_used)]
135mod tests {
136    use super::*;
137    use crate::transcript::error::TranscriptError;
138
139    #[test]
140    fn fetch_opts_new_defaults() {
141        let opts = FetchOpts::new("en");
142        assert_eq!(opts.language, "en");
143        assert!(!opts.allow_auto);
144        assert_eq!(opts.translate_to, None);
145    }
146
147    #[test]
148    fn fetch_opts_clone_eq() {
149        let a = FetchOpts {
150            language: "fr".into(),
151            allow_auto: true,
152            translate_to: Some("en".into()),
153        };
154        let b = a.clone();
155        assert_eq!(a, b);
156    }
157
158    #[test]
159    fn track_kind_serde_lowercase() {
160        let json = serde_json::to_string(&TrackKind::Manual).unwrap();
161        assert_eq!(json, "\"manual\"");
162        let json = serde_json::to_string(&TrackKind::Auto).unwrap();
163        assert_eq!(json, "\"auto\"");
164        let json = serde_json::to_string(&TrackKind::Translated).unwrap();
165        assert_eq!(json, "\"translated\"");
166        let back: TrackKind = serde_json::from_str("\"auto\"").unwrap();
167        assert_eq!(back, TrackKind::Auto);
168    }
169
170    #[test]
171    fn transcript_serde_round_trip() {
172        let t = Transcript {
173            source: "youtube".into(),
174            locator_id: "dQw4w9WgXcQ".into(),
175            language: "en".into(),
176            kind: TrackKind::Manual,
177            cues: vec![Cue::new(0, 1000, "hi")],
178        };
179        let json = serde_json::to_string(&t).unwrap();
180        let back: Transcript = serde_json::from_str(&json).unwrap();
181        assert_eq!(t, back);
182    }
183
184    #[test]
185    fn language_info_serde_round_trip() {
186        let li = LanguageInfo {
187            code: "en-US".into(),
188            name: "English (United States)".into(),
189            kind: TrackKind::Auto,
190        };
191        let json = serde_json::to_string(&li).unwrap();
192        let back: LanguageInfo = serde_json::from_str(&json).unwrap();
193        assert_eq!(li, back);
194    }
195
196    #[test]
197    fn media_info_serde_with_optional_fields() {
198        let mi = MediaInfo {
199            source: "youtube".into(),
200            locator_id: "abc".into(),
201            title: "T".into(),
202            author: None,
203            duration_ms: None,
204            languages: vec![],
205        };
206        let json = serde_json::to_string(&mi).unwrap();
207        let back: MediaInfo = serde_json::from_str(&json).unwrap();
208        assert_eq!(mi, back);
209    }
210
211    /// Mock source — exercises the trait shape and proves it is object-safe.
212    struct MockSource;
213
214    #[async_trait]
215    impl TranscriptSource for MockSource {
216        fn name(&self) -> &'static str {
217            "mock"
218        }
219
220        fn matches(url: &str) -> bool {
221            url.starts_with("mock://")
222        }
223
224        async fn fetch(&self, locator: &str, opts: &FetchOpts) -> Result<Transcript> {
225            if locator.is_empty() {
226                return Err(TranscriptError::InvalidLocator("empty".into()));
227            }
228            Ok(Transcript {
229                source: self.name().into(),
230                locator_id: locator.into(),
231                language: opts.language.clone(),
232                kind: TrackKind::Manual,
233                cues: vec![Cue::new(0, 1000, "hello")],
234            })
235        }
236
237        async fn list_languages(&self, _locator: &str) -> Result<Vec<LanguageInfo>> {
238            Ok(vec![LanguageInfo {
239                code: "en".into(),
240                name: "English".into(),
241                kind: TrackKind::Manual,
242            }])
243        }
244
245        async fn info(&self, locator: &str) -> Result<MediaInfo> {
246            Ok(MediaInfo {
247                source: self.name().into(),
248                locator_id: locator.into(),
249                title: "Mock".into(),
250                author: Some("Tester".into()),
251                duration_ms: Some(5000),
252                languages: vec![],
253            })
254        }
255    }
256
257    #[test]
258    fn matches_static_dispatch() {
259        assert!(MockSource::matches("mock://foo"));
260        assert!(!MockSource::matches("https://youtube.com/watch?v=x"));
261    }
262
263    #[tokio::test]
264    async fn mock_source_fetch_succeeds() {
265        let src = MockSource;
266        let opts = FetchOpts::new("en");
267        let t = src.fetch("vid", &opts).await.unwrap();
268        assert_eq!(t.source, "mock");
269        assert_eq!(t.locator_id, "vid");
270        assert_eq!(t.language, "en");
271        assert_eq!(t.cues.len(), 1);
272    }
273
274    #[tokio::test]
275    async fn mock_source_fetch_propagates_error() {
276        let src = MockSource;
277        let opts = FetchOpts::new("en");
278        let err = src.fetch("", &opts).await.unwrap_err();
279        assert!(matches!(err, TranscriptError::InvalidLocator(_)));
280    }
281
282    #[tokio::test]
283    async fn mock_source_via_dyn_box() {
284        let src: Box<dyn TranscriptSource> = Box::new(MockSource);
285        assert_eq!(src.name(), "mock");
286        let langs = src.list_languages("anything").await.unwrap();
287        assert_eq!(langs.len(), 1);
288        let info = src.info("anything").await.unwrap();
289        assert_eq!(info.title, "Mock");
290    }
291}