1use std::collections::HashMap;
23
24use crate::CaptionGenError;
25
26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33pub struct LanguageCode(String);
34
35impl LanguageCode {
36 pub fn new(code: &str) -> Result<Self, CaptionGenError> {
45 let code = code.trim();
46 if code.len() != 2 || !code.chars().all(|c| c.is_ascii_lowercase()) {
47 return Err(CaptionGenError::InvalidParameter(format!(
48 "ISO 639-1 language code must be exactly two lowercase ASCII letters, got {:?}",
49 code
50 )));
51 }
52 Ok(Self(code.to_string()))
53 }
54
55 pub fn as_str(&self) -> &str {
57 &self.0
58 }
59}
60
61impl std::fmt::Display for LanguageCode {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 f.write_str(&self.0)
64 }
65}
66
67impl TryFrom<&str> for LanguageCode {
68 type Error = CaptionGenError;
69
70 fn try_from(value: &str) -> Result<Self, Self::Error> {
71 Self::new(value)
72 }
73}
74
75impl TryFrom<String> for LanguageCode {
76 type Error = CaptionGenError;
77
78 fn try_from(value: String) -> Result<Self, Self::Error> {
79 Self::new(&value)
80 }
81}
82
83#[derive(Debug, Clone, PartialEq)]
87pub struct CaptionEntry {
88 pub id: u32,
90 pub start_ms: u64,
92 pub end_ms: u64,
94 pub text: String,
96}
97
98impl CaptionEntry {
99 pub fn new(id: u32, start_ms: u64, end_ms: u64, text: impl Into<String>) -> Self {
101 Self {
102 id,
103 start_ms,
104 end_ms,
105 text: text.into(),
106 }
107 }
108
109 pub fn duration_ms(&self) -> u64 {
111 self.end_ms.saturating_sub(self.start_ms)
112 }
113}
114
115#[derive(Debug, Clone)]
121pub struct MultiLangCaption {
122 pub entries: HashMap<LanguageCode, Vec<CaptionEntry>>,
123}
124
125impl MultiLangCaption {
126 pub fn languages(&self) -> impl Iterator<Item = &LanguageCode> {
128 self.entries.keys()
129 }
130
131 pub fn track(&self, lang: &LanguageCode) -> Option<&[CaptionEntry]> {
133 self.entries.get(lang).map(|v| v.as_slice())
134 }
135
136 pub fn to_srt(&self, lang: &LanguageCode) -> Result<String, CaptionGenError> {
140 let track = self.entries.get(lang).ok_or_else(|| {
141 CaptionGenError::InvalidParameter(format!(
142 "language {:?} not found in MultiLangCaption",
143 lang.as_str()
144 ))
145 })?;
146
147 if track.is_empty() {
148 return Ok(String::new());
149 }
150
151 let mut out = String::with_capacity(track.len() * 80);
152 for (idx, entry) in track.iter().enumerate() {
153 let seq = idx as u32 + 1;
154 out.push_str(&format!(
155 "{}\n{} --> {}\n{}\n\n",
156 seq,
157 ms_to_srt_timestamp(entry.start_ms),
158 ms_to_srt_timestamp(entry.end_ms),
159 entry.text
160 ));
161 }
162 Ok(out)
163 }
164
165 pub fn merge_timing(
173 &self,
174 primary: &LanguageCode,
175 secondary: &LanguageCode,
176 ) -> Result<Vec<CaptionEntry>, CaptionGenError> {
177 let primary_track = self.entries.get(primary).ok_or_else(|| {
178 CaptionGenError::InvalidParameter(format!(
179 "primary language {:?} not found",
180 primary.as_str()
181 ))
182 })?;
183 let secondary_track = self.entries.get(secondary).ok_or_else(|| {
184 CaptionGenError::InvalidParameter(format!(
185 "secondary language {:?} not found",
186 secondary.as_str()
187 ))
188 })?;
189
190 let mut merged: Vec<CaptionEntry> = Vec::with_capacity(primary_track.len());
191
192 for (idx, pentry) in primary_track.iter().enumerate() {
193 let best = secondary_track
195 .iter()
196 .filter_map(|sentry| {
197 let overlap_start = pentry.start_ms.max(sentry.start_ms);
198 let overlap_end = pentry.end_ms.min(sentry.end_ms);
199 if overlap_end > overlap_start {
200 Some((sentry, overlap_end - overlap_start))
201 } else {
202 None
203 }
204 })
205 .max_by_key(|(_, overlap)| *overlap)
206 .map(|(sentry, _)| sentry);
207
208 if let Some(sentry) = best {
209 merged.push(CaptionEntry {
210 id: idx as u32 + 1,
211 start_ms: pentry.start_ms,
212 end_ms: pentry.end_ms,
213 text: sentry.text.clone(),
214 });
215 }
216 }
217
218 Ok(merged)
219 }
220}
221
222#[derive(Debug, Default)]
235pub struct MultiLangCaptionBuilder {
236 entries: HashMap<LanguageCode, Vec<CaptionEntry>>,
237}
238
239impl MultiLangCaptionBuilder {
240 pub fn new() -> Self {
242 Self::default()
243 }
244
245 pub fn add_track(mut self, lang: LanguageCode, entries: Vec<CaptionEntry>) -> Self {
250 self.entries.insert(lang, entries);
251 self
252 }
253
254 pub fn build(self) -> MultiLangCaption {
256 MultiLangCaption {
257 entries: self.entries,
258 }
259 }
260}
261
262fn ms_to_srt_timestamp(ms: u64) -> String {
266 let total_secs = ms / 1_000;
267 let millis = ms % 1_000;
268 let secs = total_secs % 60;
269 let total_mins = total_secs / 60;
270 let mins = total_mins % 60;
271 let hours = total_mins / 60;
272 format!("{:02}:{:02}:{:02},{:03}", hours, mins, secs, millis)
273}
274
275#[cfg(test)]
278mod tests {
279 use super::*;
280
281 #[test]
284 fn lang_code_valid_en() {
285 let code = LanguageCode::new("en").expect("en should be valid");
286 assert_eq!(code.as_str(), "en");
287 }
288
289 #[test]
290 fn lang_code_valid_ja() {
291 let code = LanguageCode::new("ja").expect("ja should be valid");
292 assert_eq!(code.as_str(), "ja");
293 }
294
295 #[test]
296 fn lang_code_valid_zh() {
297 assert!(LanguageCode::new("zh").is_ok());
298 }
299
300 #[test]
301 fn lang_code_invalid_empty() {
302 assert!(LanguageCode::new("").is_err());
303 }
304
305 #[test]
306 fn lang_code_invalid_one_letter() {
307 assert!(LanguageCode::new("e").is_err());
308 }
309
310 #[test]
311 fn lang_code_invalid_three_letters() {
312 assert!(LanguageCode::new("eng").is_err());
313 }
314
315 #[test]
316 fn lang_code_invalid_uppercase() {
317 assert!(LanguageCode::new("EN").is_err());
318 }
319
320 #[test]
321 fn lang_code_invalid_digit() {
322 assert!(LanguageCode::new("e1").is_err());
323 }
324
325 #[test]
326 fn lang_code_try_from_str() {
327 let code: Result<LanguageCode, _> = "fr".try_into();
328 assert!(code.is_ok());
329 }
330
331 #[test]
332 fn lang_code_display() {
333 let code = LanguageCode::new("de").expect("new should succeed");
334 assert_eq!(code.to_string(), "de");
335 }
336
337 #[test]
340 fn caption_entry_duration() {
341 let entry = CaptionEntry::new(1, 1000, 4000, "Hello");
342 assert_eq!(entry.duration_ms(), 3000);
343 }
344
345 #[test]
346 fn caption_entry_duration_zero_on_equal_timestamps() {
347 let entry = CaptionEntry::new(1, 2000, 2000, "X");
348 assert_eq!(entry.duration_ms(), 0);
349 }
350
351 #[test]
354 fn builder_creates_empty_multilang() {
355 let caption = MultiLangCaptionBuilder::new().build();
356 assert_eq!(caption.entries.len(), 0);
357 }
358
359 #[test]
360 fn builder_add_track() {
361 let en = LanguageCode::new("en").expect("new should succeed");
362 let entries = vec![CaptionEntry::new(1, 0, 2000, "Hello")];
363 let caption = MultiLangCaptionBuilder::new()
364 .add_track(en.clone(), entries)
365 .build();
366 assert!(caption.track(&en).is_some());
367 assert_eq!(caption.track(&en).expect("track should succeed").len(), 1);
368 }
369
370 #[test]
371 fn builder_add_two_tracks() {
372 let en = LanguageCode::new("en").expect("new should succeed");
373 let es = LanguageCode::new("es").expect("new should succeed");
374 let en_entries = vec![CaptionEntry::new(1, 0, 2000, "Hello")];
375 let es_entries = vec![CaptionEntry::new(1, 0, 2000, "Hola")];
376 let caption = MultiLangCaptionBuilder::new()
377 .add_track(en.clone(), en_entries)
378 .add_track(es.clone(), es_entries)
379 .build();
380 assert!(caption.track(&en).is_some());
381 assert!(caption.track(&es).is_some());
382 }
383
384 #[test]
385 fn builder_add_track_replaces_existing() {
386 let en = LanguageCode::new("en").expect("new should succeed");
387 let first = vec![CaptionEntry::new(1, 0, 1000, "First")];
388 let second = vec![CaptionEntry::new(1, 0, 1000, "Second")];
389 let caption = MultiLangCaptionBuilder::new()
390 .add_track(en.clone(), first)
391 .add_track(en.clone(), second)
392 .build();
393 assert_eq!(
394 caption.track(&en).expect("track should succeed")[0].text,
395 "Second"
396 );
397 }
398
399 #[test]
402 fn to_srt_basic() {
403 let en = LanguageCode::new("en").expect("new should succeed");
404 let entries = vec![
405 CaptionEntry::new(1, 0, 2000, "Hello"),
406 CaptionEntry::new(2, 3000, 5000, "World"),
407 ];
408 let caption = MultiLangCaptionBuilder::new()
409 .add_track(en.clone(), entries)
410 .build();
411 let srt = caption.to_srt(&en).expect("to srt should succeed");
412 assert!(srt.contains("1\n"));
413 assert!(srt.contains("2\n"));
414 assert!(srt.contains("00:00:00,000 --> 00:00:02,000"));
415 assert!(srt.contains("00:00:03,000 --> 00:00:05,000"));
416 assert!(srt.contains("Hello"));
417 assert!(srt.contains("World"));
418 }
419
420 #[test]
421 fn to_srt_empty_track_returns_empty_string() {
422 let en = LanguageCode::new("en").expect("new should succeed");
423 let caption = MultiLangCaptionBuilder::new()
424 .add_track(en.clone(), vec![])
425 .build();
426 let srt = caption.to_srt(&en).expect("to srt should succeed");
427 assert!(srt.is_empty());
428 }
429
430 #[test]
431 fn to_srt_missing_language_returns_error() {
432 let en = LanguageCode::new("en").expect("new should succeed");
433 let fr = LanguageCode::new("fr").expect("new should succeed");
434 let caption = MultiLangCaptionBuilder::new().add_track(en, vec![]).build();
435 assert!(caption.to_srt(&fr).is_err());
436 }
437
438 #[test]
439 fn to_srt_timestamp_format() {
440 let ms = 1 * 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456;
442 let ts = ms_to_srt_timestamp(ms);
443 assert_eq!(ts, "01:02:03,456");
444 }
445
446 #[test]
449 fn merge_timing_basic_overlap() {
450 let en = LanguageCode::new("en").expect("new should succeed");
451 let ja = LanguageCode::new("ja").expect("new should succeed");
452 let en_entries = vec![CaptionEntry::new(1, 0, 3000, "Hello")];
453 let ja_entries = vec![CaptionEntry::new(1, 500, 3500, "こんにちは")];
454 let caption = MultiLangCaptionBuilder::new()
455 .add_track(en.clone(), en_entries)
456 .add_track(ja.clone(), ja_entries)
457 .build();
458 let merged = caption
459 .merge_timing(&en, &ja)
460 .expect("merge timing should succeed");
461 assert_eq!(merged.len(), 1);
462 assert_eq!(merged[0].start_ms, 0); assert_eq!(merged[0].end_ms, 3000); assert_eq!(merged[0].text, "こんにちは"); }
466
467 #[test]
468 fn merge_timing_no_overlap_excluded() {
469 let en = LanguageCode::new("en").expect("new should succeed");
470 let ja = LanguageCode::new("ja").expect("new should succeed");
471 let en_entries = vec![CaptionEntry::new(1, 0, 1000, "Hello")];
472 let ja_entries = vec![CaptionEntry::new(1, 5000, 7000, "こんにちは")]; let caption = MultiLangCaptionBuilder::new()
474 .add_track(en.clone(), en_entries)
475 .add_track(ja.clone(), ja_entries)
476 .build();
477 let merged = caption
478 .merge_timing(&en, &ja)
479 .expect("merge timing should succeed");
480 assert!(merged.is_empty());
481 }
482
483 #[test]
484 fn merge_timing_picks_best_overlap() {
485 let en = LanguageCode::new("en").expect("new should succeed");
486 let es = LanguageCode::new("es").expect("new should succeed");
487 let en_entries = vec![CaptionEntry::new(1, 0, 5000, "Long sentence")];
488 let es_entries = vec![
489 CaptionEntry::new(1, 0, 500, "Short"), CaptionEntry::new(2, 0, 4000, "Better"), ];
492 let caption = MultiLangCaptionBuilder::new()
493 .add_track(en.clone(), en_entries)
494 .add_track(es.clone(), es_entries)
495 .build();
496 let merged = caption
497 .merge_timing(&en, &es)
498 .expect("merge timing should succeed");
499 assert_eq!(merged.len(), 1);
500 assert_eq!(merged[0].text, "Better");
501 }
502
503 #[test]
504 fn merge_timing_missing_primary_returns_error() {
505 let en = LanguageCode::new("en").expect("new should succeed");
506 let fr = LanguageCode::new("fr").expect("new should succeed");
507 let es = LanguageCode::new("es").expect("new should succeed");
508 let caption = MultiLangCaptionBuilder::new().add_track(en, vec![]).build();
509 assert!(caption.merge_timing(&fr, &es).is_err());
510 }
511
512 #[test]
513 fn merge_timing_missing_secondary_returns_error() {
514 let en = LanguageCode::new("en").expect("new should succeed");
515 let fr = LanguageCode::new("fr").expect("new should succeed");
516 let caption = MultiLangCaptionBuilder::new()
517 .add_track(en.clone(), vec![CaptionEntry::new(1, 0, 1000, "X")])
518 .build();
519 assert!(caption.merge_timing(&en, &fr).is_err());
520 }
521
522 #[test]
523 fn merge_timing_ids_renumbered() {
524 let en = LanguageCode::new("en").expect("new should succeed");
525 let de = LanguageCode::new("de").expect("new should succeed");
526 let en_entries = vec![
527 CaptionEntry::new(1, 0, 1000, "Hello"),
528 CaptionEntry::new(2, 2000, 3000, "World"),
529 ];
530 let de_entries = vec![
531 CaptionEntry::new(5, 200, 1200, "Hallo"),
532 CaptionEntry::new(6, 2100, 3100, "Welt"),
533 ];
534 let caption = MultiLangCaptionBuilder::new()
535 .add_track(en.clone(), en_entries)
536 .add_track(de.clone(), de_entries)
537 .build();
538 let merged = caption
539 .merge_timing(&en, &de)
540 .expect("merge timing should succeed");
541 assert_eq!(merged.len(), 2);
542 assert_eq!(merged[0].id, 1);
543 assert_eq!(merged[1].id, 2);
544 }
545}