use num::integer::div_floor;
use serde::{Deserialize, Serialize};
use std::time::Duration;
pub use crate::Model;
#[derive(Debug, Serialize, Deserialize)]
pub struct Transcript {
pub processing_time: Duration,
pub utterances: Vec<Utterance>,
pub word_utterances: Option<Vec<Utterance>>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Utterance {
pub start: i64,
pub stop: i64,
pub text: String,
}
impl Transcript {
#[must_use]
pub fn as_text(&self) -> String {
self.utterances
.iter()
.fold(String::new(), |transcript, fragment| {
transcript + format!("{}\n", fragment.text.trim()).as_str()
})
}
#[must_use]
pub fn as_vtt(&self) -> String {
let vtt = self
.utterances
.iter()
.fold(String::new(), |transcript, fragment| {
transcript
+ format!(
"{} --> {}\n{}\n\n",
format_timestamp(fragment.start, true, "."),
format_timestamp(fragment.stop, true, "."),
fragment.text.trim().replace("-->", "->")
)
.as_str()
});
format!("WEBVTT\n{vtt}")
}
#[must_use]
pub fn as_srt(&self) -> String {
self.utterances
.iter()
.fold((1, String::new()), |(i, transcript), fragment| {
(
i + 1,
transcript
+ format!(
"{i}\n{} --> {}\n{}\n",
format_timestamp(fragment.start, true, ","),
format_timestamp(fragment.stop, true, ","),
fragment.text.trim().replace("-->", "->")
)
.as_str(),
)
})
.1
}
}
fn format_timestamp(num: i64, always_include_hours: bool, decimal_marker: &str) -> String {
assert!(num >= 0, "non-negative timestamp expected");
let mut milliseconds: i64 = num * 10;
let hours = div_floor(milliseconds, 3_600_000);
milliseconds -= hours * 3_600_000;
let minutes = div_floor(milliseconds, 60_000);
milliseconds -= minutes * 60_000;
let seconds = div_floor(milliseconds, 1_000);
milliseconds -= seconds * 1_000;
let hours_marker = if always_include_hours || hours != 0 {
format!("{hours:02}:")
} else {
String::new()
};
format!("{hours_marker}{minutes:02}:{seconds:02}{decimal_marker}{milliseconds:03}")
}
#[test]
fn test_format_timestamp() {
let result = format_timestamp(100, true, ".");
assert_eq!(result, "00:00:01.000");
}
#[test]
fn test_format_timestamp_hours() {
let result = format_timestamp(100, true, ",");
assert_eq!(result, "00:00:01,000");
}
#[test]
fn test_format_timestamp_seconds() {
let result = format_timestamp(100, false, ".");
assert_eq!(result, "00:01.000");
}