use super::*;
#[test]
fn format_timestamp_zero() {
assert_eq!(format_timestamp(0.0), "00:00:00,000");
}
#[test]
fn format_timestamp_subsecond() {
assert_eq!(format_timestamp(1.234), "00:00:01,234");
}
#[test]
fn format_timestamp_pre_minute_rollover() {
assert_eq!(format_timestamp(59.999), "00:00:59,999");
}
#[test]
fn format_timestamp_minute_rollover() {
assert_eq!(format_timestamp(60.0), "00:01:00,000");
}
#[test]
fn format_timestamp_pre_hour_rollover() {
assert_eq!(format_timestamp(3599.999), "00:59:59,999");
}
#[test]
fn format_timestamp_hour_rollover() {
assert_eq!(format_timestamp(3600.0), "01:00:00,000");
}
#[test]
fn format_timestamp_compound() {
assert_eq!(format_timestamp(3661.123), "01:01:01,123");
}
#[test]
fn format_vtt_timestamp_uses_dot() {
assert_eq!(format_vtt_timestamp(0.0), "00:00:00.000");
assert_eq!(format_vtt_timestamp(1.234), "00:00:01.234");
assert_eq!(format_vtt_timestamp(3661.123), "01:01:01.123");
}
#[test]
fn get_cues_segments_no_words() {
let t = Transcript::Segments(SegmentsPayload::new(
"hello world",
vec![
Segment::new(0.0, 1.0, "hello", vec![], ""),
Segment::new(1.0, 2.0, "world", vec![], ""),
],
));
let cues = get_cues(&t);
assert_eq!(cues.len(), 2);
assert_eq!(cues[0].text(), "hello");
assert_eq!(cues[1].text(), "world");
}
#[test]
fn get_cues_segments_with_words() {
let t = Transcript::Segments(SegmentsPayload::new(
"hi there",
vec![Segment::new(
0.0,
1.0,
"hi there",
vec![
Word::new(0.0, 0.5, "hi", BTreeMap::new()),
Word::new(0.5, 1.0, "there", BTreeMap::new()),
],
"",
)],
));
let cues = get_cues(&t);
assert_eq!(cues.len(), 3);
assert_eq!(cues[0].text(), "hi there");
assert_eq!(cues[1].text(), "hi");
assert_eq!(cues[2].text(), "there");
}
#[test]
fn get_cues_sentences_one_per_sentence() {
let t = Transcript::Sentences(SentencesPayload::new(
"hi world",
vec![
Sentence::new(
"hi",
0.0,
1.0,
1.0,
vec![SentenceToken::new("h", 0.0, 0.5, 0.5)],
"",
),
Sentence::new("world", 1.0, 2.0, 1.0, vec![], ""),
],
));
let cues = get_cues(&t);
assert_eq!(cues.len(), 2);
assert_eq!(cues[0].text(), "hi");
assert_eq!(cues[1].text(), "world");
}
#[test]
fn transcript_text_accessor() {
let t1 = Transcript::Segments(SegmentsPayload::new("alpha", vec![]));
let t2 = Transcript::Sentences(SentencesPayload::new("beta", vec![]));
assert_eq!(t1.text(), "alpha");
assert_eq!(t2.text(), "beta");
}
#[test]
fn with_extension_appends_not_replaces() {
use std::path::PathBuf;
assert_eq!(
super::with_extension(Path::new("out"), "txt"),
PathBuf::from("out.txt"),
);
assert_eq!(
super::with_extension(Path::new("out.draft"), "txt"),
PathBuf::from("out.draft.txt"),
);
}
fn three_segments_fixture() -> Transcript {
Transcript::Segments(SegmentsPayload::new(
"hello world foo",
vec![
Segment::new(0.0, 1.234, "hello", vec![], ""),
Segment::new(1.234, 2.500, "world", vec![], ""),
Segment::new(2.500, 4.000, "foo", vec![], ""),
],
))
}
#[test]
fn save_as_txt_to_writer_matches_python_body() {
let t = three_segments_fixture();
let mut buf: Vec<u8> = Vec::new();
super::save_as_txt_to_writer(&t, &mut buf).unwrap();
assert_eq!(
std::str::from_utf8(&buf).unwrap(),
"hello world foo",
"writer helper writes the same body the on-disk `.txt` test asserts"
);
}
#[test]
fn save_as_srt_to_writer_matches_python_body() {
let t = three_segments_fixture();
let mut buf: Vec<u8> = Vec::new();
super::save_as_srt_to_writer(&t, &mut buf).unwrap();
let expected = "1\n00:00:00,000 --> 00:00:01,234\nhello\n\n\
2\n00:00:01,234 --> 00:00:02,500\nworld\n\n\
3\n00:00:02,500 --> 00:00:04,000\nfoo\n\n";
assert_eq!(
std::str::from_utf8(&buf).unwrap(),
expected,
"writer helper writes the same body the on-disk `.srt` test asserts"
);
}
#[test]
fn save_as_vtt_to_writer_matches_python_body() {
let t = three_segments_fixture();
let mut buf: Vec<u8> = Vec::new();
super::save_as_vtt_to_writer(&t, &mut buf).unwrap();
let expected = "WEBVTT\n\n\
1\n00:00:00.000 --> 00:00:01.234\nhello\n\n\
2\n00:00:01.234 --> 00:00:02.500\nworld\n\n\
3\n00:00:02.500 --> 00:00:04.000\nfoo\n\n";
assert_eq!(
std::str::from_utf8(&buf).unwrap(),
expected,
"writer helper writes the same body the on-disk `.vtt` test asserts"
);
}
#[test]
fn save_as_json_to_writer_matches_python_body() {
let t = three_segments_fixture();
let mut buf: Vec<u8> = Vec::new();
super::save_as_json_to_writer(&t, &mut buf).unwrap();
let parsed: Transcript = serde_json::from_slice(&buf).unwrap();
assert_eq!(
parsed, t,
"writer helper writes the same python-shape JSON the on-disk `.json` test asserts"
);
assert!(
std::str::from_utf8(&buf).unwrap().contains("\n \"text\":"),
"writer JSON output uses 2-space indent"
);
}
struct FailingFlushWriter {
buf: Vec<u8>,
flush_calls: usize,
fail_flush: bool,
}
impl FailingFlushWriter {
fn ok() -> Self {
Self {
buf: Vec::new(),
flush_calls: 0,
fail_flush: false,
}
}
fn failing() -> Self {
Self {
buf: Vec::new(),
flush_calls: 0,
fail_flush: true,
}
}
}
impl Write for FailingFlushWriter {
fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
self.buf.extend_from_slice(bytes);
Ok(bytes.len())
}
fn flush(&mut self) -> std::io::Result<()> {
self.flush_calls += 1;
if self.fail_flush {
Err(std::io::Error::other("induced flush failure"))
} else {
Ok(())
}
}
}
#[test]
fn save_as_txt_writer_helper_flushes_via_explicit_call() {
let t = three_segments_fixture();
for (name, run) in [
(
"save_as_txt_stdout",
Box::new(|t: &Transcript, w: &mut FailingFlushWriter| super::save_as_txt_stdout(t, w))
as Box<dyn Fn(&Transcript, &mut FailingFlushWriter) -> Result<()>>,
),
(
"save_as_srt_stdout",
Box::new(|t: &Transcript, w: &mut FailingFlushWriter| super::save_as_srt_stdout(t, w)),
),
(
"save_as_vtt_stdout",
Box::new(|t: &Transcript, w: &mut FailingFlushWriter| super::save_as_vtt_stdout(t, w)),
),
(
"save_as_json_stdout",
Box::new(|t: &Transcript, w: &mut FailingFlushWriter| super::save_as_json_stdout(t, w)),
),
] {
let mut w = FailingFlushWriter::ok();
run(&t, &mut w).unwrap_or_else(|e| panic!("{name} on ok-writer must succeed: {e}"));
assert_eq!(
w.flush_calls, 1,
"{name} must call .flush() exactly once on the writer (saw {})",
w.flush_calls
);
assert!(
!w.buf.is_empty(),
"{name} must have written body bytes before flushing"
);
}
}
#[test]
fn save_as_txt_stdout_flush_failure_surfaces_as_backend_error() {
let t = three_segments_fixture();
let mut w = FailingFlushWriter::failing();
let err =
super::save_as_txt_stdout(&t, &mut w).expect_err("flush-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert!(
p.context().contains("save_as_txt") && p.context().contains("stdout flush failed"),
"FileIo context must mention save_as_txt + stdout flush failure (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Flush, "op kind must be Flush");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(
w.flush_calls, 1,
"flush must have been attempted exactly once"
);
}
#[test]
fn save_as_srt_stdout_flush_failure_surfaces_as_backend_error() {
let t = three_segments_fixture();
let mut w = FailingFlushWriter::failing();
let err =
super::save_as_srt_stdout(&t, &mut w).expect_err("flush-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert!(
p.context().contains("save_as_srt") && p.context().contains("stdout flush failed"),
"FileIo context must mention save_as_srt + stdout flush failure (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Flush, "op kind must be Flush");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(
w.flush_calls, 1,
"flush must have been attempted exactly once"
);
}
#[test]
fn save_as_vtt_stdout_flush_failure_surfaces_as_backend_error() {
let t = three_segments_fixture();
let mut w = FailingFlushWriter::failing();
let err =
super::save_as_vtt_stdout(&t, &mut w).expect_err("flush-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert!(
p.context().contains("save_as_vtt") && p.context().contains("stdout flush failed"),
"FileIo context must mention save_as_vtt + stdout flush failure (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Flush, "op kind must be Flush");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(
w.flush_calls, 1,
"flush must have been attempted exactly once"
);
}
#[test]
fn save_as_json_stdout_flush_failure_surfaces_as_backend_error() {
let t = three_segments_fixture();
let mut w = FailingFlushWriter::failing();
let err =
super::save_as_json_stdout(&t, &mut w).expect_err("flush-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert!(
p.context().contains("save_as_json") && p.context().contains("stdout flush failed"),
"FileIo context must mention save_as_json + stdout flush failure (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Flush, "op kind must be Flush");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(
w.flush_calls, 1,
"flush must have been attempted exactly once"
);
}
struct FailingWriteWriter {
write_calls: usize,
flush_calls: usize,
}
impl FailingWriteWriter {
fn new() -> Self {
Self {
write_calls: 0,
flush_calls: 0,
}
}
}
impl Write for FailingWriteWriter {
fn write(&mut self, _bytes: &[u8]) -> std::io::Result<usize> {
self.write_calls += 1;
Err(std::io::Error::other("induced write failure"))
}
fn flush(&mut self) -> std::io::Result<()> {
self.flush_calls += 1;
Ok(())
}
}
#[test]
fn save_as_txt_stdout_write_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let mut w = FailingWriteWriter::new();
let err =
super::save_as_txt_stdout(&t, &mut w).expect_err("write-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(
p.context(),
"save_as_txt: write to stdout failed",
"FileIo context must be the txt stdout WRITE marker (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Write, "op kind must be Write");
assert_eq!(
p.inner().to_string(),
"induced write failure",
"the underlying induced io::Error must be threaded through verbatim"
);
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(w.write_calls, 1, "exactly one write must be attempted");
assert_eq!(
w.flush_calls, 0,
"the failed body write must short-circuit before flush"
);
}
#[test]
fn save_as_srt_stdout_write_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let mut w = FailingWriteWriter::new();
let err =
super::save_as_srt_stdout(&t, &mut w).expect_err("write-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(
p.context(),
"save_as_srt: write to stdout failed",
"FileIo context must be the srt stdout WRITE marker (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Write, "op kind must be Write");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(w.write_calls, 1, "exactly one write must be attempted");
assert_eq!(
w.flush_calls, 0,
"the failed body write must short-circuit before flush"
);
}
#[test]
fn save_as_vtt_stdout_write_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let mut w = FailingWriteWriter::new();
let err =
super::save_as_vtt_stdout(&t, &mut w).expect_err("write-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(
p.context(),
"save_as_vtt: write to stdout failed",
"FileIo context must be the vtt stdout WRITE marker (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Write, "op kind must be Write");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert_eq!(w.write_calls, 1, "exactly one write must be attempted");
assert_eq!(
w.flush_calls, 0,
"the failed header write must short-circuit before flush"
);
}
#[test]
fn save_as_json_stdout_write_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let mut w = FailingWriteWriter::new();
let err =
super::save_as_json_stdout(&t, &mut w).expect_err("write-failing writer must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(
p.context(),
"save_as_json: serialize to stdout failed",
"FileIo context must be the json stdout SERIALIZE marker (got: {})",
p.context()
);
assert_eq!(p.op(), FileOp::Write, "op kind must be Write");
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
assert!(
w.write_calls >= 1,
"serde must have attempted at least one write before failing"
);
assert_eq!(
w.flush_calls, 0,
"the failed serialize write must short-circuit before flush"
);
}
fn nonexistent_parent_base(name: &str) -> std::path::PathBuf {
let mut p = std::env::temp_dir();
p.push(format!(
"mlxrs_serializers_unit_{}_{}_nonexistent_dir",
std::process::id(),
name
));
p.push("out");
p
}
#[test]
fn save_as_txt_create_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let base = nonexistent_parent_base("txt");
let err = super::save_as_txt(&t, &base)
.expect_err("File::create under a missing parent dir must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(
p.context(),
"save_as_txt",
"FileIo context must be the bare fn label for the file-branch create arm"
);
assert_eq!(p.op(), FileOp::Create, "op kind must be Create");
assert_eq!(
p.inner().kind(),
std::io::ErrorKind::NotFound,
"missing parent dir yields NotFound"
);
assert!(
p.path().to_string_lossy().ends_with("out.txt"),
"payload path must be the `.txt`-appended final path (got {})",
p.path().display()
);
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
}
#[test]
fn save_as_srt_create_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let base = nonexistent_parent_base("srt");
let err = super::save_as_srt(&t, &base)
.expect_err("File::create under a missing parent dir must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(p.context(), "save_as_srt");
assert_eq!(p.op(), FileOp::Create, "op kind must be Create");
assert_eq!(p.inner().kind(), std::io::ErrorKind::NotFound);
assert!(
p.path().to_string_lossy().ends_with("out.srt"),
"payload path must be the `.srt`-appended final path (got {})",
p.path().display()
);
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
}
#[test]
fn save_as_vtt_create_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let base = nonexistent_parent_base("vtt");
let err = super::save_as_vtt(&t, &base)
.expect_err("File::create under a missing parent dir must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(p.context(), "save_as_vtt");
assert_eq!(p.op(), FileOp::Create, "op kind must be Create");
assert_eq!(p.inner().kind(), std::io::ErrorKind::NotFound);
assert!(
p.path().to_string_lossy().ends_with("out.vtt"),
"payload path must be the `.vtt`-appended final path (got {})",
p.path().display()
);
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
}
#[test]
fn save_as_json_create_failure_surfaces_as_file_io() {
let t = three_segments_fixture();
let base = nonexistent_parent_base("json");
let err = super::save_as_json(&t, &base)
.expect_err("File::create under a missing parent dir must produce an Err");
match err {
Error::FileIo(p) => {
assert_eq!(p.context(), "save_as_json");
assert_eq!(p.op(), FileOp::Create, "op kind must be Create");
assert_eq!(p.inner().kind(), std::io::ErrorKind::NotFound);
assert!(
p.path().to_string_lossy().ends_with("out.json"),
"payload path must be the `.json`-appended final path (got {})",
p.path().display()
);
}
other => panic!("expected Error::FileIo, got {other:?}"),
}
}
#[test]
fn save_as_all_formats_write_byte_exact_files() {
let t = three_segments_fixture();
let mut base = std::env::temp_dir();
base.push(format!(
"mlxrs_serializers_unit_{}_roundtrip",
std::process::id()
));
super::save_as_txt(&t, &base).expect("save_as_txt must succeed on a writable temp path");
let txt_path = super::with_extension(&base, "txt");
let txt = std::fs::read_to_string(&txt_path).expect("txt file must exist");
assert_eq!(txt, "hello world foo");
let _ = std::fs::remove_file(&txt_path);
super::save_as_srt(&t, &base).expect("save_as_srt must succeed");
let srt_path = super::with_extension(&base, "srt");
let srt = std::fs::read_to_string(&srt_path).expect("srt file must exist");
let srt_expected = "1\n00:00:00,000 --> 00:00:01,234\nhello\n\n\
2\n00:00:01,234 --> 00:00:02,500\nworld\n\n\
3\n00:00:02,500 --> 00:00:04,000\nfoo\n\n";
assert_eq!(srt, srt_expected);
let _ = std::fs::remove_file(&srt_path);
super::save_as_vtt(&t, &base).expect("save_as_vtt must succeed");
let vtt_path = super::with_extension(&base, "vtt");
let vtt = std::fs::read_to_string(&vtt_path).expect("vtt file must exist");
let vtt_expected = "WEBVTT\n\n\
1\n00:00:00.000 --> 00:00:01.234\nhello\n\n\
2\n00:00:01.234 --> 00:00:02.500\nworld\n\n\
3\n00:00:02.500 --> 00:00:04.000\nfoo\n\n";
assert_eq!(vtt, vtt_expected);
let _ = std::fs::remove_file(&vtt_path);
super::save_as_json(&t, &base).expect("save_as_json must succeed");
let json_path = super::with_extension(&base, "json");
let raw = std::fs::read_to_string(&json_path).expect("json file must exist");
let parsed: Transcript = serde_json::from_str(&raw).expect("json parses back into Transcript");
assert_eq!(
parsed, t,
"on-disk JSON round-trips to the original Transcript"
);
let _ = std::fs::remove_file(&json_path);
}
#[test]
fn transcript_to_python_shape_segments_emits_extra_word_fields_and_speaker_id() {
use serde_json::json;
let mut extra = BTreeMap::new();
extra.insert("probability".to_owned(), json!(0.875));
extra.insert("scored".to_owned(), json!(true));
let t = Transcript::Segments(SegmentsPayload::new(
"hi",
vec![Segment::new(
0.0,
1.5,
"hi",
vec![Word::new(0.0, 0.5, "hi", extra)],
"spk_7",
)],
));
let expected = json!({
"text": "hi",
"segments": [
{
"text": "hi",
"start": 0.0,
"end": 1.5,
"duration": 1.5,
"words": [
{
"start": 0.0,
"end": 0.5,
"word": "hi",
"probability": 0.875,
"scored": true
}
],
"speaker_id": "spk_7"
}
]
});
let got = super::transcript_to_python_shape(&t);
assert_eq!(
got, expected,
"Segments python-shape must inline extra per-word fields + the segment speaker_id"
);
let word_obj = got["segments"][0]["words"][0]
.as_object()
.expect("word is an object");
let word_keys: Vec<&str> = word_obj.keys().map(String::as_str).collect();
assert_eq!(
word_keys,
vec!["start", "end", "word", "probability", "scored"],
"per-word key order: fixed {{start,end,word}} then BTreeMap-sorted extra keys"
);
let seg_obj = got["segments"][0]
.as_object()
.expect("segment is an object");
let seg_keys: Vec<&str> = seg_obj.keys().map(String::as_str).collect();
assert_eq!(
seg_keys,
vec!["text", "start", "end", "duration", "words", "speaker_id"],
"per-segment key order matches the python dict-insertion order"
);
}
#[test]
fn transcript_to_python_shape_sentences_emits_speaker_id_and_token_order() {
use serde_json::json;
let t = Transcript::Sentences(SentencesPayload::new(
"hi",
vec![Sentence::new(
"hi",
0.0,
0.5,
0.5,
vec![SentenceToken::new("h", 0.0, 0.25, 0.25)],
"spk_3",
)],
));
let expected = json!({
"text": "hi",
"sentences": [
{
"text": "hi",
"start": 0.0,
"end": 0.5,
"duration": 0.5,
"tokens": [
{ "text": "h", "start": 0.0, "end": 0.25, "duration": 0.25 }
],
"speaker_id": "spk_3"
}
]
});
let got = super::transcript_to_python_shape(&t);
assert_eq!(got, expected);
let sent_obj = got["sentences"][0]
.as_object()
.expect("sentence is an object");
let sent_keys: Vec<&str> = sent_obj.keys().map(String::as_str).collect();
assert_eq!(
sent_keys,
vec!["text", "start", "end", "duration", "tokens", "speaker_id"],
"per-sentence key order matches the python dict-insertion order"
);
}