use super::reader::ElanFile;
use crate::srt::format_srt_time;
struct SrtEntry {
text: String,
start_ms: i64,
end_ms: i64,
}
pub(crate) fn elan_file_to_srt_str(file: &ElanFile, participants: Option<&[String]>) -> String {
let main_tier_ids: Vec<&str> = file
.tiers
.iter()
.filter(|t| match participants {
Some(codes) => codes.iter().any(|c| c == &t.id),
None => t.parent_id.is_none() && t.id.len() == 3,
})
.map(|t| t.id.as_str())
.collect();
let multi = main_tier_ids.len() > 1;
let mut entries: Vec<SrtEntry> = Vec::new();
for tier in &file.tiers {
if !main_tier_ids.contains(&tier.id.as_str()) {
continue;
}
for ann in &tier.annotations {
let (start, end) = match (ann.start_time, ann.end_time) {
(Some(s), Some(e)) => (s, e),
_ => continue,
};
let text = if multi {
format!("{}: {}", tier.id, ann.value)
} else {
ann.value.clone()
};
entries.push(SrtEntry {
text,
start_ms: start,
end_ms: end,
});
}
}
entries.sort_by(|a, b| {
a.start_ms
.cmp(&b.start_ms)
.then_with(|| a.end_ms.cmp(&b.end_ms))
});
let mut output = String::with_capacity(4096);
for (i, entry) in entries.iter().enumerate() {
if i > 0 {
output.push('\n');
}
output.push_str(&format!("{}\n", i + 1));
output.push_str(&format!(
"{} --> {}\n",
format_srt_time(entry.start_ms),
format_srt_time(entry.end_ms),
));
output.push_str(&entry.text);
output.push('\n');
}
output
}
#[cfg(test)]
mod tests {
use super::*;
use crate::elan::reader::{Annotation, ElanFile, Tier};
fn make_alignable_ann(id: &str, start: i64, end: i64, value: &str) -> Annotation {
Annotation {
id: id.to_string(),
start_time: Some(start),
end_time: Some(end),
value: value.to_string(),
parent_id: None,
}
}
fn make_main_tier(id: &str, participant: &str, annotations: Vec<Annotation>) -> Tier {
Tier {
id: id.to_string(),
participant: participant.to_string(),
annotator: String::new(),
linguistic_type_ref: "default-lt".to_string(),
parent_id: None,
child_ids: None,
annotations,
}
}
fn make_elan_file(tiers: Vec<Tier>) -> ElanFile {
ElanFile {
file_path: "test.eaf".to_string(),
tiers,
raw_xml: String::new(),
}
}
#[test]
fn test_single_tier() {
let file = make_elan_file(vec![make_main_tier(
"CHI",
"Target_Child",
vec![
make_alignable_ann("a1", 0, 1500, "hello world ."),
make_alignable_ann("a2", 2000, 3500, "goodbye ."),
],
)]);
let srt = elan_file_to_srt_str(&file, None);
assert!(srt.contains("1\n00:00:00,000 --> 00:00:01,500\nhello world ."));
assert!(srt.contains("2\n00:00:02,000 --> 00:00:03,500\ngoodbye ."));
assert!(!srt.contains("CHI:"));
}
#[test]
fn test_multi_tier() {
let file = make_elan_file(vec![
make_main_tier(
"CHI",
"Target_Child",
vec![make_alignable_ann("a1", 0, 2000, "more cookie .")],
),
make_main_tier(
"MOT",
"Mother",
vec![make_alignable_ann("a2", 2500, 4000, "want more ?")],
),
]);
let srt = elan_file_to_srt_str(&file, None);
assert!(srt.contains("CHI: more cookie ."));
assert!(srt.contains("MOT: want more ?"));
}
#[test]
fn test_explicit_participants() {
let file = make_elan_file(vec![
make_main_tier(
"Speaker1",
"Alice",
vec![make_alignable_ann("a1", 0, 1000, "hello")],
),
make_main_tier(
"CHI",
"Target_Child",
vec![make_alignable_ann("a2", 1000, 2000, "hi")],
),
]);
let participants = vec!["Speaker1".to_string()];
let srt = elan_file_to_srt_str(&file, Some(&participants));
assert!(srt.contains("hello"));
assert!(!srt.contains("hi"));
assert!(!srt.contains("Speaker1:"));
}
#[test]
fn test_skip_no_time_marks() {
let ann = Annotation {
id: "a1".to_string(),
start_time: None,
end_time: None,
value: "no times".to_string(),
parent_id: None,
};
let file = make_elan_file(vec![make_main_tier("CHI", "Target_Child", vec![ann])]);
let srt = elan_file_to_srt_str(&file, None);
assert!(srt.is_empty());
}
#[test]
fn test_sorted_by_time() {
let file = make_elan_file(vec![make_main_tier(
"CHI",
"Target_Child",
vec![
make_alignable_ann("a1", 3000, 4000, "second ."),
make_alignable_ann("a2", 0, 1000, "first ."),
],
)]);
let srt = elan_file_to_srt_str(&file, None);
let first_pos = srt.find("first .").unwrap();
let second_pos = srt.find("second .").unwrap();
assert!(first_pos < second_pos);
}
#[test]
fn test_empty_file() {
let file = make_elan_file(vec![]);
let srt = elan_file_to_srt_str(&file, None);
assert!(srt.is_empty());
}
#[test]
fn test_auto_detect_skips_non_3char() {
let file = make_elan_file(vec![
make_main_tier(
"Speaker1",
"Alice",
vec![make_alignable_ann("a1", 0, 1000, "hello")],
),
make_main_tier(
"CHI",
"Target_Child",
vec![make_alignable_ann("a2", 1000, 2000, "hi")],
),
]);
let srt = elan_file_to_srt_str(&file, None);
assert!(srt.contains("hi"));
assert!(!srt.contains("hello"));
}
}