use super::reader::ChatFile;
use crate::srt::format_srt_time;
struct SrtEntry {
text: String,
start_ms: i64,
end_ms: i64,
}
pub(crate) fn chat_file_to_srt_str(file: &ChatFile, participants: Option<&[String]>) -> String {
let mut seen_participants: Vec<String> = Vec::new();
let mut entries: Vec<SrtEntry> = Vec::new();
for utt in file.real_utterances() {
let participant = match &utt.participant {
Some(p) => p,
None => continue,
};
if let Some(codes) = participants
&& !codes.iter().any(|c| c == participant)
{
continue;
}
let (start, end) = match utt.time_marks {
Some((s, e)) => (s, e),
None => continue,
};
if !seen_participants.contains(participant) {
seen_participants.push(participant.clone());
}
let text = utt
.tiers
.as_ref()
.and_then(|t| t.get(participant))
.cloned()
.unwrap_or_default();
let text = strip_bullet_markers(&text);
entries.push(SrtEntry {
text: format_subtitle_text(&text, participant, seen_participants.len() > 1),
start_ms: start,
end_ms: end,
});
}
entries.sort_by(|a, b| {
a.start_ms
.cmp(&b.start_ms)
.then_with(|| a.end_ms.cmp(&b.end_ms))
});
let multi_participant = seen_participants.len() > 1;
if multi_participant {
return generate_srt_from_chat_file(file, participants);
}
let mut output = String::with_capacity(4096);
for (i, entry) in entries.iter().enumerate() {
if i > 0 {
output.push('\n');
}
output.push_str(&format!("{}\n", i + 1));
output.push_str(&format!(
"{} --> {}\n",
format_srt_time(entry.start_ms),
format_srt_time(entry.end_ms),
));
output.push_str(&entry.text);
output.push('\n');
}
output
}
fn generate_srt_from_chat_file(file: &ChatFile, participants: Option<&[String]>) -> String {
let mut seen_participants: Vec<String> = Vec::new();
for utt in file.real_utterances() {
if let Some(participant) = &utt.participant {
if let Some(codes) = participants
&& !codes.iter().any(|c| c == participant)
{
continue;
}
if utt.time_marks.is_some() && !seen_participants.contains(participant) {
seen_participants.push(participant.clone());
}
}
}
let multi = seen_participants.len() > 1;
let mut entries: Vec<SrtEntry> = Vec::new();
for utt in file.real_utterances() {
let participant = match &utt.participant {
Some(p) => p,
None => continue,
};
if let Some(codes) = participants
&& !codes.iter().any(|c| c == participant)
{
continue;
}
let (start, end) = match utt.time_marks {
Some((s, e)) => (s, e),
None => continue,
};
let text = utt
.tiers
.as_ref()
.and_then(|t| t.get(participant))
.cloned()
.unwrap_or_default();
let text = strip_bullet_markers(&text);
entries.push(SrtEntry {
text: format_subtitle_text(&text, participant, multi),
start_ms: start,
end_ms: end,
});
}
entries.sort_by(|a, b| {
a.start_ms
.cmp(&b.start_ms)
.then_with(|| a.end_ms.cmp(&b.end_ms))
});
let mut output = String::with_capacity(4096);
for (i, entry) in entries.iter().enumerate() {
if i > 0 {
output.push('\n');
}
output.push_str(&format!("{}\n", i + 1));
output.push_str(&format!(
"{} --> {}\n",
format_srt_time(entry.start_ms),
format_srt_time(entry.end_ms),
));
output.push_str(&entry.text);
output.push('\n');
}
output
}
fn format_subtitle_text(text: &str, participant: &str, multi_participant: bool) -> String {
if multi_participant {
format!("{participant}: {text}")
} else {
text.to_string()
}
}
fn strip_bullet_markers(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut in_bullet = false;
for ch in text.chars() {
if ch == '\x15' {
in_bullet = !in_bullet;
} else if !in_bullet {
result.push(ch);
}
}
result.trim().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chat::header::Headers;
use crate::chat::reader::ChatFile;
use crate::chat::utterance::Utterance;
use std::collections::HashMap;
fn make_utterance(
participant: &str,
main_text: &str,
time_marks: Option<(i64, i64)>,
) -> Utterance {
let mut tiers = HashMap::new();
tiers.insert(participant.to_string(), main_text.to_string());
Utterance {
participant: Some(participant.to_string()),
tokens: None,
time_marks,
tiers: Some(tiers),
changeable_header: None,
mor_tier_name: Some("%mor".to_string()),
gra_tier_name: Some("%gra".to_string()),
}
}
fn make_chat_file(events: Vec<Utterance>) -> ChatFile {
ChatFile::new("test.cha".to_string(), Headers::default(), events, vec![])
}
#[test]
fn test_single_participant() {
let file = make_chat_file(vec![
make_utterance("CHI", "hello world .", Some((0, 1500))),
make_utterance("CHI", "goodbye .", Some((2000, 3500))),
]);
let srt = chat_file_to_srt_str(&file, None);
assert!(srt.contains("1\n00:00:00,000 --> 00:00:01,500\nhello world ."));
assert!(srt.contains("2\n00:00:02,000 --> 00:00:03,500\ngoodbye ."));
assert!(!srt.contains("CHI:"));
}
#[test]
fn test_multi_participant() {
let file = make_chat_file(vec![
make_utterance("CHI", "more cookie .", Some((0, 2000))),
make_utterance("MOT", "want more ?", Some((2500, 4000))),
]);
let srt = chat_file_to_srt_str(&file, None);
assert!(srt.contains("CHI: more cookie ."));
assert!(srt.contains("MOT: want more ?"));
}
#[test]
fn test_skip_no_time_marks() {
let file = make_chat_file(vec![
make_utterance("CHI", "hello .", Some((0, 1500))),
make_utterance("CHI", "no time marks .", None),
make_utterance("CHI", "goodbye .", Some((2000, 3000))),
]);
let srt = chat_file_to_srt_str(&file, None);
assert!(srt.contains("hello ."));
assert!(!srt.contains("no time marks ."));
assert!(srt.contains("goodbye ."));
assert!(srt.contains("1\n"));
assert!(srt.contains("2\n"));
assert!(!srt.contains("3\n"));
}
#[test]
fn test_filter_participants() {
let file = make_chat_file(vec![
make_utterance("CHI", "more cookie .", Some((0, 2000))),
make_utterance("MOT", "want more ?", Some((2500, 4000))),
]);
let participants = vec!["CHI".to_string()];
let srt = chat_file_to_srt_str(&file, Some(&participants));
assert!(srt.contains("more cookie ."));
assert!(!srt.contains("want more ?"));
assert!(!srt.contains("CHI:"));
}
#[test]
fn test_strip_bullet_markers() {
let text = "hello world . \x150_1500\x15";
let result = strip_bullet_markers(text);
assert_eq!(result, "hello world .");
}
#[test]
fn test_empty_file() {
let file = make_chat_file(vec![]);
let srt = chat_file_to_srt_str(&file, None);
assert!(srt.is_empty());
}
#[test]
fn test_sorted_by_time() {
let file = make_chat_file(vec![
make_utterance("CHI", "second .", Some((2000, 3000))),
make_utterance("CHI", "first .", Some((0, 1000))),
]);
let srt = chat_file_to_srt_str(&file, None);
let first_pos = srt.find("first .").unwrap();
let second_pos = srt.find("second .").unwrap();
assert!(first_pos < second_pos);
}
}