use super::reader::{TextGridFile, TextGridTier};
struct ChatUtterance {
participant: String,
text: String,
start_ms: i64,
end_ms: i64,
}
pub(crate) fn textgrid_file_to_chat_str(
file: &TextGridFile,
participants: Option<&[String]>,
) -> String {
let main_tier_names: Vec<&str> = file
.tiers
.iter()
.filter(|t| {
matches!(t, TextGridTier::IntervalTier { .. })
&& match participants {
Some(codes) => codes.iter().any(|c| c == t.name()),
None => t.name().len() == 3,
}
})
.map(|t| t.name())
.collect();
let mut utterances: Vec<ChatUtterance> = Vec::new();
for tier in &file.tiers {
if let TextGridTier::IntervalTier {
name, intervals, ..
} = tier
{
if !main_tier_names.contains(&name.as_str()) {
continue;
}
for interval in intervals {
if interval.text.is_empty() {
continue;
}
let start_ms = (interval.xmin * 1000.0).round() as i64;
let end_ms = (interval.xmax * 1000.0).round() as i64;
utterances.push(ChatUtterance {
participant: name.clone(),
text: interval.text.clone(),
start_ms,
end_ms,
});
}
}
}
utterances.sort_by(|a, b| {
a.start_ms
.cmp(&b.start_ms)
.then_with(|| a.end_ms.cmp(&b.end_ms))
});
let mut output = String::with_capacity(4096);
output.push_str("@UTF8\n");
output.push_str("@Begin\n");
if !main_tier_names.is_empty() {
output.push_str("@Participants:\t");
let parts: Vec<String> = main_tier_names
.iter()
.map(|code| format!("{code} {code}"))
.collect();
output.push_str(&parts.join(", "));
output.push('\n');
}
for utt in &utterances {
output.push_str(&format!(
"*{}:\t{} \x15{}_{}\x15\n",
utt.participant, utt.text, utt.start_ms, utt.end_ms,
));
}
output.push_str("@End\n");
output
}
#[cfg(test)]
mod tests {
use super::*;
use crate::textgrid::reader::{Interval, TextGridFile};
fn make_interval_tier(name: &str, intervals: Vec<Interval>) -> TextGridTier {
let xmax = intervals.last().map(|i| i.xmax).unwrap_or(0.0);
TextGridTier::IntervalTier {
name: name.to_string(),
xmin: 0.0,
xmax,
intervals,
}
}
fn make_textgrid_file(tiers: Vec<TextGridTier>) -> TextGridFile {
TextGridFile {
file_path: "test.TextGrid".to_string(),
xmin: 0.0,
xmax: 5.0,
tiers,
raw_text: String::new(),
}
}
#[test]
fn test_single_tier() {
let file = make_textgrid_file(vec![make_interval_tier(
"CHI",
vec![Interval {
xmin: 0.0,
xmax: 1.5,
text: "hello world .".to_string(),
}],
)]);
let chat = textgrid_file_to_chat_str(&file, None);
assert!(chat.contains("@Begin"));
assert!(chat.contains("@End"));
assert!(chat.contains("@Participants:\tCHI CHI"));
assert!(chat.contains("*CHI:\thello world . \x150_1500\x15"));
}
#[test]
fn test_auto_detect_skips_non_3char() {
let file = make_textgrid_file(vec![
make_interval_tier(
"Speaker1",
vec![Interval {
xmin: 0.0,
xmax: 1.0,
text: "hello".to_string(),
}],
),
make_interval_tier(
"CHI",
vec![Interval {
xmin: 1.0,
xmax: 2.0,
text: "hi".to_string(),
}],
),
]);
let chat = textgrid_file_to_chat_str(&file, None);
assert!(chat.contains("*CHI:\thi"));
assert!(!chat.contains("Speaker1"));
assert!(!chat.contains("hello"));
}
#[test]
fn test_explicit_participants() {
let file = make_textgrid_file(vec![make_interval_tier(
"Speaker1",
vec![Interval {
xmin: 0.0,
xmax: 1.0,
text: "hello".to_string(),
}],
)]);
let participants = vec!["Speaker1".to_string()];
let chat = textgrid_file_to_chat_str(&file, Some(&participants));
assert!(chat.contains("*Speaker1:\thello"));
}
#[test]
fn test_skips_empty_intervals() {
let file = make_textgrid_file(vec![make_interval_tier(
"CHI",
vec![
Interval {
xmin: 0.0,
xmax: 0.5,
text: String::new(),
},
Interval {
xmin: 0.5,
xmax: 1.0,
text: "hello .".to_string(),
},
],
)]);
let chat = textgrid_file_to_chat_str(&file, None);
assert!(chat.contains("hello ."));
assert_eq!(chat.matches("*CHI:").count(), 1);
}
#[test]
fn test_empty_file() {
let file = make_textgrid_file(vec![]);
let chat = textgrid_file_to_chat_str(&file, None);
assert!(chat.contains("@Begin"));
assert!(chat.contains("@End"));
assert!(!chat.contains("@Participants"));
}
}