use super::reader::{TextGridFile, TextGridTier};
fn xml_escape(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
pub(crate) fn textgrid_file_to_eaf_xml(file: &TextGridFile) -> String {
let mut time_slots: Vec<(String, i64)> = Vec::new();
let mut ts_counter = 1usize;
struct TierData {
name: String,
annotations: Vec<(String, String, String, String)>, }
let mut tier_data_list: Vec<TierData> = Vec::new();
let mut ann_counter = 1usize;
for tier in &file.tiers {
if let TextGridTier::IntervalTier {
name, intervals, ..
} = tier
{
let mut annotations = Vec::new();
for interval in intervals {
if interval.text.is_empty() {
continue;
}
let ts_id1 = format!("ts{ts_counter}");
ts_counter += 1;
let ts_id2 = format!("ts{ts_counter}");
ts_counter += 1;
let ann_id = format!("a{ann_counter}");
ann_counter += 1;
let start_ms = (interval.xmin * 1000.0).round() as i64;
let end_ms = (interval.xmax * 1000.0).round() as i64;
time_slots.push((ts_id1.clone(), start_ms));
time_slots.push((ts_id2.clone(), end_ms));
annotations.push((ann_id, ts_id1, ts_id2, interval.text.clone()));
}
tier_data_list.push(TierData {
name: name.clone(),
annotations,
});
}
}
let mut xml = String::with_capacity(4096);
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
xml.push_str("<ANNOTATION_DOCUMENT>\n");
xml.push_str(" <HEADER MEDIA_FILE=\"\" TIME_UNITS=\"milliseconds\"/>\n");
xml.push_str(" <TIME_ORDER>\n");
for (ts_id, ms) in &time_slots {
xml.push_str(&format!(
" <TIME_SLOT TIME_SLOT_ID=\"{ts_id}\" TIME_VALUE=\"{ms}\"/>\n"
));
}
xml.push_str(" </TIME_ORDER>\n");
for td in &tier_data_list {
xml.push_str(&format!(
" <TIER TIER_ID=\"{}\" PARTICIPANT=\"\" ANNOTATOR=\"\" LINGUISTIC_TYPE_REF=\"default-lt\">\n",
xml_escape(&td.name)
));
for (ann_id, ts1, ts2, value) in &td.annotations {
xml.push_str(" <ANNOTATION>\n");
xml.push_str(&format!(
" <ALIGNABLE_ANNOTATION ANNOTATION_ID=\"{ann_id}\" TIME_SLOT_REF1=\"{ts1}\" TIME_SLOT_REF2=\"{ts2}\">\n"
));
xml.push_str(&format!(
" <ANNOTATION_VALUE>{}</ANNOTATION_VALUE>\n",
xml_escape(value)
));
xml.push_str(" </ALIGNABLE_ANNOTATION>\n");
xml.push_str(" </ANNOTATION>\n");
}
xml.push_str(" </TIER>\n");
}
xml.push_str("</ANNOTATION_DOCUMENT>\n");
xml
}
#[cfg(test)]
mod tests {
use super::*;
use crate::elan::BaseElan;
use crate::textgrid::reader::{Interval, TextGridFile};
fn make_interval_tier(name: &str, intervals: Vec<Interval>) -> TextGridTier {
let xmax = intervals.last().map(|i| i.xmax).unwrap_or(0.0);
TextGridTier::IntervalTier {
name: name.to_string(),
xmin: 0.0,
xmax,
intervals,
}
}
fn make_textgrid_file(tiers: Vec<TextGridTier>) -> TextGridFile {
let xmax = tiers
.iter()
.map(|t| match t {
TextGridTier::IntervalTier { xmax, .. } => *xmax,
TextGridTier::TextTier { xmax, .. } => *xmax,
})
.fold(0.0f64, f64::max);
TextGridFile {
file_path: "test.TextGrid".to_string(),
xmin: 0.0,
xmax,
tiers,
raw_text: String::new(),
}
}
#[test]
fn test_single_tier() {
let file = make_textgrid_file(vec![make_interval_tier(
"words",
vec![
Interval {
xmin: 0.0,
xmax: 1.5,
text: "hello".to_string(),
},
Interval {
xmin: 1.5,
xmax: 2.3,
text: "world".to_string(),
},
],
)]);
let xml = textgrid_file_to_eaf_xml(&file);
assert!(xml.contains("TIER_ID=\"words\""));
assert!(xml.contains("TIME_VALUE=\"0\""));
assert!(xml.contains("TIME_VALUE=\"1500\""));
assert!(xml.contains("TIME_VALUE=\"2300\""));
assert!(xml.contains("<ANNOTATION_VALUE>hello</ANNOTATION_VALUE>"));
assert!(xml.contains("<ANNOTATION_VALUE>world</ANNOTATION_VALUE>"));
}
#[test]
fn test_skips_texttier() {
let file = make_textgrid_file(vec![TextGridTier::TextTier {
name: "events".to_string(),
xmin: 0.0,
xmax: 1.0,
points: vec![crate::textgrid::reader::Point {
number: 0.5,
mark: "click".to_string(),
}],
}]);
let xml = textgrid_file_to_eaf_xml(&file);
assert!(!xml.contains("TIER_ID"));
}
#[test]
fn test_skips_empty_text() {
let file = make_textgrid_file(vec![make_interval_tier(
"words",
vec![
Interval {
xmin: 0.0,
xmax: 0.5,
text: String::new(),
},
Interval {
xmin: 0.5,
xmax: 1.0,
text: "hello".to_string(),
},
],
)]);
let xml = textgrid_file_to_eaf_xml(&file);
assert!(xml.contains("hello"));
assert_eq!(xml.matches("ALIGNABLE_ANNOTATION").count(), 2); }
#[test]
fn test_xml_escape() {
let file = make_textgrid_file(vec![make_interval_tier(
"tier",
vec![Interval {
xmin: 0.0,
xmax: 1.0,
text: "a < b & c > d".to_string(),
}],
)]);
let xml = textgrid_file_to_eaf_xml(&file);
assert!(xml.contains("a < b & c > d"));
}
#[test]
fn test_round_trip_through_elan() {
let file = make_textgrid_file(vec![make_interval_tier(
"CHI",
vec![Interval {
xmin: 0.0,
xmax: 1.5,
text: "hello world".to_string(),
}],
)]);
let xml = textgrid_file_to_eaf_xml(&file);
let elan =
crate::elan::Elan::from_strs(vec![xml], Some(vec!["test.eaf".to_string()]), false)
.unwrap();
let tiers = elan.tiers_flat();
assert_eq!(tiers.len(), 1);
assert_eq!(tiers[0].id, "CHI");
assert_eq!(tiers[0].annotations.len(), 1);
assert_eq!(tiers[0].annotations[0].value, "hello world");
assert_eq!(tiers[0].annotations[0].start_time, Some(0));
assert_eq!(tiers[0].annotations[0].end_time, Some(1500));
}
}