1#![deny(clippy::all)]
2#![deny(clippy::pedantic)]
3#![deny(clippy::nursery)]
4#![deny(clippy::cargo)]
5
6use std::{
7 collections::VecDeque,
8 io::{Error, ErrorKind, Result},
9};
10
11mod input;
12pub mod interval;
13pub mod point;
14pub mod textgrid;
15mod utilities;
16
17use input::{get_file_content, Source};
18use interval::{Interval, Tier as IntervalTier};
19use point::{Point, Tier as PointTier};
20use textgrid::{TextGrid, Tier};
21
22pub fn parse_textgrid<I, W>(input: I, print_warnings: W) -> Result<TextGrid>
45where
46 I: Into<Source>,
47 W: Into<Option<bool>> + Copy,
48{
49 let input_source: Source = input.into();
50
51 let (mut content, name) = get_file_content(input_source, None)?;
52
53 content.retain(|s| !s.trim().is_empty());
55
56 for line in &mut content {
58 let mut quote_count = 0;
59 let mut quote_indices = Vec::<usize>::new();
60 for (i, c) in line.chars().enumerate() {
61 if c == '"' {
62 quote_count += 1;
63 quote_indices.push(i);
64 }
65 if c == '!' && quote_count % 2 != 0 {
66 *line = line[..quote_indices[quote_indices.len() - 2]].to_string();
67 break;
68 }
69 }
70 }
71
72 content = utilities::process_lines(&content);
74
75 let mut textgrid_data: VecDeque<String> = VecDeque::from(content);
77
78 let textgrid_data = verify_start_of_textgrid(&mut textgrid_data)?;
80
81 let tg_xmin = textgrid_data
82 .pop_front()
83 .ok_or_else(|| {
84 Error::new(
85 ErrorKind::InvalidData,
86 "TextGrid malformed; early EOF expecting `xmin`",
87 )
88 })?
89 .chars()
90 .filter(|c| c.is_numeric() || *c == '.')
91 .collect::<String>()
92 .parse::<f64>()
93 .map_err(|_| {
94 Error::new(
95 ErrorKind::InvalidData,
96 "TextGrid malformed; could not parse `xmin` as a float",
97 )
98 })?;
99
100 let tg_xmax = textgrid_data
101 .pop_front()
102 .ok_or_else(|| {
103 Error::new(
104 ErrorKind::InvalidData,
105 "TextGrid malformed; early EOF expecting `xmax`",
106 )
107 })?
108 .chars()
109 .filter(|c| c.is_numeric() || *c == '.')
110 .collect::<String>()
111 .parse::<f64>()
112 .map_err(|_| {
113 Error::new(
114 ErrorKind::InvalidData,
115 "TextGrid malformed; could not parse `xmax` as a float",
116 )
117 })?;
118
119 let parsed_textgrid = parse_tiers(textgrid_data, tg_xmin, tg_xmax, print_warnings)?;
120
121 Ok(TextGrid::new(tg_xmin, tg_xmax, parsed_textgrid, name))
122}
123
124fn verify_start_of_textgrid(textgrid_data: &mut VecDeque<String>) -> Result<&mut VecDeque<String>> {
125 let file_type = textgrid_data.pop_front().unwrap_or_default();
126 if file_type != "ooTextFile" {
127 return Err(Error::new(
128 ErrorKind::InvalidData,
129 format!(
130 "TextGrid malformed; `File type` incorrect: expected `ooTextFile`, got {file_type}"
131 ),
132 ));
133 }
134
135 let object_class = textgrid_data.pop_front().unwrap_or_default();
136 if object_class != "TextGrid" {
137 return Err(Error::new(
138 ErrorKind::InvalidData,
139 format!("TextGrid malformed; `Object class` incorrect: expected `TextGrid`, got {object_class}"),
140 ));
141 }
142
143 Ok(textgrid_data)
144}
145
146fn parse_tiers<W: Into<Option<bool>> + Copy>(
159 data: &mut VecDeque<String>,
160 tg_xmin: f64,
161 tg_xmax: f64,
162 warn: W,
163) -> Result<Vec<Tier>> {
164 let mut tiers = Vec::<Tier>::new();
165
166 let num_tiers = utilities::pull_next_number::<i64>(data)?;
167 let mut num_tier_counter = 0;
168
169 while !data.is_empty() {
170 num_tier_counter += 1;
171
172 let tier_type = data.pop_front().ok_or_else(|| {
173 Error::new(
174 ErrorKind::InvalidData,
175 "TextGrid malformed; early EOF expecting tier type",
176 )
177 })?;
178 let tier_name = data.pop_front().ok_or_else(|| {
179 Error::new(
180 ErrorKind::InvalidData,
181 "TextGrid malformed; early EOF expecting tier name",
182 )
183 })?;
184
185 let xmin = utilities::pull_next_number::<f64>(data)?;
186 let xmax = utilities::pull_next_number::<f64>(data)?;
187
188 if warn.into().unwrap_or_default() {
189 if xmin < tg_xmin {
190 return Err(Error::new(
191 ErrorKind::InvalidData,
192 "TextGrid malformed; tier {tier_name} `xmin` less than TextGrid `xmin`",
193 ));
194 }
195 if xmax > tg_xmax {
196 return Err(Error::new(
197 ErrorKind::InvalidData,
198 "TextGrid malformed; tier {tier_name} `xmax` greater than TextGrid `xmax`",
199 ));
200 }
201 }
202
203 let tier_size = utilities::pull_next_number::<i64>(data)?;
204 let mut tier_size_counter = 0;
205
206 match tier_type.as_str() {
207 "IntervalTier" => {
208 let mut new_tier: IntervalTier =
209 IntervalTier::new(tier_name.clone(), xmin, xmax, Vec::<Interval>::new());
210
211 while data.front().is_some()
212 && !["IntervalTier".to_string(), "TextTier".to_string()]
213 .contains(data.front().unwrap())
214 {
215 new_tier.push_interval(parse_interval(data)?, warn);
216 tier_size_counter += 1;
217 }
218 if warn.into().unwrap_or_default() && tier_size != tier_size_counter {
219 eprintln!(
220 "Warning: Tier `{tier_name}` has a size of {tier_size} but {tier_size_counter} intervals were found",
221 );
222 }
223 tiers.push(Tier::IntervalTier(new_tier));
224 }
225 "TextTier" => {
226 let mut new_tier =
227 PointTier::new(tier_name.clone(), xmin, xmax, Vec::<Point>::new());
228
229 while data.front().is_some()
230 && !["\"IntervalTier\"".to_string(), "\"TextTier\"".to_string()]
231 .contains(data.front().unwrap())
232 {
233 new_tier.push_point(parse_point(data)?, warn);
234 tier_size_counter += 1;
235 }
236 if warn.into().unwrap_or_default() && tier_size != tier_size_counter {
237 eprintln!(
238 "Warning: Tier `{tier_name}` has a size of {tier_size} but {tier_size_counter} points were found",
239 );
240 }
241 tiers.push(Tier::PointTier(new_tier));
242 }
243 _ => {
244 return Err(Error::new(
245 ErrorKind::InvalidData,
246 format!("TextGrid malformed; Invalid tier type: {tier_type}"),
247 ));
248 }
249 }
250 }
251
252 if num_tiers != num_tier_counter && warn.into().unwrap_or_default() {
253 eprintln!(
254 "Warning: TextGrid has a size of {num_tiers} but {num_tier_counter} tiers were found",
255 );
256 }
257
258 Ok(tiers)
259}
260
261fn parse_interval(data: &mut VecDeque<String>) -> Result<Interval> {
271 let xmin = utilities::pull_next_number::<f64>(data)?;
272 let xmax = utilities::pull_next_number::<f64>(data)?;
273 let text = data.pop_front().unwrap_or_default();
274
275 Ok(Interval::new(xmin, xmax, text))
276}
277
278fn parse_point(data: &mut VecDeque<String>) -> Result<Point> {
288 let number = utilities::pull_next_number::<f64>(data)?;
289 let mark = data.pop_front().unwrap_or_default();
290
291 Ok(Point::new(number, mark))
292}
293
294#[cfg(test)]
295mod test {
296 use std::collections::VecDeque;
297
298 use crate::input::Source;
299
300 use super::parse_textgrid;
301
302 const TEXTGRID: &str = "File type = \"ooTextFile\"\nObject class = \"TextGrid\"\n\nxmin = 0\nxmax = 2.3\ntiers? <exists>\nsize = 3\nitem []:\n\titem [1]:\n\t\tclass = \"IntervalTier\"\n\t\tname = \"John\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tintervals: size = 1\n\t\tintervals [1]:\n\t\t\txmin = 0\n\t\t\txmax = 2.3\n\t\t\ttext = \"daisy bell\"\n\titem [2]:\n\t\tclass = \"IntervalTier\"\n\t\tname = \"Kelly\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tintervals: size = 1\n\t\tintervals [1]:\n\t\t\txmin = 0\n\t\t\txmax = 2.3\n\t\t\ttext = \"\"\n\titem [3]:\n\t\tclass = \"TextTier\"\n\t\tname = \"Bell\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tpoints: size = 1\n\t\tpoints [1]:\n\t\t\tnumber = 1\n\t\t\tmark = \"give me your answer do\"\"\n";
303
304 #[test]
305 fn parse_textgrid_from_string() {
306 let parsed_textgrid = parse_textgrid(TEXTGRID, false).unwrap();
307
308 let tier = match &parsed_textgrid.tiers()[1] {
309 crate::textgrid::Tier::IntervalTier(tier) => tier,
310 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
311 };
312
313 assert_eq!(tier.name(), "Kelly");
314 }
315
316 #[test]
317 fn parse_textgrid_from_path() {
318 let parsed_textgrid = parse_textgrid("example/long.TextGrid", false).unwrap();
319
320 let tier = match &parsed_textgrid.tiers()[1] {
321 crate::textgrid::Tier::IntervalTier(tier) => tier,
322 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
323 };
324
325 assert_eq!(tier.name(), "Kelly");
326 }
327
328 #[test]
329 fn parse_textgrid_from_vector() {
330 let textgrid_vector = TEXTGRID
331 .split('\n')
332 .map(std::string::ToString::to_string)
333 .collect::<Vec<String>>();
334
335 let parsed_textgrid = parse_textgrid(textgrid_vector, false).unwrap();
336
337 let tier = match &parsed_textgrid.tiers()[1] {
338 crate::textgrid::Tier::IntervalTier(tier) => tier,
339 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
340 };
341
342 assert_eq!(tier.name(), "Kelly");
343 }
344
345 #[test]
346 fn parse_textgrid_from_stream() {
347 let textgrid_stream = TEXTGRID.as_bytes();
348
349 let parsed_textgrid =
350 parse_textgrid(Source::Stream(Box::new(textgrid_stream)), false).unwrap();
351
352 let tier = match &parsed_textgrid.tiers()[1] {
353 crate::textgrid::Tier::IntervalTier(tier) => tier,
354 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
355 };
356
357 assert_eq!(tier.name(), "Kelly");
358 }
359
360 #[test]
361 fn parse_textgrid_from_file() {
362 let textgrid_file = std::fs::File::open("example/long.TextGrid").unwrap();
363
364 let parsed_textgrid = parse_textgrid(textgrid_file, false).unwrap();
365
366 let tier = match &parsed_textgrid.tiers()[1] {
367 crate::textgrid::Tier::IntervalTier(tier) => tier,
368 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
369 };
370
371 assert_eq!(tier.name(), "Kelly");
372 }
373
374 #[test]
375 fn parse_textgrid_from_invalid_string() {
376 let parsed_textgrid = parse_textgrid("invalid", false);
377
378 assert!(parsed_textgrid.is_err());
379 }
380
381 #[test]
382 fn verify_start_of_textgrid() {
383 let mut textgrid_data = VecDeque::new();
384 textgrid_data.push_back("ooTextFile".to_string());
385 textgrid_data.push_back("TextGrid".to_string());
386
387 let verified_textgrid_data = super::verify_start_of_textgrid(&mut textgrid_data);
388
389 assert!(verified_textgrid_data.is_ok());
390 }
391
392 #[test]
393 fn parse_tiers() {
394 let mut tier_data = VecDeque::new();
395 tier_data.push_back("3".to_string());
396 tier_data.push_back("IntervalTier".to_string());
397 tier_data.push_back("John".to_string());
398 tier_data.push_back("0".to_string());
399 tier_data.push_back("2.3".to_string());
400 tier_data.push_back("1".to_string());
401 tier_data.push_back("0".to_string());
402 tier_data.push_back("2.3".to_string());
403 tier_data.push_back("daisy bell".to_string());
404 tier_data.push_back("IntervalTier".to_string());
405 tier_data.push_back("Kelly".to_string());
406 tier_data.push_back("0".to_string());
407 tier_data.push_back("2.3".to_string());
408 tier_data.push_back("1".to_string());
409 tier_data.push_back("0".to_string());
410 tier_data.push_back("2.3".to_string());
411 tier_data.push_back(String::new());
412 tier_data.push_back("TextTier".to_string());
413 tier_data.push_back("Bell".to_string());
414 tier_data.push_back("0".to_string());
415 tier_data.push_back("2.3".to_string());
416 tier_data.push_back("1".to_string());
417 tier_data.push_back("1".to_string());
418 tier_data.push_back("give me your answer do\"".to_string());
419
420 let parsed_tiers = super::parse_tiers(&mut tier_data, 0.0, 2.3, false).unwrap();
421
422 let tier = match &parsed_tiers[1] {
423 crate::textgrid::Tier::IntervalTier(tier) => tier,
424 crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
425 };
426
427 assert_eq!(tier.name(), "Kelly");
428 }
429
430 #[test]
431 fn parse_interval() {
432 let mut interval_data = VecDeque::new();
433 interval_data.push_back("0".to_string());
434 interval_data.push_back("2.3".to_string());
435 interval_data.push_back("daisy bell".to_string());
436
437 let parsed_interval = super::parse_interval(&mut interval_data).unwrap();
438
439 assert_eq!(parsed_interval.text(), "daisy bell");
440 }
441
442 #[test]
443 fn parse_point() {
444 let mut point_data = VecDeque::new();
445 point_data.push_back("1".to_string());
446 point_data.push_back("give me your answer do\"".to_string());
447
448 let parsed_point = super::parse_point(&mut point_data).unwrap();
449
450 assert_eq!(parsed_point.mark(), "give me your answer do\"");
451 }
452}