textgridde_rs/
lib.rs

1#![deny(clippy::all)]
2#![deny(clippy::pedantic)]
3#![deny(clippy::nursery)]
4#![deny(clippy::cargo)]
5
6use std::{
7    collections::VecDeque,
8    io::{Error, ErrorKind, Result},
9};
10
11mod input;
12pub mod interval;
13pub mod point;
14pub mod textgrid;
15mod utilities;
16
17use input::{get_file_content, Source};
18use interval::{Interval, Tier as IntervalTier};
19use point::{Point, Tier as PointTier};
20use textgrid::{TextGrid, Tier};
21
22/// Parses a Praat `.TextGrid` file into a `textgridde::Textgrid` struct.
23///
24/// # Arguments
25///
26/// * `input` - One of the following:
27///                 * A path to a `.TextGrid` file.
28///                 * A string containing the entire `TextGrid` file.
29///                 * A vector of strings containing the lines of a `.TextGrid` file.
30///                 * A stream containing the contents of a `.TextGrid` file.
31/// * `print_warnings?` - An optional boolean indicating whether to print warnings.
32///
33/// # Returns
34///
35/// A `Result` containing a `textgridde::TextGrid` struct if successful, or a `std::io::Error` if parsing failed.
36///
37/// # Errors
38///
39/// If a `TextGrid` is malformed irrecoverably, an `std::io::Error` is returned. This can be for one of the following reasons:
40///     * The file does not start with the correct `File type` and `Object class` (`"ooTextFile"` and `"TextGrid"` respectively).
41///     * The `xmin` and `xmax` values are not present or cannot be parsed as floats.
42///     * The `exists` value is not present or is not equal to "exists".
43///     * A tier type is not recognized.
44pub fn parse_textgrid<I, W>(input: I, print_warnings: W) -> Result<TextGrid>
45where
46    I: Into<Source>,
47    W: Into<Option<bool>> + Copy,
48{
49    let input_source: Source = input.into();
50
51    let (mut content, name) = get_file_content(input_source, None)?;
52
53    // Clean up the content by removing empty or whitespace-only lines
54    content.retain(|s| !s.trim().is_empty());
55
56    // Iterate over lines, removing comments (a "!" after an odd number of quotation marks and everything after it)
57    for line in &mut content {
58        let mut quote_count = 0;
59        let mut quote_indices = Vec::<usize>::new();
60        for (i, c) in line.chars().enumerate() {
61            if c == '"' {
62                quote_count += 1;
63                quote_indices.push(i);
64            }
65            if c == '!' && quote_count % 2 != 0 {
66                *line = line[..quote_indices[quote_indices.len() - 2]].to_string();
67                break;
68            }
69        }
70    }
71
72    // Split lines with spaces not inside quotation marks into their own elements
73    content = utilities::process_lines(&content);
74
75    // Convert into a VecDeque for efficient popping from the front
76    let mut textgrid_data: VecDeque<String> = VecDeque::from(content);
77
78    // Verify the start of the TextGrid file, ensuring "File type" and "Object class" exist
79    let textgrid_data = verify_start_of_textgrid(&mut textgrid_data)?;
80
81    let tg_xmin = textgrid_data
82        .pop_front()
83        .ok_or_else(|| {
84            Error::new(
85                ErrorKind::InvalidData,
86                "TextGrid malformed; early EOF expecting `xmin`",
87            )
88        })?
89        .chars()
90        .filter(|c| c.is_numeric() || *c == '.')
91        .collect::<String>()
92        .parse::<f64>()
93        .map_err(|_| {
94            Error::new(
95                ErrorKind::InvalidData,
96                "TextGrid malformed; could not parse `xmin` as a float",
97            )
98        })?;
99
100    let tg_xmax = textgrid_data
101        .pop_front()
102        .ok_or_else(|| {
103            Error::new(
104                ErrorKind::InvalidData,
105                "TextGrid malformed; early EOF expecting `xmax`",
106            )
107        })?
108        .chars()
109        .filter(|c| c.is_numeric() || *c == '.')
110        .collect::<String>()
111        .parse::<f64>()
112        .map_err(|_| {
113            Error::new(
114                ErrorKind::InvalidData,
115                "TextGrid malformed; could not parse `xmax` as a float",
116            )
117        })?;
118
119    let parsed_textgrid = parse_tiers(textgrid_data, tg_xmin, tg_xmax, print_warnings)?;
120
121    Ok(TextGrid::new(tg_xmin, tg_xmax, parsed_textgrid, name))
122}
123
124fn verify_start_of_textgrid(textgrid_data: &mut VecDeque<String>) -> Result<&mut VecDeque<String>> {
125    let file_type = textgrid_data.pop_front().unwrap_or_default();
126    if file_type != "ooTextFile" {
127        return Err(Error::new(
128            ErrorKind::InvalidData,
129            format!(
130                "TextGrid malformed; `File type` incorrect: expected `ooTextFile`, got {file_type}"
131            ),
132        ));
133    }
134
135    let object_class = textgrid_data.pop_front().unwrap_or_default();
136    if object_class != "TextGrid" {
137        return Err(Error::new(
138            ErrorKind::InvalidData,
139            format!("TextGrid malformed; `Object class` incorrect: expected `TextGrid`, got {object_class}"),
140        ));
141    }
142
143    Ok(textgrid_data)
144}
145
146/// Parses tiers from a `TextGrid` file.
147///
148/// # Arguments
149///
150/// * `data` - A mutable reference to a `VecDeque<String>` containing the lines of a `TextGrid` file.
151/// * `tg_xmin` - The `xmin` value of the `TextGrid`.
152/// * `tg_xmax` - The `xmax` value of the `TextGrid`.
153/// * `warn` - An optional boolean indicating whether to print warnings.
154///
155/// # Returns
156///
157/// A `Result` containing a vector of `textgridde::Tier` structs if successful, or an `std::io::Error` if parsing failed.
158fn parse_tiers<W: Into<Option<bool>> + Copy>(
159    data: &mut VecDeque<String>,
160    tg_xmin: f64,
161    tg_xmax: f64,
162    warn: W,
163) -> Result<Vec<Tier>> {
164    let mut tiers = Vec::<Tier>::new();
165
166    let num_tiers = utilities::pull_next_number::<i64>(data)?;
167    let mut num_tier_counter = 0;
168
169    while !data.is_empty() {
170        num_tier_counter += 1;
171
172        let tier_type = data.pop_front().ok_or_else(|| {
173            Error::new(
174                ErrorKind::InvalidData,
175                "TextGrid malformed; early EOF expecting tier type",
176            )
177        })?;
178        let tier_name = data.pop_front().ok_or_else(|| {
179            Error::new(
180                ErrorKind::InvalidData,
181                "TextGrid malformed; early EOF expecting tier name",
182            )
183        })?;
184
185        let xmin = utilities::pull_next_number::<f64>(data)?;
186        let xmax = utilities::pull_next_number::<f64>(data)?;
187
188        if warn.into().unwrap_or_default() {
189            if xmin < tg_xmin {
190                return Err(Error::new(
191                    ErrorKind::InvalidData,
192                    "TextGrid malformed; tier {tier_name} `xmin` less than TextGrid `xmin`",
193                ));
194            }
195            if xmax > tg_xmax {
196                return Err(Error::new(
197                    ErrorKind::InvalidData,
198                    "TextGrid malformed; tier {tier_name} `xmax` greater than TextGrid `xmax`",
199                ));
200            }
201        }
202
203        let tier_size = utilities::pull_next_number::<i64>(data)?;
204        let mut tier_size_counter = 0;
205
206        match tier_type.as_str() {
207            "IntervalTier" => {
208                let mut new_tier: IntervalTier =
209                    IntervalTier::new(tier_name.clone(), xmin, xmax, Vec::<Interval>::new());
210
211                while data.front().is_some()
212                    && !["IntervalTier".to_string(), "TextTier".to_string()]
213                        .contains(data.front().unwrap())
214                {
215                    new_tier.push_interval(parse_interval(data)?, warn);
216                    tier_size_counter += 1;
217                }
218                if warn.into().unwrap_or_default() && tier_size != tier_size_counter {
219                    eprintln!(
220                        "Warning: Tier `{tier_name}` has a size of {tier_size} but {tier_size_counter} intervals were found",
221                    );
222                }
223                tiers.push(Tier::IntervalTier(new_tier));
224            }
225            "TextTier" => {
226                let mut new_tier =
227                    PointTier::new(tier_name.clone(), xmin, xmax, Vec::<Point>::new());
228
229                while data.front().is_some()
230                    && !["\"IntervalTier\"".to_string(), "\"TextTier\"".to_string()]
231                        .contains(data.front().unwrap())
232                {
233                    new_tier.push_point(parse_point(data)?, warn);
234                    tier_size_counter += 1;
235                }
236                if warn.into().unwrap_or_default() && tier_size != tier_size_counter {
237                    eprintln!(
238                        "Warning: Tier `{tier_name}` has a size of {tier_size} but {tier_size_counter} points were found",
239                    );
240                }
241                tiers.push(Tier::PointTier(new_tier));
242            }
243            _ => {
244                return Err(Error::new(
245                    ErrorKind::InvalidData,
246                    format!("TextGrid malformed; Invalid tier type: {tier_type}"),
247                ));
248            }
249        }
250    }
251
252    if num_tiers != num_tier_counter && warn.into().unwrap_or_default() {
253        eprintln!(
254            "Warning: TextGrid has a size of {num_tiers} but {num_tier_counter} tiers were found",
255        );
256    }
257
258    Ok(tiers)
259}
260
261/// Parses an `Interval` from a `TextGrid` file.
262///
263/// # Arguments
264///
265/// * `data` - A mutable reference to a `VecDeque<String>` containing the lines of a `TextGrid` file.
266///
267/// # Returns
268///
269/// A `Result` containing an `Interval` struct if successful, or an `std::io::Error` if parsing failed.
270fn parse_interval(data: &mut VecDeque<String>) -> Result<Interval> {
271    let xmin = utilities::pull_next_number::<f64>(data)?;
272    let xmax = utilities::pull_next_number::<f64>(data)?;
273    let text = data.pop_front().unwrap_or_default();
274
275    Ok(Interval::new(xmin, xmax, text))
276}
277
278/// Parses a `Point` from a `TextGrid` file.
279///
280/// # Arguments
281///
282/// * `data` - A mutable reference to a `VecDeque<String>` containing the lines of a `TextGrid` file.
283///
284/// # Returns
285///
286/// A `Result` containing a `Point` struct if successful, or an `std::io::Error` if parsing failed.
287fn parse_point(data: &mut VecDeque<String>) -> Result<Point> {
288    let number = utilities::pull_next_number::<f64>(data)?;
289    let mark = data.pop_front().unwrap_or_default();
290
291    Ok(Point::new(number, mark))
292}
293
294#[cfg(test)]
295mod test {
296    use std::collections::VecDeque;
297
298    use crate::input::Source;
299
300    use super::parse_textgrid;
301
302    const TEXTGRID: &str = "File type = \"ooTextFile\"\nObject class = \"TextGrid\"\n\nxmin = 0\nxmax = 2.3\ntiers? <exists>\nsize = 3\nitem []:\n\titem [1]:\n\t\tclass = \"IntervalTier\"\n\t\tname = \"John\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tintervals: size = 1\n\t\tintervals [1]:\n\t\t\txmin = 0\n\t\t\txmax = 2.3\n\t\t\ttext = \"daisy bell\"\n\titem [2]:\n\t\tclass = \"IntervalTier\"\n\t\tname = \"Kelly\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tintervals: size = 1\n\t\tintervals [1]:\n\t\t\txmin = 0\n\t\t\txmax = 2.3\n\t\t\ttext = \"\"\n\titem [3]:\n\t\tclass = \"TextTier\"\n\t\tname = \"Bell\"\n\t\txmin = 0\n\t\txmax = 2.3\n\t\tpoints: size = 1\n\t\tpoints [1]:\n\t\t\tnumber = 1\n\t\t\tmark = \"give me your answer do\"\"\n";
303
304    #[test]
305    fn parse_textgrid_from_string() {
306        let parsed_textgrid = parse_textgrid(TEXTGRID, false).unwrap();
307
308        let tier = match &parsed_textgrid.tiers()[1] {
309            crate::textgrid::Tier::IntervalTier(tier) => tier,
310            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
311        };
312
313        assert_eq!(tier.name(), "Kelly");
314    }
315
316    #[test]
317    fn parse_textgrid_from_path() {
318        let parsed_textgrid = parse_textgrid("example/long.TextGrid", false).unwrap();
319
320        let tier = match &parsed_textgrid.tiers()[1] {
321            crate::textgrid::Tier::IntervalTier(tier) => tier,
322            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
323        };
324
325        assert_eq!(tier.name(), "Kelly");
326    }
327
328    #[test]
329    fn parse_textgrid_from_vector() {
330        let textgrid_vector = TEXTGRID
331            .split('\n')
332            .map(std::string::ToString::to_string)
333            .collect::<Vec<String>>();
334
335        let parsed_textgrid = parse_textgrid(textgrid_vector, false).unwrap();
336
337        let tier = match &parsed_textgrid.tiers()[1] {
338            crate::textgrid::Tier::IntervalTier(tier) => tier,
339            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
340        };
341
342        assert_eq!(tier.name(), "Kelly");
343    }
344
345    #[test]
346    fn parse_textgrid_from_stream() {
347        let textgrid_stream = TEXTGRID.as_bytes();
348
349        let parsed_textgrid =
350            parse_textgrid(Source::Stream(Box::new(textgrid_stream)), false).unwrap();
351
352        let tier = match &parsed_textgrid.tiers()[1] {
353            crate::textgrid::Tier::IntervalTier(tier) => tier,
354            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
355        };
356
357        assert_eq!(tier.name(), "Kelly");
358    }
359
360    #[test]
361    fn parse_textgrid_from_file() {
362        let textgrid_file = std::fs::File::open("example/long.TextGrid").unwrap();
363
364        let parsed_textgrid = parse_textgrid(textgrid_file, false).unwrap();
365
366        let tier = match &parsed_textgrid.tiers()[1] {
367            crate::textgrid::Tier::IntervalTier(tier) => tier,
368            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
369        };
370
371        assert_eq!(tier.name(), "Kelly");
372    }
373
374    #[test]
375    fn parse_textgrid_from_invalid_string() {
376        let parsed_textgrid = parse_textgrid("invalid", false);
377
378        assert!(parsed_textgrid.is_err());
379    }
380
381    #[test]
382    fn verify_start_of_textgrid() {
383        let mut textgrid_data = VecDeque::new();
384        textgrid_data.push_back("ooTextFile".to_string());
385        textgrid_data.push_back("TextGrid".to_string());
386
387        let verified_textgrid_data = super::verify_start_of_textgrid(&mut textgrid_data);
388
389        assert!(verified_textgrid_data.is_ok());
390    }
391
392    #[test]
393    fn parse_tiers() {
394        let mut tier_data = VecDeque::new();
395        tier_data.push_back("3".to_string());
396        tier_data.push_back("IntervalTier".to_string());
397        tier_data.push_back("John".to_string());
398        tier_data.push_back("0".to_string());
399        tier_data.push_back("2.3".to_string());
400        tier_data.push_back("1".to_string());
401        tier_data.push_back("0".to_string());
402        tier_data.push_back("2.3".to_string());
403        tier_data.push_back("daisy bell".to_string());
404        tier_data.push_back("IntervalTier".to_string());
405        tier_data.push_back("Kelly".to_string());
406        tier_data.push_back("0".to_string());
407        tier_data.push_back("2.3".to_string());
408        tier_data.push_back("1".to_string());
409        tier_data.push_back("0".to_string());
410        tier_data.push_back("2.3".to_string());
411        tier_data.push_back(String::new());
412        tier_data.push_back("TextTier".to_string());
413        tier_data.push_back("Bell".to_string());
414        tier_data.push_back("0".to_string());
415        tier_data.push_back("2.3".to_string());
416        tier_data.push_back("1".to_string());
417        tier_data.push_back("1".to_string());
418        tier_data.push_back("give me your answer do\"".to_string());
419
420        let parsed_tiers = super::parse_tiers(&mut tier_data, 0.0, 2.3, false).unwrap();
421
422        let tier = match &parsed_tiers[1] {
423            crate::textgrid::Tier::IntervalTier(tier) => tier,
424            crate::textgrid::Tier::PointTier(_) => panic!("Expected IntervalTier, got PointTier"),
425        };
426
427        assert_eq!(tier.name(), "Kelly");
428    }
429
430    #[test]
431    fn parse_interval() {
432        let mut interval_data = VecDeque::new();
433        interval_data.push_back("0".to_string());
434        interval_data.push_back("2.3".to_string());
435        interval_data.push_back("daisy bell".to_string());
436
437        let parsed_interval = super::parse_interval(&mut interval_data).unwrap();
438
439        assert_eq!(parsed_interval.text(), "daisy bell");
440    }
441
442    #[test]
443    fn parse_point() {
444        let mut point_data = VecDeque::new();
445        point_data.push_back("1".to_string());
446        point_data.push_back("give me your answer do\"".to_string());
447
448        let parsed_point = super::parse_point(&mut point_data).unwrap();
449
450        assert_eq!(parsed_point.mark(), "give me your answer do\"");
451    }
452}