Skip to main content

bibtex_parser/parser/
utils.rs

1//! Parser utilities
2
3use winnow::prelude::*;
4
5/// Fast inline whitespace skipping
6#[inline]
7fn skip_whitespace(input: &mut &str) {
8    let bytes = input.as_bytes();
9    let len = super::simd::scan_whitespace(bytes);
10    *input = &input[len..];
11}
12
13/// Make a parser whitespace-insensitive
14#[inline]
15pub fn ws<'a, F, O>(mut parser: F) -> impl Parser<&'a str, O, winnow::error::ContextError>
16where
17    F: Parser<&'a str, O, winnow::error::ContextError>,
18{
19    move |input: &mut &'a str| {
20        skip_whitespace(input);
21        let output = parser.parse_next(input)?;
22        skip_whitespace(input);
23        Ok(output)
24    }
25}
26
27/// Case-insensitive tag parser
28#[inline]
29#[must_use]
30pub fn tag_no_case<'a>(
31    tag: &'static str,
32) -> impl Parser<&'a str, &'a str, winnow::error::ContextError> {
33    move |input: &mut &'a str| {
34        let tag_len = tag.len();
35        if input.len() < tag_len {
36            return super::backtrack();
37        }
38
39        let input_start = &input[..tag_len];
40        if input_start.eq_ignore_ascii_case(tag) {
41            let result = input_start;
42            *input = &input[tag_len..];
43            Ok(result)
44        } else {
45            super::backtrack()
46        }
47    }
48}
49
50/// Parse a delimited value with balanced delimiters
51#[inline]
52#[must_use]
53pub fn balanced_delimited<'a>(
54    open: char,
55    close: char,
56) -> impl Parser<&'a str, &'a str, winnow::error::ContextError> {
57    move |input: &mut &'a str| {
58        if !input.starts_with(open) {
59            return super::backtrack();
60        }
61
62        let mut depth = 0;
63        let mut pos = 0;
64        let bytes = input.as_bytes();
65        let mut i = 0;
66        while i < bytes.len() {
67            let byte = bytes[i];
68            if byte == b'\\' && i + 1 < bytes.len() {
69                // Skip escaped character and the following byte
70                i += 2;
71                continue;
72            } else if byte == open as u8 {
73                depth += 1;
74            } else if byte == close as u8 {
75                depth -= 1;
76                if depth == 0 {
77                    pos = i + 1;
78                    break;
79                }
80            }
81            i += 1;
82        }
83
84        if depth == 0 {
85            let result = &input[1..pos - 1];
86            *input = &input[pos..];
87            Ok(result)
88        } else {
89            super::backtrack()
90        }
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn test_ws() {
100        let mut input = "  hello  world  ";
101        let mut parser = ws("hello");
102        let result = parser.parse_next(&mut input).unwrap();
103        assert_eq!(result, "hello");
104        assert_eq!(input, "world  ");
105    }
106
107    #[test]
108    fn test_tag_no_case() {
109        let mut input = "ARTICLE{...}";
110        let result = tag_no_case("article").parse_next(&mut input).unwrap();
111        assert_eq!(result, "ARTICLE");
112        assert_eq!(input, "{...}");
113
114        let mut input = "Article{...}";
115        let result = tag_no_case("article").parse_next(&mut input).unwrap();
116        assert_eq!(result, "Article");
117        assert_eq!(input, "{...}");
118    }
119}