oxvg_parse/
lib.rs

1//! Primitives for parsing XML values
2
3use error::Error;
4mod types;
5
6pub mod error;
7
8/// A parser containing state for the active parsing of an SVG value
9pub struct Parser<'input> {
10    input: &'input str,
11    cursor: usize,
12}
13
14impl<'input> Parser<'input> {
15    /// Create a new parser with the input
16    pub fn new(input: &'input str) -> Self {
17        Self { input, cursor: 0 }
18    }
19
20    /// Returns the current position in the input being read
21    pub fn cursor(&self) -> usize {
22        self.cursor
23    }
24
25    /// Try reading the next value
26    ///
27    /// # Errors
28    ///
29    /// If the input has ended
30    pub fn read(&mut self) -> Result<char, Error<'input>> {
31        let current = self.current()?;
32        self.advance();
33        Ok(current)
34    }
35
36    /// Go to the next value without reading
37    pub fn advance(&mut self) {
38        self.cursor += 1;
39    }
40
41    /// Move backwards without reading
42    pub fn rewind(&mut self, n: usize) {
43        self.cursor -= n;
44    }
45
46    /// Skip remaining input
47    pub fn done(&mut self) {
48        self.cursor = self.input.len();
49    }
50
51    /// Try parsing a portion of the input, reverting to the original state if failed
52    ///
53    /// # Errors
54    ///
55    /// If the attempted parsing fails
56    pub fn try_parse<T, E, F: FnOnce(&mut Self) -> Result<T, E>>(&mut self, f: F) -> Result<T, E> {
57        let cursor = self.cursor;
58        let result = f(self);
59        if result.is_err() {
60            self.cursor = cursor;
61        }
62        result
63    }
64
65    /// Get remaining slice of input
66    pub fn slice(&self) -> &'input str {
67        &self.input[self.cursor..]
68    }
69
70    /// Get remaining slice of input and advance to the end of the input
71    pub fn take_slice(&mut self) -> &'input str {
72        let slice = &self.input[self.cursor..];
73        self.done();
74        slice
75    }
76
77    /// Get slice from start position to current position
78    pub fn slice_from(&self, start: usize) -> &'input str {
79        let end = self.cursor.min(self.input.len());
80        &self.input[start..end]
81    }
82
83    /// Get the length of the remaining input
84    pub fn len(&self) -> usize {
85        self.input.len() - self.cursor
86    }
87
88    /// Returns whether the remaining input is empty
89    pub fn is_empty(&self) -> bool {
90        self.len() == 0
91    }
92
93    /// Gets the current character of the input
94    ///
95    /// # Errors
96    ///
97    /// If reached the end of input
98    pub fn current(&self) -> Result<char, Error<'input>> {
99        self.slice().chars().next().ok_or(Error::EndOfInput)
100    }
101
102    /// Move the cursor forward while the characters match the given predicate
103    ///
104    /// Returns the skipped content as a slice
105    pub fn take_matches<F: FnMut(char) -> bool>(&mut self, f: F) -> &'input str {
106        let cursor = self.cursor();
107        self.skip_matches(f);
108        let result = self.slice_from(cursor);
109        result
110    }
111
112    /// Moves the cursor forward the number of matching characters
113    pub fn skip_matches<F: FnMut(char) -> bool>(&mut self, pat: F) {
114        self.skip_internal(self.slice().trim_start_matches(pat).len());
115    }
116
117    /// Moves the cursor forward the number of matching characters
118    pub fn skip_char(&mut self, char: char) {
119        self.skip_internal(self.slice().trim_matches(char).len());
120    }
121
122    /// Moves the cursor forward the number of whitespace characters
123    pub fn skip_whitespace(&mut self) {
124        self.skip_matches(char::is_whitespace);
125    }
126
127    fn skip_internal(&mut self, trim: usize) {
128        let offset = self.len() - trim;
129        self.cursor += offset;
130    }
131
132    /// Asserts the end of the input was reached
133    ///
134    /// # Errors
135    ///
136    /// When the cursor is prior to the end of the string
137    pub fn expect_done(&self) -> Result<(), Error<'input>> {
138        if self.cursor < self.input.len() {
139            Err(Error::ExpectedDone)
140        } else {
141            Ok(())
142        }
143    }
144
145    /// Read and assert the next character matches the expected character
146    ///
147    /// # Errors
148    ///
149    /// If the end of the input is reached, or the character does not match
150    pub fn expect_char(&mut self, expected: char) -> Result<(), Error<'input>> {
151        let received = self.read()?;
152        if received == expected {
153            Ok(())
154        } else {
155            Err(Error::ExpectedChar { expected, received })
156        }
157    }
158
159    /// Read and assert a set of characters matches the expected pattern.
160    ///
161    /// # Errors
162    ///
163    /// - If the end of the input is reached
164    /// - If none of the characters match the expected pattern
165    /// - If the patterns matcher asserts an error
166    pub fn expect_matches<F: Fn(char) -> Result<bool, &'static str>>(
167        &mut self,
168        expected: &'static str,
169        f: F,
170    ) -> Result<&'input str, Error<'input>> {
171        let cursor = self.cursor;
172        let mut result = Ok(());
173        self.skip_matches(|char| match f(char) {
174            Ok(bool) => bool,
175            Err(expected) => {
176                result = Err(expected);
177                false
178            }
179        });
180        match result {
181            Ok(()) => match self.slice_from(cursor) {
182                "" => Err(Error::ExpectedMatch {
183                    expected,
184                    received: "nothing",
185                }),
186                result => Ok(result),
187            },
188            Err(expected) => Err(Error::ExpectedMatch {
189                expected,
190                received: self.slice_from(cursor),
191            }),
192        }
193    }
194
195    /// Read and assert a set of characters is whitespace
196    ///
197    /// # Errors
198    ///
199    /// If none of the next characters are whitespace
200    pub fn expect_whitespace(&mut self) -> Result<(), Error<'input>> {
201        self.expect_matches("a whitespace character", |char| Ok(char.is_whitespace()))?;
202        Ok(())
203    }
204
205    /// Read and assert a set of characters matches the given string
206    ///
207    /// # Errors
208    ///
209    /// If the next set of characters does not match the given string
210    pub fn expect_str(&mut self, expected: &'static str) -> Result<(), Error<'input>> {
211        let cursor = self.cursor;
212        self.cursor += expected.len();
213        let received = self.slice_from(cursor);
214        if received == expected {
215            Ok(())
216        } else {
217            Err(Error::ExpectedString { expected, received })
218        }
219    }
220
221    /// Read and assert a set of characters matches some ident
222    ///
223    /// # Errors
224    ///
225    /// When an invalid ident is received
226    pub fn expect_ident(&mut self) -> Result<&'input str, Error<'input>> {
227        let cursor = self.cursor;
228        let name_start_char = self.read()?;
229        if !is_name_start_char(name_start_char) {
230            return Err(Error::ExpectedIdent {
231                expected: "valid ident starting character",
232                received: self.slice_from(cursor),
233            });
234        }
235        self.skip_matches(is_name_char);
236        Ok(self.slice_from(cursor))
237    }
238
239    /// Read and assert a set of characters matches the given identifier
240    ///
241    /// # Errors
242    ///
243    /// If the next set of characters does not match the given identifier
244    pub fn expect_ident_matching(&mut self, expected: &'static str) -> Result<(), Error<'input>> {
245        let received = self.expect_ident()?;
246        if received == expected {
247            Ok(())
248        } else {
249            Err(Error::ExpectedIdent { expected, received })
250        }
251    }
252}
253fn is_name_start_char(char: char) -> bool {
254    char.is_ascii_alphabetic()
255        || matches!(char, ':' | '_' | '\u{C0}'..'\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}')
256}
257fn is_name_char(char: char) -> bool {
258    is_name_start_char(char)
259        || char.is_ascii_digit()
260        || matches!(char, '-' | '.' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
261}
262
263/// A trait for things that can be parsed from CSS or attribute values.
264pub trait Parse<'input>: Sized {
265    /// Parse this value using an existing parser.
266    ///
267    /// # Errors
268    /// If parsing fails
269    fn parse(input: &mut Parser<'input>) -> Result<Self, Error<'input>>;
270
271    /// Parse a value from a string
272    ///
273    /// # Errors
274    /// If parsing fails
275    fn parse_string(input: &'input str) -> Result<Self, Error<'input>> {
276        let parser = &mut Parser::new(input);
277        parser.skip_whitespace();
278        let result = Self::parse(parser)?;
279        parser.skip_whitespace();
280        parser.expect_done()?;
281        Ok(result)
282    }
283}