Skip to main content

svgrtypes/
stream.rs

1use std::str::FromStr;
2
3use crate::Error;
4
5/// Extension methods for XML-subset only operations.
6pub(crate) trait ByteExt {
7    /// Checks if a byte is a numeric sign.
8    fn is_sign(&self) -> bool;
9
10    /// Checks if a byte is a digit.
11    ///
12    /// `[0-9]`
13    fn is_digit(&self) -> bool;
14
15    /// Checks if a byte is a hex digit.
16    ///
17    /// `[0-9A-Fa-f]`
18    fn is_hex_digit(&self) -> bool;
19
20    /// Checks if a byte is a space.
21    ///
22    /// `[ \r\n\t]`
23    fn is_space(&self) -> bool;
24
25    /// Checks if a byte is an ASCII ident char.
26    fn is_ascii_ident(&self) -> bool;
27}
28impl ByteExt for u8 {
29    #[inline]
30    fn is_sign(&self) -> bool {
31        matches!(*self, b'+' | b'-')
32    }
33
34    #[inline]
35    fn is_digit(&self) -> bool {
36        matches!(*self, b'0'..=b'9')
37    }
38
39    #[inline]
40    fn is_hex_digit(&self) -> bool {
41        matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
42    }
43
44    #[inline]
45    fn is_space(&self) -> bool {
46        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
47    }
48
49    #[inline]
50    fn is_ascii_ident(&self) -> bool {
51        matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
52    }
53}
54
55/// A streaming text parsing interface.
56#[derive(Clone, Copy, PartialEq, Eq, Debug)]
57pub struct Stream<'a> {
58    text: &'a str,
59    pos: usize,
60}
61
62impl<'a> From<&'a str> for Stream<'a> {
63    #[inline]
64    fn from(text: &'a str) -> Self {
65        Stream { text, pos: 0 }
66    }
67}
68
69impl<'a> Stream<'a> {
70    /// Returns the current position in bytes.
71    #[inline]
72    pub fn pos(&self) -> usize {
73        self.pos
74    }
75
76    /// Calculates the current position in chars.
77    pub fn calc_char_pos(&self) -> usize {
78        self.calc_char_pos_at(self.pos)
79    }
80
81    /// Calculates the current position in chars.
82    pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
83        let mut pos = 1;
84        for (idx, _) in self.text.char_indices() {
85            if idx >= byte_pos {
86                break;
87            }
88
89            pos += 1;
90        }
91
92        pos
93    }
94
95    /// Sets current position equal to the end.
96    ///
97    /// Used to indicate end of parsing on error.
98    #[inline]
99    pub fn jump_to_end(&mut self) {
100        self.pos = self.text.len();
101    }
102
103    /// Checks if the stream is reached the end.
104    ///
105    /// Any [`pos()`] value larger than original text length indicates stream end.
106    ///
107    /// Accessing stream after reaching end via safe methods will produce
108    /// an `UnexpectedEndOfStream` error.
109    ///
110    /// Accessing stream after reaching end via *_unchecked methods will produce
111    /// a Rust's bound checking error.
112    ///
113    /// [`pos()`]: #method.pos
114    #[inline]
115    pub fn at_end(&self) -> bool {
116        self.pos >= self.text.len()
117    }
118
119    /// Returns a byte from a current stream position.
120    ///
121    /// # Errors
122    ///
123    /// - `UnexpectedEndOfStream`
124    #[inline]
125    pub fn curr_byte(&self) -> Result<u8, Error> {
126        if self.at_end() {
127            return Err(Error::UnexpectedEndOfStream);
128        }
129
130        Ok(self.curr_byte_unchecked())
131    }
132
133    #[inline]
134    pub fn chars(&self) -> std::str::Chars<'a> {
135        self.text[self.pos..].chars()
136    }
137
138    /// Returns a byte from a current stream position.
139    ///
140    /// # Panics
141    ///
142    /// - if the current position is after the end of the data
143    #[inline]
144    pub fn curr_byte_unchecked(&self) -> u8 {
145        self.text.as_bytes()[self.pos]
146    }
147
148    /// Checks that current byte is equal to provided.
149    ///
150    /// Returns `false` if no bytes left.
151    #[inline]
152    pub fn is_curr_byte_eq(&self, c: u8) -> bool {
153        if !self.at_end() {
154            self.curr_byte_unchecked() == c
155        } else {
156            false
157        }
158    }
159
160    /// Returns a next byte from a current stream position.
161    ///
162    /// # Errors
163    ///
164    /// - `UnexpectedEndOfStream`
165    #[inline]
166    pub fn next_byte(&self) -> Result<u8, Error> {
167        if self.pos + 1 >= self.text.len() {
168            return Err(Error::UnexpectedEndOfStream);
169        }
170
171        Ok(self.text.as_bytes()[self.pos + 1])
172    }
173
174    /// Advances by `n` bytes.
175    #[inline]
176    pub fn advance(&mut self, n: usize) {
177        debug_assert!(self.pos + n <= self.text.len());
178        self.pos += n;
179    }
180
181    /// Skips whitespaces.
182    ///
183    /// Accepted values: `' ' \n \r \t`.
184    pub fn skip_spaces(&mut self) {
185        while !self.at_end() && self.curr_byte_unchecked().is_space() {
186            self.advance(1);
187        }
188    }
189
190    /// Checks that the stream starts with a selected text.
191    ///
192    /// We are using `&[u8]` instead of `&str` for performance reasons.
193    #[inline]
194    pub fn starts_with(&self, text: &[u8]) -> bool {
195        self.text.as_bytes()[self.pos..].starts_with(text)
196    }
197
198    /// Consumes current byte if it's equal to the provided byte.
199    ///
200    /// # Errors
201    ///
202    /// - `InvalidChar`
203    /// - `UnexpectedEndOfStream`
204    pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
205        if self.curr_byte()? != c {
206            return Err(Error::InvalidChar(
207                vec![self.curr_byte_unchecked(), c],
208                self.calc_char_pos(),
209            ));
210        }
211
212        self.advance(1);
213        Ok(())
214    }
215
216    /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
217    /// Consumes a single ident consisting of ASCII characters, if available.
218    pub fn consume_ascii_ident(&mut self) -> &'a str {
219        let start = self.pos;
220        self.skip_bytes(|_, c| c.is_ascii_ident());
221        self.slice_back(start)
222    }
223
224    /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
225    ///
226    /// # Errors
227    ///
228    /// - `UnexpectedEndOfStream`
229    /// - `InvalidValue`
230    pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
231        // Check for opening quote.
232        let quote = self.curr_byte()?;
233
234        if quote != b'\'' && quote != b'"' {
235            return Err(Error::InvalidValue);
236        }
237
238        let mut prev = quote;
239        self.advance(1);
240
241        let start = self.pos();
242
243        while !self.at_end() {
244            let curr = self.curr_byte_unchecked();
245
246            // Advance until the closing quote.
247            if curr == quote {
248                // Check for escaped quote.
249                if prev != b'\\' {
250                    break;
251                }
252            }
253
254            prev = curr;
255            self.advance(1);
256        }
257
258        let value = self.slice_back(start);
259
260        // Check for closing quote.
261        self.consume_byte(quote)?;
262
263        Ok(value)
264    }
265
266    /// Consumes selected string.
267    ///
268    /// # Errors
269    ///
270    /// - `InvalidChar`
271    /// - `UnexpectedEndOfStream`
272    pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
273        if self.at_end() {
274            return Err(Error::UnexpectedEndOfStream);
275        }
276
277        if !self.starts_with(text) {
278            let len = std::cmp::min(text.len(), self.text.len() - self.pos);
279            // Collect chars and do not slice a string,
280            // because the `len` can be on the char boundary.
281            // Which lead to a panic.
282            let actual = self.text[self.pos..].chars().take(len).collect();
283
284            // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
285            let expected = std::str::from_utf8(text).unwrap().to_owned();
286
287            return Err(Error::InvalidString(
288                vec![actual, expected],
289                self.calc_char_pos(),
290            ));
291        }
292
293        self.advance(text.len());
294        Ok(())
295    }
296
297    /// Consumes bytes by the predicate and returns them.
298    ///
299    /// The result can be empty.
300    pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
301    where
302        F: Fn(&Stream, u8) -> bool,
303    {
304        let start = self.pos();
305        self.skip_bytes(f);
306        self.slice_back(start)
307    }
308
309    /// Consumes bytes by the predicate.
310    pub fn skip_bytes<F>(&mut self, f: F)
311    where
312        F: Fn(&Stream, u8) -> bool,
313    {
314        while !self.at_end() {
315            let c = self.curr_byte_unchecked();
316            if f(self, c) {
317                self.advance(1);
318            } else {
319                break;
320            }
321        }
322    }
323
324    /// Slices data from `pos` to the current position.
325    #[inline]
326    pub fn slice_back(&self, pos: usize) -> &'a str {
327        &self.text[pos..self.pos]
328    }
329
330    /// Slices data from the current position to the end.
331    #[inline]
332    pub fn slice_tail(&self) -> &'a str {
333        &self.text[self.pos..]
334    }
335
336    /// Parses integer number from the stream.
337    ///
338    /// Same as [`parse_number()`], but only for integer. Does not refer to any SVG type.
339    ///
340    /// [`parse_number()`]: #method.parse_number
341    pub fn parse_integer(&mut self) -> Result<i32, Error> {
342        self.skip_spaces();
343
344        if self.at_end() {
345            return Err(Error::InvalidNumber(self.calc_char_pos()));
346        }
347
348        let start = self.pos();
349
350        // Consume sign.
351        if self.curr_byte()?.is_sign() {
352            self.advance(1);
353        }
354
355        // The current char must be a digit.
356        if !self.curr_byte()?.is_digit() {
357            return Err(Error::InvalidNumber(self.calc_char_pos_at(start)));
358        }
359
360        self.skip_digits();
361
362        // Use the default i32 parser now.
363        let s = self.slice_back(start);
364        match i32::from_str(s) {
365            Ok(n) => Ok(n),
366            Err(_) => Err(Error::InvalidNumber(self.calc_char_pos_at(start))),
367        }
368    }
369
370    /// Parses integer from a list of numbers.
371    pub fn parse_list_integer(&mut self) -> Result<i32, Error> {
372        if self.at_end() {
373            return Err(Error::UnexpectedEndOfStream);
374        }
375
376        let n = self.parse_integer()?;
377        self.skip_spaces();
378        self.parse_list_separator();
379        Ok(n)
380    }
381
382    /// Parses number or percent from the stream.
383    ///
384    /// Percent value will be normalized.
385    pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
386        self.skip_spaces();
387
388        let n = self.parse_number()?;
389        if self.starts_with(b"%") {
390            self.advance(1);
391            Ok(n / 100.0)
392        } else {
393            Ok(n)
394        }
395    }
396
397    /// Parses number or percent from a list of numbers and/or percents.
398    pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
399        if self.at_end() {
400            return Err(Error::UnexpectedEndOfStream);
401        }
402
403        let l = self.parse_number_or_percent()?;
404        self.skip_spaces();
405        self.parse_list_separator();
406        Ok(l)
407    }
408
409    /// Skips digits.
410    pub fn skip_digits(&mut self) {
411        self.skip_bytes(|_, c| c.is_digit());
412    }
413
414    #[inline]
415    pub(crate) fn parse_list_separator(&mut self) {
416        if self.is_curr_byte_eq(b',') {
417            self.advance(1);
418        }
419    }
420}
421
422#[rustfmt::skip]
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    #[test]
428    fn parse_integer_1() {
429        let mut s = Stream::from("10");
430        assert_eq!(s.parse_integer().unwrap(), 10);
431    }
432
433    #[test]
434    fn parse_err_integer_1() {
435        // error because of overflow
436        let mut s = Stream::from("10000000000000");
437        assert_eq!(s.parse_integer().unwrap_err().to_string(),
438                   "invalid number at position 1");
439    }
440}