animate/path/
stream.rs

1use std::cmp;
2use std::str::{self, FromStr};
3
4use super::{Angle, AngleUnit, Error, Length, LengthUnit, Result};
5
6/// Extension methods for XML-subset only operations.
7pub(crate) trait ByteExt {
8    /// Checks if a byte is a numeric sign.
9    fn is_sign(&self) -> bool;
10
11    /// Checks if a byte is a digit.
12    ///
13    /// `[0-9]`
14    fn is_digit(&self) -> bool;
15
16    /// Checks if a byte is a hex digit.
17    ///
18    /// `[0-9A-Fa-f]`
19    fn is_hex_digit(&self) -> bool;
20
21    /// Checks if a byte is a space.
22    ///
23    /// `[ \r\n\t]`
24    fn is_space(&self) -> bool;
25
26    /// Checks if a byte is an ASCII char.
27    ///
28    /// `[A-Za-z]`
29    fn is_letter(&self) -> bool;
30
31    /// Checks if a byte is an XML ident char.
32    fn is_ident(&self) -> bool;
33}
34
35impl ByteExt for u8 {
36    #[inline]
37    fn is_sign(&self) -> bool {
38        matches!(*self, b'+' | b'-')
39    }
40
41    #[inline]
42    fn is_digit(&self) -> bool {
43        matches!(*self, b'0'..=b'9')
44    }
45
46    #[inline]
47    fn is_hex_digit(&self) -> bool {
48        matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
49    }
50
51    #[inline]
52    fn is_space(&self) -> bool {
53        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
54    }
55
56    #[inline]
57    fn is_letter(&self) -> bool {
58        matches!(*self, b'A'..=b'Z' | b'a'..=b'z')
59    }
60
61    #[inline]
62    fn is_ident(&self) -> bool {
63        matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
64    }
65}
66
67/// A streaming text parsing interface.
68#[derive(PartialEq, Clone, Copy, Debug)]
69pub struct Stream<'a> {
70    text: &'a str,
71    pos: usize,
72}
73
74impl<'a> From<&'a str> for Stream<'a> {
75    #[inline]
76    fn from(text: &'a str) -> Self {
77        Stream { text, pos: 0 }
78    }
79}
80
81impl<'a> Stream<'a> {
82    /// Returns the current position in bytes.
83    #[inline]
84    pub fn pos(&self) -> usize {
85        self.pos
86    }
87
88    /// Calculates the current position in chars.
89    pub fn calc_char_pos(&self) -> usize {
90        self.calc_char_pos_at(self.pos)
91    }
92
93    /// Calculates the current position in chars.
94    pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
95        let mut pos = 1;
96        for (idx, _) in self.text.char_indices() {
97            if idx >= byte_pos {
98                break;
99            }
100
101            pos += 1;
102        }
103
104        pos
105    }
106
107    /// Sets current position equal to the end.
108    ///
109    /// Used to indicate end of parsing on error.
110    #[inline]
111    pub fn jump_to_end(&mut self) {
112        self.pos = self.text.len();
113    }
114
115    /// Checks if the stream is reached the end.
116    ///
117    /// Any [`pos()`] value larger than original text length indicates stream end.
118    ///
119    /// Accessing stream after reaching end via safe methods will produce
120    /// an `UnexpectedEndOfStream` error.
121    ///
122    /// Accessing stream after reaching end via *_unchecked methods will produce
123    /// a Rust's bound checking error.
124    ///
125    /// [`pos()`]: #method.pos
126    #[inline]
127    pub fn at_end(&self) -> bool {
128        self.pos >= self.text.len()
129    }
130
131    /// Returns a byte from a current stream position.
132    ///
133    /// # Errors
134    ///
135    /// - `UnexpectedEndOfStream`
136    #[inline]
137    pub fn curr_byte(&self) -> Result<u8> {
138        if self.at_end() {
139            return Err(Error::UnexpectedEndOfStream);
140        }
141
142        Ok(self.curr_byte_unchecked())
143    }
144
145    /// Returns a byte from a current stream position.
146    ///
147    /// # Panics
148    ///
149    /// - if the current position is after the end of the data
150    #[inline]
151    pub fn curr_byte_unchecked(&self) -> u8 {
152        self.text.as_bytes()[self.pos]
153    }
154
155    /// Checks that current byte is equal to provided.
156    ///
157    /// Returns `false` if no bytes left.
158    #[inline]
159    pub fn is_curr_byte_eq(&self, c: u8) -> bool {
160        if !self.at_end() {
161            self.curr_byte_unchecked() == c
162        } else {
163            false
164        }
165    }
166
167    /// Returns a byte from a current stream position if there is one.
168    #[inline]
169    pub fn get_curr_byte(&self) -> Option<u8> {
170        if !self.at_end() {
171            Some(self.curr_byte_unchecked())
172        } else {
173            None
174        }
175    }
176
177    /// Returns a next byte from a current stream position.
178    ///
179    /// # Errors
180    ///
181    /// - `UnexpectedEndOfStream`
182    #[inline]
183    pub fn next_byte(&self) -> Result<u8> {
184        if self.pos + 1 >= self.text.len() {
185            return Err(Error::UnexpectedEndOfStream);
186        }
187
188        Ok(self.text.as_bytes()[self.pos + 1])
189    }
190
191    /// Advances by `n` bytes.
192    ///
193    /// # Examples
194    ///
195    // /// ```should_panic
196    // /// use svgtypes::Stream;
197    // ///
198    // /// let mut s = Stream::from("text");
199    // /// s.advance(2); // ok
200    // /// s.advance(20); // will cause a panic via debug_assert!().
201    /// ```
202    #[inline]
203    pub fn advance(&mut self, n: usize) {
204        debug_assert!(self.pos + n <= self.text.len());
205        self.pos += n;
206    }
207
208    /// Skips whitespaces.
209    ///
210    /// Accepted values: `' ' \n \r \t`.
211    pub fn skip_spaces(&mut self) {
212        while !self.at_end() && self.curr_byte_unchecked().is_space() {
213            self.advance(1);
214        }
215    }
216
217    /// Checks that the stream starts with a selected text.
218    ///
219    /// We are using `&[u8]` instead of `&str` for performance reasons.
220    ///
221    // /// # Examples
222    // ///
223    // /// ```
224    // /// use svgtypes::Stream;
225    // ///
226    // /// let mut s = Stream::from("Some text.");
227    // /// s.advance(5);
228    // /// assert_eq!(s.starts_with(b"text"), true);
229    // /// assert_eq!(s.starts_with(b"long"), false);
230    // /// ```
231    #[inline]
232    pub fn starts_with(&self, text: &[u8]) -> bool {
233        self.text.as_bytes()[self.pos..].starts_with(text)
234    }
235
236    /// Checks if the stream is starts with a space.
237    ///
238    /// Uses [`skip_spaces()`](#method.curr_byte) internally.
239    pub fn starts_with_space(&self) -> bool {
240        if self.at_end() {
241            return false;
242        }
243
244        let mut is_space = false;
245
246        let c = self.curr_byte_unchecked();
247
248        if c.is_space() {
249            is_space = true;
250        }
251
252        is_space
253    }
254
255    /// Consumes current byte if it's equal to the provided byte.
256    ///
257    /// # Errors
258    ///
259    /// - `InvalidChar`
260    /// - `UnexpectedEndOfStream`
261    ///
262    /// # Examples
263    ///
264    // /// ```
265    // /// use svgtypes::Stream;
266    // ///
267    // /// let mut s = Stream::from("Some text.");
268    // /// s.consume_byte(b'S').unwrap();
269    // /// s.consume_byte(b'o').unwrap();
270    // /// s.consume_byte(b'm').unwrap();
271    // /// // s.consume_byte(b'q').unwrap(); // will produce an error
272    // /// ```
273    pub fn consume_byte(&mut self, c: u8) -> Result<()> {
274        if self.curr_byte()? != c {
275            return Err(Error::InvalidChar(
276                vec![self.curr_byte_unchecked(), c],
277                self.calc_char_pos(),
278            ));
279        }
280
281        self.advance(1);
282        Ok(())
283    }
284
285    /// Consumes selected string.
286    ///
287    /// # Errors
288    ///
289    /// - `InvalidChar`
290    /// - `UnexpectedEndOfStream`
291    pub fn skip_string(&mut self, text: &[u8]) -> Result<()> {
292        if self.at_end() {
293            return Err(Error::UnexpectedEndOfStream);
294        }
295
296        if !self.starts_with(text) {
297            let len = cmp::min(text.len(), self.text.len() - self.pos);
298            // Collect chars and do not slice a string,
299            // because the `len` can be on the char boundary.
300            // Which lead to a panic.
301            let actual = self.text[self.pos..].chars().take(len).collect();
302
303            // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
304            let expected = str::from_utf8(text).unwrap().to_owned();
305
306            return Err(Error::InvalidString(
307                vec![actual, expected],
308                self.calc_char_pos(),
309            ));
310        }
311
312        self.advance(text.len());
313        Ok(())
314    }
315
316    /// Consumes bytes by the predicate and returns them.
317    ///
318    /// The result can be empty.
319    pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
320    where
321        F: Fn(&Stream, u8) -> bool,
322    {
323        let start = self.pos();
324        self.skip_bytes(f);
325        self.slice_back(start)
326    }
327
328    /// Consumes bytes by the predicate.
329    pub fn skip_bytes<F>(&mut self, f: F)
330    where
331        F: Fn(&Stream, u8) -> bool,
332    {
333        while !self.at_end() {
334            let c = self.curr_byte_unchecked();
335            if f(self, c) {
336                self.advance(1);
337            } else {
338                break;
339            }
340        }
341    }
342
343    /// Consumes bytes by the predicate and returns them.
344    pub fn consume_ident(&mut self) -> &'a str {
345        let start = self.pos;
346        self.skip_bytes(|_, c| c.is_ident());
347        self.slice_back(start)
348    }
349
350    /// Slices data from `pos` to the current position.
351    #[inline]
352    pub fn slice_back(&self, pos: usize) -> &'a str {
353        &self.text[pos..self.pos]
354    }
355
356    /// Slices data from the current position to the end.
357    #[inline]
358    pub fn slice_tail(&self) -> &'a str {
359        &self.text[self.pos..]
360    }
361
362    /// Parses number from the stream.
363    ///
364    /// This method will detect a number length and then
365    /// will pass a substring to the `f64::from_str` method.
366    ///
367    /// <https://www.w3.org/TR/SVG11/types.html#DataTypeNumber>
368    ///
369    /// # Errors
370    ///
371    /// Returns only `InvalidNumber`.
372    ///
373    // /// # Examples
374    // ///
375    // /// ```
376    // /// use svgtypes::Stream;
377    // ///
378    // /// let mut s = Stream::from("3.14");
379    // /// assert_eq!(s.parse_number().unwrap(), 3.14);
380    // /// assert_eq!(s.at_end(), true);
381    // /// ```
382    pub fn parse_number(&mut self) -> Result<f64> {
383        // Strip off leading whitespaces.
384        self.skip_spaces();
385
386        let start = self.pos();
387
388        if self.at_end() {
389            return Err(Error::InvalidNumber(self.calc_char_pos_at(start)));
390        }
391
392        self.parse_number_impl()
393            .map_err(|_| Error::InvalidNumber(self.calc_char_pos_at(start)))
394    }
395
396    fn parse_number_impl(&mut self) -> Result<f64> {
397        let start = self.pos();
398
399        let mut c = self.curr_byte()?;
400
401        // Consume sign.
402        if c.is_sign() {
403            self.advance(1);
404            c = self.curr_byte()?;
405        }
406
407        // Consume integer.
408        match c {
409            b'0'..=b'9' => self.skip_digits(),
410            b'.' => {}
411            _ => return Err(Error::InvalidNumber(0)),
412        }
413
414        // Consume fraction.
415        if let Ok(b'.') = self.curr_byte() {
416            self.advance(1);
417            self.skip_digits();
418        }
419
420        if let Ok(c) = self.curr_byte() {
421            if matches!(c, b'e' | b'E') {
422                let c2 = self.next_byte()?;
423                // Check for `em`/`ex`.
424                if c2 != b'm' && c2 != b'x' {
425                    self.advance(1);
426
427                    match self.curr_byte()? {
428                        b'+' | b'-' => {
429                            self.advance(1);
430                            self.skip_digits();
431                        }
432                        b'0'..=b'9' => self.skip_digits(),
433                        _ => {
434                            return Err(Error::InvalidNumber(0));
435                        }
436                    }
437                }
438            }
439        }
440
441        let s = self.slice_back(start);
442
443        // Use the default f64 parser now.
444        if let Ok(n) = f64::from_str(s) {
445            // inf, nan, etc. are an error.
446            if n.is_finite() {
447                return Ok(n);
448            }
449        }
450
451        Err(Error::InvalidNumber(0))
452    }
453
454    /// Parses number from the list of numbers.
455    ///
456    /// # Examples
457    ///
458    // /// ```
459    // /// use svgtypes::Stream;
460    // ///
461    // /// let mut s = Stream::from("3.14, 12,5 , 20-4");
462    // /// assert_eq!(s.parse_list_number().unwrap(), 3.14);
463    // /// assert_eq!(s.parse_list_number().unwrap(), 12.0);
464    // /// assert_eq!(s.parse_list_number().unwrap(), 5.0);
465    // /// assert_eq!(s.parse_list_number().unwrap(), 20.0);
466    // /// assert_eq!(s.parse_list_number().unwrap(), -4.0);
467    // /// ```
468    pub fn parse_list_number(&mut self) -> Result<f64> {
469        if self.at_end() {
470            return Err(Error::UnexpectedEndOfStream);
471        }
472
473        let n = self.parse_number()?;
474        self.skip_spaces();
475        parse_list_separator(self);
476        Ok(n)
477    }
478
479    /// Parses integer number from the stream.
480    ///
481    /// Same as [`parse_number()`], but only for integer. Does not refer to any SVG type.
482    ///
483    /// [`parse_number()`]: #method.parse_number
484    pub fn parse_integer(&mut self) -> Result<i32> {
485        self.skip_spaces();
486
487        if self.at_end() {
488            return Err(Error::InvalidNumber(self.calc_char_pos()));
489        }
490
491        let start = self.pos();
492
493        // Consume sign.
494        if self.curr_byte()?.is_sign() {
495            self.advance(1);
496        }
497
498        // The current char must be a digit.
499        if !self.curr_byte()?.is_digit() {
500            return Err(Error::InvalidNumber(self.calc_char_pos_at(start)));
501        }
502
503        self.skip_digits();
504
505        // Use the default i32 parser now.
506        let s = self.slice_back(start);
507        match i32::from_str(s) {
508            Ok(n) => Ok(n),
509            Err(_) => Err(Error::InvalidNumber(self.calc_char_pos_at(start))),
510        }
511    }
512
513    /// Parses integer from the list of numbers.
514    pub fn parse_list_integer(&mut self) -> Result<i32> {
515        if self.at_end() {
516            return Err(Error::UnexpectedEndOfStream);
517        }
518
519        let n = self.parse_integer()?;
520        self.skip_spaces();
521        parse_list_separator(self);
522        Ok(n)
523    }
524
525    /// Parses length from the stream.
526    ///
527    /// <https://www.w3.org/TR/SVG11/types.html#DataTypeLength>
528    ///
529    /// # Examples
530    ///
531    // /// ```
532    // /// use svgtypes::{Stream, Length, LengthUnit};
533    // ///
534    // /// let mut s = Stream::from("30%");
535    // /// assert_eq!(s.parse_length().unwrap(), Length::new(30.0, LengthUnit::Percent));
536    // /// ```
537    ///
538    /// # Notes
539    ///
540    /// - Suffix must be lowercase, otherwise it will be an error.
541    pub fn parse_length(&mut self) -> Result<Length> {
542        self.skip_spaces();
543
544        let n = self.parse_number()?;
545
546        if self.at_end() {
547            return Ok(Length::new(n, LengthUnit::None));
548        }
549
550        let u = if self.starts_with(b"%") {
551            LengthUnit::Percent
552        } else if self.starts_with(b"em") {
553            LengthUnit::Em
554        } else if self.starts_with(b"ex") {
555            LengthUnit::Ex
556        } else if self.starts_with(b"px") {
557            LengthUnit::Px
558        } else if self.starts_with(b"in") {
559            LengthUnit::In
560        } else if self.starts_with(b"cm") {
561            LengthUnit::Cm
562        } else if self.starts_with(b"mm") {
563            LengthUnit::Mm
564        } else if self.starts_with(b"pt") {
565            LengthUnit::Pt
566        } else if self.starts_with(b"pc") {
567            LengthUnit::Pc
568        } else {
569            LengthUnit::None
570        };
571
572        match u {
573            LengthUnit::Percent => self.advance(1),
574            LengthUnit::None => {}
575            _ => self.advance(2),
576        }
577
578        Ok(Length::new(n, u))
579    }
580
581    /// Parses length from the list of lengths.
582    pub fn parse_list_length(&mut self) -> Result<Length> {
583        if self.at_end() {
584            return Err(Error::UnexpectedEndOfStream);
585        }
586
587        let l = self.parse_length()?;
588        self.skip_spaces();
589        parse_list_separator(self);
590        Ok(l)
591    }
592
593    /// Parses angle from the stream.
594    ///
595    /// <https://www.w3.org/TR/SVG11/types.html#DataTypeAngle>
596    ///
597    /// # Notes
598    ///
599    /// - Suffix must be lowercase, otherwise it will be an error.
600    pub fn parse_angle(&mut self) -> Result<Angle> {
601        self.skip_spaces();
602
603        let n = self.parse_number()?;
604
605        if self.at_end() {
606            return Ok(Angle::new(n, AngleUnit::Degrees));
607        }
608
609        let u = if self.starts_with(b"deg") {
610            self.advance(3);
611            AngleUnit::Degrees
612        } else if self.starts_with(b"grad") {
613            self.advance(4);
614            AngleUnit::Gradians
615        } else if self.starts_with(b"rad") {
616            self.advance(3);
617            AngleUnit::Radians
618        } else {
619            AngleUnit::Degrees
620        };
621
622        Ok(Angle::new(n, u))
623    }
624
625    /// Skips digits.
626    pub fn skip_digits(&mut self) {
627        self.skip_bytes(|_, c| c.is_digit());
628    }
629
630    /// Parses a [IRI].
631    ///
632    /// By the SVG spec, the ID must contain only [Name] characters,
633    /// but since no one fallows this it will parse any characters.
634    ///
635    /// [IRI]: https://www.w3.org/TR/SVG11/types.html#DataTypeIRI
636    /// [Name]: https://www.w3.org/TR/xml/#NT-Name
637    pub fn parse_iri(&mut self) -> Result<&'a str> {
638        let mut _impl = || -> Result<&'a str> {
639            self.skip_spaces();
640            self.consume_byte(b'#')?;
641            let link = self.consume_bytes(|_, c| c != b' ');
642            if !link.is_empty() {
643                Ok(link)
644            } else {
645                Err(Error::InvalidValue)
646            }
647        };
648
649        _impl().map_err(|_| Error::InvalidValue)
650    }
651
652    /// Parses a [FuncIRI].
653    ///
654    /// By the SVG spec, the ID must contain only [Name] characters,
655    /// but since no one fallows this it will parse any characters.
656    ///
657    /// [FuncIRI]: https://www.w3.org/TR/SVG11/types.html#DataTypeFuncIRI
658    /// [Name]: https://www.w3.org/TR/xml/#NT-Name
659    pub fn parse_func_iri(&mut self) -> Result<&'a str> {
660        let mut _impl = || -> Result<&'a str> {
661            self.skip_spaces();
662            self.skip_string(b"url(")?;
663            self.skip_spaces();
664            self.consume_byte(b'#')?;
665            let link = self.consume_bytes(|_, c| c != b' ' && c != b')');
666            self.skip_spaces();
667            self.consume_byte(b')')?;
668
669            if !link.is_empty() {
670                Ok(link)
671            } else {
672                Err(Error::InvalidValue)
673            }
674        };
675
676        _impl().map_err(|_| Error::InvalidValue)
677    }
678}
679
680#[inline]
681fn parse_list_separator(s: &mut Stream) {
682    if s.is_curr_byte_eq(b',') {
683        s.advance(1);
684    }
685}
686
687#[cfg(test)]
688mod tests {
689    use super::*;
690
691    #[test]
692    fn parse_integer_1() {
693        let mut s = Stream::from("10");
694        assert_eq!(s.parse_integer().unwrap(), 10);
695    }
696
697    #[test]
698    fn parse_err_integer_1() {
699        // error because of overflow
700        let mut s = Stream::from("10000000000000");
701        assert_eq!(
702            s.parse_integer().unwrap_err().to_string(),
703            "invalid number at position 1"
704        );
705    }
706
707    #[test]
708    fn parse_length_1() {
709        let mut s = Stream::from("1,");
710        assert_eq!(
711            s.parse_length().unwrap(),
712            Length::new(1.0, LengthUnit::None)
713        );
714    }
715
716    #[test]
717    fn parse_length_2() {
718        let mut s = Stream::from("1 ,");
719        assert_eq!(
720            s.parse_length().unwrap(),
721            Length::new(1.0, LengthUnit::None)
722        );
723    }
724
725    #[test]
726    fn parse_length_3() {
727        let mut s = Stream::from("1 1");
728        assert_eq!(
729            s.parse_length().unwrap(),
730            Length::new(1.0, LengthUnit::None)
731        );
732    }
733
734    #[test]
735    fn parse_iri_1() {
736        assert_eq!(Stream::from("#id").parse_iri().unwrap(), "id");
737    }
738
739    #[test]
740    fn parse_iri_2() {
741        assert_eq!(Stream::from("   #id   ").parse_iri().unwrap(), "id");
742    }
743
744    #[test]
745    fn parse_iri_3() {
746        assert_eq!(Stream::from("   #id   text").parse_iri().unwrap(), "id");
747    }
748
749    #[test]
750    fn parse_iri_4() {
751        assert_eq!(Stream::from("#1").parse_iri().unwrap(), "1");
752    }
753
754    #[test]
755    fn parse_err_iri_1() {
756        assert_eq!(
757            Stream::from("# id").parse_iri().unwrap_err().to_string(),
758            "invalid value"
759        );
760    }
761
762    #[test]
763    fn parse_func_iri_1() {
764        assert_eq!(Stream::from("url(#id)").parse_func_iri().unwrap(), "id");
765    }
766
767    #[test]
768    fn parse_func_iri_2() {
769        assert_eq!(Stream::from("url(#1)").parse_func_iri().unwrap(), "1");
770    }
771
772    #[test]
773    fn parse_func_iri_3() {
774        assert_eq!(
775            Stream::from("    url(    #id    )   ")
776                .parse_func_iri()
777                .unwrap(),
778            "id"
779        );
780    }
781
782    #[test]
783    fn parse_err_func_iri_1() {
784        assert_eq!(
785            Stream::from("url ( #1 )")
786                .parse_func_iri()
787                .unwrap_err()
788                .to_string(),
789            "invalid value"
790        );
791    }
792
793    #[test]
794    fn parse_err_func_iri_2() {
795        assert_eq!(
796            Stream::from("url(#)")
797                .parse_func_iri()
798                .unwrap_err()
799                .to_string(),
800            "invalid value"
801        );
802    }
803
804    #[test]
805    fn parse_err_func_iri_3() {
806        assert_eq!(
807            Stream::from("url(# id)")
808                .parse_func_iri()
809                .unwrap_err()
810                .to_string(),
811            "invalid value"
812        );
813    }
814}