ziyy_core/splitter/
mod.rs

1use std::borrow::Cow;
2use std::mem::take;
3
4use fragment::Fragment;
5use fragment::FragmentType::{self, *};
6
7use crate::common::Span;
8use crate::{Error, ErrorType, Result};
9
10pub mod fragment;
11
12pub struct Splitter<'a> {
13    source: &'a Cow<'a, str>,
14    fragments: Vec<Fragment<'a>>,
15    start: usize,
16    current: usize,
17    span: Span,
18}
19
20impl<'a> Default for Splitter<'a> {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26enum Quote {
27    Single,
28    Double,
29    None,
30}
31
32static SOURCE: Cow<'_, str> = Cow::Borrowed("");
33
34impl<'a> Splitter<'a> {
35    pub fn new() -> Self {
36        Self {
37            source: &SOURCE,
38            fragments: vec![],
39            start: 0,
40            current: 0,
41            span: Span::default(),
42        }
43    }
44
45    pub fn split(&mut self, source: &'a Cow<'a, str>) -> Result<Vec<Fragment<'a>>> {
46        self.source = source;
47
48        macro_rules! consume_word {
49            ($c:ident) => {
50                loop {
51                    if self.is_at_end() {
52                        break;
53                    }
54
55                    if is_whitespace(self.peek()) {
56                        break;
57                    }
58
59                    if matches!(self.peek(), b'<') {
60                        break;
61                    }
62
63                    if matches!($c, b'\\') {
64                        self.advance();
65                    }
66
67                    self.advance();
68                }
69            };
70        }
71
72        while !self.is_at_end() {
73            self.start = self.current;
74
75            let mut c = self.advance();
76
77            match c {
78                b' ' | b'\r' | b'\t' | b'\n' => self.whitespace(),
79                b'\\' => {
80                    c = self.advance();
81                    consume_word!(c);
82                    self.add_fragment(Word);
83                }
84                b'<' => self.tag()?,
85                _ => {
86                    consume_word!(c);
87                    self.add_fragment(Word);
88                }
89            }
90        }
91
92        Ok(take(&mut self.fragments))
93    }
94
95    fn tag(&mut self) -> Result<()> {
96        if self.peek() == b'>' {
97            self.advance();
98            self.add_fragment(Tag);
99            return Ok(());
100        }
101        let mut quote = Quote::None;
102
103        loop {
104            let c = self.advance();
105            if self.is_at_end() {
106                match quote {
107                    Quote::Single | Quote::Double => {
108                        return Err(Error::new(
109                            ErrorType::UnterminatedString,
110                            "Untermitated string literal".into(),
111                            self.span,
112                        ));
113                    }
114                    Quote::None => {
115                        return Err(Error::new(
116                            ErrorType::UnexpectedEof,
117                            "Untermitated string literal".into(),
118                            self.span,
119                        ));
120                    }
121                }
122            }
123
124            let close = matches!(self.peek(), b'>');
125            let single = matches!(self.peek(), b'\'');
126            let double = matches!(self.peek(), b'"');
127            let esc = matches!(c, b'\\');
128            match quote {
129                Quote::Single => {
130                    if single && !esc {
131                        quote = Quote::None;
132                    }
133                }
134                Quote::Double => {
135                    if double && !esc {
136                        quote = Quote::None;
137                    }
138                }
139                Quote::None => {
140                    if close {
141                        break;
142                    } else if single {
143                        quote = Quote::Single;
144                    } else if double {
145                        quote = Quote::Double;
146                    }
147                }
148            }
149        }
150
151        self.advance();
152        self.add_fragment(Tag);
153        Ok(())
154    }
155
156    fn whitespace(&mut self) {
157        while is_whitespace(self.peek()) {
158            self.advance();
159        }
160        self.add_fragment(Whitespace);
161    }
162
163    fn peek(&self) -> u8 {
164        if self.is_at_end() {
165            b'\0'
166        } else {
167            self.source.as_bytes()[self.current]
168        }
169    }
170
171    fn is_at_end(&self) -> bool {
172        self.current >= self.source.len()
173    }
174
175    fn advance(&mut self) -> u8 {
176        self.current += 1;
177        self.span += (0, 1);
178        let ch = self.source.as_bytes()[self.current - 1];
179        if ch == b'\n' {
180            self.span += (1, 0);
181        }
182        ch
183    }
184
185    fn add_fragment(&mut self, r#type: FragmentType) {
186        let text = &self.source[self.start..self.current];
187        self.fragments
188            .push(Fragment::new(r#type, Cow::Borrowed(text), self.span));
189        self.span.tie_end();
190    }
191}
192
193pub fn is_whitespace(c: u8) -> bool {
194    matches!(c, b' ' | b'\t' | b'\n' | b'\x0c' | b'\x0d')
195}