ziyy_core/splitter/
mod.rs1use std::borrow::Cow;
2use std::mem::take;
3
4use fragment::Fragment;
5use fragment::FragmentType::{self, *};
6
7use crate::common::Span;
8use crate::{Error, ErrorType, Result};
9
10pub mod fragment;
11
12pub struct Splitter<'a> {
13 source: &'a Cow<'a, str>,
14 fragments: Vec<Fragment<'a>>,
15 start: usize,
16 current: usize,
17 span: Span,
18}
19
20impl<'a> Default for Splitter<'a> {
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26enum Quote {
27 Single,
28 Double,
29 None,
30}
31
32static SOURCE: Cow<'_, str> = Cow::Borrowed("");
33
34impl<'a> Splitter<'a> {
35 pub fn new() -> Self {
36 Self {
37 source: &SOURCE,
38 fragments: vec![],
39 start: 0,
40 current: 0,
41 span: Span::default(),
42 }
43 }
44
45 pub fn split(&mut self, source: &'a Cow<'a, str>) -> Result<Vec<Fragment<'a>>> {
46 self.source = source;
47
48 macro_rules! consume_word {
49 ($c:ident) => {
50 loop {
51 if self.is_at_end() {
52 break;
53 }
54
55 if is_whitespace(self.peek()) {
56 break;
57 }
58
59 if matches!(self.peek(), b'<') {
60 break;
61 }
62
63 if matches!($c, b'\\') {
64 self.advance();
65 }
66
67 self.advance();
68 }
69 };
70 }
71
72 while !self.is_at_end() {
73 self.start = self.current;
74
75 let mut c = self.advance();
76
77 match c {
78 b' ' | b'\r' | b'\t' | b'\n' => self.whitespace(),
79 b'\\' => {
80 c = self.advance();
81 consume_word!(c);
82 self.add_fragment(Word);
83 }
84 b'<' => self.tag()?,
85 _ => {
86 consume_word!(c);
87 self.add_fragment(Word);
88 }
89 }
90 }
91
92 Ok(take(&mut self.fragments))
93 }
94
95 fn tag(&mut self) -> Result<()> {
96 if self.peek() == b'>' {
97 self.advance();
98 self.add_fragment(Tag);
99 return Ok(());
100 }
101 let mut quote = Quote::None;
102
103 loop {
104 let c = self.advance();
105 if self.is_at_end() {
106 match quote {
107 Quote::Single | Quote::Double => {
108 return Err(Error::new(
109 ErrorType::UnterminatedString,
110 "Untermitated string literal".into(),
111 self.span,
112 ));
113 }
114 Quote::None => {
115 return Err(Error::new(
116 ErrorType::UnexpectedEof,
117 "Untermitated string literal".into(),
118 self.span,
119 ));
120 }
121 }
122 }
123
124 let close = matches!(self.peek(), b'>');
125 let single = matches!(self.peek(), b'\'');
126 let double = matches!(self.peek(), b'"');
127 let esc = matches!(c, b'\\');
128 match quote {
129 Quote::Single => {
130 if single && !esc {
131 quote = Quote::None;
132 }
133 }
134 Quote::Double => {
135 if double && !esc {
136 quote = Quote::None;
137 }
138 }
139 Quote::None => {
140 if close {
141 break;
142 } else if single {
143 quote = Quote::Single;
144 } else if double {
145 quote = Quote::Double;
146 }
147 }
148 }
149 }
150
151 self.advance();
152 self.add_fragment(Tag);
153 Ok(())
154 }
155
156 fn whitespace(&mut self) {
157 while is_whitespace(self.peek()) {
158 self.advance();
159 }
160 self.add_fragment(Whitespace);
161 }
162
163 fn peek(&self) -> u8 {
164 if self.is_at_end() {
165 b'\0'
166 } else {
167 self.source.as_bytes()[self.current]
168 }
169 }
170
171 fn is_at_end(&self) -> bool {
172 self.current >= self.source.len()
173 }
174
175 fn advance(&mut self) -> u8 {
176 self.current += 1;
177 self.span += (0, 1);
178 let ch = self.source.as_bytes()[self.current - 1];
179 if ch == b'\n' {
180 self.span += (1, 0);
181 }
182 ch
183 }
184
185 fn add_fragment(&mut self, r#type: FragmentType) {
186 let text = &self.source[self.start..self.current];
187 self.fragments
188 .push(Fragment::new(r#type, Cow::Borrowed(text), self.span));
189 self.span.tie_end();
190 }
191}
192
193pub fn is_whitespace(c: u8) -> bool {
194 matches!(c, b' ' | b'\t' | b'\n' | b'\x0c' | b'\x0d')
195}