azul_simplecss/
tokenizer.rs1use stream;
6use stream::Stream;
7use error::Error;
8
9#[derive(PartialEq,Debug)]
11pub enum Combinator {
12 Space,
14 GreaterThan,
16 Plus,
18}
19
20#[derive(PartialEq,Debug)]
22pub enum Token<'a> {
23 UniversalSelector,
27 TypeSelector(&'a str),
31 IdSelector(&'a str),
37 ClassSelector(&'a str),
43 AttributeSelector(&'a str),
49 PseudoClass { selector: &'a str, value: Option<&'a str> },
56 Combinator(Combinator),
58 Comma,
62 BlockStart,
68 BlockEnd,
74 Declaration(&'a str, &'a str),
80 AtRule(&'a str),
83 DeclarationStr(&'a str),
85 AtStr(&'a str),
87 DoublePseudoClass { selector: &'a str, value: Option<&'a str> },
89 EndOfStream,
93}
94
95#[derive(PartialEq)]
96enum State {
97 Rule,
98 Declaration,
99 DeclarationRule,
100}
101
102pub struct Tokenizer<'a> {
104 stream: Stream<'a>,
105 state: State,
106 after_selector: bool,
107 has_at_rule: bool,
108 at_start: bool,
109}
110
111impl<'a> Tokenizer<'a> {
112 pub fn new(text: &str) -> Tokenizer {
114 Tokenizer {
115 stream: Stream::new(text.as_bytes()),
116 state: State::Rule,
117 after_selector: false,
118 has_at_rule: false,
119 at_start: true,
120 }
121 }
122
123 pub fn new_bound(text: &str, start: usize, end: usize) -> Tokenizer {
131 Tokenizer {
132 stream: Stream::new_bound(text.as_bytes(), start, end),
133 state: State::Rule,
134 after_selector: false,
135 has_at_rule: false,
136 at_start: true,
137 }
138 }
139
140 pub fn pos(&self) -> usize {
142 self.stream.pos()
143 }
144
145 pub fn parse_next(&mut self) -> Result<Token<'a>, Error> {
147 if self.at_start {
148 self.stream.skip_spaces();
149 self.at_start = false;
150 }
151
152 if self.stream.at_end() {
153 return Ok(Token::EndOfStream);
154 }
155
156 match self.state {
157 State::Rule => self.consume_rule(),
158 State::Declaration => self.consume_declaration(),
159 State::DeclarationRule => self.consume_declaration(),
160 }
161 }
162
163 fn consume_rule(&mut self) -> Result<Token<'a>, Error> {
164 match self.stream.curr_char_raw() {
165 b'@' => {
166 self.after_selector = true;
167 self.has_at_rule = true;
168 self.stream.advance_raw(1);
169 let s = self.consume_ident()?;
170 return Ok(Token::AtRule(s));
171 }
172 b'#' => {
173 self.after_selector = true;
174 self.has_at_rule = false;
175 self.stream.advance_raw(1);
176 let s = try!(self.consume_ident());
177 return Ok(Token::IdSelector(s));
178 }
179 b'.' => {
180 self.after_selector = true;
181 self.has_at_rule = false;
182 self.stream.advance_raw(1);
183 let s = try!(self.consume_ident());
184 return Ok(Token::ClassSelector(s));
185 }
186 b'*' => {
187 self.after_selector = true;
188 self.has_at_rule = false;
189 self.stream.advance_raw(1);
190 self.stream.skip_spaces();
191 return Ok(Token::UniversalSelector);
192 }
193 b':' => {
194 self.after_selector = true;
195 self.has_at_rule = false;
196 self.stream.advance_raw(1);
197
198 let is_double_colon = self.stream.is_char_eq(b':')?;
200 if is_double_colon {
201 self.stream.advance_raw(1); }
203
204 let s = try!(self.consume_ident());
205
206 if self.stream.curr_char() == Ok(b'(') {
207 self.stream.advance_raw(1); let inner_len = self.stream.length_to(b')')?;
210 let inner = self.stream.read_raw_str(inner_len);
211 self.stream.advance_raw(1); return Ok(if is_double_colon {
213 Token::DoublePseudoClass { selector: s, value: Some(inner) }
214 } else {
215 Token::PseudoClass { selector: s, value: Some(inner) }
216 });
217 } else {
218 return Ok(if is_double_colon {
219 Token::DoublePseudoClass { selector: s, value: None }
220 } else {
221 Token::PseudoClass { selector: s, value: None }
222 });
223 }
224 }
225 b'[' => {
226 self.after_selector = true;
227 self.has_at_rule = false;
228 self.stream.advance_raw(1);
229 let len = try!(self.stream.length_to(b']'));
230 let s = self.stream.read_raw_str(len);
231 self.stream.advance_raw(1); self.stream.skip_spaces();
233 return Ok(Token::AttributeSelector(s));
234 }
235 b',' => {
236 self.after_selector = false;
237 self.has_at_rule = false;
238 self.stream.advance_raw(1);
239 self.stream.skip_spaces();
240 return Ok(Token::Comma);
241 }
242 b'{' => {
243 self.after_selector = false;
244 self.has_at_rule = false;
245 self.state = State::Declaration;
246 self.stream.advance_raw(1);
247 return Ok(Token::BlockStart);
248 }
249 b'>' => {
250 if self.after_selector {
251 self.after_selector = false;
252 self.has_at_rule = false;
253 self.stream.advance_raw(1);
254 self.stream.skip_spaces();
255 return Ok(Token::Combinator(Combinator::GreaterThan));
256 } else {
257 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
258 }
259 }
260 b'+' => {
261 if self.after_selector {
262 self.after_selector = false;
263 self.has_at_rule = false;
264 self.stream.advance_raw(1);
265 self.stream.skip_spaces();
266 return Ok(Token::Combinator(Combinator::Plus));
267 } else {
268 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
269 }
270 }
271 b'/' => {
272 if try!(self.consume_comment()) {
273 return self.parse_next();
274 } else {
275 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
276 }
277 }
278 _ => {
279 if self.stream.is_space_raw() {
280 self.stream.skip_spaces();
281
282 if !self.after_selector {
283 return self.parse_next();
284 }
285
286 match self.stream.curr_char()? {
287 b'{' | b'/' | b'>' | b'+' | b'*' => { return self.parse_next(); },
288 _ => {
289 self.after_selector = false;
290 if !self.has_at_rule {
291 return Ok(Token::Combinator(Combinator::Space));
292 }
293 }
294 }
295 }
296
297 let s = try!(self.consume_ident());
298 let token_type = if self.has_at_rule {
299 self.has_at_rule = true;
300 Token::AtStr(s)
301 } else {
302 self.has_at_rule = false;
303 Token::TypeSelector(s)
304 };
305
306 self.after_selector = true;
307 return Ok(token_type);
308 }
309 }
310 }
311
312 fn consume_declaration(&mut self) -> Result<Token<'a>, Error> {
313 self.stream.skip_spaces();
314 self.has_at_rule = false;
315
316 match self.stream.curr_char_raw() {
317 b'}' => {
318 if self.state == State::DeclarationRule {
319 self.state = State::Declaration;
320 } else if self.state == State::Declaration {
321 self.state = State::Rule;
322 }
323 self.stream.advance_raw(1);
324 self.stream.skip_spaces();
325 return Ok(Token::BlockEnd);
326 },
327 b'{' => {
328 if self.state == State::Rule {
329 self.state = State::Declaration;
330 } else if self.state == State::Declaration {
331 self.state = State::DeclarationRule;
332 }
333 self.stream.advance_raw(1);
334 self.stream.skip_spaces();
335 return Ok(Token::BlockStart);
336 },
337 b'/' => {
338 if try!(self.consume_comment()) {
339 return self.parse_next();
340 } else {
341 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
342 }
343 }
344 _ => {
345 let name = self.consume_ident()?;
346
347 self.stream.skip_spaces();
348
349 if self.stream.is_char_eq(b'/')? {
350 if !try!(self.consume_comment()) {
351 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
352 }
353 }
354
355 if self.stream.is_char_eq(b'{')? {
356 if name.is_empty() {
357 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
358 } else {
359 return Ok(Token::DeclarationStr(name));
360 }
361 }
362
363 self.stream.advance_raw(1); self.stream.skip_spaces();
365
366 if self.stream.is_char_eq(b'/')? {
367 if !try!(self.consume_comment()) {
368 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
369 }
370 }
371
372 let len = self.stream.length_to_either(&[b';', b'}'])?;
373
374 if len == 0 {
375 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
376 }
377
378 let mut value = self.stream.read_raw_str(len);
379 if let Some(p) = value.as_bytes().iter().rposition(|c| !stream::is_space(*c)) {
381 value = &value[0..(p + 1)];
382 }
383
384 self.stream.skip_spaces();
385 while try!(self.stream.is_char_eq(b';')) {
386 self.stream.advance_raw(1);
387 self.stream.skip_spaces();
388 }
389
390 Ok(Token::Declaration(name, value))
391 }
392 }
393 }
394
395 fn consume_ident(&mut self) -> Result<&'a str, Error> {
396 let start = self.stream.pos();
397
398 while !self.stream.at_end() {
399 if self.stream.is_ident_raw() {
400 try!(self.stream.advance(1));
401 } else {
402 break;
403 }
404 }
405
406 if start == self.stream.pos() {
407 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
408 }
409
410 let s = self.stream.slice_region_raw_str(start, self.stream.pos());
411 Ok(s)
412 }
413
414 fn consume_comment(&mut self) -> Result<bool, Error> {
415 self.stream.advance_raw(1);
416
417 if try!(self.stream.is_char_eq(b'*')) {
418 self.stream.advance_raw(1); while !self.stream.at_end() {
421 let len = try!(self.stream.length_to(b'*'));
422 try!(self.stream.advance(len + 1));
423 if try!(self.stream.is_char_eq(b'/')) {
424 self.stream.advance_raw(1);
425 break;
426 }
427 }
428
429 return Ok(true);
430 } else {
431 return Ok(false);
432 }
433 }
434}