1use crate::stream;
6use crate::stream::Stream;
7use crate::error::Error;
8
9#[derive(PartialEq,Debug)]
11pub enum Combinator {
12 Space,
14 GreaterThan,
16 Plus,
18 Tilde,
20}
21
22#[derive(PartialEq,Debug)]
24pub enum Token<'a> {
25 UniversalSelector,
29 TypeSelector(&'a str),
33 IdSelector(&'a str),
39 ClassSelector(&'a str),
45 AttributeSelector(&'a str),
51 PseudoClass {
58 selector: &'a str,
60 value: Option<&'a str>,
62 },
63 Combinator(Combinator),
65 Comma,
69 BlockStart,
75 BlockEnd,
81 Declaration(&'a str, &'a str),
87 AtRule(&'a str),
90 DeclarationStr(&'a str),
92 AtStr(&'a str),
94 DoublePseudoClass {
96 selector: &'a str,
98 value: Option<&'a str>,
100 },
101 EndOfStream,
105}
106
107#[derive(PartialEq)]
108enum State {
109 Rule,
110 Declaration,
111 DeclarationRule,
112}
113
114pub struct Tokenizer<'a> {
116 stream: Stream<'a>,
117 state: State,
118 after_selector: bool,
119 has_at_rule: bool,
120 at_start: bool,
121 nesting_stack: Vec<bool>,
124}
125
126impl<'a> Tokenizer<'a> {
127 pub fn new(text: &str) -> Tokenizer<'_> {
129 Tokenizer {
130 stream: Stream::new(text.as_bytes()),
131 state: State::Rule,
132 after_selector: false,
133 has_at_rule: false,
134 at_start: true,
135 nesting_stack: Vec::new(),
136 }
137 }
138
139 pub fn new_bound(text: &str, start: usize, end: usize) -> Tokenizer<'_> {
147 Tokenizer {
148 stream: Stream::new_bound(text.as_bytes(), start, end),
149 state: State::Rule,
150 after_selector: false,
151 has_at_rule: false,
152 at_start: true,
153 nesting_stack: Vec::new(),
154 }
155 }
156
157 pub fn pos(&self) -> usize {
159 self.stream.pos()
160 }
161
162 pub fn parse_next(&mut self) -> Result<Token<'a>, Error> {
164 if self.at_start {
165 self.stream.skip_spaces();
166 self.at_start = false;
167 }
168
169 if self.stream.at_end() {
170 return Ok(Token::EndOfStream);
171 }
172
173 match self.state {
174 State::Rule => self.consume_rule(),
175 State::Declaration => self.consume_declaration(),
176 State::DeclarationRule => self.consume_declaration(),
177 }
178 }
179
180 fn consume_rule(&mut self) -> Result<Token<'a>, Error> {
181 match self.stream.curr_char_raw() {
182 b'@' => {
183 self.after_selector = true;
184 self.has_at_rule = true;
185 self.stream.advance_raw(1);
186 let s = self.consume_ident()?;
187
188 return Ok(Token::AtRule(s));
191 }
192 b'#' => {
193 self.after_selector = true;
194 self.has_at_rule = false;
195 self.stream.advance_raw(1);
196 let s = self.consume_ident()?;
197 return Ok(Token::IdSelector(s));
198 }
199 b'.' => {
200 self.after_selector = true;
201 self.has_at_rule = false;
202 self.stream.advance_raw(1);
203 let s = self.consume_ident()?;
204 return Ok(Token::ClassSelector(s));
205 }
206 b'*' => {
207 self.after_selector = true;
208 self.has_at_rule = false;
209 self.stream.advance_raw(1);
210 self.stream.skip_spaces();
211 return Ok(Token::UniversalSelector);
212 }
213 b':' => {
214 self.after_selector = true;
215 self.has_at_rule = false;
216 self.stream.advance_raw(1);
217
218 let is_double_colon = self.stream.is_char_eq(b':')?;
220 if is_double_colon {
221 self.stream.advance_raw(1); }
223
224 let s = self.consume_ident()?;
225
226 if self.stream.curr_char() == Ok(b'(') {
227 self.stream.advance_raw(1); let inner_len = self.stream.length_to(b')')?;
230 let inner = self.stream.read_raw_str(inner_len);
231 self.stream.advance_raw(1); return Ok(if is_double_colon {
233 Token::DoublePseudoClass { selector: s, value: Some(inner) }
234 } else {
235 Token::PseudoClass { selector: s, value: Some(inner) }
236 });
237 } else {
238 return Ok(if is_double_colon {
239 Token::DoublePseudoClass { selector: s, value: None }
240 } else {
241 Token::PseudoClass { selector: s, value: None }
242 });
243 }
244 }
245 b'[' => {
246 self.after_selector = true;
247 self.has_at_rule = false;
248 self.stream.advance_raw(1);
249 let len = self.stream.length_to(b']')?;
250 let s = self.stream.read_raw_str(len);
251 self.stream.advance_raw(1); self.stream.skip_spaces();
253 return Ok(Token::AttributeSelector(s));
254 }
255 b',' => {
256 self.after_selector = false;
257 self.has_at_rule = false;
258 self.stream.advance_raw(1);
259 self.stream.skip_spaces();
260 return Ok(Token::Comma);
261 }
262 b'{' => {
263 self.nesting_stack.push(self.has_at_rule);
265 self.after_selector = false;
266 self.has_at_rule = false;
267 self.state = State::Declaration;
268 self.stream.advance_raw(1);
269 return Ok(Token::BlockStart);
270 }
271 b'>' => {
272 if self.after_selector {
273 self.after_selector = false;
274 self.has_at_rule = false;
275 self.stream.advance_raw(1);
276 self.stream.skip_spaces();
277 return Ok(Token::Combinator(Combinator::GreaterThan));
278 } else {
279 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
280 }
281 }
282 b'+' => {
283 if self.after_selector {
284 self.after_selector = false;
285 self.has_at_rule = false;
286 self.stream.advance_raw(1);
287 self.stream.skip_spaces();
288 return Ok(Token::Combinator(Combinator::Plus));
289 } else {
290 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
291 }
292 }
293 b'~' => {
294 if self.after_selector {
295 self.after_selector = false;
296 self.has_at_rule = false;
297 self.stream.advance_raw(1);
298 self.stream.skip_spaces();
299 return Ok(Token::Combinator(Combinator::Tilde));
300 } else {
301 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
302 }
303 }
304 b'/' => {
305 if self.consume_comment()? {
306 return self.parse_next();
307 } else {
308 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
309 }
310 }
311 b'(' if self.has_at_rule => {
312 let s = self.consume_parenthesized_content()?;
314 self.after_selector = true;
315 return Ok(Token::AtStr(s));
316 }
317 _ => {
318 if self.stream.is_space_raw() {
319 self.stream.skip_spaces();
320
321 if !self.after_selector {
322 return self.parse_next();
323 }
324
325 match self.stream.curr_char()? {
326 b'{' | b'/' | b'>' | b'+' | b'~' | b'*' | b'(' => { return self.parse_next(); },
327 _ => {
328 self.after_selector = false;
329 if !self.has_at_rule {
330 return Ok(Token::Combinator(Combinator::Space));
331 }
332 }
333 }
334 }
335
336 let s = self.consume_ident()?;
337 let token_type = if self.has_at_rule {
338 self.has_at_rule = true;
339 Token::AtStr(s)
340 } else {
341 self.has_at_rule = false;
342 Token::TypeSelector(s)
343 };
344
345 self.after_selector = true;
346 return Ok(token_type);
347 }
348 }
349 }
350
351 fn consume_declaration(&mut self) -> Result<Token<'a>, Error> {
352 self.stream.skip_spaces();
353
354 match self.stream.curr_char_raw() {
355 b'}' => {
356 self.nesting_stack.pop();
358
359 if self.state == State::DeclarationRule {
360 self.state = State::Declaration;
361 } else if self.state == State::Declaration {
362 if self.nesting_stack.is_empty() {
364 self.state = State::Rule;
365 } else {
366 self.state = State::Declaration;
368 }
369 }
370 self.has_at_rule = false;
371 self.stream.advance_raw(1);
372 self.stream.skip_spaces();
373 return Ok(Token::BlockEnd);
374 },
375 b'{' => {
376 self.nesting_stack.push(self.has_at_rule);
378 self.has_at_rule = false;
379
380 if self.state == State::Rule {
381 self.state = State::Declaration;
382 } else if self.state == State::Declaration {
383 self.state = State::DeclarationRule;
384 }
385 self.stream.advance_raw(1);
386 self.stream.skip_spaces();
387 return Ok(Token::BlockStart);
388 },
389 b'@' => {
390 self.after_selector = true;
392 self.has_at_rule = true;
393 self.stream.advance_raw(1);
394 let s = self.consume_ident()?;
395 self.stream.skip_spaces();
396 return Ok(Token::AtRule(s));
397 },
398 b':' => {
399 self.after_selector = true;
401 self.has_at_rule = false;
402 self.stream.advance_raw(1);
403
404 let is_double_colon = self.stream.is_char_eq(b':')?;
406 if is_double_colon {
407 self.stream.advance_raw(1);
408 }
409
410 let s = self.consume_ident()?;
411
412 if self.stream.curr_char() == Ok(b'(') {
413 self.stream.advance_raw(1); let inner_len = self.stream.length_to(b')')?;
415 let inner = self.stream.read_raw_str(inner_len);
416 self.stream.advance_raw(1); return Ok(if is_double_colon {
418 Token::DoublePseudoClass { selector: s, value: Some(inner) }
419 } else {
420 Token::PseudoClass { selector: s, value: Some(inner) }
421 });
422 } else {
423 return Ok(if is_double_colon {
424 Token::DoublePseudoClass { selector: s, value: None }
425 } else {
426 Token::PseudoClass { selector: s, value: None }
427 });
428 }
429 },
430 b'.' => {
431 self.after_selector = true;
433 self.has_at_rule = false;
434 self.stream.advance_raw(1);
435 let s = self.consume_ident()?;
436 return Ok(Token::ClassSelector(s));
437 },
438 b'#' => {
439 self.after_selector = true;
441 self.has_at_rule = false;
442 self.stream.advance_raw(1);
443 let s = self.consume_ident()?;
444 return Ok(Token::IdSelector(s));
445 },
446 b'*' => {
447 self.after_selector = true;
449 self.has_at_rule = false;
450 self.stream.advance_raw(1);
451 self.stream.skip_spaces();
452 return Ok(Token::UniversalSelector);
453 },
454 b'[' => {
455 self.after_selector = true;
457 self.has_at_rule = false;
458 self.stream.advance_raw(1);
459 let len = self.stream.length_to(b']')?;
460 let s = self.stream.read_raw_str(len);
461 self.stream.advance_raw(1); self.stream.skip_spaces();
463 return Ok(Token::AttributeSelector(s));
464 },
465 b'>' => {
466 self.after_selector = false;
468 self.has_at_rule = false;
469 self.stream.advance_raw(1);
470 self.stream.skip_spaces();
471 return Ok(Token::Combinator(Combinator::GreaterThan));
472 },
473 b'+' => {
474 self.after_selector = false;
476 self.has_at_rule = false;
477 self.stream.advance_raw(1);
478 self.stream.skip_spaces();
479 return Ok(Token::Combinator(Combinator::Plus));
480 },
481 b'~' => {
482 self.after_selector = false;
484 self.has_at_rule = false;
485 self.stream.advance_raw(1);
486 self.stream.skip_spaces();
487 return Ok(Token::Combinator(Combinator::Tilde));
488 },
489 b',' => {
490 self.after_selector = false;
492 self.has_at_rule = false;
493 self.stream.advance_raw(1);
494 self.stream.skip_spaces();
495 return Ok(Token::Comma);
496 },
497 b'(' if self.has_at_rule => {
498 let s = self.consume_parenthesized_content()?;
500 self.after_selector = true;
501 return Ok(Token::AtStr(s));
502 },
503 b'/' => {
504 if self.consume_comment()? {
505 return self.parse_next();
506 } else {
507 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
508 }
509 }
510 _ => {
511 if self.has_at_rule {
513 let s = self.consume_ident()?;
514 self.stream.skip_spaces();
515 self.after_selector = true;
516 return Ok(Token::AtStr(s));
517 }
518
519 let name = self.consume_ident()?;
520
521 self.stream.skip_spaces();
522
523 if self.stream.is_char_eq(b'/')? {
524 if !self.consume_comment()? {
525 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
526 }
527 }
528
529 if self.stream.is_char_eq(b'{')? {
530 if name.is_empty() {
532 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
533 } else {
534 self.after_selector = true;
535 return Ok(Token::TypeSelector(name));
536 }
537 }
538
539 if !self.stream.is_char_eq(b':')? {
541 self.after_selector = true;
543 return Ok(Token::TypeSelector(name));
544 }
545
546 self.stream.advance_raw(1); self.stream.skip_spaces();
548
549 if self.stream.is_char_eq(b'/')? {
550 if !self.consume_comment()? {
551 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
552 }
553 }
554
555 let len = self.stream.length_to_either(&[b';', b'}'])?;
556
557 if len == 0 {
558 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
559 }
560
561 let mut value = self.stream.read_raw_str(len);
562 if let Some(p) = value.as_bytes().iter().rposition(|c| !stream::is_space(*c)) {
564 value = &value[0..(p + 1)];
565 }
566
567 self.stream.skip_spaces();
568 while self.stream.is_char_eq(b';')? {
569 self.stream.advance_raw(1);
570 self.stream.skip_spaces();
571 }
572
573 Ok(Token::Declaration(name, value))
574 }
575 }
576 }
577
578 fn consume_ident(&mut self) -> Result<&'a str, Error> {
579 let start = self.stream.pos();
580
581 while !self.stream.at_end() {
582 if self.stream.is_ident_raw() {
583 self.stream.advance(1)?;
584 } else {
585 break;
586 }
587 }
588
589 if start == self.stream.pos() {
590 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
591 }
592
593 let s = self.stream.slice_region_raw_str(start, self.stream.pos());
594 Ok(s)
595 }
596
597 fn consume_comment(&mut self) -> Result<bool, Error> {
598 self.stream.advance_raw(1);
599
600 if self.stream.is_char_eq(b'*')? {
601 self.stream.advance_raw(1); while !self.stream.at_end() {
604 let len = self.stream.length_to(b'*')?;
605 self.stream.advance(len + 1)?;
606 if self.stream.is_char_eq(b'/')? {
607 self.stream.advance_raw(1);
608 break;
609 }
610 }
611
612 return Ok(true);
613 } else {
614 return Ok(false);
615 }
616 }
617
618 fn consume_parenthesized_content(&mut self) -> Result<&'a str, Error> {
621 if !self.stream.is_char_eq(b'(')? {
622 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
623 }
624
625 let start = self.stream.pos();
626 self.stream.advance_raw(1); let mut depth = 1;
629
630 while !self.stream.at_end() && depth > 0 {
631 match self.stream.curr_char_raw() {
632 b'(' => {
633 depth += 1;
634 self.stream.advance_raw(1);
635 }
636 b')' => {
637 depth -= 1;
638 self.stream.advance_raw(1);
639 }
640 b'"' | b'\'' => {
641 let quote = self.stream.curr_char_raw();
643 self.stream.advance_raw(1);
644 while !self.stream.at_end() {
645 let c = self.stream.curr_char_raw();
646 self.stream.advance_raw(1);
647 if c == quote {
648 break;
649 }
650 if c == b'\\' && !self.stream.at_end() {
651 self.stream.advance_raw(1); }
653 }
654 }
655 _ => {
656 self.stream.advance_raw(1);
657 }
658 }
659 }
660
661 if depth != 0 {
662 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
663 }
664
665 let end = self.stream.pos();
667 let s = self.stream.slice_region_raw_str(start, end);
668 self.stream.skip_spaces();
669 Ok(s)
670 }
671}