1use ecow::{eco_format, EcoString};
2use unicode_ident::{is_xid_continue, is_xid_start};
3use unicode_script::{Script, UnicodeScript};
4use unicode_segmentation::UnicodeSegmentation;
5use unscanny::Scanner;
6
7use crate::{SyntaxError, SyntaxKind, SyntaxNode};
8
9#[derive(Clone)]
11pub(super) struct Lexer<'s> {
12 s: Scanner<'s>,
14 mode: LexMode,
17 newline: bool,
19 error: Option<SyntaxError>,
21}
22
23#[derive(Debug, Copy, Clone, Eq, PartialEq)]
25pub(super) enum LexMode {
26 Markup,
28 Math,
30 Code,
32}
33
34impl<'s> Lexer<'s> {
35 pub fn new(text: &'s str, mode: LexMode) -> Self {
38 Self {
39 s: Scanner::new(text),
40 mode,
41 newline: false,
42 error: None,
43 }
44 }
45
46 pub fn mode(&self) -> LexMode {
48 self.mode
49 }
50
51 pub fn set_mode(&mut self, mode: LexMode) {
53 self.mode = mode;
54 }
55
56 pub fn cursor(&self) -> usize {
59 self.s.cursor()
60 }
61
62 pub fn jump(&mut self, index: usize) {
64 self.s.jump(index);
65 }
66
67 pub fn newline(&self) -> bool {
69 self.newline
70 }
71
72 pub fn column(&self, index: usize) -> usize {
74 let mut s = self.s; s.jump(index);
76 s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
77 }
78}
79
80impl Lexer<'_> {
81 fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
83 self.error = Some(SyntaxError::new(message));
84 SyntaxKind::Error
85 }
86
87 fn hint(&mut self, message: impl Into<EcoString>) {
89 if let Some(error) = &mut self.error {
90 error.hints.push(message.into());
91 }
92 }
93}
94
95impl Lexer<'_> {
97 pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
100 debug_assert!(self.error.is_none());
101 let start = self.s.cursor();
102
103 self.newline = false;
104 let kind = match self.s.eat() {
105 Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
106 Some('#') if start == 0 && self.s.eat_if('!') => self.shebang(),
107 Some('/') if self.s.eat_if('/') => self.line_comment(),
108 Some('/') if self.s.eat_if('*') => self.block_comment(),
109 Some('*') if self.s.eat_if('/') => {
110 let kind = self.error("unexpected end of block comment");
111 self.hint(
112 "consider escaping the `*` with a backslash or \
113 opening the block comment with `/*`",
114 );
115 kind
116 }
117 Some('`') if self.mode != LexMode::Math => return self.raw(),
118 Some(c) => match self.mode {
119 LexMode::Markup => self.markup(start, c),
120 LexMode::Math => match self.math(start, c) {
121 (kind, None) => kind,
122 (kind, Some(node)) => return (kind, node),
123 },
124 LexMode::Code => self.code(start, c),
125 },
126
127 None => SyntaxKind::End,
128 };
129
130 let text = self.s.from(start);
131 let node = match self.error.take() {
132 Some(error) => SyntaxNode::error(error, text),
133 None => SyntaxNode::leaf(kind, text),
134 };
135 (kind, node)
136 }
137
138 fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
140 let more = self.s.eat_while(|c| is_space(c, self.mode));
141 let newlines = match c {
142 ' ' if more.is_empty() => 0,
144 _ => count_newlines(self.s.from(start)),
145 };
146
147 self.newline = newlines > 0;
148 if self.mode == LexMode::Markup && newlines >= 2 {
149 SyntaxKind::Parbreak
150 } else {
151 SyntaxKind::Space
152 }
153 }
154
155 fn shebang(&mut self) -> SyntaxKind {
156 self.s.eat_until(is_newline);
157 SyntaxKind::Shebang
158 }
159
160 fn line_comment(&mut self) -> SyntaxKind {
161 self.s.eat_until(is_newline);
162 SyntaxKind::LineComment
163 }
164
165 fn block_comment(&mut self) -> SyntaxKind {
166 let mut state = '_';
167 let mut depth = 1;
168
169 while let Some(c) = self.s.eat() {
171 state = match (state, c) {
172 ('*', '/') => {
173 depth -= 1;
174 if depth == 0 {
175 break;
176 }
177 '_'
178 }
179 ('/', '*') => {
180 depth += 1;
181 '_'
182 }
183 _ => c,
184 }
185 }
186
187 SyntaxKind::BlockComment
188 }
189}
190
191impl Lexer<'_> {
193 fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
194 match c {
195 '\\' => self.backslash(),
196 'h' if self.s.eat_if("ttp://") => self.link(),
197 'h' if self.s.eat_if("ttps://") => self.link(),
198 '<' if self.s.at(is_id_continue) => self.label(),
199 '@' => self.ref_marker(),
200
201 '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
202 '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
203 '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
204 '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
205 '-' if self.s.at(char::is_numeric) => SyntaxKind::Shorthand,
206 '*' if !self.in_word() => SyntaxKind::Star,
207 '_' if !self.in_word() => SyntaxKind::Underscore,
208
209 '#' => SyntaxKind::Hash,
210 '[' => SyntaxKind::LeftBracket,
211 ']' => SyntaxKind::RightBracket,
212 '\'' => SyntaxKind::SmartQuote,
213 '"' => SyntaxKind::SmartQuote,
214 '$' => SyntaxKind::Dollar,
215 '~' => SyntaxKind::Shorthand,
216 ':' => SyntaxKind::Colon,
217 '=' => {
218 self.s.eat_while('=');
219 if self.space_or_end() {
220 SyntaxKind::HeadingMarker
221 } else {
222 self.text()
223 }
224 }
225 '-' if self.space_or_end() => SyntaxKind::ListMarker,
226 '+' if self.space_or_end() => SyntaxKind::EnumMarker,
227 '/' if self.space_or_end() => SyntaxKind::TermMarker,
228 '0'..='9' => self.numbering(start),
229
230 _ => self.text(),
231 }
232 }
233
234 fn backslash(&mut self) -> SyntaxKind {
235 if self.s.eat_if("u{") {
236 let hex = self.s.eat_while(char::is_ascii_alphanumeric);
237 if !self.s.eat_if('}') {
238 return self.error("unclosed Unicode escape sequence");
239 }
240
241 if u32::from_str_radix(hex, 16)
242 .ok()
243 .and_then(std::char::from_u32)
244 .is_none()
245 {
246 return self.error(eco_format!("invalid Unicode codepoint: {}", hex));
247 }
248
249 return SyntaxKind::Escape;
250 }
251
252 if self.s.done() || self.s.at(char::is_whitespace) {
253 SyntaxKind::Linebreak
254 } else {
255 self.s.eat();
256 SyntaxKind::Escape
257 }
258 }
259
260 fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
264 let start = self.s.cursor() - 1;
265
266 let mut backticks = 1;
268 while self.s.eat_if('`') {
269 backticks += 1;
270 }
271
272 if backticks == 2 {
274 let nodes = vec![
275 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
276 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
277 ];
278 return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
279 }
280
281 let mut found = 0;
283 while found < backticks {
284 match self.s.eat() {
285 Some('`') => found += 1,
286 Some(_) => found = 0,
287 None => {
288 let msg = SyntaxError::new("unclosed raw text");
289 let error = SyntaxNode::error(msg, self.s.from(start));
290 return (SyntaxKind::Error, error);
291 }
292 }
293 }
294 let end = self.s.cursor();
295
296 let mut nodes = Vec::with_capacity(3); let mut prev_start = start;
301 let mut push_raw = |kind, s: &Scanner| {
302 nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
303 prev_start = s.cursor();
304 };
305
306 self.s.jump(start + backticks);
308 push_raw(SyntaxKind::RawDelim, &self.s);
309
310 if backticks >= 3 {
311 self.blocky_raw(end - backticks, &mut push_raw);
312 } else {
313 self.inline_raw(end - backticks, &mut push_raw);
314 }
315
316 self.s.jump(end);
318 push_raw(SyntaxKind::RawDelim, &self.s);
319
320 (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
321 }
322
323 fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
353 where
354 F: FnMut(SyntaxKind, &Scanner),
355 {
356 if self.s.eat_if(is_id_start) {
358 self.s.eat_while(is_id_continue);
359 push_raw(SyntaxKind::RawLang, &self.s);
360 }
361
362 let mut lines = split_newlines(self.s.to(inner_end));
364
365 let dedent = lines
367 .iter()
368 .skip(1)
369 .filter(|line| !line.chars().all(char::is_whitespace))
370 .chain(lines.last())
372 .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
373 .min()
374 .unwrap_or(0);
375
376 if lines.last().is_some_and(|last| last.chars().all(char::is_whitespace)) {
379 lines.pop();
380 } else if let Some(last) = lines.last_mut() {
381 if last.trim_end().ends_with('`') {
385 *last = last.strip_suffix(' ').unwrap_or(last);
386 }
387 }
388
389 let mut lines = lines.into_iter();
390
391 if let Some(first_line) = lines.next() {
395 if first_line.chars().all(char::is_whitespace) {
396 self.s.advance(first_line.len());
397 debug_assert!(self.s.cursor() != inner_end);
401 } else {
418 let line_end = self.s.cursor() + first_line.len();
419 if self.s.eat_if(' ') {
420 push_raw(SyntaxKind::RawTrimmed, &self.s);
422 }
423 self.s.jump(line_end);
425 push_raw(SyntaxKind::Text, &self.s);
426 }
427 }
428
429 for line in lines {
431 let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
432 self.s.eat_newline();
433 self.s.advance(offset);
434 push_raw(SyntaxKind::RawTrimmed, &self.s);
435 self.s.advance(line.len() - offset);
436 push_raw(SyntaxKind::Text, &self.s);
437 }
438
439 if self.s.cursor() < inner_end {
441 self.s.jump(inner_end);
442 push_raw(SyntaxKind::RawTrimmed, &self.s);
443 }
444 }
445
446 fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
450 where
451 F: FnMut(SyntaxKind, &Scanner),
452 {
453 while self.s.cursor() < inner_end {
454 if self.s.at(is_newline) {
455 push_raw(SyntaxKind::Text, &self.s);
456 self.s.eat_newline();
457 push_raw(SyntaxKind::RawTrimmed, &self.s);
458 continue;
459 }
460 self.s.eat();
461 }
462 push_raw(SyntaxKind::Text, &self.s);
463 }
464
465 fn link(&mut self) -> SyntaxKind {
466 let (link, balanced) = link_prefix(self.s.after());
467 self.s.advance(link.len());
468
469 if !balanced {
470 return self.error(
471 "automatic links cannot contain unbalanced brackets, \
472 use the `link` function instead",
473 );
474 }
475
476 SyntaxKind::Link
477 }
478
479 fn numbering(&mut self, start: usize) -> SyntaxKind {
480 self.s.eat_while(char::is_ascii_digit);
481
482 let read = self.s.from(start);
483 if self.s.eat_if('.') && self.space_or_end() && read.parse::<usize>().is_ok() {
484 return SyntaxKind::EnumMarker;
485 }
486
487 self.text()
488 }
489
490 fn ref_marker(&mut self) -> SyntaxKind {
491 self.s.eat_while(is_valid_in_label_literal);
492
493 while matches!(self.s.scout(-1), Some('.' | ':')) {
495 self.s.uneat();
496 }
497
498 SyntaxKind::RefMarker
499 }
500
501 fn label(&mut self) -> SyntaxKind {
502 let label = self.s.eat_while(is_valid_in_label_literal);
503 if label.is_empty() {
504 return self.error("label cannot be empty");
505 }
506
507 if !self.s.eat_if('>') {
508 return self.error("unclosed label");
509 }
510
511 SyntaxKind::Label
512 }
513
514 fn text(&mut self) -> SyntaxKind {
515 macro_rules! table {
516 ($(|$c:literal)*) => {
517 static TABLE: [bool; 128] = {
518 let mut t = [false; 128];
519 $(t[$c as usize] = true;)*
520 t
521 };
522 };
523 }
524
525 table! {
526 | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
527 | '[' | ']' | '~' | '-' | '.' | '\'' | '"' | '*' | '_'
528 | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
529 };
530
531 loop {
532 self.s.eat_until(|c: char| {
533 TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
534 });
535
536 let mut s = self.s;
539 match s.eat() {
540 Some(' ') if s.at(char::is_alphanumeric) => {}
541 Some('/') if !s.at(['/', '*']) => {}
542 Some('-') if !s.at(['-', '?']) => {}
543 Some('.') if !s.at("..") => {}
544 Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
545 Some('@') if !s.at(is_valid_in_label_literal) => {}
546 _ => break,
547 }
548
549 self.s = s;
550 }
551
552 SyntaxKind::Text
553 }
554
555 fn in_word(&self) -> bool {
556 let wordy = |c: Option<char>| {
557 c.is_some_and(|c| {
558 c.is_alphanumeric()
559 && !matches!(
560 c.script(),
561 Script::Han
562 | Script::Hiragana
563 | Script::Katakana
564 | Script::Hangul
565 )
566 })
567 };
568 let prev = self.s.scout(-2);
569 let next = self.s.peek();
570 wordy(prev) && wordy(next)
571 }
572
573 fn space_or_end(&self) -> bool {
574 self.s.done()
575 || self.s.at(char::is_whitespace)
576 || self.s.at("//")
577 || self.s.at("/*")
578 }
579}
580
581impl Lexer<'_> {
583 fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
584 let kind = match c {
585 '\\' => self.backslash(),
586 '"' => self.string(),
587
588 '-' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
589 '-' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
590 '-' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
591 ':' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
592 ':' if self.s.eat_if(":=") => SyntaxKind::MathShorthand,
593 '!' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
594 '.' if self.s.eat_if("..") => SyntaxKind::MathShorthand,
595 '[' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
596 '<' if self.s.eat_if("==>") => SyntaxKind::MathShorthand,
597 '<' if self.s.eat_if("-->") => SyntaxKind::MathShorthand,
598 '<' if self.s.eat_if("--") => SyntaxKind::MathShorthand,
599 '<' if self.s.eat_if("-<") => SyntaxKind::MathShorthand,
600 '<' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
601 '<' if self.s.eat_if("<-") => SyntaxKind::MathShorthand,
602 '<' if self.s.eat_if("<<") => SyntaxKind::MathShorthand,
603 '<' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
604 '<' if self.s.eat_if("==") => SyntaxKind::MathShorthand,
605 '<' if self.s.eat_if("~~") => SyntaxKind::MathShorthand,
606 '<' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
607 '<' if self.s.eat_if('<') => SyntaxKind::MathShorthand,
608 '<' if self.s.eat_if('-') => SyntaxKind::MathShorthand,
609 '<' if self.s.eat_if('~') => SyntaxKind::MathShorthand,
610 '>' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
611 '>' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
612 '=' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
613 '=' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
614 '=' if self.s.eat_if(':') => SyntaxKind::MathShorthand,
615 '>' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
616 '>' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
617 '|' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
618 '|' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
619 '|' if self.s.eat_if(']') => SyntaxKind::MathShorthand,
620 '|' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
621 '~' if self.s.eat_if("~>") => SyntaxKind::MathShorthand,
622 '~' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
623 '*' | '-' | '~' => SyntaxKind::MathShorthand,
624
625 '.' => SyntaxKind::Dot,
626 ',' => SyntaxKind::Comma,
627 ';' => SyntaxKind::Semicolon,
628 ')' => SyntaxKind::RightParen,
629
630 '#' => SyntaxKind::Hash,
631 '_' => SyntaxKind::Underscore,
632 '$' => SyntaxKind::Dollar,
633 '/' => SyntaxKind::Slash,
634 '^' => SyntaxKind::Hat,
635 '\'' => SyntaxKind::Prime,
636 '&' => SyntaxKind::MathAlignPoint,
637 '√' | '∛' | '∜' => SyntaxKind::Root,
638
639 c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
641 self.s.eat_while(is_math_id_continue);
642 let (kind, node) = self.math_ident_or_field(start);
643 return (kind, Some(node));
644 }
645
646 _ => self.math_text(start, c),
648 };
649 (kind, None)
650 }
651
652 fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
654 let mut kind = SyntaxKind::MathIdent;
655 let mut node = SyntaxNode::leaf(kind, self.s.from(start));
656 while let Some(ident) = self.maybe_dot_ident() {
657 kind = SyntaxKind::FieldAccess;
658 let field_children = vec![
659 node,
660 SyntaxNode::leaf(SyntaxKind::Dot, '.'),
661 SyntaxNode::leaf(SyntaxKind::Ident, ident),
662 ];
663 node = SyntaxNode::inner(kind, field_children);
664 }
665 (kind, node)
666 }
667
668 fn maybe_dot_ident(&mut self) -> Option<&str> {
670 if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
671 let ident_start = self.s.cursor();
672 self.s.eat();
673 self.s.eat_while(is_math_id_continue);
674 Some(self.s.from(ident_start))
675 } else {
676 None
677 }
678 }
679
680 fn math_text(&mut self, start: usize, c: char) -> SyntaxKind {
681 if c.is_numeric() {
683 self.s.eat_while(char::is_numeric);
684 let mut s = self.s;
685 if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() {
686 self.s = s;
687 }
688 SyntaxKind::MathText
689 } else {
690 let len = self
691 .s
692 .get(start..self.s.string().len())
693 .graphemes(true)
694 .next()
695 .map_or(0, str::len);
696 self.s.jump(start + len);
697 if len > c.len_utf8() {
698 SyntaxKind::Text
701 } else {
702 SyntaxKind::MathText
703 }
704 }
705 }
706
707 pub fn maybe_math_named_arg(&mut self, start: usize) -> Option<SyntaxNode> {
709 let cursor = self.s.cursor();
710 self.s.jump(start);
711 if self.s.eat_if(is_id_start) {
712 self.s.eat_while(is_id_continue);
713 if self.s.at(':') && !self.s.at(":=") && !self.s.at("::=") {
716 let node = if self.s.from(start) != "_" {
718 SyntaxNode::leaf(SyntaxKind::Ident, self.s.from(start))
719 } else {
720 let msg = SyntaxError::new("expected identifier, found underscore");
721 SyntaxNode::error(msg, self.s.from(start))
722 };
723 return Some(node);
724 }
725 }
726 self.s.jump(cursor);
727 None
728 }
729
730 pub fn maybe_math_spread_arg(&mut self, start: usize) -> Option<SyntaxNode> {
732 let cursor = self.s.cursor();
733 self.s.jump(start);
734 if self.s.eat_if("..") {
735 if !self.space_or_end() && !self.s.at('.') {
738 let node = SyntaxNode::leaf(SyntaxKind::Dots, self.s.from(start));
739 return Some(node);
740 }
741 }
742 self.s.jump(cursor);
743 None
744 }
745}
746
747impl Lexer<'_> {
749 fn code(&mut self, start: usize, c: char) -> SyntaxKind {
750 match c {
751 '<' if self.s.at(is_id_continue) => self.label(),
752 '0'..='9' => self.number(start, c),
753 '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
754 '"' => self.string(),
755
756 '=' if self.s.eat_if('=') => SyntaxKind::EqEq,
757 '!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
758 '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
759 '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
760 '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
761 '-' | '\u{2212}' if self.s.eat_if('=') => SyntaxKind::HyphEq,
762 '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
763 '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
764 '.' if self.s.eat_if('.') => SyntaxKind::Dots,
765 '=' if self.s.eat_if('>') => SyntaxKind::Arrow,
766
767 '{' => SyntaxKind::LeftBrace,
768 '}' => SyntaxKind::RightBrace,
769 '[' => SyntaxKind::LeftBracket,
770 ']' => SyntaxKind::RightBracket,
771 '(' => SyntaxKind::LeftParen,
772 ')' => SyntaxKind::RightParen,
773 '$' => SyntaxKind::Dollar,
774 ',' => SyntaxKind::Comma,
775 ';' => SyntaxKind::Semicolon,
776 ':' => SyntaxKind::Colon,
777 '.' => SyntaxKind::Dot,
778 '+' => SyntaxKind::Plus,
779 '-' | '\u{2212}' => SyntaxKind::Minus,
780 '*' => SyntaxKind::Star,
781 '/' => SyntaxKind::Slash,
782 '=' => SyntaxKind::Eq,
783 '<' => SyntaxKind::Lt,
784 '>' => SyntaxKind::Gt,
785
786 c if is_id_start(c) => self.ident(start),
787
788 c => self.error(eco_format!("the character `{c}` is not valid in code")),
789 }
790 }
791
792 fn ident(&mut self, start: usize) -> SyntaxKind {
793 self.s.eat_while(is_id_continue);
794 let ident = self.s.from(start);
795
796 let prev = self.s.get(0..start);
797 if !prev.ends_with(['.', '@']) || prev.ends_with("..") {
798 if let Some(keyword) = keyword(ident) {
799 return keyword;
800 }
801 }
802
803 if ident == "_" {
804 SyntaxKind::Underscore
805 } else {
806 SyntaxKind::Ident
807 }
808 }
809
810 fn number(&mut self, mut start: usize, c: char) -> SyntaxKind {
811 let mut base = 10;
813 if c == '0' {
814 if self.s.eat_if('b') {
815 base = 2;
816 } else if self.s.eat_if('o') {
817 base = 8;
818 } else if self.s.eat_if('x') {
819 base = 16;
820 }
821 if base != 10 {
822 start = self.s.cursor();
823 }
824 }
825
826 self.s.eat_while(if base == 16 {
828 char::is_ascii_alphanumeric
829 } else {
830 char::is_ascii_digit
831 });
832
833 if c != '.'
836 && !self.s.at("..")
837 && !self.s.scout(1).is_some_and(is_id_start)
838 && self.s.eat_if('.')
839 && base == 10
840 {
841 self.s.eat_while(char::is_ascii_digit);
842 }
843
844 if !self.s.at("em") && self.s.eat_if(['e', 'E']) && base == 10 {
846 self.s.eat_if(['+', '-']);
847 self.s.eat_while(char::is_ascii_digit);
848 }
849
850 let suffix_start = self.s.cursor();
852 if !self.s.eat_if('%') {
853 self.s.eat_while(char::is_ascii_alphanumeric);
854 }
855
856 let number = self.s.get(start..suffix_start);
857 let suffix = self.s.from(suffix_start);
858
859 let kind = if i64::from_str_radix(number, base).is_ok() {
860 SyntaxKind::Int
861 } else if base == 10 && number.parse::<f64>().is_ok() {
862 SyntaxKind::Float
863 } else {
864 return self.error(match base {
865 2 => eco_format!("invalid binary number: 0b{}", number),
866 8 => eco_format!("invalid octal number: 0o{}", number),
867 16 => eco_format!("invalid hexadecimal number: 0x{}", number),
868 _ => eco_format!("invalid number: {}", number),
869 });
870 };
871
872 if suffix.is_empty() {
873 return kind;
874 }
875
876 if !matches!(
877 suffix,
878 "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%"
879 ) {
880 return self.error(eco_format!("invalid number suffix: {}", suffix));
881 }
882
883 if base != 10 {
884 let kind = self.error(eco_format!("invalid base-{base} prefix"));
885 self.hint("numbers with a unit cannot have a base prefix");
886 return kind;
887 }
888
889 SyntaxKind::Numeric
890 }
891
892 fn string(&mut self) -> SyntaxKind {
893 let mut escaped = false;
894 self.s.eat_until(|c| {
895 let stop = c == '"' && !escaped;
896 escaped = c == '\\' && !escaped;
897 stop
898 });
899
900 if !self.s.eat_if('"') {
901 return self.error("unclosed string");
902 }
903
904 SyntaxKind::Str
905 }
906}
907
908fn keyword(ident: &str) -> Option<SyntaxKind> {
910 Some(match ident {
911 "none" => SyntaxKind::None,
912 "auto" => SyntaxKind::Auto,
913 "true" => SyntaxKind::Bool,
914 "false" => SyntaxKind::Bool,
915 "not" => SyntaxKind::Not,
916 "and" => SyntaxKind::And,
917 "or" => SyntaxKind::Or,
918 "let" => SyntaxKind::Let,
919 "set" => SyntaxKind::Set,
920 "show" => SyntaxKind::Show,
921 "context" => SyntaxKind::Context,
922 "if" => SyntaxKind::If,
923 "else" => SyntaxKind::Else,
924 "for" => SyntaxKind::For,
925 "in" => SyntaxKind::In,
926 "while" => SyntaxKind::While,
927 "break" => SyntaxKind::Break,
928 "continue" => SyntaxKind::Continue,
929 "return" => SyntaxKind::Return,
930 "import" => SyntaxKind::Import,
931 "include" => SyntaxKind::Include,
932 "as" => SyntaxKind::As,
933 _ => return None,
934 })
935}
936
937trait ScannerExt {
938 fn advance(&mut self, by: usize);
939 fn eat_newline(&mut self) -> bool;
940}
941
942impl ScannerExt for Scanner<'_> {
943 fn advance(&mut self, by: usize) {
944 self.jump(self.cursor() + by);
945 }
946
947 fn eat_newline(&mut self) -> bool {
948 let ate = self.eat_if(is_newline);
949 if ate && self.before().ends_with('\r') {
950 self.eat_if('\n');
951 }
952 ate
953 }
954}
955
956#[inline]
958fn is_space(character: char, mode: LexMode) -> bool {
959 match mode {
960 LexMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
961 _ => character.is_whitespace(),
962 }
963}
964
965#[inline]
967pub fn is_newline(character: char) -> bool {
968 matches!(
969 character,
970 '\n' | '\x0B' | '\x0C' | '\r' |
972 '\u{0085}' | '\u{2028}' | '\u{2029}'
974 )
975}
976
977pub fn link_prefix(text: &str) -> (&str, bool) {
980 let mut s = unscanny::Scanner::new(text);
981 let mut brackets = Vec::new();
982
983 #[rustfmt::skip]
984 s.eat_while(|c: char| {
985 match c {
986 | '0' ..= '9'
987 | 'a' ..= 'z'
988 | 'A' ..= 'Z'
989 | '!' | '#' | '$' | '%' | '&' | '*' | '+'
990 | ',' | '-' | '.' | '/' | ':' | ';' | '='
991 | '?' | '@' | '_' | '~' | '\'' => true,
992 '[' => {
993 brackets.push(b'[');
994 true
995 }
996 '(' => {
997 brackets.push(b'(');
998 true
999 }
1000 ']' => brackets.pop() == Some(b'['),
1001 ')' => brackets.pop() == Some(b'('),
1002 _ => false,
1003 }
1004 });
1005
1006 while matches!(s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
1008 s.uneat();
1009 }
1010
1011 (s.before(), brackets.is_empty())
1012}
1013
1014pub fn split_newlines(text: &str) -> Vec<&str> {
1016 let mut s = Scanner::new(text);
1017 let mut lines = Vec::new();
1018 let mut start = 0;
1019 let mut end = 0;
1020
1021 while let Some(c) = s.eat() {
1022 if is_newline(c) {
1023 if c == '\r' {
1024 s.eat_if('\n');
1025 }
1026
1027 lines.push(&text[start..end]);
1028 start = s.cursor();
1029 }
1030 end = s.cursor();
1031 }
1032
1033 lines.push(&text[start..]);
1034 lines
1035}
1036
1037fn count_newlines(text: &str) -> usize {
1039 let mut newlines = 0;
1040 let mut s = Scanner::new(text);
1041 while let Some(c) = s.eat() {
1042 if is_newline(c) {
1043 if c == '\r' {
1044 s.eat_if('\n');
1045 }
1046 newlines += 1;
1047 }
1048 }
1049 newlines
1050}
1051
1052#[inline]
1060pub fn is_ident(string: &str) -> bool {
1061 let mut chars = string.chars();
1062 chars
1063 .next()
1064 .is_some_and(|c| is_id_start(c) && chars.all(is_id_continue))
1065}
1066
1067#[inline]
1069pub fn is_id_start(c: char) -> bool {
1070 is_xid_start(c) || c == '_'
1071}
1072
1073#[inline]
1075pub fn is_id_continue(c: char) -> bool {
1076 is_xid_continue(c) || c == '_' || c == '-'
1077}
1078
1079#[inline]
1081fn is_math_id_start(c: char) -> bool {
1082 is_xid_start(c)
1083}
1084
1085#[inline]
1087fn is_math_id_continue(c: char) -> bool {
1088 is_xid_continue(c) && c != '_'
1089}
1090
1091#[inline]
1093fn is_valid_in_label_literal(c: char) -> bool {
1094 is_id_continue(c) || matches!(c, ':' | '.')
1095}
1096
1097pub fn is_valid_label_literal_id(id: &str) -> bool {
1099 !id.is_empty() && id.chars().all(is_valid_in_label_literal)
1100}