1use ecow::{EcoString, eco_format};
2use unicode_ident::{is_xid_continue, is_xid_start};
3use unicode_script::{Script, UnicodeScript};
4use unicode_segmentation::UnicodeSegmentation;
5use unscanny::Scanner;
6
7use crate::{SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode};
8
9#[derive(Clone)]
11pub(super) struct Lexer<'s> {
12 s: Scanner<'s>,
14 mode: SyntaxMode,
17 newline: bool,
19 error: Option<SyntaxError>,
21}
22
23impl<'s> Lexer<'s> {
24 pub fn new(text: &'s str, mode: SyntaxMode) -> Self {
27 Self {
28 s: Scanner::new(text),
29 mode,
30 newline: false,
31 error: None,
32 }
33 }
34
35 pub fn mode(&self) -> SyntaxMode {
37 self.mode
38 }
39
40 pub fn set_mode(&mut self, mode: SyntaxMode) {
42 self.mode = mode;
43 }
44
45 pub fn cursor(&self) -> usize {
48 self.s.cursor()
49 }
50
51 pub fn jump(&mut self, index: usize) {
53 self.s.jump(index);
54 }
55
56 pub fn newline(&self) -> bool {
58 self.newline
59 }
60
61 pub fn column(&self, index: usize) -> usize {
63 let mut s = self.s; s.jump(index);
65 s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
66 }
67}
68
69impl Lexer<'_> {
70 fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
72 self.error = Some(SyntaxError::new(message));
73 SyntaxKind::Error
74 }
75
76 fn hint(&mut self, message: impl Into<EcoString>) {
78 if let Some(error) = &mut self.error {
79 error.hints.push(message.into());
80 }
81 }
82}
83
84impl Lexer<'_> {
86 pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
89 debug_assert!(self.error.is_none());
90 let start = self.s.cursor();
91
92 self.newline = false;
93 let kind = match self.s.eat() {
94 Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
95 Some('#') if start == 0 && self.s.eat_if('!') => self.shebang(),
96 Some('/') if self.s.eat_if('/') => self.line_comment(),
97 Some('/') if self.s.eat_if('*') => self.block_comment(),
98 Some('*') if self.s.eat_if('/') => {
99 let kind = self.error("unexpected end of block comment");
100 self.hint(
101 "consider escaping the `*` with a backslash or \
102 opening the block comment with `/*`",
103 );
104 kind
105 }
106 Some('`') if self.mode != SyntaxMode::Math => return self.raw(),
107 Some(c) => match self.mode {
108 SyntaxMode::Markup => self.markup(start, c),
109 SyntaxMode::Math => match self.math(start, c) {
110 (kind, None) => kind,
111 (kind, Some(node)) => return (kind, node),
112 },
113 SyntaxMode::Code => self.code(start, c),
114 },
115
116 None => SyntaxKind::End,
117 };
118
119 let text = self.s.from(start);
120 let node = match self.error.take() {
121 Some(error) => SyntaxNode::error(error, text),
122 None => SyntaxNode::leaf(kind, text),
123 };
124 (kind, node)
125 }
126
127 fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
129 let more = self.s.eat_while(|c| is_space(c, self.mode));
130 let newlines = match c {
131 ' ' if more.is_empty() => 0,
133 _ => count_newlines(self.s.from(start)),
134 };
135
136 self.newline = newlines > 0;
137 if self.mode == SyntaxMode::Markup && newlines >= 2 {
138 SyntaxKind::Parbreak
139 } else {
140 SyntaxKind::Space
141 }
142 }
143
144 fn shebang(&mut self) -> SyntaxKind {
145 self.s.eat_until(is_newline);
146 SyntaxKind::Shebang
147 }
148
149 fn line_comment(&mut self) -> SyntaxKind {
150 self.s.eat_until(is_newline);
151 SyntaxKind::LineComment
152 }
153
154 fn block_comment(&mut self) -> SyntaxKind {
155 let mut state = '_';
156 let mut depth = 1;
157
158 while let Some(c) = self.s.eat() {
160 state = match (state, c) {
161 ('*', '/') => {
162 depth -= 1;
163 if depth == 0 {
164 break;
165 }
166 '_'
167 }
168 ('/', '*') => {
169 depth += 1;
170 '_'
171 }
172 _ => c,
173 }
174 }
175
176 SyntaxKind::BlockComment
177 }
178}
179
180impl Lexer<'_> {
182 fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
183 match c {
184 '\\' => self.backslash(),
185 'h' if self.s.eat_if("ttp://") => self.link(),
186 'h' if self.s.eat_if("ttps://") => self.link(),
187 '<' if self.s.at(is_id_continue) => self.label(),
188 '@' if self.s.at(is_id_continue) => self.ref_marker(),
189
190 '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
191 '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
192 '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
193 '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
194 '-' if self.s.at(char::is_numeric) => SyntaxKind::Shorthand,
195 '*' if !self.in_word() => SyntaxKind::Star,
196 '_' if !self.in_word() => SyntaxKind::Underscore,
197
198 '#' => SyntaxKind::Hash,
199 '[' => SyntaxKind::LeftBracket,
200 ']' => SyntaxKind::RightBracket,
201 '\'' => SyntaxKind::SmartQuote,
202 '"' => SyntaxKind::SmartQuote,
203 '$' => SyntaxKind::Dollar,
204 '~' => SyntaxKind::Shorthand,
205 ':' => SyntaxKind::Colon,
206 '=' => {
207 self.s.eat_while('=');
208 if self.space_or_end() { SyntaxKind::HeadingMarker } else { self.text() }
209 }
210 '-' if self.space_or_end() => SyntaxKind::ListMarker,
211 '+' if self.space_or_end() => SyntaxKind::EnumMarker,
212 '/' if self.space_or_end() => SyntaxKind::TermMarker,
213 '0'..='9' => self.numbering(start),
214
215 _ => self.text(),
216 }
217 }
218
219 fn backslash(&mut self) -> SyntaxKind {
220 if self.s.eat_if("u{") {
221 let hex = self.s.eat_while(char::is_ascii_alphanumeric);
222 if !self.s.eat_if('}') {
223 return self.error("unclosed Unicode escape sequence");
224 }
225
226 if u32::from_str_radix(hex, 16)
227 .ok()
228 .and_then(std::char::from_u32)
229 .is_none()
230 {
231 return self.error(eco_format!("invalid Unicode codepoint: {}", hex));
232 }
233
234 return SyntaxKind::Escape;
235 }
236
237 if self.s.done() || self.s.at(char::is_whitespace) {
238 SyntaxKind::Linebreak
239 } else {
240 self.s.eat();
241 SyntaxKind::Escape
242 }
243 }
244
245 fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
249 let start = self.s.cursor() - 1;
250
251 let mut backticks = 1;
253 while self.s.eat_if('`') {
254 backticks += 1;
255 }
256
257 if backticks == 2 {
259 let nodes = vec![
260 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
261 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
262 ];
263 return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
264 }
265
266 let mut found = 0;
268 while found < backticks {
269 match self.s.eat() {
270 Some('`') => found += 1,
271 Some(_) => found = 0,
272 None => {
273 let msg = SyntaxError::new("unclosed raw text");
274 let error = SyntaxNode::error(msg, self.s.from(start));
275 return (SyntaxKind::Error, error);
276 }
277 }
278 }
279 let end = self.s.cursor();
280
281 let mut nodes = Vec::with_capacity(3); let mut prev_start = start;
286 let mut push_raw = |kind, s: &Scanner| {
287 nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
288 prev_start = s.cursor();
289 };
290
291 self.s.jump(start + backticks);
293 push_raw(SyntaxKind::RawDelim, &self.s);
294
295 if backticks >= 3 {
296 self.blocky_raw(end - backticks, &mut push_raw);
297 } else {
298 self.inline_raw(end - backticks, &mut push_raw);
299 }
300
301 self.s.jump(end);
303 push_raw(SyntaxKind::RawDelim, &self.s);
304
305 (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
306 }
307
308 fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
338 where
339 F: FnMut(SyntaxKind, &Scanner),
340 {
341 if self.s.eat_if(is_id_start) {
343 self.s.eat_while(is_id_continue);
344 push_raw(SyntaxKind::RawLang, &self.s);
345 }
346
347 let mut lines = split_newlines(self.s.to(inner_end));
349
350 let dedent = lines
352 .iter()
353 .skip(1)
354 .filter(|line| !line.chars().all(char::is_whitespace))
355 .chain(lines.last())
357 .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
358 .min()
359 .unwrap_or(0);
360
361 if lines.last().is_some_and(|last| last.chars().all(char::is_whitespace)) {
364 lines.pop();
365 } else if let Some(last) = lines.last_mut() {
366 if last.trim_end().ends_with('`') {
370 *last = last.strip_suffix(' ').unwrap_or(last);
371 }
372 }
373
374 let mut lines = lines.into_iter();
375
376 if let Some(first_line) = lines.next() {
380 if first_line.chars().all(char::is_whitespace) {
381 self.s.advance(first_line.len());
382 debug_assert!(self.s.cursor() != inner_end);
386 } else {
403 let line_end = self.s.cursor() + first_line.len();
404 if self.s.eat_if(' ') {
405 push_raw(SyntaxKind::RawTrimmed, &self.s);
407 }
408 self.s.jump(line_end);
410 push_raw(SyntaxKind::Text, &self.s);
411 }
412 }
413
414 for line in lines {
416 let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
417 self.s.eat_newline();
418 self.s.advance(offset);
419 push_raw(SyntaxKind::RawTrimmed, &self.s);
420 self.s.advance(line.len() - offset);
421 push_raw(SyntaxKind::Text, &self.s);
422 }
423
424 if self.s.cursor() < inner_end {
426 self.s.jump(inner_end);
427 push_raw(SyntaxKind::RawTrimmed, &self.s);
428 }
429 }
430
431 fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
435 where
436 F: FnMut(SyntaxKind, &Scanner),
437 {
438 while self.s.cursor() < inner_end {
439 if self.s.at(is_newline) {
440 push_raw(SyntaxKind::Text, &self.s);
441 self.s.eat_newline();
442 push_raw(SyntaxKind::RawTrimmed, &self.s);
443 continue;
444 }
445 self.s.eat();
446 }
447 push_raw(SyntaxKind::Text, &self.s);
448 }
449
450 fn link(&mut self) -> SyntaxKind {
451 let (link, balanced) = link_prefix(self.s.after());
452 self.s.advance(link.len());
453
454 if !balanced {
455 return self.error(
456 "automatic links cannot contain unbalanced brackets, \
457 use the `link` function instead",
458 );
459 }
460
461 SyntaxKind::Link
462 }
463
464 fn numbering(&mut self, start: usize) -> SyntaxKind {
465 self.s.eat_while(char::is_ascii_digit);
466
467 let read = self.s.from(start);
468 if self.s.eat_if('.') && self.space_or_end() && read.parse::<u64>().is_ok() {
469 return SyntaxKind::EnumMarker;
470 }
471
472 self.text()
473 }
474
475 fn ref_marker(&mut self) -> SyntaxKind {
476 self.s.eat_while(is_valid_in_label_literal);
477
478 while matches!(self.s.scout(-1), Some('.' | ':')) {
480 self.s.uneat();
481 }
482
483 SyntaxKind::RefMarker
484 }
485
486 fn label(&mut self) -> SyntaxKind {
487 let label = self.s.eat_while(is_valid_in_label_literal);
488 if label.is_empty() {
489 return self.error("label cannot be empty");
490 }
491
492 if !self.s.eat_if('>') {
493 return self.error("unclosed label");
494 }
495
496 SyntaxKind::Label
497 }
498
499 fn text(&mut self) -> SyntaxKind {
500 macro_rules! table {
501 ($(|$c:literal)*) => {
502 static TABLE: [bool; 128] = {
503 let mut t = [false; 128];
504 $(t[$c as usize] = true;)*
505 t
506 };
507 };
508 }
509
510 table! {
511 | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
512 | '[' | ']' | '~' | '-' | '.' | '\'' | '"' | '*' | '_'
513 | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
514 };
515
516 loop {
517 self.s.eat_until(|c: char| {
518 TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
519 });
520
521 let mut s = self.s;
524 match s.eat() {
525 Some(' ') if s.at(char::is_alphanumeric) => {}
526 Some('/') if !s.at(['/', '*']) => {}
527 Some('-') if !s.at(['-', '?']) => {}
528 Some('.') if !s.at("..") => {}
529 Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
530 Some('@') if !s.at(is_valid_in_label_literal) => {}
531 _ => break,
532 }
533
534 self.s = s;
535 }
536
537 SyntaxKind::Text
538 }
539
540 fn in_word(&self) -> bool {
541 let wordy = |c: Option<char>| {
542 c.is_some_and(|c| {
543 c.is_alphanumeric()
544 && !matches!(
545 c.script(),
546 Script::Han
547 | Script::Hiragana
548 | Script::Katakana
549 | Script::Hangul
550 )
551 })
552 };
553 let prev = self.s.scout(-2);
554 let next = self.s.peek();
555 wordy(prev) && wordy(next)
556 }
557
558 fn space_or_end(&self) -> bool {
559 self.s.done()
560 || self.s.at(char::is_whitespace)
561 || self.s.at("//")
562 || self.s.at("/*")
563 }
564}
565
566impl Lexer<'_> {
568 fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
569 let kind = match c {
570 '\\' => self.backslash(),
571 '"' => self.string(),
572
573 '-' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
574 '-' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
575 '-' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
576 ':' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
577 ':' if self.s.eat_if(":=") => SyntaxKind::MathShorthand,
578 '!' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
579 '.' if self.s.eat_if("..") => SyntaxKind::MathShorthand,
580 '[' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
581 '<' if self.s.eat_if("==>") => SyntaxKind::MathShorthand,
582 '<' if self.s.eat_if("-->") => SyntaxKind::MathShorthand,
583 '<' if self.s.eat_if("--") => SyntaxKind::MathShorthand,
584 '<' if self.s.eat_if("-<") => SyntaxKind::MathShorthand,
585 '<' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
586 '<' if self.s.eat_if("<-") => SyntaxKind::MathShorthand,
587 '<' if self.s.eat_if("<<") => SyntaxKind::MathShorthand,
588 '<' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
589 '<' if self.s.eat_if("==") => SyntaxKind::MathShorthand,
590 '<' if self.s.eat_if("~~") => SyntaxKind::MathShorthand,
591 '<' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
592 '<' if self.s.eat_if('<') => SyntaxKind::MathShorthand,
593 '<' if self.s.eat_if('-') => SyntaxKind::MathShorthand,
594 '<' if self.s.eat_if('~') => SyntaxKind::MathShorthand,
595 '>' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
596 '>' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
597 '=' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
598 '=' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
599 '=' if self.s.eat_if(':') => SyntaxKind::MathShorthand,
600 '>' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
601 '>' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
602 '|' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
603 '|' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
604 '|' if self.s.eat_if(']') => SyntaxKind::MathShorthand,
605 '|' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
606 '~' if self.s.eat_if("~>") => SyntaxKind::MathShorthand,
607 '~' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
608 '*' | '-' | '~' => SyntaxKind::MathShorthand,
609
610 '.' => SyntaxKind::Dot,
611 ',' => SyntaxKind::Comma,
612 ';' => SyntaxKind::Semicolon,
613 ')' => SyntaxKind::RightParen,
614
615 '#' => SyntaxKind::Hash,
616 '_' => SyntaxKind::Underscore,
617 '$' => SyntaxKind::Dollar,
618 '/' => SyntaxKind::Slash,
619 '^' => SyntaxKind::Hat,
620 '\'' => SyntaxKind::Prime,
621 '&' => SyntaxKind::MathAlignPoint,
622 '√' | '∛' | '∜' => SyntaxKind::Root,
623
624 c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
626 self.s.eat_while(is_math_id_continue);
627 let (kind, node) = self.math_ident_or_field(start);
628 return (kind, Some(node));
629 }
630
631 _ => self.math_text(start, c),
633 };
634 (kind, None)
635 }
636
637 fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
639 let mut kind = SyntaxKind::MathIdent;
640 let mut node = SyntaxNode::leaf(kind, self.s.from(start));
641 while let Some(ident) = self.maybe_dot_ident() {
642 kind = SyntaxKind::FieldAccess;
643 let field_children = vec![
644 node,
645 SyntaxNode::leaf(SyntaxKind::Dot, '.'),
646 SyntaxNode::leaf(SyntaxKind::Ident, ident),
647 ];
648 node = SyntaxNode::inner(kind, field_children);
649 }
650 (kind, node)
651 }
652
653 fn maybe_dot_ident(&mut self) -> Option<&str> {
655 if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
656 let ident_start = self.s.cursor();
657 self.s.eat();
658 self.s.eat_while(is_math_id_continue);
659 Some(self.s.from(ident_start))
660 } else {
661 None
662 }
663 }
664
665 fn math_text(&mut self, start: usize, c: char) -> SyntaxKind {
666 if c.is_numeric() {
668 self.s.eat_while(char::is_numeric);
669 let mut s = self.s;
670 if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() {
671 self.s = s;
672 }
673 SyntaxKind::MathText
674 } else {
675 let len = self
676 .s
677 .get(start..self.s.string().len())
678 .graphemes(true)
679 .next()
680 .map_or(0, str::len);
681 self.s.jump(start + len);
682 if len > c.len_utf8() {
683 SyntaxKind::Text
686 } else {
687 SyntaxKind::MathText
688 }
689 }
690 }
691
692 pub fn maybe_math_named_arg(&mut self, start: usize) -> Option<SyntaxNode> {
694 let cursor = self.s.cursor();
695 self.s.jump(start);
696 if self.s.eat_if(is_id_start) {
697 self.s.eat_while(is_id_continue);
698 if self.s.at(':') && !self.s.at(":=") && !self.s.at("::=") {
701 let node = if self.s.from(start) != "_" {
703 SyntaxNode::leaf(SyntaxKind::Ident, self.s.from(start))
704 } else {
705 let msg = SyntaxError::new("expected identifier, found underscore");
706 SyntaxNode::error(msg, self.s.from(start))
707 };
708 return Some(node);
709 }
710 }
711 self.s.jump(cursor);
712 None
713 }
714
715 pub fn maybe_math_spread_arg(&mut self, start: usize) -> Option<SyntaxNode> {
717 let cursor = self.s.cursor();
718 self.s.jump(start);
719 if self.s.eat_if("..") {
720 if !self.space_or_end() && !self.s.at('.') {
723 let node = SyntaxNode::leaf(SyntaxKind::Dots, self.s.from(start));
724 return Some(node);
725 }
726 }
727 self.s.jump(cursor);
728 None
729 }
730}
731
732impl Lexer<'_> {
734 fn code(&mut self, start: usize, c: char) -> SyntaxKind {
735 match c {
736 '<' if self.s.at(is_id_continue) => self.label(),
737 '0'..='9' => self.number(start, c),
738 '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
739 '"' => self.string(),
740
741 '=' if self.s.eat_if('=') => SyntaxKind::EqEq,
742 '!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
743 '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
744 '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
745 '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
746 '-' | '\u{2212}' if self.s.eat_if('=') => SyntaxKind::HyphEq,
747 '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
748 '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
749 '.' if self.s.eat_if('.') => SyntaxKind::Dots,
750 '=' if self.s.eat_if('>') => SyntaxKind::Arrow,
751
752 '{' => SyntaxKind::LeftBrace,
753 '}' => SyntaxKind::RightBrace,
754 '[' => SyntaxKind::LeftBracket,
755 ']' => SyntaxKind::RightBracket,
756 '(' => SyntaxKind::LeftParen,
757 ')' => SyntaxKind::RightParen,
758 '$' => SyntaxKind::Dollar,
759 ',' => SyntaxKind::Comma,
760 ';' => SyntaxKind::Semicolon,
761 ':' => SyntaxKind::Colon,
762 '.' => SyntaxKind::Dot,
763 '+' => SyntaxKind::Plus,
764 '-' | '\u{2212}' => SyntaxKind::Minus,
765 '*' => SyntaxKind::Star,
766 '/' => SyntaxKind::Slash,
767 '=' => SyntaxKind::Eq,
768 '<' => SyntaxKind::Lt,
769 '>' => SyntaxKind::Gt,
770
771 c if is_id_start(c) => self.ident(start),
772
773 c => self.error(eco_format!("the character `{c}` is not valid in code")),
774 }
775 }
776
777 fn ident(&mut self, start: usize) -> SyntaxKind {
778 self.s.eat_while(is_id_continue);
779 let ident = self.s.from(start);
780
781 let prev = self.s.get(0..start);
782 if (!prev.ends_with(['.', '@']) || prev.ends_with(".."))
783 && let Some(keyword) = keyword(ident)
784 {
785 return keyword;
786 }
787
788 if ident == "_" { SyntaxKind::Underscore } else { SyntaxKind::Ident }
789 }
790
791 fn number(&mut self, start: usize, first_c: char) -> SyntaxKind {
792 let base = match first_c {
794 '0' if self.s.eat_if('b') => 2,
795 '0' if self.s.eat_if('o') => 8,
796 '0' if self.s.eat_if('x') => 16,
797 _ => 10,
798 };
799
800 if base == 16 {
802 self.s.eat_while(char::is_ascii_alphanumeric);
803 } else {
804 self.s.eat_while(char::is_ascii_digit);
805 }
806
807 let mut is_float = false;
809 if base == 10 {
810 if first_c == '.' {
813 is_float = true; } else if !self.s.at("..")
815 && !self.s.scout(1).is_some_and(is_id_start)
816 && self.s.eat_if('.')
817 {
818 is_float = true;
819 self.s.eat_while(char::is_ascii_digit);
820 }
821
822 if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
824 is_float = true;
825 self.s.eat_if(['+', '-']);
826 self.s.eat_while(char::is_ascii_digit);
827 }
828 }
829
830 let number = self.s.from(start);
831 let suffix = self.s.eat_while(|c: char| c.is_ascii_alphanumeric() || c == '%');
832
833 let mut suffix_result = match suffix {
834 "" => Ok(None),
835 "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" => Ok(Some(())),
836 _ => Err(eco_format!("invalid number suffix: {suffix}")),
837 };
838
839 let number_result = if is_float && number.parse::<f64>().is_err() {
840 Err(eco_format!("invalid floating point number: {number}"))
843 } else if base == 10 {
844 Ok(())
845 } else {
846 let name = match base {
847 2 => "binary",
848 8 => "octal",
849 16 => "hexadecimal",
850 _ => unreachable!(),
851 };
852 match i64::from_str_radix(&number[2..], base) {
854 Ok(_) if suffix.is_empty() => Ok(()),
855 Ok(value) => {
856 if suffix_result.is_ok() {
857 suffix_result = Err(eco_format!(
858 "try using a decimal number: {value}{suffix}"
859 ));
860 }
861 Err(eco_format!("{name} numbers cannot have a suffix"))
862 }
863 Err(_) => Err(eco_format!("invalid {name} number: {number}")),
864 }
865 };
866
867 match (number_result, suffix_result) {
869 (Ok(()), Ok(None)) if is_float => SyntaxKind::Float,
871 (Ok(()), Ok(None)) => SyntaxKind::Int,
872 (Ok(()), Ok(Some(()))) => SyntaxKind::Numeric,
873 (Err(number_err), Err(suffix_err)) => {
875 let err = self.error(number_err);
876 self.hint(suffix_err);
877 err
878 }
879 (Ok(()), Err(msg)) | (Err(msg), Ok(_)) => self.error(msg),
880 }
881 }
882
883 fn string(&mut self) -> SyntaxKind {
884 let mut escaped = false;
885 self.s.eat_until(|c| {
886 let stop = c == '"' && !escaped;
887 escaped = c == '\\' && !escaped;
888 stop
889 });
890
891 if !self.s.eat_if('"') {
892 return self.error("unclosed string");
893 }
894
895 SyntaxKind::Str
896 }
897}
898
899fn keyword(ident: &str) -> Option<SyntaxKind> {
901 Some(match ident {
902 "none" => SyntaxKind::None,
903 "auto" => SyntaxKind::Auto,
904 "true" => SyntaxKind::Bool,
905 "false" => SyntaxKind::Bool,
906 "not" => SyntaxKind::Not,
907 "and" => SyntaxKind::And,
908 "or" => SyntaxKind::Or,
909 "let" => SyntaxKind::Let,
910 "set" => SyntaxKind::Set,
911 "show" => SyntaxKind::Show,
912 "context" => SyntaxKind::Context,
913 "if" => SyntaxKind::If,
914 "else" => SyntaxKind::Else,
915 "for" => SyntaxKind::For,
916 "in" => SyntaxKind::In,
917 "while" => SyntaxKind::While,
918 "break" => SyntaxKind::Break,
919 "continue" => SyntaxKind::Continue,
920 "return" => SyntaxKind::Return,
921 "import" => SyntaxKind::Import,
922 "include" => SyntaxKind::Include,
923 "as" => SyntaxKind::As,
924 _ => return None,
925 })
926}
927
928trait ScannerExt {
929 fn advance(&mut self, by: usize);
930 fn eat_newline(&mut self) -> bool;
931}
932
933impl ScannerExt for Scanner<'_> {
934 fn advance(&mut self, by: usize) {
935 self.jump(self.cursor() + by);
936 }
937
938 fn eat_newline(&mut self) -> bool {
939 let ate = self.eat_if(is_newline);
940 if ate && self.before().ends_with('\r') {
941 self.eat_if('\n');
942 }
943 ate
944 }
945}
946
947#[inline]
949fn is_space(character: char, mode: SyntaxMode) -> bool {
950 match mode {
951 SyntaxMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
952 _ => character.is_whitespace(),
953 }
954}
955
956#[inline]
958pub fn is_newline(character: char) -> bool {
959 matches!(
960 character,
961 '\n' | '\x0B' | '\x0C' | '\r' |
963 '\u{0085}' | '\u{2028}' | '\u{2029}'
965 )
966}
967
968pub fn link_prefix(text: &str) -> (&str, bool) {
971 let mut s = unscanny::Scanner::new(text);
972 let mut brackets = Vec::new();
973
974 #[rustfmt::skip]
975 s.eat_while(|c: char| {
976 match c {
977 | '0' ..= '9'
978 | 'a' ..= 'z'
979 | 'A' ..= 'Z'
980 | '!' | '#' | '$' | '%' | '&' | '*' | '+'
981 | ',' | '-' | '.' | '/' | ':' | ';' | '='
982 | '?' | '@' | '_' | '~' | '\'' => true,
983 '[' => {
984 brackets.push(b'[');
985 true
986 }
987 '(' => {
988 brackets.push(b'(');
989 true
990 }
991 ']' => brackets.pop() == Some(b'['),
992 ')' => brackets.pop() == Some(b'('),
993 _ => false,
994 }
995 });
996
997 while matches!(s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
999 s.uneat();
1000 }
1001
1002 (s.before(), brackets.is_empty())
1003}
1004
1005pub fn split_newlines(text: &str) -> Vec<&str> {
1007 let mut s = Scanner::new(text);
1008 let mut lines = Vec::new();
1009 let mut start = 0;
1010 let mut end = 0;
1011
1012 while let Some(c) = s.eat() {
1013 if is_newline(c) {
1014 if c == '\r' {
1015 s.eat_if('\n');
1016 }
1017
1018 lines.push(&text[start..end]);
1019 start = s.cursor();
1020 }
1021 end = s.cursor();
1022 }
1023
1024 lines.push(&text[start..]);
1025 lines
1026}
1027
1028fn count_newlines(text: &str) -> usize {
1030 let mut newlines = 0;
1031 let mut s = Scanner::new(text);
1032 while let Some(c) = s.eat() {
1033 if is_newline(c) {
1034 if c == '\r' {
1035 s.eat_if('\n');
1036 }
1037 newlines += 1;
1038 }
1039 }
1040 newlines
1041}
1042
1043#[inline]
1051pub fn is_ident(string: &str) -> bool {
1052 let mut chars = string.chars();
1053 chars
1054 .next()
1055 .is_some_and(|c| is_id_start(c) && chars.all(is_id_continue))
1056}
1057
1058#[inline]
1060pub fn is_id_start(c: char) -> bool {
1061 is_xid_start(c) || c == '_'
1062}
1063
1064#[inline]
1066pub fn is_id_continue(c: char) -> bool {
1067 is_xid_continue(c) || c == '_' || c == '-'
1068}
1069
1070#[inline]
1072fn is_math_id_start(c: char) -> bool {
1073 is_xid_start(c)
1074}
1075
1076#[inline]
1078fn is_math_id_continue(c: char) -> bool {
1079 is_xid_continue(c) && c != '_'
1080}
1081
1082#[inline]
1084fn is_valid_in_label_literal(c: char) -> bool {
1085 is_id_continue(c) || matches!(c, ':' | '.')
1086}
1087
1088pub fn is_valid_label_literal_id(id: &str) -> bool {
1090 !id.is_empty() && id.chars().all(is_valid_in_label_literal)
1091}