1use std::num::IntErrorKind;
2
3use ecow::{EcoString, eco_format};
4use unicode_ident::{is_xid_continue, is_xid_start};
5use unicode_script::{Script, UnicodeScript};
6use unicode_segmentation::UnicodeSegmentation;
7use unscanny::Scanner;
8
9use crate::{SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode};
10
11#[derive(Clone)]
13pub(super) struct Lexer<'s> {
14 s: Scanner<'s>,
16 mode: SyntaxMode,
19 newline: bool,
21 error: Option<SyntaxError>,
23}
24
25impl<'s> Lexer<'s> {
26 pub fn new(text: &'s str, mode: SyntaxMode) -> Self {
29 Self {
30 s: Scanner::new(text),
31 mode,
32 newline: false,
33 error: None,
34 }
35 }
36
37 pub fn mode(&self) -> SyntaxMode {
39 self.mode
40 }
41
42 pub fn set_mode(&mut self, mode: SyntaxMode) {
44 self.mode = mode;
45 }
46
47 pub fn cursor(&self) -> usize {
50 self.s.cursor()
51 }
52
53 pub fn jump(&mut self, index: usize) {
55 self.s.jump(index);
56 }
57
58 pub fn newline(&self) -> bool {
60 self.newline
61 }
62
63 pub fn column(&self, index: usize) -> usize {
65 let mut s = self.s; s.jump(index);
67 s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
68 }
69}
70
71impl Lexer<'_> {
72 fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
74 self.error = Some(SyntaxError::new(message));
75 SyntaxKind::Error
76 }
77
78 fn hint(&mut self, message: impl Into<EcoString>) {
80 if let Some(error) = &mut self.error {
81 error.hints.push(message.into());
82 }
83 }
84}
85
86impl Lexer<'_> {
88 pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
91 debug_assert!(self.error.is_none());
92 let start = self.s.cursor();
93
94 self.newline = false;
95 let kind = match self.s.eat() {
96 Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
97 Some('#') if start == 0 && self.s.eat_if('!') => self.shebang(),
98 Some('/') if self.s.eat_if('/') => self.line_comment(),
99 Some('/') if self.s.eat_if('*') => self.block_comment(),
100 Some('*') if self.s.eat_if('/') => {
101 let kind = self.error("unexpected end of block comment");
102 self.hint(
103 "consider escaping the `*` with a backslash or \
104 opening the block comment with `/*`",
105 );
106 kind
107 }
108 Some('`') if self.mode != SyntaxMode::Math => return self.raw(),
109 Some(c) => match self.mode {
110 SyntaxMode::Markup => self.markup(start, c),
111 SyntaxMode::Math => match self.math(start, c) {
112 (kind, None) => kind,
113 (kind, Some(node)) => return (kind, node),
114 },
115 SyntaxMode::Code => self.code(start, c),
116 },
117
118 None => SyntaxKind::End,
119 };
120
121 let text = self.s.from(start);
122 let node = match self.error.take() {
123 Some(error) => SyntaxNode::error(error, text),
124 None => SyntaxNode::leaf(kind, text),
125 };
126 (kind, node)
127 }
128
129 fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
131 let more = self.s.eat_while(|c| is_space(c, self.mode));
132 let newlines = match c {
133 ' ' if more.is_empty() => 0,
135 _ => count_newlines(self.s.from(start)),
136 };
137
138 self.newline = newlines > 0;
139 if self.mode == SyntaxMode::Markup && newlines >= 2 {
140 SyntaxKind::Parbreak
141 } else {
142 SyntaxKind::Space
143 }
144 }
145
146 fn shebang(&mut self) -> SyntaxKind {
147 self.s.eat_until(is_newline);
148 SyntaxKind::Shebang
149 }
150
151 fn line_comment(&mut self) -> SyntaxKind {
152 self.s.eat_until(is_newline);
153 SyntaxKind::LineComment
154 }
155
156 fn block_comment(&mut self) -> SyntaxKind {
157 let mut state = '_';
158 let mut depth = 1;
159
160 while let Some(c) = self.s.eat() {
162 state = match (state, c) {
163 ('*', '/') => {
164 depth -= 1;
165 if depth == 0 {
166 break;
167 }
168 '_'
169 }
170 ('/', '*') => {
171 depth += 1;
172 '_'
173 }
174 _ => c,
175 }
176 }
177
178 SyntaxKind::BlockComment
179 }
180}
181
182impl Lexer<'_> {
184 fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
185 match c {
186 '\\' => self.backslash(),
187 'h' if self.s.eat_if("ttp://") => self.link(),
188 'h' if self.s.eat_if("ttps://") => self.link(),
189 '<' if self.s.at(is_id_continue) => self.label(),
190 '@' if self.s.at(is_id_continue) => self.ref_marker(),
191
192 '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
193 '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
194 '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
195 '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
196 '-' if self.s.at(char::is_numeric) => SyntaxKind::Shorthand,
197 '*' if !self.in_word() => SyntaxKind::Star,
198 '_' if !self.in_word() => SyntaxKind::Underscore,
199
200 '#' => SyntaxKind::Hash,
201 '[' => SyntaxKind::LeftBracket,
202 ']' => SyntaxKind::RightBracket,
203 '\'' => SyntaxKind::SmartQuote,
204 '"' => SyntaxKind::SmartQuote,
205 '$' => SyntaxKind::Dollar,
206 '~' => SyntaxKind::Shorthand,
207 ':' => SyntaxKind::Colon,
208 '=' => {
209 self.s.eat_while('=');
210 if self.space_or_end() { SyntaxKind::HeadingMarker } else { self.text() }
211 }
212 '-' if self.space_or_end() => SyntaxKind::ListMarker,
213 '+' if self.space_or_end() => SyntaxKind::EnumMarker,
214 '/' if self.space_or_end() => SyntaxKind::TermMarker,
215 '0'..='9' => self.numbering(start),
216
217 _ => self.text(),
218 }
219 }
220
221 fn backslash(&mut self) -> SyntaxKind {
222 if self.s.eat_if("u{") {
223 let hex = self.s.eat_while(char::is_ascii_alphanumeric);
224 if !self.s.eat_if('}') {
225 return self.error("unclosed Unicode escape sequence");
226 }
227
228 if u32::from_str_radix(hex, 16)
229 .ok()
230 .and_then(std::char::from_u32)
231 .is_none()
232 {
233 return self.error(eco_format!("invalid Unicode codepoint: {}", hex));
234 }
235
236 return SyntaxKind::Escape;
237 }
238
239 if self.s.done() || self.s.at(char::is_whitespace) {
240 SyntaxKind::Linebreak
241 } else {
242 self.s.eat();
243 SyntaxKind::Escape
244 }
245 }
246
247 fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
251 let start = self.s.cursor() - 1;
252
253 let mut backticks = 1;
255 while self.s.eat_if('`') {
256 backticks += 1;
257 }
258
259 if backticks == 2 {
261 let nodes = vec![
262 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
263 SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
264 ];
265 return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
266 }
267
268 let mut found = 0;
270 while found < backticks {
271 match self.s.eat() {
272 Some('`') => found += 1,
273 Some(_) => found = 0,
274 None => {
275 let msg = SyntaxError::new("unclosed raw text");
276 let error = SyntaxNode::error(msg, self.s.from(start));
277 return (SyntaxKind::Error, error);
278 }
279 }
280 }
281 let end = self.s.cursor();
282
283 let mut nodes = Vec::with_capacity(3); let mut prev_start = start;
288 let mut push_raw = |kind, s: &Scanner| {
289 nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
290 prev_start = s.cursor();
291 };
292
293 self.s.jump(start + backticks);
295 push_raw(SyntaxKind::RawDelim, &self.s);
296
297 if backticks >= 3 {
298 self.blocky_raw(end - backticks, &mut push_raw);
299 } else {
300 self.inline_raw(end - backticks, &mut push_raw);
301 }
302
303 self.s.jump(end);
305 push_raw(SyntaxKind::RawDelim, &self.s);
306
307 (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
308 }
309
310 fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
340 where
341 F: FnMut(SyntaxKind, &Scanner),
342 {
343 if self.s.eat_if(is_id_start) {
345 self.s.eat_while(is_id_continue);
346 push_raw(SyntaxKind::RawLang, &self.s);
347 }
348
349 let mut lines = split_newlines(self.s.to(inner_end));
351
352 let dedent = lines
354 .iter()
355 .skip(1)
356 .filter(|line| !line.chars().all(char::is_whitespace))
357 .chain(lines.last())
359 .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
360 .min()
361 .unwrap_or(0);
362
363 if lines.last().is_some_and(|last| last.chars().all(char::is_whitespace)) {
366 lines.pop();
367 } else if let Some(last) = lines.last_mut() {
368 if last.trim_end().ends_with('`') {
372 *last = last.strip_suffix(' ').unwrap_or(last);
373 }
374 }
375
376 let mut lines = lines.into_iter();
377
378 if let Some(first_line) = lines.next() {
382 if first_line.chars().all(char::is_whitespace) {
383 self.s.advance(first_line.len());
384 debug_assert!(self.s.cursor() != inner_end);
388 } else {
405 let line_end = self.s.cursor() + first_line.len();
406 if self.s.eat_if(' ') {
407 push_raw(SyntaxKind::RawTrimmed, &self.s);
409 }
410 self.s.jump(line_end);
412 push_raw(SyntaxKind::Text, &self.s);
413 }
414 }
415
416 for line in lines {
418 let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
419 self.s.eat_newline();
420 self.s.advance(offset);
421 push_raw(SyntaxKind::RawTrimmed, &self.s);
422 self.s.advance(line.len() - offset);
423 push_raw(SyntaxKind::Text, &self.s);
424 }
425
426 if self.s.cursor() < inner_end {
428 self.s.jump(inner_end);
429 push_raw(SyntaxKind::RawTrimmed, &self.s);
430 }
431 }
432
433 fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
437 where
438 F: FnMut(SyntaxKind, &Scanner),
439 {
440 while self.s.cursor() < inner_end {
441 if self.s.at(is_newline) {
442 push_raw(SyntaxKind::Text, &self.s);
443 self.s.eat_newline();
444 push_raw(SyntaxKind::RawTrimmed, &self.s);
445 continue;
446 }
447 self.s.eat();
448 }
449 push_raw(SyntaxKind::Text, &self.s);
450 }
451
452 fn link(&mut self) -> SyntaxKind {
453 let (link, balanced) = link_prefix(self.s.after());
454 self.s.advance(link.len());
455
456 if !balanced {
457 return self.error(
458 "automatic links cannot contain unbalanced brackets, \
459 use the `link` function instead",
460 );
461 }
462
463 SyntaxKind::Link
464 }
465
466 fn numbering(&mut self, start: usize) -> SyntaxKind {
467 self.s.eat_while(char::is_ascii_digit);
468
469 let read = self.s.from(start);
470 if self.s.eat_if('.') && self.space_or_end() && read.parse::<u64>().is_ok() {
471 return SyntaxKind::EnumMarker;
472 }
473
474 self.text()
475 }
476
477 fn ref_marker(&mut self) -> SyntaxKind {
478 self.s.eat_while(is_valid_in_label_literal);
479
480 while matches!(self.s.scout(-1), Some('.' | ':')) {
482 self.s.uneat();
483 }
484
485 SyntaxKind::RefMarker
486 }
487
488 fn label(&mut self) -> SyntaxKind {
489 let label = self.s.eat_while(is_valid_in_label_literal);
490 if label.is_empty() {
491 return self.error("label cannot be empty");
492 }
493
494 if !self.s.eat_if('>') {
495 return self.error("unclosed label");
496 }
497
498 SyntaxKind::Label
499 }
500
501 fn text(&mut self) -> SyntaxKind {
502 macro_rules! table {
503 ($(|$c:literal)*) => {
504 static TABLE: [bool; 128] = {
505 let mut t = [false; 128];
506 $(t[$c as usize] = true;)*
507 t
508 };
509 };
510 }
511
512 table! {
513 | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
514 | '[' | ']' | '~' | '-' | '.' | '\'' | '"' | '*' | '_'
515 | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
516 };
517
518 loop {
519 self.s.eat_until(|c: char| {
520 TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
521 });
522
523 let mut s = self.s;
526 match s.eat() {
527 Some(' ') if s.at(char::is_alphanumeric) => {}
528 Some('/') if !s.at(['/', '*']) => {}
529 Some('-') if !s.at(['-', '?']) => {}
530 Some('.') if !s.at("..") => {}
531 Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
532 Some('@') if !s.at(is_valid_in_label_literal) => {}
533 _ => break,
534 }
535
536 self.s = s;
537 }
538
539 SyntaxKind::Text
540 }
541
542 fn in_word(&self) -> bool {
543 let wordy = |c: Option<char>| {
544 c.is_some_and(|c| {
545 c.is_alphanumeric()
546 && !matches!(
547 c.script(),
548 Script::Han
549 | Script::Hiragana
550 | Script::Katakana
551 | Script::Hangul
552 )
553 })
554 };
555 let prev = self.s.scout(-2);
556 let next = self.s.peek();
557 wordy(prev) && wordy(next)
558 }
559
560 fn space_or_end(&self) -> bool {
561 self.s.done()
562 || self.s.at(char::is_whitespace)
563 || self.s.at("//")
564 || self.s.at("/*")
565 }
566}
567
568impl Lexer<'_> {
570 fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
571 let kind = match c {
572 '\\' => self.backslash(),
573 '"' => self.string(),
574
575 '-' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
576 '-' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
577 '-' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
578 ':' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
579 ':' if self.s.eat_if(":=") => SyntaxKind::MathShorthand,
580 '!' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
581 '.' if self.s.eat_if("..") => SyntaxKind::MathShorthand,
582 '[' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
583 '<' if self.s.eat_if("==>") => SyntaxKind::MathShorthand,
584 '<' if self.s.eat_if("-->") => SyntaxKind::MathShorthand,
585 '<' if self.s.eat_if("--") => SyntaxKind::MathShorthand,
586 '<' if self.s.eat_if("-<") => SyntaxKind::MathShorthand,
587 '<' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
588 '<' if self.s.eat_if("<-") => SyntaxKind::MathShorthand,
589 '<' if self.s.eat_if("<<") => SyntaxKind::MathShorthand,
590 '<' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
591 '<' if self.s.eat_if("==") => SyntaxKind::MathShorthand,
592 '<' if self.s.eat_if("~~") => SyntaxKind::MathShorthand,
593 '<' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
594 '<' if self.s.eat_if('<') => SyntaxKind::MathShorthand,
595 '<' if self.s.eat_if('-') => SyntaxKind::MathShorthand,
596 '<' if self.s.eat_if('~') => SyntaxKind::MathShorthand,
597 '>' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
598 '>' if self.s.eat_if(">>") => SyntaxKind::MathShorthand,
599 '=' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
600 '=' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
601 '=' if self.s.eat_if(':') => SyntaxKind::MathShorthand,
602 '>' if self.s.eat_if('=') => SyntaxKind::MathShorthand,
603 '>' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
604 '|' if self.s.eat_if("->") => SyntaxKind::MathShorthand,
605 '|' if self.s.eat_if("=>") => SyntaxKind::MathShorthand,
606 '|' if self.s.eat_if(']') => SyntaxKind::MathShorthand,
607 '|' if self.s.eat_if('|') => SyntaxKind::MathShorthand,
608 '~' if self.s.eat_if("~>") => SyntaxKind::MathShorthand,
609 '~' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
610 '*' | '-' | '~' => SyntaxKind::MathShorthand,
611
612 '.' => SyntaxKind::Dot,
613 ',' => SyntaxKind::Comma,
614 ';' => SyntaxKind::Semicolon,
615 ')' => SyntaxKind::RightParen,
616
617 '#' => SyntaxKind::Hash,
618 '_' => SyntaxKind::Underscore,
619 '$' => SyntaxKind::Dollar,
620 '/' => SyntaxKind::Slash,
621 '^' => SyntaxKind::Hat,
622 '\'' => SyntaxKind::Prime,
623 '&' => SyntaxKind::MathAlignPoint,
624 '√' | '∛' | '∜' => SyntaxKind::Root,
625
626 c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
628 self.s.eat_while(is_math_id_continue);
629 let (kind, node) = self.math_ident_or_field(start);
630 return (kind, Some(node));
631 }
632
633 _ => self.math_text(start, c),
635 };
636 (kind, None)
637 }
638
639 fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
641 let mut kind = SyntaxKind::MathIdent;
642 let mut node = SyntaxNode::leaf(kind, self.s.from(start));
643 while let Some(ident) = self.maybe_dot_ident() {
644 kind = SyntaxKind::FieldAccess;
645 let field_children = vec![
646 node,
647 SyntaxNode::leaf(SyntaxKind::Dot, '.'),
648 SyntaxNode::leaf(SyntaxKind::Ident, ident),
649 ];
650 node = SyntaxNode::inner(kind, field_children);
651 }
652 (kind, node)
653 }
654
655 fn maybe_dot_ident(&mut self) -> Option<&str> {
657 if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
658 let ident_start = self.s.cursor();
659 self.s.eat();
660 self.s.eat_while(is_math_id_continue);
661 Some(self.s.from(ident_start))
662 } else {
663 None
664 }
665 }
666
667 fn math_text(&mut self, start: usize, c: char) -> SyntaxKind {
668 if c.is_numeric() {
670 self.s.eat_while(char::is_numeric);
671 let mut s = self.s;
672 if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() {
673 self.s = s;
674 }
675 SyntaxKind::MathText
676 } else {
677 let len = self
678 .s
679 .get(start..self.s.string().len())
680 .graphemes(true)
681 .next()
682 .map_or(0, str::len);
683 self.s.jump(start + len);
684 if len > c.len_utf8() {
685 SyntaxKind::Text
688 } else {
689 SyntaxKind::MathText
690 }
691 }
692 }
693
694 pub fn maybe_math_named_arg(&mut self, start: usize) -> Option<SyntaxNode> {
696 let cursor = self.s.cursor();
697 self.s.jump(start);
698 if self.s.eat_if(is_id_start) {
699 self.s.eat_while(is_id_continue);
700 if self.s.at(':') && !self.s.at(":=") && !self.s.at("::=") {
703 let node = if self.s.from(start) != "_" {
705 SyntaxNode::leaf(SyntaxKind::Ident, self.s.from(start))
706 } else {
707 let msg = SyntaxError::new("expected identifier, found underscore");
708 SyntaxNode::error(msg, self.s.from(start))
709 };
710 return Some(node);
711 }
712 }
713 self.s.jump(cursor);
714 None
715 }
716
717 pub fn maybe_math_spread_arg(&mut self, start: usize) -> Option<SyntaxNode> {
719 let cursor = self.s.cursor();
720 self.s.jump(start);
721 if self.s.eat_if("..") {
722 if !self.space_or_end() && !self.s.at('.') {
725 let node = SyntaxNode::leaf(SyntaxKind::Dots, self.s.from(start));
726 return Some(node);
727 }
728 }
729 self.s.jump(cursor);
730 None
731 }
732}
733
734impl Lexer<'_> {
736 fn code(&mut self, start: usize, c: char) -> SyntaxKind {
737 match c {
738 '<' if self.s.at(is_id_continue) => self.label(),
739 '0'..='9' => self.number(start, c),
740 '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
741 '"' => self.string(),
742
743 '=' if self.s.eat_if('=') => SyntaxKind::EqEq,
744 '!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
745 '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
746 '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
747 '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
748 '-' | '\u{2212}' if self.s.eat_if('=') => SyntaxKind::HyphEq,
749 '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
750 '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
751 '.' if self.s.eat_if('.') => SyntaxKind::Dots,
752 '=' if self.s.eat_if('>') => SyntaxKind::Arrow,
753
754 '{' => SyntaxKind::LeftBrace,
755 '}' => SyntaxKind::RightBrace,
756 '[' => SyntaxKind::LeftBracket,
757 ']' => SyntaxKind::RightBracket,
758 '(' => SyntaxKind::LeftParen,
759 ')' => SyntaxKind::RightParen,
760 '$' => SyntaxKind::Dollar,
761 ',' => SyntaxKind::Comma,
762 ';' => SyntaxKind::Semicolon,
763 ':' => SyntaxKind::Colon,
764 '.' => SyntaxKind::Dot,
765 '+' => SyntaxKind::Plus,
766 '-' | '\u{2212}' => SyntaxKind::Minus,
767 '*' => SyntaxKind::Star,
768 '/' => SyntaxKind::Slash,
769 '=' => SyntaxKind::Eq,
770 '<' => SyntaxKind::Lt,
771 '>' => SyntaxKind::Gt,
772
773 c if is_id_start(c) => self.ident(start),
774
775 c => self.error(eco_format!("the character `{c}` is not valid in code")),
776 }
777 }
778
779 fn ident(&mut self, start: usize) -> SyntaxKind {
780 self.s.eat_while(is_id_continue);
781 let ident = self.s.from(start);
782
783 let prev = self.s.get(0..start);
784 if (!prev.ends_with(['.', '@']) || prev.ends_with(".."))
785 && let Some(keyword) = keyword(ident)
786 {
787 return keyword;
788 }
789
790 if ident == "_" { SyntaxKind::Underscore } else { SyntaxKind::Ident }
791 }
792
793 fn number(&mut self, start: usize, first_c: char) -> SyntaxKind {
794 let base = match first_c {
796 '0' if self.s.eat_if('b') => 2,
797 '0' if self.s.eat_if('o') => 8,
798 '0' if self.s.eat_if('x') => 16,
799 _ => 10,
800 };
801
802 if base == 16 {
804 self.s.eat_while(char::is_ascii_alphanumeric);
805 } else {
806 self.s.eat_while(char::is_ascii_digit);
807 }
808
809 let mut is_float = false;
811 if base == 10 {
812 if first_c == '.' {
815 is_float = true; } else if !self.s.at("..")
817 && !self.s.scout(1).is_some_and(is_id_start)
818 && self.s.eat_if('.')
819 {
820 is_float = true;
821 self.s.eat_while(char::is_ascii_digit);
822 }
823
824 if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
826 is_float = true;
827 self.s.eat_if(['+', '-']);
828 self.s.eat_while(char::is_ascii_digit);
829 }
830 }
831
832 let number = self.s.from(start);
833 let suffix = self.s.eat_while(|c: char| c.is_ascii_alphanumeric() || c == '%');
834
835 if base == 10
837 && !is_float
838 && let Err(e) = i64::from_str_radix(number, base)
839 && matches!(e.kind(), IntErrorKind::PosOverflow | IntErrorKind::NegOverflow)
840 && number.parse::<f64>().is_ok()
841 {
842 is_float = true;
843 }
844
845 let mut suffix_result = match suffix {
846 "" => Ok(None),
847 "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" => Ok(Some(())),
848 _ => Err(eco_format!("invalid number suffix: {suffix}")),
849 };
850
851 let number_result = if is_float && number.parse::<f64>().is_err() {
852 Err(eco_format!("invalid floating point number: {number}"))
855 } else if base == 10 {
856 Ok(())
857 } else {
858 let name = match base {
859 2 => "binary",
860 8 => "octal",
861 16 => "hexadecimal",
862 _ => unreachable!(),
863 };
864 match i64::from_str_radix(&number[2..], base) {
866 Ok(_) if suffix.is_empty() => Ok(()),
867 Ok(value) => {
868 if suffix_result.is_ok() {
869 suffix_result = Err(eco_format!(
870 "try using a decimal number: {value}{suffix}"
871 ));
872 }
873 Err(eco_format!("{name} numbers cannot have a suffix"))
874 }
875 Err(_) => Err(eco_format!("invalid {name} number: {number}")),
876 }
877 };
878
879 match (number_result, suffix_result) {
881 (Ok(()), Ok(None)) if is_float => SyntaxKind::Float,
883 (Ok(()), Ok(None)) => SyntaxKind::Int,
884 (Ok(()), Ok(Some(()))) => SyntaxKind::Numeric,
885 (Err(number_err), Err(suffix_err)) => {
887 let err = self.error(number_err);
888 self.hint(suffix_err);
889 err
890 }
891 (Ok(()), Err(msg)) | (Err(msg), Ok(_)) => self.error(msg),
892 }
893 }
894
895 fn string(&mut self) -> SyntaxKind {
896 let mut escaped = false;
897 self.s.eat_until(|c| {
898 let stop = c == '"' && !escaped;
899 escaped = c == '\\' && !escaped;
900 stop
901 });
902
903 if !self.s.eat_if('"') {
904 return self.error("unclosed string");
905 }
906
907 SyntaxKind::Str
908 }
909}
910
911fn keyword(ident: &str) -> Option<SyntaxKind> {
913 Some(match ident {
914 "none" => SyntaxKind::None,
915 "auto" => SyntaxKind::Auto,
916 "true" => SyntaxKind::Bool,
917 "false" => SyntaxKind::Bool,
918 "not" => SyntaxKind::Not,
919 "and" => SyntaxKind::And,
920 "or" => SyntaxKind::Or,
921 "let" => SyntaxKind::Let,
922 "set" => SyntaxKind::Set,
923 "show" => SyntaxKind::Show,
924 "context" => SyntaxKind::Context,
925 "if" => SyntaxKind::If,
926 "else" => SyntaxKind::Else,
927 "for" => SyntaxKind::For,
928 "in" => SyntaxKind::In,
929 "while" => SyntaxKind::While,
930 "break" => SyntaxKind::Break,
931 "continue" => SyntaxKind::Continue,
932 "return" => SyntaxKind::Return,
933 "import" => SyntaxKind::Import,
934 "include" => SyntaxKind::Include,
935 "as" => SyntaxKind::As,
936 _ => return None,
937 })
938}
939
940trait ScannerExt {
941 fn advance(&mut self, by: usize);
942 fn eat_newline(&mut self) -> bool;
943}
944
945impl ScannerExt for Scanner<'_> {
946 fn advance(&mut self, by: usize) {
947 self.jump(self.cursor() + by);
948 }
949
950 fn eat_newline(&mut self) -> bool {
951 let ate = self.eat_if(is_newline);
952 if ate && self.before().ends_with('\r') {
953 self.eat_if('\n');
954 }
955 ate
956 }
957}
958
959#[inline]
961fn is_space(character: char, mode: SyntaxMode) -> bool {
962 match mode {
963 SyntaxMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
964 _ => character.is_whitespace(),
965 }
966}
967
968#[inline]
970pub fn is_newline(character: char) -> bool {
971 matches!(
972 character,
973 '\n' | '\x0B' | '\x0C' | '\r' |
975 '\u{0085}' | '\u{2028}' | '\u{2029}'
977 )
978}
979
980pub fn link_prefix(text: &str) -> (&str, bool) {
983 let mut s = unscanny::Scanner::new(text);
984 let mut brackets = Vec::new();
985
986 #[rustfmt::skip]
987 s.eat_while(|c: char| {
988 match c {
989 | '0' ..= '9'
990 | 'a' ..= 'z'
991 | 'A' ..= 'Z'
992 | '!' | '#' | '$' | '%' | '&' | '*' | '+'
993 | ',' | '-' | '.' | '/' | ':' | ';' | '='
994 | '?' | '@' | '_' | '~' | '\'' => true,
995 '[' => {
996 brackets.push(b'[');
997 true
998 }
999 '(' => {
1000 brackets.push(b'(');
1001 true
1002 }
1003 ']' => brackets.pop() == Some(b'['),
1004 ')' => brackets.pop() == Some(b'('),
1005 _ => false,
1006 }
1007 });
1008
1009 while matches!(s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
1011 s.uneat();
1012 }
1013
1014 (s.before(), brackets.is_empty())
1015}
1016
1017pub fn split_newlines(text: &str) -> Vec<&str> {
1019 let mut s = Scanner::new(text);
1020 let mut lines = Vec::new();
1021 let mut start = 0;
1022 let mut end = 0;
1023
1024 while let Some(c) = s.eat() {
1025 if is_newline(c) {
1026 if c == '\r' {
1027 s.eat_if('\n');
1028 }
1029
1030 lines.push(&text[start..end]);
1031 start = s.cursor();
1032 }
1033 end = s.cursor();
1034 }
1035
1036 lines.push(&text[start..]);
1037 lines
1038}
1039
1040fn count_newlines(text: &str) -> usize {
1042 let mut newlines = 0;
1043 let mut s = Scanner::new(text);
1044 while let Some(c) = s.eat() {
1045 if is_newline(c) {
1046 if c == '\r' {
1047 s.eat_if('\n');
1048 }
1049 newlines += 1;
1050 }
1051 }
1052 newlines
1053}
1054
1055#[inline]
1063pub fn is_ident(string: &str) -> bool {
1064 let mut chars = string.chars();
1065 chars
1066 .next()
1067 .is_some_and(|c| is_id_start(c) && chars.all(is_id_continue))
1068}
1069
1070#[inline]
1072pub fn is_id_start(c: char) -> bool {
1073 is_xid_start(c) || c == '_'
1074}
1075
1076#[inline]
1078pub fn is_id_continue(c: char) -> bool {
1079 is_xid_continue(c) || c == '_' || c == '-'
1080}
1081
1082#[inline]
1084fn is_math_id_start(c: char) -> bool {
1085 is_xid_start(c)
1086}
1087
1088#[inline]
1090fn is_math_id_continue(c: char) -> bool {
1091 is_xid_continue(c) && c != '_'
1092}
1093
1094#[inline]
1096fn is_valid_in_label_literal(c: char) -> bool {
1097 is_id_continue(c) || matches!(c, ':' | '.')
1098}
1099
1100pub fn is_valid_label_literal_id(id: &str) -> bool {
1102 !id.is_empty() && id.chars().all(is_valid_in_label_literal)
1103}