1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::MarkdownLanguage, lexer::token_type::MarkdownTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, TextEdit, errors::OakError, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, MarkdownLanguage>;
8
9#[derive(Clone, Debug)]
11pub struct MarkdownLexer<'config> {
12 _config: &'config MarkdownLanguage,
13}
14
15impl<'config> MarkdownLexer<'config> {
16 pub fn new(config: &'config MarkdownLanguage) -> Self {
18 Self { _config: config }
19 }
20
21 fn run<S: Source + ?Sized>(&self, state: &mut State<S>) -> Result<(), OakError> {
22 while state.not_at_end() {
23 let safe_point = state.get_position();
24
25 if let Some(ch) = state.peek() {
26 match ch {
27 ' ' | '\t' => {
28 if self._config.allow_indented_code_blocks && self.lex_indented_code_block(state) {
29 continue;
30 }
31 self.skip_whitespace(state);
32 }
33 '\n' | '\r' => {
34 self.lex_newline(state);
35 }
36 '$' if self._config.allow_math => {
37 if self.lex_math(state) {
38 continue;
39 }
40 self.lex_special_char(state);
41 }
42 '^' if self._config.allow_sub_superscript || self._config.allow_footnotes => {
43 if self._config.allow_footnotes && self.lex_footnote(state) {
44 continue;
45 }
46 if self._config.allow_sub_superscript && self.lex_sub_superscript(state) {
47 continue;
48 }
49 self.lex_special_char(state);
50 }
51 '#' => {
52 if self._config.allow_headings && self.lex_heading(state) {
53 continue;
54 }
55 self.lex_special_char(state);
56 }
57 '`' => {
58 if self._config.allow_fenced_code_blocks && self.lex_code_block(state) {
59 continue;
60 }
61 if self.lex_inline_code(state) {
62 continue;
63 }
64 self.lex_special_char(state);
65 }
66 '~' => {
67 if self.lex_code_block(state) {
68 continue;
69 }
70 if self._config.allow_strikethrough && self.lex_strikethrough(state) {
71 continue;
72 }
73 if self._config.allow_sub_superscript && self.lex_sub_superscript(state) {
74 continue;
75 }
76 self.lex_special_char(state);
77 }
78 '*' | '_' => {
79 if self._config.allow_horizontal_rules && self.lex_horizontal_rule(state) {
80 continue;
81 }
82 if self._config.allow_lists && self.lex_list_marker(state) {
83 continue;
84 }
85 if self.lex_emphasis(state) {
86 continue;
87 }
88 self.lex_special_char(state);
89 }
90 '-' => {
91 if self._config.allow_front_matter && self.lex_front_matter(state) {
92 continue;
93 }
94 if self._config.allow_horizontal_rules && self.lex_horizontal_rule(state) {
95 continue;
96 }
97 if self._config.allow_lists && self.lex_list_marker(state) {
98 continue;
99 }
100 self.lex_special_char(state);
101 }
102 '+' => {
103 if self._config.allow_lists && self.lex_list_marker(state) {
104 continue;
105 }
106 self.lex_special_char(state);
107 }
108 '!' => {
109 if self.lex_link_or_image(state) {
110 continue;
111 }
112 self.lex_special_char(state);
113 }
114 '[' => {
115 if self._config.allow_task_lists && self.lex_task_marker(state) {
116 continue;
117 }
118 if self.lex_link_or_image(state) {
119 continue;
120 }
121 self.lex_special_char(state);
122 }
123 '>' => {
124 if self._config.allow_blockquotes && self.lex_blockquote(state) {
125 continue;
126 }
127 self.lex_special_char(state);
128 }
129 '|' if self._config.allow_tables => {
130 self.lex_special_char(state);
131 }
132 '0'..='9' => {
133 if self.lex_list_marker(state) {
134 continue;
135 }
136 self.lex_text(state);
137 }
138 '<' => {
139 if self._config.allow_html && self.lex_html_tag(state) {
140 continue;
141 }
142 if self._config.allow_xml && self.lex_xml_tag(state) {
143 continue;
144 }
145 self.lex_special_char(state);
146 }
147 ']' | '(' | ')' | '|' | '.' | ':' | '\\' => {
148 self.lex_special_char(state);
149 }
150 _ => {
151 if self.lex_text(state) {
152 continue;
153 }
154 let start_pos = state.get_position();
156 state.advance(ch.len_utf8());
157 state.add_token(MarkdownTokenType::Error, start_pos, state.get_position());
158 }
159 }
160 }
161
162 state.advance_if_dead_lock(safe_point)
163 }
164 Ok(())
165 }
166
167 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
169 let start_pos = state.get_position();
170
171 while let Some(ch) = state.peek() {
172 if ch == ' ' || ch == '\t' {
173 state.advance(ch.len_utf8());
174 }
175 else {
176 break;
177 }
178 }
179
180 if state.get_position() > start_pos {
181 state.add_token(MarkdownTokenType::Whitespace, start_pos, state.get_position());
182 true
183 }
184 else {
185 false
186 }
187 }
188
189 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
191 let start_pos = state.get_position();
192
193 if let Some('\n') = state.peek() {
194 state.advance(1);
195 state.add_token(MarkdownTokenType::Newline, start_pos, state.get_position());
196 true
197 }
198 else if let Some('\r') = state.peek() {
199 state.advance(1);
200 if let Some('\n') = state.peek() {
201 state.advance(1);
202 }
203 state.add_token(MarkdownTokenType::Newline, start_pos, state.get_position());
204 true
205 }
206 else {
207 false
208 }
209 }
210
211 fn lex_heading<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
213 let start_pos = state.get_position();
214
215 if start_pos > 0 {
217 if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
218 if prev_char != '\n' && prev_char != '\r' {
219 return false;
220 }
221 }
222 }
223
224 if let Some('#') = state.peek() {
225 let mut level = 0;
226 let mut pos = start_pos;
227
228 while let Some('#') = state.source().get_char_at(pos) {
230 level += 1;
231 pos += 1;
232 if level > 6 {
233 return false; }
235 }
236
237 if let Some(ch) = state.source().get_char_at(pos) {
239 if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
240 return false;
241 }
242 }
243
244 state.advance(level);
245
246 let heading_kind = match level {
247 1 => MarkdownTokenType::Heading1,
248 2 => MarkdownTokenType::Heading2,
249 3 => MarkdownTokenType::Heading3,
250 4 => MarkdownTokenType::Heading4,
251 5 => MarkdownTokenType::Heading5,
252 6 => MarkdownTokenType::Heading6,
253 _ => return false,
254 };
255
256 state.add_token(heading_kind, start_pos, state.get_position());
257 true
258 }
259 else {
260 false
261 }
262 }
263
264 fn lex_inline_code<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
266 let start_pos = state.get_position();
267
268 if let Some('`') = state.peek() {
269 state.advance(1);
270 let mut found_end = false;
271
272 while let Some(ch) = state.peek() {
273 if ch == '`' {
274 state.advance(1);
275 found_end = true;
276 break;
277 }
278 else if ch == '\n' || ch == '\r' {
279 break; }
281 else {
282 state.advance(ch.len_utf8());
283 }
284 }
285
286 if found_end {
287 state.add_token(MarkdownTokenType::InlineCode, start_pos, state.get_position());
288 true
289 }
290 else {
291 state.set_position(start_pos);
293 false
294 }
295 }
296 else {
297 false
298 }
299 }
300
301 fn lex_code_block<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
303 let start_pos = state.get_position();
304
305 if start_pos > 0 {
307 if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
308 if prev_char != '\n' && prev_char != '\r' {
309 return false;
310 }
311 }
312 }
313
314 let fence_char = if let Some('`') = state.peek() {
316 '`'
317 }
318 else if let Some('~') = state.peek() {
319 '~'
320 }
321 else {
322 return false;
323 };
324
325 let mut fence_count = 0;
326 let mut pos = start_pos;
327
328 while let Some(ch) = state.source().get_char_at(pos) {
330 if ch == fence_char {
331 fence_count += 1;
332 pos += 1;
333 }
334 else {
335 break;
336 }
337 }
338
339 if fence_count < 3 {
340 return false; }
342
343 state.advance(fence_count);
344 state.add_token(MarkdownTokenType::CodeFence, start_pos, state.get_position());
345
346 let lang_start = state.get_position();
348 while let Some(ch) = state.peek() {
349 if ch == '\n' || ch == '\r' {
350 break;
351 }
352 else if ch != ' ' && ch != '\t' {
353 state.advance(ch.len_utf8());
354 }
355 else {
356 break;
357 }
358 }
359
360 if state.get_position() > lang_start {
361 state.add_token(MarkdownTokenType::CodeLanguage, lang_start, state.get_position());
362 }
363
364 true
365 }
366
367 fn lex_emphasis<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
369 let start_pos = state.get_position();
370
371 let marker_char = if let Some('*') = state.peek() {
372 '*'
373 }
374 else if let Some('_') = state.peek() {
375 '_'
376 }
377 else {
378 return false;
379 };
380
381 let mut marker_count = 0;
382 let mut pos = start_pos;
383
384 while let Some(ch) = state.source().get_char_at(pos) {
386 if ch == marker_char {
387 marker_count += 1;
388 pos += 1;
389 }
390 else {
391 break;
392 }
393 }
394
395 if marker_count == 0 {
396 return false;
397 }
398
399 state.advance(marker_count);
400
401 let token_kind = if marker_count >= 2 { MarkdownTokenType::Strong } else { MarkdownTokenType::Emphasis };
402
403 state.add_token(token_kind, start_pos, state.get_position());
404 true
405 }
406
407 fn lex_strikethrough<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
409 let start_pos = state.get_position();
410
411 if let Some('~') = state.peek() {
412 if let Some('~') = state.source().get_char_at(start_pos + 1) {
413 state.advance(2);
414 state.add_token(MarkdownTokenType::Strikethrough, start_pos, state.get_position());
415 true
416 }
417 else {
418 false
419 }
420 }
421 else {
422 false
423 }
424 }
425
426 fn lex_link_or_image<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
428 let start_pos = state.get_position();
429
430 let is_image = if let Some('!') = state.peek() {
432 state.advance(1);
433 true
434 }
435 else {
436 false
437 };
438
439 if let Some('[') = state.peek() {
440 state.advance(1);
441
442 let token_kind = if is_image { MarkdownTokenType::Image } else { MarkdownTokenType::Link };
443
444 state.add_token(token_kind, start_pos, state.get_position());
445 true
446 }
447 else {
448 if is_image {
449 state.set_position(start_pos);
451 }
452 false
453 }
454 }
455
456 fn lex_list_marker<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
458 let start_pos = state.get_position();
459
460 let mut check_pos = start_pos;
462 while check_pos > 0 {
463 check_pos -= 1;
464 if let Some(ch) = state.source().get_char_at(check_pos) {
465 if ch == '\n' || ch == '\r' {
466 break;
467 }
468 else if ch != ' ' && ch != '\t' {
469 return false; }
471 }
472 }
473
474 if let Some(ch) = state.peek() {
475 match ch {
476 '-' | '*' | '+' => {
477 state.advance(1);
479 if let Some(next_ch) = state.peek() {
480 if next_ch == ' ' || next_ch == '\t' {
481 state.add_token(MarkdownTokenType::ListMarker, start_pos, state.get_position());
482 return true;
483 }
484 }
485 state.set_position(start_pos);
486 false
487 }
488 '0'..='9' => {
489 while let Some(digit) = state.peek() {
491 if digit.is_ascii_digit() { state.advance(1) } else { break }
492 }
493
494 if let Some('.') = state.peek() {
495 state.advance(1);
496 if let Some(next_ch) = state.peek() {
497 if next_ch == ' ' || next_ch == '\t' {
498 state.add_token(MarkdownTokenType::ListMarker, start_pos, state.get_position());
499 return true;
500 }
501 }
502 }
503
504 state.set_position(start_pos);
505 false
506 }
507 _ => false,
508 }
509 }
510 else {
511 false
512 }
513 }
514
515 fn lex_task_marker<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
517 let start_pos = state.get_position();
518
519 if let Some('[') = state.peek() {
520 state.advance(1);
521 if let Some(ch) = state.peek() {
522 if ch == ' ' || ch == 'x' || ch == 'X' {
523 state.advance(1);
524 if let Some(']') = state.peek() {
525 state.advance(1);
526 state.add_token(MarkdownTokenType::TaskMarker, start_pos, state.get_position());
527 return true;
528 }
529 }
530 }
531 state.set_position(start_pos);
532 }
533 false
534 }
535
536 fn lex_html_tag<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
538 self.lex_any_tag(state, MarkdownTokenType::HtmlTag, MarkdownTokenType::HtmlComment)
539 }
540
541 fn lex_xml_tag<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
543 self.lex_any_tag(state, MarkdownTokenType::XmlTag, MarkdownTokenType::XmlComment)
544 }
545
546 fn lex_any_tag<S: Source + ?Sized>(&self, state: &mut State<S>, tag_kind: MarkdownTokenType, comment_kind: MarkdownTokenType) -> bool {
548 let start_pos = state.get_position();
549
550 if let Some('<') = state.peek() {
551 state.advance(1);
552
553 if let Some('!') = state.peek() {
555 if state.source().get_char_at(state.get_position() + 1) == Some('-') && state.source().get_char_at(state.get_position() + 2) == Some('-') {
556 state.advance(3);
557 let mut found_end = false;
558 while let Some(ch) = state.peek() {
559 if ch == '-' && state.source().get_char_at(state.get_position() + 1) == Some('-') && state.source().get_char_at(state.get_position() + 2) == Some('>') {
560 state.advance(3);
561 found_end = true;
562 break;
563 }
564 state.advance(ch.len_utf8());
565 }
566 if found_end {
567 state.add_token(comment_kind, start_pos, state.get_position());
568 return true;
569 }
570 }
571 }
572
573 let mut found_end = false;
575 let mut in_string = None; while let Some(ch) = state.peek() {
578 if let Some(quote) = in_string {
579 if ch == quote {
580 in_string = None;
581 }
582 }
583 else {
584 if ch == '>' {
585 state.advance(1);
586 found_end = true;
587 break;
588 }
589 else if ch == '"' || ch == '\'' {
590 in_string = Some(ch);
591 }
592 }
593 state.advance(ch.len_utf8());
594 }
595
596 if found_end {
597 state.add_token(tag_kind, start_pos, state.get_position());
598 true
599 }
600 else {
601 state.set_position(start_pos);
602 false
603 }
604 }
605 else {
606 false
607 }
608 }
609
610 fn lex_blockquote<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
612 let start_pos = state.get_position();
613
614 let mut check_pos = start_pos;
616 while check_pos > 0 {
617 check_pos -= 1;
618 if let Some(ch) = state.source().get_char_at(check_pos) {
619 if ch == '\n' || ch == '\r' {
620 break;
621 }
622 else if ch != ' ' && ch != '\t' {
623 return false;
624 }
625 }
626 }
627
628 if let Some('>') = state.peek() {
629 state.advance(1);
630 state.add_token(MarkdownTokenType::BlockquoteMarker, start_pos, state.get_position());
631 true
632 }
633 else {
634 false
635 }
636 }
637
638 fn lex_horizontal_rule<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
640 let start_pos = state.get_position();
641
642 let mut check_pos = start_pos;
644 while check_pos > 0 {
645 check_pos -= 1;
646 if let Some(ch) = state.source().get_char_at(check_pos) {
647 if ch == '\n' || ch == '\r' {
648 break;
649 }
650 else if ch != ' ' && ch != '\t' {
651 return false;
652 }
653 }
654 }
655
656 if let Some(ch) = state.peek() {
657 if ch == '-' || ch == '*' || ch == '_' {
658 let rule_char = ch;
659 let mut count = 0;
660 let mut pos = start_pos;
661
662 while let Some(current_ch) = state.source().get_char_at(pos) {
664 if current_ch == rule_char {
665 count += 1;
666 pos += 1
667 }
668 else if current_ch == ' ' || current_ch == '\t' {
669 pos += 1; }
671 else {
672 break;
673 }
674 }
675
676 if count >= 3 {
677 while let Some(current_ch) = state.source().get_char_at(pos) {
679 if current_ch == '\n' || current_ch == '\r' {
680 break;
681 }
682 else if current_ch == ' ' || current_ch == '\t' {
683 pos += 1
684 }
685 else {
686 return false; }
688 }
689
690 state.set_position(pos);
691 state.add_token(MarkdownTokenType::HorizontalRule, start_pos, state.get_position());
692 return true;
693 }
694 }
695 }
696 false
697 }
698
699 fn lex_math<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
701 let start_pos = state.get_position();
702
703 if let Some('$') = state.peek() {
704 state.advance(1);
705 let mut is_block = false;
706
707 if let Some('$') = state.peek() {
708 state.advance(1);
709 is_block = true;
710 }
711
712 let mut found_end = false;
713 while let Some(ch) = state.peek() {
714 if ch == '$' {
715 if is_block {
716 if let Some('$') = state.source().get_char_at(state.get_position() + 1) {
717 state.advance(2);
718 found_end = true;
719 break;
720 }
721 }
722 else {
723 state.advance(1);
724 found_end = true;
725 break;
726 }
727 }
728 state.advance(ch.len_utf8())
729 }
730
731 if found_end {
732 let kind = if is_block { MarkdownTokenType::MathBlock } else { MarkdownTokenType::MathInline };
733 state.add_token(kind, start_pos, state.get_position());
734 true
735 }
736 else {
737 state.set_position(start_pos);
738 false
739 }
740 }
741 else {
742 false
743 }
744 }
745
746 fn lex_front_matter<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
748 let start_pos = state.get_position();
749
750 if start_pos != 0 {
752 return false;
753 }
754
755 if state.peek() == Some('-') && state.source().get_char_at(1) == Some('-') && state.source().get_char_at(2) == Some('-') {
756 state.advance(3);
757 let mut found_end = false;
759 while state.not_at_end() {
760 if state.peek() == Some('\n') || state.peek() == Some('\r') {
761 state.advance(1);
762 if state.peek() == Some('\n') {
763 state.advance(1)
764 }
765 if state.peek() == Some('-') && state.source().get_char_at(state.get_position() + 1) == Some('-') && state.source().get_char_at(state.get_position() + 2) == Some('-') {
766 state.advance(3);
767 found_end = true;
768 break;
769 }
770 }
771 else {
772 state.advance(1)
773 }
774 }
775
776 if found_end {
777 state.add_token(MarkdownTokenType::FrontMatter, start_pos, state.get_position());
778 true
779 }
780 else {
781 state.set_position(start_pos);
782 false
783 }
784 }
785 else {
786 false
787 }
788 }
789
790 fn lex_footnote<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
792 let start_pos = state.get_position();
793
794 if let Some('^') = state.peek() {
795 let check_pos = start_pos;
797 if check_pos > 0 && state.source().get_char_at(check_pos - 1) == Some('[') {
798 state.advance(1);
799 while let Some(ch) = state.peek() {
800 if ch == ']' {
801 state.advance(1);
802 if state.peek() == Some(':') {
804 state.advance(1);
805 state.add_token(MarkdownTokenType::FootnoteDefinition, start_pos - 1, state.get_position())
806 }
807 else {
808 state.add_token(MarkdownTokenType::FootnoteReference, start_pos - 1, state.get_position())
809 }
810 return true;
811 }
812 else if ch == '\n' || ch == '\r' {
813 break;
814 }
815 state.advance(ch.len_utf8())
816 }
817 }
818 state.set_position(start_pos);
819 }
820 false
821 }
822
823 fn lex_sub_superscript<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
825 let start_pos = state.get_position();
826
827 if let Some(ch) = state.peek() {
828 let marker = ch;
829 if marker == '^' || marker == '~' {
830 state.advance(1);
831 let mut found_end = false;
832 while let Some(next_ch) = state.peek() {
833 if next_ch == marker {
834 state.advance(1);
835 found_end = true;
836 break;
837 }
838 else if next_ch == ' ' || next_ch == '\t' || next_ch == '\n' || next_ch == '\r' {
839 break;
840 }
841 state.advance(next_ch.len_utf8())
842 }
843
844 if found_end {
845 let kind = if marker == '^' { MarkdownTokenType::Superscript } else { MarkdownTokenType::Subscript };
846 state.add_token(kind, start_pos, state.get_position());
847 true
848 }
849 else {
850 state.set_position(start_pos);
851 false
852 }
853 }
854 else {
855 false
856 }
857 }
858 else {
859 false
860 }
861 }
862
863 fn lex_indented_code_block<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
865 let start_pos = state.get_position();
866
867 if start_pos > 0 {
869 if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
870 if prev_char != '\n' && prev_char != '\r' {
871 return false;
872 }
873 }
874 }
875
876 let mut indent_count = 0;
878 let mut pos = start_pos;
879 while let Some(ch) = state.source().get_char_at(pos) {
880 if ch == ' ' {
881 indent_count += 1;
882 pos += 1;
883 if indent_count >= 4 {
884 break;
885 }
886 }
887 else if ch == '\t' {
888 indent_count = 4;
889 pos += 1;
890 break;
891 }
892 else {
893 break;
894 }
895 }
896
897 if indent_count >= 4 {
898 state.set_position(pos);
899 state.add_token(MarkdownTokenType::CodeBlock, start_pos, state.get_position());
900 true
901 }
902 else {
903 false
904 }
905 }
906
907 fn lex_special_char<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
909 let start_pos = state.get_position();
910
911 if let Some(ch) = state.peek() {
912 let token_kind = match ch {
913 '[' => MarkdownTokenType::LBracket,
914 ']' => MarkdownTokenType::RBracket,
915 '(' => MarkdownTokenType::LParen,
916 ')' => MarkdownTokenType::RParen,
917 '<' => MarkdownTokenType::Less,
918 '>' => MarkdownTokenType::Greater,
919 '*' => MarkdownTokenType::Asterisk,
920 '_' => MarkdownTokenType::Underscore,
921 '`' => MarkdownTokenType::Backtick,
922 '~' => MarkdownTokenType::Tilde,
923 '#' => MarkdownTokenType::Hash,
924 '|' => MarkdownTokenType::Pipe,
925 '-' => MarkdownTokenType::Dash,
926 '+' => MarkdownTokenType::Plus,
927 '.' => MarkdownTokenType::Dot,
928 ':' => MarkdownTokenType::Colon,
929 '!' => MarkdownTokenType::Exclamation,
930 '\\' => MarkdownTokenType::Escape,
931 '$' => MarkdownTokenType::Dollar,
932 '^' => MarkdownTokenType::Caret,
933 _ => return false,
934 };
935
936 state.advance(ch.len_utf8());
937 state.add_token(token_kind, start_pos, state.get_position());
938 true
939 }
940 else {
941 false
942 }
943 }
944
945 fn lex_text<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
947 let start_pos = state.get_position();
948
949 while let Some(ch) = state.peek() {
950 match ch {
952 ' ' | '\t' | '\n' | '\r' | '#' | '*' | '_' | '`' | '~' | '[' | ']' | '(' | ')' | '<' | '>' | '|' | '-' | '+' | '.' | ':' | '!' | '\\' | '$' | '^' => break,
953 _ => {
954 state.advance(ch.len_utf8());
955 }
956 }
957 }
958
959 if state.get_position() > start_pos {
960 state.add_token(MarkdownTokenType::Text, start_pos, state.get_position());
961 true
962 }
963 else {
964 false
965 }
966 }
967}
968
969impl<'config> Lexer<MarkdownLanguage> for MarkdownLexer<'config> {
970 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MarkdownLanguage>) -> LexOutput<MarkdownLanguage> {
971 let mut state = State::new(text);
972 let result = self.run(&mut state);
973 if result.is_ok() {
974 state.add_eof();
975 }
976 state.finish_with_cache(result, cache)
977 }
978}
979
980impl<'config> MarkdownLexer<'config> {
981 pub fn lex_internal<'a, S: Source + ?Sized>(&self, source: &'a S) -> LexOutput<MarkdownLanguage> {
983 let mut state = State::new(source);
984 let result = self.run(&mut state);
985 state.finish(result)
986 }
987}