1use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8use crate::{
9 ast::*,
10 diagnostic::{Diagnostic, DiagnosticCode, DiagnosticSeverity},
11 entities::named_character_reference,
12 options::{SyntaxConfigError, SyntaxOptions},
13 span::Span,
14 validate::is_directive_name,
15};
16
17#[derive(Clone, Debug, Eq, PartialEq)]
20pub struct ParseOutput {
21 pub document: Document,
23 pub diagnostics: Vec<Diagnostic>,
25}
26
27#[derive(Clone, Debug, Eq, PartialEq)]
29pub enum ParseStrictError {
30 Config(SyntaxConfigError),
32 Diagnostic(Diagnostic),
34}
35
36#[derive(Clone, Debug, Eq, PartialEq)]
37struct ParsedLinkResource {
38 destination: String,
39 destination_kind: LinkDestinationKind,
40 title: Option<String>,
41 title_kind: Option<LinkTitleKind>,
42}
43
44const REFERENCE_LABEL_MAX_CHARS: usize = 999;
45const WIKILINK_MAX_BYTES: usize = 999;
46
47#[derive(Clone, Copy, Debug)]
48struct Line<'a> {
49 text: &'a str,
50 eol: &'a str,
51 start: usize,
52 end: usize,
53 end_with_eol: usize,
54 lazy: bool,
59}
60
61#[derive(Clone, Copy, Debug)]
62struct ListMarkerInfo<'a> {
63 ordered: bool,
64 start: Option<u64>,
65 delimiter: ListDelimiter,
66 indent: usize,
67 marker_len: usize,
68 content_indent: usize,
69 content: &'a str,
70}
71
72#[derive(Clone, Copy, Debug)]
73struct DescriptionMarker<'a> {
74 content_offset: usize,
75 content: &'a str,
76}
77
78#[derive(Clone, Debug)]
79struct DescriptionTerm {
80 marker_index: usize,
81 term_end: usize,
82 blank_after_term: bool,
83 source: String,
84 source_offset: usize,
85}
86
87#[derive(Clone, Copy, Debug, Eq, PartialEq)]
88enum HtmlBlockKind {
89 RawTag,
90 BlockTag,
91 Until(&'static str),
92 UntilBlank,
93}
94
95pub fn parse(input: &str) -> ParseOutput {
98 SyntaxOptions::default().parse(input)
99}
100
101impl SyntaxOptions {
102 pub fn parse(&self, input: &str) -> ParseOutput {
107 match parse_checked(input, self) {
108 Ok(output) => output,
109 Err(error) => ParseOutput {
110 document: Document::default(),
111 diagnostics: vec![Diagnostic::new(
112 DiagnosticSeverity::Error,
113 DiagnosticCode::StrictParse,
114 Span::new(0, input.len()),
115 error.message(),
116 )],
117 },
118 }
119 }
120
121 pub fn parse_strict(&self, input: &str) -> Result<ParseOutput, ParseStrictError> {
124 let output = parse_checked(input, self).map_err(ParseStrictError::Config)?;
125 if let Some(diagnostic) = output
126 .diagnostics
127 .iter()
128 .find(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
129 {
130 return Err(ParseStrictError::Diagnostic(diagnostic.clone()));
131 }
132 Ok(output)
133 }
134}
135
136fn parse_checked(input: &str, options: &SyntaxOptions) -> Result<ParseOutput, SyntaxConfigError> {
137 options.validate()?;
138 let mut diagnostics = Vec::new();
139 let definitions = collect_definitions(input, options);
140 let children = parse_blocks(input, 0, true, options, &definitions, &mut diagnostics);
141
142 Ok(ParseOutput {
143 document: Document {
144 meta: NodeMeta::new(Some(Span::new(0, input.len()))),
145 children,
146 },
147 diagnostics,
148 })
149}
150
151fn parse_blocks(
152 input: &str,
153 base_offset: usize,
154 allow_frontmatter: bool,
155 options: &SyntaxOptions,
156 definitions: &[String],
157 diagnostics: &mut Vec<Diagnostic>,
158) -> Vec<Block> {
159 let lines = collect_lines(input, base_offset);
160 parse_blocks_from_lines(&lines, allow_frontmatter, options, definitions, diagnostics)
161}
162
163fn parse_blocks_from_lines(
164 lines: &[Line<'_>],
165 allow_frontmatter: bool,
166 options: &SyntaxOptions,
167 definitions: &[String],
168 diagnostics: &mut Vec<Diagnostic>,
169) -> Vec<Block> {
170 let mut blocks = Vec::new();
171 let mut index = 0;
172
173 while index < lines.len() {
174 let line = lines[index];
175 if line.text.trim().is_empty() {
176 index += 1;
177 continue;
178 }
179 let after_definition_unbroken = index > 0
180 && !lines[index - 1].text.trim().is_empty()
181 && matches!(blocks.last(), Some(Block::Definition(_)));
182
183 if allow_frontmatter && index == 0 {
184 if let Some((block, next)) = parse_frontmatter(lines, index, options) {
185 blocks.push(block);
186 index = next;
187 continue;
188 }
189 }
190
191 if let Some((block, next)) =
192 parse_container_directive(lines, index, options, definitions, diagnostics)
193 {
194 blocks.push(block);
195 index = next;
196 continue;
197 }
198
199 if let Some((block, next)) = parse_math_block(lines, index, options) {
200 blocks.push(block);
201 index = next;
202 continue;
203 }
204
205 if let Some((block, next)) = parse_fenced_code(lines, index, options) {
206 blocks.push(block);
207 index = next;
208 continue;
209 }
210
211 if let Some((block, next)) =
212 parse_block_quote(lines, index, options, definitions, diagnostics)
213 {
214 blocks.push(block);
215 index = next;
216 continue;
217 }
218
219 if let Some(block) = parse_atx_heading(line, options, definitions) {
220 blocks.push(block);
221 index += 1;
222 continue;
223 }
224
225 if let Some(block) = parse_thematic_break(line) {
226 blocks.push(block);
227 index += 1;
228 continue;
229 }
230
231 if let Some((block, next)) = parse_list(lines, index, options, definitions, diagnostics) {
232 blocks.push(block);
233 index = next;
234 continue;
235 }
236
237 if let Some((block, next)) =
238 parse_footnote_definition(lines, index, options, definitions, diagnostics)
239 {
240 blocks.push(block);
241 index = next;
242 continue;
243 }
244
245 if let Some((block, next)) =
246 parse_definition(lines, index, options, after_definition_unbroken)
247 {
248 blocks.push(block);
249 index = next;
250 continue;
251 }
252
253 if let Some(block) = parse_leaf_directive(line, options, definitions, diagnostics) {
254 blocks.push(block);
255 index += 1;
256 continue;
257 }
258
259 if let Some((block, next)) = parse_html_block(lines, index, options) {
260 blocks.push(block);
261 index = next;
262 continue;
263 }
264
265 if let Some((block, next)) = parse_mdx_flow(lines, index, options, diagnostics) {
266 blocks.push(block);
267 index = next;
268 continue;
269 }
270
271 if !after_definition_unbroken {
272 if let Some((block, next)) = parse_indented_code(lines, index, options) {
273 blocks.push(block);
274 index = next;
275 continue;
276 }
277 }
278
279 if let Some((block, next)) = parse_table(lines, index, options, definitions, diagnostics) {
280 blocks.push(block);
281 index = next;
282 continue;
283 }
284
285 if let Some((block, next)) = parse_setext_heading(lines, index, options, definitions) {
286 blocks.push(block);
287 index = next;
288 continue;
289 }
290
291 if let Some((block, next)) =
292 parse_description_list(lines, index, options, definitions, diagnostics)
293 {
294 blocks.push(block);
295 index = next;
296 continue;
297 }
298
299 let (block, next) = parse_paragraph(lines, index, options, definitions, diagnostics);
300 blocks.push(block);
301 index = next;
302 }
303
304 blocks
305}
306
307fn collect_lines(input: &str, base_offset: usize) -> Vec<Line<'_>> {
308 let bytes = input.as_bytes();
309 let mut lines = Vec::new();
310 let mut start = 0;
311 let mut index = 0;
312
313 while index < bytes.len() {
314 match bytes[index] {
315 b'\n' => {
316 let end = index;
317 lines.push(Line {
318 text: &input[start..end],
319 eol: &input[index..index + 1],
320 start: base_offset + start,
321 end: base_offset + end,
322 end_with_eol: base_offset + index + 1,
323 lazy: false,
324 });
325 index += 1;
326 start = index;
327 }
328 b'\r' => {
329 let end = index;
330 let eol_end = if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
331 index + 2
332 } else {
333 index + 1
334 };
335 lines.push(Line {
336 text: &input[start..end],
337 eol: &input[index..eol_end],
338 start: base_offset + start,
339 end: base_offset + end,
340 end_with_eol: base_offset + eol_end,
341 lazy: false,
342 });
343 index = eol_end;
344 start = index;
345 }
346 _ => index += 1,
347 }
348 }
349
350 if start < bytes.len() || input.is_empty() {
351 lines.push(Line {
352 text: &input[start..],
353 eol: "",
354 start: base_offset + start,
355 end: base_offset + bytes.len(),
356 end_with_eol: base_offset + bytes.len(),
357 lazy: false,
358 });
359 }
360
361 lines
362}
363
364fn collect_definitions(input: &str, options: &SyntaxOptions) -> Vec<String> {
365 let mut diagnostics = Vec::new();
366 let blocks = parse_blocks(input, 0, true, options, &[], &mut diagnostics);
367 let mut definitions = Vec::new();
368 collect_definition_refs_from_blocks(&blocks, &mut definitions);
369 definitions
370}
371
372fn collect_definition_refs_from_blocks(blocks: &[Block], definitions: &mut Vec<String>) {
373 for block in blocks {
374 match block {
375 Block::Definition(definition) => {
376 if definitions
377 .iter()
378 .all(|identifier| identifier != &definition.identifier)
379 {
380 definitions.push(definition.identifier.clone());
381 }
382 }
383 Block::BlockQuote(node) => {
384 collect_definition_refs_from_blocks(&node.children, definitions);
385 }
386 Block::Alert(node) => {
387 collect_definition_refs_from_blocks(&node.children, definitions);
388 }
389 Block::List(node) => {
390 for item in &node.children {
391 collect_definition_refs_from_blocks(&item.children, definitions);
392 }
393 }
394 Block::DescriptionList(node) => {
395 for item in &node.children {
396 for details in &item.details {
397 collect_definition_refs_from_blocks(&details.children, definitions);
398 }
399 }
400 }
401 Block::FootnoteDefinition(node) => {
402 collect_definition_refs_from_blocks(&node.children, definitions);
403 }
404 Block::ContainerDirective(node) => {
405 collect_definition_refs_from_blocks(&node.children, definitions);
406 }
407 _ => {}
408 }
409 }
410}
411
412fn parse_frontmatter(
413 lines: &[Line<'_>],
414 index: usize,
415 options: &SyntaxOptions,
416) -> Option<(Block, usize)> {
417 if !options.constructs.frontmatter {
418 return None;
419 }
420 let kind = frontmatter_fence_kind(lines[index].text)?;
421
422 let mut value = String::new();
423 let mut cursor = index + 1;
424 while cursor < lines.len() {
425 if frontmatter_fence_kind(lines[cursor].text) == Some(kind) {
426 let span = Span::new(lines[index].start, lines[cursor].end_with_eol);
427 return Some((
428 Block::Frontmatter(Frontmatter {
429 meta: NodeMeta::new(Some(span)),
430 kind,
431 value,
432 }),
433 cursor + 1,
434 ));
435 }
436 push_line(&mut value, lines[cursor].text);
437 cursor += 1;
438 }
439
440 None
441}
442
443fn frontmatter_fence_kind(line: &str) -> Option<FrontmatterKind> {
444 match line.trim_end_matches([' ', '\t']) {
445 "---" => Some(FrontmatterKind::Yaml),
446 "+++" => Some(FrontmatterKind::Toml),
447 _ => None,
448 }
449}
450
451fn parse_container_directive(
452 lines: &[Line<'_>],
453 index: usize,
454 options: &SyntaxOptions,
455 definitions: &[String],
456 diagnostics: &mut Vec<Diagnostic>,
457) -> Option<(Block, usize)> {
458 if !options.constructs.directive_container {
459 return None;
460 }
461 let trimmed = trim_up_to_three_spaces(lines[index].text)?;
462 let Some((fence_len, opener_rest)) = directive_container_opener_prefix(trimmed) else {
463 return None;
464 };
465 let opener_base = lines[index].start + (lines[index].text.len() - trimmed.len()) + fence_len;
466
467 let Some((name, label_source, attributes, _consumed)) = parse_directive_opener(opener_rest)
468 else {
469 diagnostics.push(Diagnostic::new(
470 DiagnosticSeverity::Error,
471 DiagnosticCode::InvalidDirectiveName,
472 Span::new(lines[index].start, lines[index].end),
473 "container directive must have a valid name",
474 ));
475 return None;
476 };
477 let label_base = opener_base + name.len() + 1;
478
479 let mut content = String::new();
480 let mut cursor = index + 1;
481 let mut nested_fences = Vec::new();
482 while cursor < lines.len() {
483 let line = lines[cursor].text;
484 let trimmed = trim_up_to_three_spaces(line);
485 if let Some(trimmed) = trimmed {
486 if let Some(nested_len) = nested_fences.last().copied() {
487 if directive_container_closing_fence(trimmed, nested_len).is_some() {
488 nested_fences.pop();
489 push_line(&mut content, line);
490 cursor += 1;
491 continue;
492 }
493 } else if directive_container_closing_fence(trimmed, fence_len).is_some() {
494 let label = label_source
495 .map(|source| {
496 parse_inlines(source, label_base, options, definitions, diagnostics)
497 })
498 .unwrap_or_default();
499 let children = parse_blocks(
500 &content,
501 lines[index + 1].start,
502 false,
503 options,
504 definitions,
505 diagnostics,
506 );
507 return Some((
508 Block::ContainerDirective(ContainerDirective {
509 meta: NodeMeta::new(Some(Span::new(
510 lines[index].start,
511 lines[cursor].end_with_eol,
512 ))),
513 name,
514 label,
515 attributes,
516 children,
517 }),
518 cursor + 1,
519 ));
520 }
521
522 if let Some((nested_len, nested_rest)) = directive_container_opener_prefix(trimmed) {
523 if parse_directive_opener(nested_rest).is_some() {
524 nested_fences.push(nested_len);
525 }
526 }
527 }
528
529 push_line(&mut content, line);
530 cursor += 1;
531 }
532
533 diagnostics.push(Diagnostic::new(
534 DiagnosticSeverity::Error,
535 DiagnosticCode::UnclosedDirectiveContainer,
536 Span::new(lines[index].start, lines[index].end),
537 "container directive is missing a closing fence",
538 ));
539 Some((
540 Block::ContainerDirective(ContainerDirective {
541 meta: NodeMeta::new(Some(Span::new(
542 lines[index].start,
543 lines.last()?.end_with_eol,
544 ))),
545 name,
546 label: label_source
547 .map(|source| parse_inlines(source, label_base, options, definitions, diagnostics))
548 .unwrap_or_default(),
549 attributes,
550 children: parse_blocks(
551 &content,
552 lines
553 .get(index + 1)
554 .map(|line| line.start)
555 .unwrap_or(lines[index].end),
556 false,
557 options,
558 definitions,
559 diagnostics,
560 ),
561 }),
562 lines.len(),
563 ))
564}
565
566fn directive_container_opener_prefix(input: &str) -> Option<(usize, &str)> {
567 let fence_len = input
568 .as_bytes()
569 .iter()
570 .take_while(|byte| **byte == b':')
571 .count();
572 if fence_len >= 3 {
573 Some((fence_len, &input[fence_len..]))
574 } else {
575 None
576 }
577}
578
579fn directive_container_closing_fence(input: &str, min_len: usize) -> Option<usize> {
580 let fence_len = input
581 .as_bytes()
582 .iter()
583 .take_while(|byte| **byte == b':')
584 .count();
585 if fence_len >= min_len && input[fence_len..].trim().is_empty() {
586 Some(fence_len)
587 } else {
588 None
589 }
590}
591
592fn parse_math_block(
593 lines: &[Line<'_>],
594 index: usize,
595 options: &SyntaxOptions,
596) -> Option<(Block, usize)> {
597 if !options.constructs.math_block {
598 return None;
599 }
600 let opener = trim_up_to_three_spaces(lines[index].text)?;
606 let fence_length = math_block_fence_length(opener)?;
607 let opening_indent = leading_indent_columns(lines[index].text);
608
609 let mut value = String::new();
610 let mut content_lines = 0usize;
611 let mut cursor = index + 1;
612 while cursor < lines.len() {
613 if let Some(close_line) = trim_up_to_three_spaces(lines[cursor].text) {
614 if math_block_fence_closes(close_line, fence_length) {
615 return Some((
616 Block::MathBlock(MathBlock {
617 meta: NodeMeta::new(Some(Span::new(
618 lines[index].start,
619 lines[cursor].end_with_eol,
620 ))),
621 value,
622 }),
623 cursor + 1,
624 ));
625 }
626 }
627 if content_lines > 0 {
628 ensure_line_separator(&mut value);
632 }
633 let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
634 value.push_str(&stripped);
635 value.push_str(lines[cursor].eol);
636 content_lines += 1;
637 cursor += 1;
638 }
639
640 Some((
643 Block::MathBlock(MathBlock {
644 meta: NodeMeta::new(Some(Span::new(
645 lines[index].start,
646 lines.last()?.end_with_eol,
647 ))),
648 value,
649 }),
650 lines.len(),
651 ))
652}
653
654fn math_block_fence_length(input: &str) -> Option<usize> {
657 let length = input
658 .as_bytes()
659 .iter()
660 .take_while(|byte| **byte == b'$')
661 .count();
662 if length < 2 || input[length..].contains('$') {
663 return None;
664 }
665 Some(length)
666}
667
668fn math_block_fence_closes(input: &str, length: usize) -> bool {
671 let count = input
672 .as_bytes()
673 .iter()
674 .take_while(|byte| **byte == b'$')
675 .count();
676 count >= length && input[count..].trim().is_empty()
677}
678
679fn parse_fenced_code(
680 lines: &[Line<'_>],
681 index: usize,
682 options: &SyntaxOptions,
683) -> Option<(Block, usize)> {
684 let line = fence_line(lines[index].text, options)?;
685 let (marker, length) = fence_start(line)?;
686 let opening_indent = leading_indent_columns(lines[index].text);
689 let info = line[length..].trim();
690 if marker == FenceMarker::Backtick && info.contains('`') {
691 return None;
692 }
693 let info = if info.is_empty() {
694 None
695 } else {
696 Some(unescape_string(info))
697 };
698
699 let mut value = String::new();
700 let mut content_lines = 0usize;
705 let mut cursor = index + 1;
706 while cursor < lines.len() {
707 if let Some(close_line) = fence_line(lines[cursor].text, options) {
708 if fence_close(close_line, marker, length) {
709 return Some((
710 Block::CodeBlock(CodeBlock {
711 meta: NodeMeta::new(Some(Span::new(
712 lines[index].start,
713 lines[cursor].end_with_eol,
714 ))),
715 kind: CodeBlockKind::Fenced { marker, length },
716 info,
717 value,
718 }),
719 cursor + 1,
720 ));
721 }
722 }
723 if content_lines > 0 {
724 ensure_line_separator(&mut value);
728 }
729 let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
730 value.push_str(&stripped);
731 value.push_str(lines[cursor].eol);
732 content_lines += 1;
733 cursor += 1;
734 }
735 Some((
736 Block::CodeBlock(CodeBlock {
737 meta: NodeMeta::new(Some(Span::new(
738 lines[index].start,
739 lines.last()?.end_with_eol,
740 ))),
741 kind: CodeBlockKind::Fenced { marker, length },
742 info,
743 value,
744 }),
745 lines.len(),
746 ))
747}
748
749fn fence_line<'a>(line: &'a str, options: &SyntaxOptions) -> Option<&'a str> {
750 if options.constructs.indented_code {
751 trim_up_to_three_spaces(line)
752 } else {
753 Some(trim_ascii_start(line))
754 }
755}
756
757fn container_closed_after_unclosed_fence(
758 lines: &[Line<'_>],
759 cursor: usize,
760 last_content_index: usize,
761 content: &str,
762 options: &SyntaxOptions,
763) -> bool {
764 !lines[last_content_index].eol.is_empty()
765 && (cursor >= lines.len() || lines[cursor].text.trim().is_empty())
766 && content_has_unclosed_fenced_code(content, options)
767}
768
769fn content_has_unclosed_fenced_code(content: &str, options: &SyntaxOptions) -> bool {
770 let lines = collect_lines(content, 0);
771 let mut open_fence = None;
772 for line in lines {
773 let Some(trimmed) = fence_line(line.text, options) else {
774 continue;
775 };
776 if let Some((marker, length, has_nonblank_content)) = open_fence {
777 if fence_close(trimmed, marker, length) {
778 open_fence = None;
779 } else {
780 open_fence = Some((
781 marker,
782 length,
783 has_nonblank_content || !trimmed.trim().is_empty(),
784 ));
785 }
786 continue;
787 }
788 let Some((marker, length)) = fence_start(trimmed) else {
789 continue;
790 };
791 let info = trimmed[length..].trim();
792 if marker != FenceMarker::Backtick || !info.contains('`') {
793 open_fence = Some((marker, length, false));
794 }
795 }
796 open_fence.is_some_and(|(_, _, has_nonblank_content)| !has_nonblank_content)
797}
798
799fn block_quote_content_paragraph_open(content: &str, options: &SyntaxOptions) -> bool {
809 let Some(trimmed) = trim_up_to_three_spaces(content) else {
810 return false;
812 };
813 if trimmed.is_empty() {
814 return false;
815 }
816 if let Some(rest) = trimmed.strip_prefix('>') {
817 let rest = rest.strip_prefix(' ').unwrap_or(rest);
818 return block_quote_content_paragraph_open(rest, options);
819 }
820 if let Some(marker) = list_marker_info(trimmed) {
821 let first_content = list_marker_first_content(trimmed, marker);
822 return block_quote_content_paragraph_open(&first_content, options);
823 }
824 !lazy_line_starts_block(trimmed, options)
825}
826
827fn lazy_line_starts_block(input: &str, options: &SyntaxOptions) -> bool {
833 likely_block_start(input, options)
834 || (options.constructs.html_block && line_starts_html_block(input))
835 || trim_up_to_three_spaces(input).is_some_and(|t| t.starts_with('`') || t.starts_with('~'))
840}
841
842fn parse_block_quote(
843 lines: &[Line<'_>],
844 index: usize,
845 options: &SyntaxOptions,
846 definitions: &[String],
847 diagnostics: &mut Vec<Diagnostic>,
848) -> Option<(Block, usize)> {
849 if !trim_up_to_three_spaces(lines[index].text)?.starts_with('>') {
850 return None;
851 }
852
853 let mut content = String::new();
854 let mut lazy_flags: Vec<bool> = Vec::new();
858 let mut cursor = index;
859 let mut paragraph_open = false;
860 let mut in_table = false;
861 let mut last_content_line: Option<String> = None;
862 let mut content_base_offset = None;
863 while cursor < lines.len() {
864 let raw = lines[cursor].text;
865 let trimmed_opt = trim_up_to_three_spaces(raw);
866 let marked = trimmed_opt.is_some_and(|trimmed| trimmed.starts_with('>'));
867 let quote_rest_owned: String;
868 if let Some(trimmed) = trimmed_opt {
869 if trimmed.is_empty() {
870 break;
871 }
872 }
873 let (line, line_start) = if marked {
874 let trimmed = trimmed_opt.expect("marked implies a trimmed line");
875 let trimmed_start = lines[cursor].start + (raw.len() - trimmed.len());
876 let mut rest_start = 1;
877 let mut rest = &trimmed[rest_start..];
878 if rest.starts_with(' ') {
879 rest_start += 1;
880 rest = &rest[1..];
881 } else if rest.starts_with('\t') {
882 let marker_end_column = leading_indent_columns(raw) + 1;
883 match strip_leading_indent_columns_from(rest, 1, marker_end_column) {
884 Cow::Borrowed(stripped) => rest = stripped,
885 Cow::Owned(stripped) => {
886 quote_rest_owned = stripped;
887 rest = "e_rest_owned;
888 }
889 }
890 }
891 (rest, trimmed_start + rest_start)
892 } else if in_table {
893 break;
896 } else if paragraph_open && !lazy_line_starts_block(raw, options) {
897 (raw, lines[cursor].start)
901 } else {
902 break;
903 };
904
905 let mut escaped_lazy = String::new();
906 let line = if !marked
907 && last_content_line.as_deref().is_some_and(|previous| {
908 table_can_start_source(
909 previous,
910 line,
911 options.constructs.indented_code,
912 options.constructs.spoiler,
913 )
914 }) {
915 escaped_lazy.push_str(line);
916 if let Some(offset) = escaped_lazy.find('-') {
917 escaped_lazy.insert(offset, '\\');
918 }
919 &escaped_lazy
920 } else {
921 line
922 };
923
924 let starts_table = last_content_line.as_deref().is_some_and(|previous| {
925 table_can_start_source(
926 previous,
927 line,
928 options.constructs.indented_code,
929 options.constructs.spoiler,
930 )
931 });
932 if marked && starts_table {
933 paragraph_open = false;
934 in_table = true;
935 } else if marked && in_table && block_quote_table_body_row(line, options) {
936 paragraph_open = false;
937 } else {
938 in_table = false;
939 paragraph_open = block_quote_content_paragraph_open(line, options);
942 }
943 last_content_line = Some(line.into());
944 if content_base_offset.is_none() {
945 content_base_offset = Some(line_start);
946 }
947 push_line(&mut content, line);
948 lazy_flags.push(!marked);
949 cursor += 1;
950 }
951
952 let span = Span::new(lines[index].start, lines[cursor - 1].end_with_eol);
953 let child_base_offset = content_base_offset.unwrap_or(lines[index].start);
954 if !lines[cursor - 1].eol.is_empty() && !ends_with_line_ending(&content) {
955 content.push_str(lines[cursor - 1].eol);
956 }
957 if container_closed_after_unclosed_fence(lines, cursor, cursor - 1, &content, options) {
958 content.push('\n');
959 }
960 if let Some(alert) = parse_alert_from_block_quote(
961 &content,
962 child_base_offset,
963 span,
964 options,
965 definitions,
966 diagnostics,
967 ) {
968 return Some((alert, cursor));
969 }
970
971 let mut child_lines = collect_lines(&content, child_base_offset);
972 for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
973 child.lazy = lazy;
974 }
975 let children = parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
976 Some((
977 Block::BlockQuote(BlockQuote {
978 meta: NodeMeta::new(Some(span)),
979 children,
980 }),
981 cursor,
982 ))
983}
984
985fn parse_alert_from_block_quote(
986 content: &str,
987 base_offset: usize,
988 span: Span,
989 options: &SyntaxOptions,
990 definitions: &[String],
991 diagnostics: &mut Vec<Diagnostic>,
992) -> Option<Block> {
993 if !options.constructs.gfm_alert {
994 return None;
995 }
996 let (first_line, rest) = content.split_once('\n').unwrap_or((content, ""));
997 let (kind, title) = parse_alert_marker(first_line)?;
998 let rest_base_offset = base_offset + first_line.len() + usize::from(!rest.is_empty());
999 let children = if rest.is_empty() {
1000 Vec::new()
1001 } else {
1002 parse_blocks(
1003 rest,
1004 rest_base_offset,
1005 false,
1006 options,
1007 definitions,
1008 diagnostics,
1009 )
1010 };
1011 Some(Block::Alert(Alert {
1012 meta: NodeMeta::new(Some(span)),
1013 kind,
1014 title,
1015 children,
1016 }))
1017}
1018
1019fn parse_alert_marker(line: &str) -> Option<(AlertKind, Option<String>)> {
1020 let close = line.find(']')?;
1021 let marker = line.get(0..close + 1)?;
1022 if !marker.starts_with("[!") {
1023 return None;
1024 }
1025 let kind = match &marker[2..close].to_ascii_lowercase()[..] {
1026 "note" => AlertKind::Note,
1027 "tip" => AlertKind::Tip,
1028 "important" => AlertKind::Important,
1029 "warning" => AlertKind::Warning,
1030 "caution" => AlertKind::Caution,
1031 _ => return None,
1032 };
1033 let title = line[close + 1..].trim();
1034 Some((
1035 kind,
1036 if title.is_empty() {
1037 None
1038 } else {
1039 Some(title.into())
1040 },
1041 ))
1042}
1043
1044fn block_quote_table_body_row(line: &str, options: &SyntaxOptions) -> bool {
1045 table_indent_line(line, options.constructs.indented_code).is_some_and(|row| {
1046 !row.trim().is_empty() && contains_unescaped_pipe(row, options.constructs.spoiler)
1047 })
1048}
1049
1050fn parse_list(
1051 lines: &[Line<'_>],
1052 index: usize,
1053 options: &SyntaxOptions,
1054 definitions: &[String],
1055 diagnostics: &mut Vec<Diagnostic>,
1056) -> Option<(Block, usize)> {
1057 let first_marker = list_marker_info(lines[index].text)?;
1058 let mut items = Vec::new();
1059 let mut cursor = index;
1060 let mut tight = true;
1061
1062 while cursor < lines.len() {
1063 if parse_thematic_break(lines[cursor]).is_some() {
1068 break;
1069 }
1070 let Some(marker) = list_marker_info(lines[cursor].text) else {
1071 break;
1072 };
1073 if !same_list_marker(first_marker, marker) {
1074 break;
1075 }
1076
1077 let item_start = cursor;
1078 let mut item_end = cursor;
1079 let mut item_tight = true;
1080 let mut item_blank_offsets: Vec<usize> = Vec::new();
1086 let mut content = String::new();
1087 let mut lazy_flags: Vec<bool> = Vec::new();
1094 let mut open_fence = None;
1095 let first_content = list_marker_first_content(lines[cursor].text, marker);
1096 let mut last_content_line: Option<String> = Some(first_content.as_ref().into());
1097 let mut paragraph_open = list_item_paragraph_stays_open(None, &first_content, options);
1098 let mut item_started_blank = first_content.trim().is_empty();
1103 push_line(&mut content, &first_content);
1104 lazy_flags.push(false);
1105 update_list_item_fence(&first_content, &mut open_fence);
1106 cursor += 1;
1107
1108 while cursor < lines.len() {
1109 if lines[cursor].text.trim().is_empty() {
1110 if open_fence.is_some() {
1113 let stripped = strip_list_continuation(
1114 lines[cursor].text,
1115 marker.content_indent,
1116 first_marker.indent,
1117 );
1118 push_line(&mut content, &stripped);
1119 lazy_flags.push(false);
1120 update_list_item_fence(&stripped, &mut open_fence);
1121 item_end = cursor;
1122 cursor += 1;
1123 continue;
1124 }
1125 let next = next_nonblank_line(lines, cursor + 1);
1126 if item_started_blank
1127 || next >= lines.len()
1128 || sibling_list_marker_at_line(
1129 lines[next].text,
1130 first_marker,
1131 marker.content_indent,
1132 )
1133 || leading_indent_columns(lines[next].text) < marker.content_indent
1134 {
1135 if next < lines.len()
1136 && sibling_list_marker_at_line(
1137 lines[next].text,
1138 first_marker,
1139 marker.content_indent,
1140 )
1141 {
1142 item_tight = false;
1143 }
1144 cursor = next;
1145 break;
1146 }
1147 item_blank_offsets.push(content.len() + usize::from(!content.is_empty()));
1155 paragraph_open = false;
1156 push_line(&mut content, "");
1157 lazy_flags.push(false);
1158 item_end = cursor;
1159 cursor += 1;
1160 continue;
1161 }
1162
1163 item_started_blank = false;
1164
1165 if sibling_list_marker_at_line(lines[cursor].text, first_marker, marker.content_indent)
1166 {
1167 break;
1168 }
1169
1170 if leading_indent_columns(lines[cursor].text) < marker.content_indent
1175 && !same_list_marker_line(lines[cursor].text, first_marker)
1176 && list_marker_info(lines[cursor].text).is_some()
1177 {
1178 break;
1179 }
1180
1181 if leading_indent_columns(lines[cursor].text) < marker.content_indent {
1182 if likely_block_start(lines[cursor].text, options) || !paragraph_open {
1183 break;
1184 }
1185 }
1186
1187 let lazy = paragraph_open
1193 && leading_indent_columns(lines[cursor].text) < marker.content_indent;
1194 let stripped = strip_list_continuation(
1195 lines[cursor].text,
1196 marker.content_indent,
1197 first_marker.indent,
1198 );
1199 let starts_table = last_content_line.as_deref().is_some_and(|previous| {
1200 table_can_start_source(
1201 previous,
1202 &stripped,
1203 options.constructs.indented_code,
1204 options.constructs.spoiler,
1205 )
1206 });
1207 paragraph_open = if starts_table {
1208 false
1209 } else {
1210 list_item_paragraph_stays_open(Some(paragraph_open), &stripped, options)
1211 };
1212 push_line(&mut content, &stripped);
1213 lazy_flags.push(lazy);
1214 update_list_item_fence(&stripped, &mut open_fence);
1215 last_content_line = Some(stripped.into_owned());
1216 item_end = cursor;
1217 cursor += 1;
1218 }
1219
1220 let child_base = lines[item_start].start + marker.content_indent;
1221 if !lines[item_end].eol.is_empty() && !ends_with_line_ending(&content) {
1222 content.push_str(lines[item_end].eol);
1223 }
1224 if container_closed_after_unclosed_fence(lines, cursor, item_end, &content, options) {
1225 content.push('\n');
1226 }
1227 let mut child_lines = collect_lines(&content, child_base);
1228 for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
1229 child.lazy = lazy;
1230 }
1231 let mut children =
1232 parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
1233 let checked = if options.constructs.gfm_task_list_item {
1234 take_task_marker_from_children(&mut children)
1235 } else {
1236 None
1237 };
1238
1239 if item_tight
1240 && blank_separates_top_level_blocks(&item_blank_offsets, &children, child_base)
1241 {
1242 item_tight = false;
1243 }
1244 tight = tight && item_tight;
1245 items.push(ListItem {
1246 meta: NodeMeta::new(Some(Span::new(
1247 lines[item_start].start,
1248 lines[item_end].end_with_eol,
1249 ))),
1250 checked,
1251 children,
1252 });
1253 }
1254
1255 Some((
1256 Block::List(List {
1257 meta: NodeMeta::new(Some(Span::new(
1258 lines[index].start,
1259 lines[cursor - 1].end_with_eol,
1260 ))),
1261 ordered: first_marker.ordered,
1262 start: first_marker.start,
1263 delimiter: first_marker.delimiter,
1264 tight,
1265 children: items,
1266 }),
1267 cursor,
1268 ))
1269}
1270
1271fn blank_separates_top_level_blocks(
1284 blank_offsets: &[usize],
1285 children: &[Block],
1286 child_base: usize,
1287) -> bool {
1288 if blank_offsets.is_empty() || children.len() < 2 {
1289 return false;
1290 }
1291 let Some(&first_blank) = blank_offsets.iter().min() else {
1292 return false;
1293 };
1294 children.iter().any(|child| {
1295 block_span(child).is_some_and(|span| span.start.saturating_sub(child_base) > first_blank)
1296 })
1297}
1298
1299fn block_span(block: &Block) -> Option<Span> {
1300 let meta = match block {
1301 Block::Paragraph(node) => &node.meta,
1302 Block::Heading(node) => &node.meta,
1303 Block::ThematicBreak(node) => &node.meta,
1304 Block::BlockQuote(node) => &node.meta,
1305 Block::Alert(node) => &node.meta,
1306 Block::List(node) => &node.meta,
1307 Block::DescriptionList(node) => &node.meta,
1308 Block::CodeBlock(node) => &node.meta,
1309 Block::HtmlBlock(node) => &node.meta,
1310 Block::Definition(node) => &node.meta,
1311 Block::FootnoteDefinition(node) => &node.meta,
1312 Block::Table(node) => &node.meta,
1313 Block::MathBlock(node) => &node.meta,
1314 Block::Frontmatter(node) => &node.meta,
1315 Block::MdxEsm(node) => &node.meta,
1316 Block::MdxExpression(node) => &node.meta,
1317 Block::MdxJsx(node) => &node.meta,
1318 Block::LeafDirective(node) => &node.meta,
1319 Block::ContainerDirective(node) => &node.meta,
1320 };
1321 meta.span
1322}
1323
1324fn list_item_paragraph_stays_open(
1325 previous_open: Option<bool>,
1326 line: &str,
1327 options: &SyntaxOptions,
1328) -> bool {
1329 if line.trim().is_empty() {
1330 return false;
1331 }
1332 if previous_open == Some(false) {
1333 return false;
1334 }
1335 block_quote_content_paragraph_open(line, options)
1336}
1337
1338fn parse_description_list(
1339 lines: &[Line<'_>],
1340 index: usize,
1341 options: &SyntaxOptions,
1342 definitions: &[String],
1343 diagnostics: &mut Vec<Diagnostic>,
1344) -> Option<(Block, usize)> {
1345 if !options.constructs.description_list || !is_description_term_line(lines[index].text, options)
1346 {
1347 return None;
1348 }
1349
1350 let mut cursor = index;
1351 let mut items = Vec::new();
1352 let mut tight = true;
1353 let mut list_end = lines[index].end_with_eol;
1354
1355 while cursor < lines.len() {
1356 if !is_description_term_line(lines[cursor].text, options) {
1357 break;
1358 }
1359 let Some(term) = description_term(lines, cursor, options) else {
1360 break;
1361 };
1362 let term_line = lines[cursor];
1363 let mut details = Vec::new();
1364 let item_start = term_line.start;
1365 let mut item_end = lines[term.term_end].end_with_eol;
1366 tight = tight && !term.blank_after_term;
1367 cursor = term.marker_index;
1368
1369 loop {
1370 let Some(marker) = description_marker(lines[cursor].text) else {
1371 break;
1372 };
1373 let (detail, next, detail_tight) = parse_description_details(
1374 lines,
1375 cursor,
1376 marker,
1377 options,
1378 definitions,
1379 diagnostics,
1380 )?;
1381 tight = tight && detail_tight;
1382 item_end = detail
1383 .meta
1384 .span
1385 .map(|span| span.end)
1386 .unwrap_or(lines[cursor].end_with_eol);
1387 details.push(detail);
1388 cursor = next;
1389
1390 let next_nonblank = next_nonblank_line(lines, cursor);
1391 if next_nonblank < lines.len()
1392 && description_marker(lines[next_nonblank].text).is_some()
1393 {
1394 if next_nonblank != cursor {
1395 tight = false;
1396 }
1397 cursor = next_nonblank;
1398 continue;
1399 }
1400 break;
1401 }
1402
1403 if details.is_empty() {
1404 return None;
1405 }
1406 list_end = item_end;
1407 items.push(DescriptionItem {
1408 meta: NodeMeta::new(Some(Span::new(item_start, item_end))),
1409 term: parse_inlines(
1410 &term.source,
1411 term.source_offset,
1412 options,
1413 definitions,
1414 diagnostics,
1415 ),
1416 details,
1417 });
1418
1419 let next_item = next_nonblank_line(lines, cursor);
1420 if next_item >= lines.len() {
1421 cursor = next_item;
1422 break;
1423 }
1424 if description_term(lines, next_item, options).is_some() {
1425 if next_item != cursor {
1426 tight = false;
1427 }
1428 cursor = next_item;
1429 continue;
1430 }
1431 cursor = next_item;
1432 break;
1433 }
1434
1435 (!items.is_empty()).then_some((
1436 Block::DescriptionList(DescriptionList {
1437 meta: NodeMeta::new(Some(Span::new(lines[index].start, list_end))),
1438 tight,
1439 children: items,
1440 }),
1441 cursor,
1442 ))
1443}
1444
1445fn parse_description_details(
1446 lines: &[Line<'_>],
1447 index: usize,
1448 marker: DescriptionMarker<'_>,
1449 options: &SyntaxOptions,
1450 definitions: &[String],
1451 diagnostics: &mut Vec<Diagnostic>,
1452) -> Option<(DescriptionDetails, usize, bool)> {
1453 let mut content = String::new();
1454 push_line(&mut content, marker.content);
1455 let mut cursor = index + 1;
1456 let mut end = lines[index].end_with_eol;
1457 let mut tight = true;
1458 let mut paragraph_open = paragraph_stays_open(marker.content, options);
1459
1460 while cursor < lines.len() {
1461 if lines[cursor].text.trim().is_empty() {
1462 let next = next_nonblank_line(lines, cursor + 1);
1463 if next >= lines.len() || description_term(lines, next, options).is_some() {
1470 cursor = next;
1471 break;
1472 }
1473 if description_marker(lines[next].text).is_some() {
1474 tight = false;
1475 cursor = next;
1476 break;
1477 }
1478 if strip_indent_continuation(lines[next].text).is_none() {
1479 break;
1480 }
1481 push_line(&mut content, "");
1482 paragraph_open = false;
1483 tight = false;
1484 end = lines[cursor].end_with_eol;
1485 cursor += 1;
1486 continue;
1487 }
1488
1489 if description_marker(lines[cursor].text).is_some()
1490 || description_term(lines, cursor, options).is_some()
1491 {
1492 break;
1493 }
1494
1495 let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1496 {
1497 continuation
1498 } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1499 trim_ascii_start(lines[cursor].text)
1500 } else {
1501 break;
1502 };
1503 paragraph_open = paragraph_stays_open(continuation, options);
1504 push_line(&mut content, continuation);
1505 end = lines[cursor].end_with_eol;
1506 cursor += 1;
1507 }
1508
1509 if content.trim().is_empty() {
1510 return None;
1511 }
1512
1513 Some((
1514 DescriptionDetails {
1515 meta: NodeMeta::new(Some(Span::new(lines[index].start, end))),
1516 children: parse_blocks(
1517 &content,
1518 lines[index].start + marker.content_offset,
1519 false,
1520 options,
1521 definitions,
1522 diagnostics,
1523 ),
1524 },
1525 cursor,
1526 tight,
1527 ))
1528}
1529
1530fn description_term(
1531 lines: &[Line<'_>],
1532 term_index: usize,
1533 options: &SyntaxOptions,
1534) -> Option<DescriptionTerm> {
1535 if term_index >= lines.len() || !is_description_term_line(lines[term_index].text, options) {
1536 return None;
1537 }
1538 let mut source = String::new();
1539 let mut term_end = term_index;
1540 let mut cursor = term_index;
1541 while cursor < lines.len() && is_description_term_line(lines[cursor].text, options) {
1542 if !source.is_empty() {
1543 source.push('\n');
1544 }
1545 source.push_str(trim_ascii_start(lines[cursor].text).trim_end());
1546 term_end = cursor;
1547 cursor += 1;
1548 }
1549
1550 let mut marker_index = cursor;
1551 let mut blank_after_term = false;
1552 while marker_index < lines.len() && lines[marker_index].text.trim().is_empty() {
1553 blank_after_term = true;
1554 marker_index += 1;
1555 }
1556 (marker_index < lines.len() && description_marker(lines[marker_index].text).is_some()).then(
1557 || DescriptionTerm {
1558 marker_index,
1559 term_end,
1560 blank_after_term,
1561 source,
1562 source_offset: lines[term_index].start + leading_trim_bytes(lines[term_index].text),
1563 },
1564 )
1565}
1566
1567fn is_description_term_line(line: &str, options: &SyntaxOptions) -> bool {
1568 leading_indent_columns(line) <= 3
1569 && !line.trim().is_empty()
1570 && description_marker(line).is_none()
1571 && !likely_block_start(line, options)
1572}
1573
1574fn description_marker(line: &str) -> Option<DescriptionMarker<'_>> {
1575 let (columns, bytes) = leading_indent(line);
1576 if columns > 2 || !matches!(line.as_bytes().get(bytes), Some(b':' | b'~')) {
1577 return None;
1578 }
1579 if line
1580 .as_bytes()
1581 .get(bytes + 1)
1582 .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1583 {
1584 return None;
1585 }
1586 let mut content_offset = bytes + 1;
1587 while line
1588 .as_bytes()
1589 .get(content_offset)
1590 .is_some_and(|byte| matches!(*byte, b' ' | b'\t'))
1591 {
1592 content_offset += 1;
1593 }
1594 Some(DescriptionMarker {
1595 content_offset,
1596 content: &line[content_offset..],
1597 })
1598}
1599
1600fn paragraph_stays_open(line: &str, options: &SyntaxOptions) -> bool {
1604 !line.trim().is_empty() && !likely_block_start(line, options)
1605}
1606
1607fn strip_indent_continuation(input: &str) -> Option<&str> {
1609 input
1610 .strip_prefix(" ")
1611 .or_else(|| input.strip_prefix('\t'))
1612}
1613
1614fn parse_atx_heading(
1615 line: Line<'_>,
1616 options: &SyntaxOptions,
1617 definitions: &[String],
1618) -> Option<Block> {
1619 let text = trim_up_to_three_spaces(line.text)?;
1620 let depth = text
1621 .as_bytes()
1622 .iter()
1623 .take_while(|byte| **byte == b'#')
1624 .count();
1625 if depth == 0 || depth > 6 {
1626 return None;
1627 }
1628 if text
1629 .as_bytes()
1630 .get(depth)
1631 .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1632 && text.len() != depth
1633 {
1634 return None;
1635 }
1636 let after_opening = &text[depth..];
1637 let content_start_in_text = depth + leading_trim_bytes(after_opening);
1638 let content = trim_closing_hashes(after_opening.trim_start());
1639 let content_start = line.start + (line.text.len() - text.len()) + content_start_in_text;
1640 Some(Block::Heading(Heading {
1641 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1642 depth: depth as u8,
1643 kind: HeadingKind::Atx,
1644 children: parse_inlines(
1645 content,
1646 content_start,
1647 options,
1648 definitions,
1649 &mut Vec::new(),
1650 ),
1651 }))
1652}
1653
1654fn parse_thematic_break(line: Line<'_>) -> Option<Block> {
1655 let text = trim_up_to_three_spaces(line.text)?.trim();
1656 let mut marker = None;
1657 let mut count = 0;
1658 for char in text.chars() {
1659 if char == ' ' || char == '\t' {
1660 continue;
1661 }
1662 let current = match char {
1663 '-' => ThematicBreakMarker::Dash,
1664 '*' => ThematicBreakMarker::Asterisk,
1665 '_' => ThematicBreakMarker::Underscore,
1666 _ => return None,
1667 };
1668 if marker.is_some_and(|marker| marker != current) {
1669 return None;
1670 }
1671 marker = Some(current);
1672 count += 1;
1673 }
1674 if count >= 3 {
1675 Some(Block::ThematicBreak(ThematicBreak {
1676 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1677 marker: marker?,
1678 }))
1679 } else {
1680 None
1681 }
1682}
1683
1684fn parse_definition(
1685 lines: &[Line<'_>],
1686 index: usize,
1687 options: &SyntaxOptions,
1688 allow_subsequent_indent: bool,
1689) -> Option<(Block, usize)> {
1690 let line = lines[index];
1691 let text = trim_definition_start(line.text, allow_subsequent_indent)?;
1692 if !text.starts_with('[') {
1693 return None;
1694 }
1695
1696 let mut accumulated = String::from(text);
1704 let mut label_end_line = index;
1705 let close = loop {
1706 if let Some(close) = find_reference_label_end(&accumulated, 0) {
1707 if accumulated.as_bytes().get(close + 1) == Some(&b':') {
1708 break close;
1709 }
1710 return None;
1712 }
1713 let next = label_end_line + 1;
1714 if next >= lines.len() || lines[next].text.trim().is_empty() {
1715 return None;
1716 }
1717 if likely_block_start(lines[next].text, options)
1724 || setext_underline_depth(lines[next].text).is_some()
1725 || table_can_start(lines, next, options)
1726 {
1727 return None;
1728 }
1729 accumulated.push('\n');
1730 accumulated.push_str(lines[next].text);
1731 label_end_line = next;
1732 };
1733 let label = String::from(&accumulated[1..close]);
1734 if normalize_label(&label).is_empty() {
1735 return None;
1736 }
1737 let label = label.as_str();
1738 let mut source = String::from(&accumulated[close + 2..]);
1739 let mut cursor = label_end_line;
1740 let mut best_without_title = None;
1741
1742 loop {
1743 if let Some(resource) = parse_definition_destination_title(&source) {
1744 if resource.title.is_some() {
1745 return Some((
1746 Block::Definition(Definition {
1747 meta: NodeMeta::new(Some(Span::new(
1748 line.start,
1749 lines[cursor].end_with_eol,
1750 ))),
1751 label: label.into(),
1752 identifier: normalize_label(label),
1753 destination: resource.destination,
1754 destination_kind: resource.destination_kind,
1755 title: resource.title,
1756 title_kind: resource.title_kind,
1757 }),
1758 cursor + 1,
1759 ));
1760 }
1761
1762 best_without_title = Some((resource, cursor + 1));
1763 let next = cursor + 1;
1764 if next >= lines.len()
1765 || lines[next].text.trim().is_empty()
1766 || !line_can_start_definition_title(lines[next].text)
1767 {
1768 break;
1769 }
1770 }
1771
1772 let next = cursor + 1;
1773 if next >= lines.len() || lines[next].text.trim().is_empty() {
1774 break;
1775 }
1776 if likely_block_start(lines[next].text, options)
1782 || setext_underline_depth(lines[next].text).is_some()
1783 {
1784 break;
1785 }
1786 source.push('\n');
1787 source.push_str(lines[next].text);
1788 cursor = next;
1789 }
1790
1791 let (resource, next) = best_without_title?;
1792 let end = lines[next - 1].end_with_eol;
1793 Some((
1794 Block::Definition(Definition {
1795 meta: NodeMeta::new(Some(Span::new(line.start, end))),
1796 label: label.into(),
1797 identifier: normalize_label(label),
1798 destination: resource.destination,
1799 destination_kind: resource.destination_kind,
1800 title: resource.title,
1801 title_kind: resource.title_kind,
1802 }),
1803 next,
1804 ))
1805}
1806
1807fn trim_definition_start(input: &str, allow_subsequent_indent: bool) -> Option<&str> {
1808 if let Some(trimmed) = trim_up_to_three_spaces(input) {
1809 return Some(trimmed);
1810 }
1811 if allow_subsequent_indent {
1812 let (columns, bytes) = leading_indent(input);
1813 if columns == 4 {
1814 return Some(&input[bytes..]);
1815 }
1816 }
1817 None
1818}
1819
1820fn parse_footnote_definition(
1821 lines: &[Line<'_>],
1822 index: usize,
1823 options: &SyntaxOptions,
1824 definitions: &[String],
1825 diagnostics: &mut Vec<Diagnostic>,
1826) -> Option<(Block, usize)> {
1827 if !options.constructs.footnote_definition {
1828 return None;
1829 }
1830 let line = lines[index];
1831 let text = line.text.trim();
1832 if !text.starts_with("[^") {
1833 return None;
1834 }
1835 let close = find_footnote_definition_label_end(text)?;
1836 let label = &text[2..close];
1837 if !is_footnote_label(label) {
1838 return None;
1839 }
1840 let rest = text[close + 2..].trim();
1841 let mut content = String::new();
1842 push_line(&mut content, rest);
1843 let mut cursor = index + 1;
1844 let mut end = line.end_with_eol;
1845 let mut paragraph_open = paragraph_stays_open(rest, options);
1846
1847 while cursor < lines.len() {
1848 if lines[cursor].text.trim().is_empty() {
1849 let next = next_nonblank_line(lines, cursor + 1);
1850 if next >= lines.len() || !is_footnote_continuation(lines[next].text) {
1851 break;
1852 }
1853 push_line(&mut content, "");
1854 paragraph_open = false;
1855 end = lines[cursor].end_with_eol;
1856 cursor += 1;
1857 continue;
1858 }
1859
1860 let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1861 {
1862 continuation
1863 } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1864 trim_ascii_start(lines[cursor].text)
1865 } else {
1866 break;
1867 };
1868 paragraph_open = paragraph_stays_open(continuation, options);
1869 push_line(&mut content, continuation);
1870 end = lines[cursor].end_with_eol;
1871 cursor += 1;
1872 }
1873
1874 Some((
1875 Block::FootnoteDefinition(FootnoteDefinition {
1876 meta: NodeMeta::new(Some(Span::new(line.start, end))),
1877 label: label.into(),
1878 identifier: normalize_label(label),
1879 children: parse_blocks(
1880 &content,
1881 line.end.saturating_sub(rest.len()),
1882 false,
1883 options,
1884 definitions,
1885 diagnostics,
1886 ),
1887 }),
1888 cursor,
1889 ))
1890}
1891
1892fn is_footnote_continuation(input: &str) -> bool {
1893 strip_indent_continuation(input).is_some()
1894}
1895
1896fn parse_leaf_directive(
1897 line: Line<'_>,
1898 options: &SyntaxOptions,
1899 definitions: &[String],
1900 diagnostics: &mut Vec<Diagnostic>,
1901) -> Option<Block> {
1902 if !options.constructs.directive_leaf {
1903 return None;
1904 }
1905 let trimmed = line.text.trim_start();
1906 if trimmed.starts_with(":::") || !trimmed.starts_with("::") {
1907 return None;
1908 }
1909 let opener_base = line.start + (line.text.len() - trimmed.len()) + 2;
1910 let Some((name, label_source, attributes, _)) = parse_directive_opener(&trimmed[2..]) else {
1911 diagnostics.push(Diagnostic::new(
1912 DiagnosticSeverity::Error,
1913 DiagnosticCode::InvalidDirectiveName,
1914 Span::new(line.start, line.end),
1915 "leaf directive must have a valid name",
1916 ));
1917 return None;
1918 };
1919 let label = label_source
1920 .map(|source| {
1921 parse_inlines(
1922 source,
1923 opener_base + name.len() + 1,
1924 options,
1925 definitions,
1926 diagnostics,
1927 )
1928 })
1929 .unwrap_or_default();
1930 Some(Block::LeafDirective(LeafDirective {
1931 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1932 name,
1933 label,
1934 attributes,
1935 }))
1936}
1937
1938fn parse_html_block(
1939 lines: &[Line<'_>],
1940 index: usize,
1941 options: &SyntaxOptions,
1942) -> Option<(Block, usize)> {
1943 if !options.constructs.html_block {
1944 return None;
1945 }
1946
1947 let trimmed = trim_up_to_three_spaces(lines[index].text)?;
1948 let kind = html_block_start(trimmed)?;
1949 let mut value = String::new();
1950 let mut cursor = index;
1951 match kind {
1952 HtmlBlockKind::RawTag => {
1953 while cursor < lines.len() {
1957 push_line(&mut value, lines[cursor].text);
1958 if ["script", "pre", "style", "textarea"]
1959 .iter()
1960 .any(|tag| line_contains_raw_closing_tag(lines[cursor].text, tag))
1961 {
1962 cursor += 1;
1963 break;
1964 }
1965 cursor += 1;
1966 }
1967 }
1968 HtmlBlockKind::BlockTag => {
1969 while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1970 push_line(&mut value, lines[cursor].text);
1971 cursor += 1;
1972 }
1973 }
1974 HtmlBlockKind::Until(end) => {
1975 while cursor < lines.len() {
1976 push_line(&mut value, lines[cursor].text);
1977 if lines[cursor].text.contains(end) {
1978 cursor += 1;
1979 break;
1980 }
1981 cursor += 1;
1982 }
1983 }
1984 HtmlBlockKind::UntilBlank => {
1985 while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1986 push_line(&mut value, lines[cursor].text);
1987 cursor += 1;
1988 }
1989 }
1990 }
1991 Some((
1992 Block::HtmlBlock(HtmlBlock {
1993 meta: NodeMeta::new(Some(Span::new(
1994 lines[index].start,
1995 lines[cursor - 1].end_with_eol,
1996 ))),
1997 value,
1998 }),
1999 cursor,
2000 ))
2001}
2002
2003fn html_block_start(input: &str) -> Option<HtmlBlockKind> {
2004 let trimmed = input.trim_end();
2005 if !trimmed.starts_with('<') {
2006 return None;
2007 }
2008
2009 if raw_html_tag_start(trimmed) {
2010 return Some(HtmlBlockKind::RawTag);
2011 }
2012 if trimmed.starts_with("<!--") {
2013 return Some(HtmlBlockKind::Until("-->"));
2014 }
2015 if trimmed.starts_with("<?") {
2016 return Some(HtmlBlockKind::Until("?>"));
2017 }
2018 if is_declaration_start(trimmed) {
2019 return Some(HtmlBlockKind::Until(">"));
2020 }
2021 if trimmed.starts_with("<![CDATA[") {
2022 return Some(HtmlBlockKind::Until("]]>"));
2023 }
2024
2025 if html_block_tag_start(trimmed) {
2026 return Some(HtmlBlockKind::BlockTag);
2027 }
2028
2029 let Some((end, _tag_name)) = parse_html_tag(trimmed, 0) else {
2030 return None;
2031 };
2032 let rest = trimmed[end..].trim();
2033 if rest.is_empty() {
2034 Some(HtmlBlockKind::UntilBlank)
2035 } else {
2036 None
2037 }
2038}
2039
2040pub(crate) fn line_starts_html_block(input: &str) -> bool {
2041 trim_up_to_three_spaces(input)
2042 .and_then(html_block_start)
2043 .is_some()
2044}
2045
2046fn raw_html_tag_start(input: &str) -> bool {
2047 for tag in ["script", "pre", "style", "textarea"] {
2048 if html_raw_open_tag_prefix(input, tag) {
2049 return true;
2050 }
2051 }
2052 false
2053}
2054
2055fn html_raw_open_tag_prefix(input: &str, tag: &str) -> bool {
2056 let Some(rest) = input.strip_prefix('<') else {
2057 return false;
2058 };
2059 if rest.starts_with('/') || rest.len() < tag.len() {
2060 return false;
2061 }
2062 let rest_bytes = rest.as_bytes();
2063 let tag_bytes = tag.as_bytes();
2064 if !rest_bytes
2065 .get(..tag_bytes.len())
2066 .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2067 {
2068 return false;
2069 }
2070 match rest_bytes.get(tag.len()) {
2071 None => true,
2072 Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2073 Some(b'/') => {
2074 rest_bytes.get(tag.len() + 1) == Some(&b'>') && rest_bytes.get(tag.len() + 2).is_none()
2075 }
2076 _ => false,
2077 }
2078}
2079
2080fn line_contains_raw_closing_tag(input: &str, tag: &str) -> bool {
2081 let bytes = input.as_bytes();
2082 let tag_bytes = tag.as_bytes();
2083 let mut cursor = 0;
2084
2085 while cursor + 2 + tag_bytes.len() <= bytes.len() {
2086 let tag_start = cursor + 2;
2087 let tag_end = tag_start + tag_bytes.len();
2088 if bytes.get(cursor) == Some(&b'<')
2089 && bytes.get(cursor + 1) == Some(&b'/')
2090 && bytes
2091 .get(tag_start..tag_end)
2092 .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2093 {
2094 match bytes.get(tag_end) {
2095 Some(b'>') => return true,
2096 Some(byte) if byte.is_ascii_whitespace() => {
2097 let mut after_space = tag_end;
2098 while bytes
2099 .get(after_space)
2100 .is_some_and(|byte| byte.is_ascii_whitespace())
2101 {
2102 after_space += 1;
2103 }
2104 if bytes.get(after_space) == Some(&b'>') {
2105 return true;
2106 }
2107 }
2108 _ => {}
2109 }
2110 }
2111 cursor += 1;
2112 }
2113
2114 false
2115}
2116
2117fn html_block_tag_start(input: &str) -> bool {
2118 let bytes = input.as_bytes();
2119 if bytes.first() != Some(&b'<') {
2120 return false;
2121 }
2122
2123 let mut cursor = 1;
2124 if bytes.get(cursor) == Some(&b'/') {
2125 cursor += 1;
2126 }
2127
2128 let name_start = cursor;
2129 if !bytes
2130 .get(cursor)
2131 .is_some_and(|byte| byte.is_ascii_alphabetic())
2132 {
2133 return false;
2134 }
2135 cursor += 1;
2136 while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
2137 cursor += 1;
2138 }
2139
2140 let name = &input[name_start..cursor];
2141 if !html_block_tag(name) {
2142 return false;
2143 }
2144
2145 match bytes.get(cursor) {
2146 None | Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2147 Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => true,
2148 _ => false,
2149 }
2150}
2151
2152fn html_block_tag(tag: &str) -> bool {
2153 matches!(
2154 tag.to_ascii_lowercase().as_str(),
2155 "address"
2156 | "article"
2157 | "aside"
2158 | "base"
2159 | "basefont"
2160 | "blockquote"
2161 | "body"
2162 | "caption"
2163 | "center"
2164 | "col"
2165 | "colgroup"
2166 | "dd"
2167 | "details"
2168 | "dialog"
2169 | "dir"
2170 | "div"
2171 | "dl"
2172 | "dt"
2173 | "fieldset"
2174 | "figcaption"
2175 | "figure"
2176 | "footer"
2177 | "form"
2178 | "frame"
2179 | "frameset"
2180 | "h1"
2181 | "h2"
2182 | "h3"
2183 | "h4"
2184 | "h5"
2185 | "h6"
2186 | "head"
2187 | "header"
2188 | "hr"
2189 | "html"
2190 | "iframe"
2191 | "legend"
2192 | "li"
2193 | "link"
2194 | "main"
2195 | "menu"
2196 | "menuitem"
2197 | "nav"
2198 | "noframes"
2199 | "ol"
2200 | "optgroup"
2201 | "option"
2202 | "p"
2203 | "param"
2204 | "search"
2205 | "section"
2206 | "summary"
2207 | "table"
2208 | "tbody"
2209 | "td"
2210 | "tfoot"
2211 | "th"
2212 | "thead"
2213 | "title"
2214 | "tr"
2215 | "track"
2216 | "ul"
2217 )
2218}
2219
2220fn is_declaration_start(input: &str) -> bool {
2221 input
2222 .as_bytes()
2223 .get(2)
2224 .is_some_and(|byte| input.starts_with("<!") && byte.is_ascii_alphabetic())
2225}
2226
2227fn parse_mdx_flow(
2228 lines: &[Line<'_>],
2229 index: usize,
2230 options: &SyntaxOptions,
2231 diagnostics: &mut Vec<Diagnostic>,
2232) -> Option<(Block, usize)> {
2233 if options.constructs.mdx_esm {
2234 if let Some((block, next)) = parse_mdx_esm_flow(lines, index, diagnostics) {
2235 return Some((block, next));
2236 }
2237 }
2238
2239 let line = lines[index];
2240 let trimmed = line.text.trim_start();
2241 if options.constructs.mdx_expression_block && trimmed.starts_with('{') {
2242 let open_byte = line.text.len() - trimmed.len();
2243 if let Some((close_line, close_byte)) = find_mdx_expression_close(lines, index, open_byte) {
2244 return Some((
2245 Block::MdxExpression(MdxExpression {
2246 meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2247 value: collect_mdx_expression_value(
2248 lines, index, open_byte, close_line, close_byte,
2249 ),
2250 }),
2251 close_line + 1,
2252 ));
2253 }
2254 diagnostics.push(Diagnostic::new(
2255 DiagnosticSeverity::Error,
2256 DiagnosticCode::InvalidMdx,
2257 Span::new(line.start + open_byte, lines.last()?.end_with_eol),
2258 "MDX expression block is missing a closing brace",
2259 ));
2260 }
2261 if options.constructs.mdx_jsx_block && trimmed.starts_with('<') {
2262 if let Some(close_line) = find_mdx_jsx_close(lines, index) {
2263 return Some((
2264 Block::MdxJsx(MdxJsx {
2265 meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2266 value: collect_line_range(lines, index, close_line),
2267 }),
2268 close_line + 1,
2269 ));
2270 }
2271 let start_byte = line.text.len() - trimmed.len();
2272 if let Some(root) = mdx_jsx_tag_start(line.text, start_byte) {
2273 if !root.closing {
2274 if let Some((_tag_end_line, _tag_end_byte, self_closing)) =
2275 find_mdx_jsx_tag_end(lines, index, start_byte)
2276 {
2277 if !self_closing {
2278 diagnostics.push(Diagnostic::new(
2279 DiagnosticSeverity::Error,
2280 DiagnosticCode::InvalidMdx,
2281 Span::new(line.start + start_byte, lines.last()?.end_with_eol),
2282 "MDX JSX block is missing a closing tag",
2283 ));
2284 }
2285 }
2286 }
2287 }
2288 }
2289 None
2290}
2291
2292#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2293struct MdxEsmState {
2294 brace_depth: usize,
2295 bracket_depth: usize,
2296 paren_depth: usize,
2297 block_comment: bool,
2298 quote: Option<u8>,
2299 escaped: bool,
2300}
2301
2302#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2303enum MdxBraceState {
2304 Normal,
2305 SingleQuoted,
2306 DoubleQuoted,
2307 Template,
2308 LineComment,
2309 BlockComment,
2310}
2311
2312#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2313enum MdxJsxTag<'a> {
2314 Fragment,
2315 Named(&'a str),
2316}
2317
2318#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2319struct MdxJsxTagStart<'a> {
2320 tag: MdxJsxTag<'a>,
2321 closing: bool,
2322}
2323
2324fn parse_mdx_esm_flow(
2325 lines: &[Line<'_>],
2326 index: usize,
2327 diagnostics: &mut Vec<Diagnostic>,
2328) -> Option<(Block, usize)> {
2329 if !is_mdx_esm_start(lines[index].text) {
2330 return None;
2331 }
2332
2333 let mut value = String::new();
2334 let mut state = MdxEsmState::default();
2335 let mut cursor = index;
2336 while cursor < lines.len() {
2337 let line = lines[cursor].text;
2338 if cursor > index && !is_mdx_esm_continuation(line, &state) {
2339 break;
2340 }
2341 if cursor > index {
2342 value.push('\n');
2343 }
2344 value.push_str(line);
2345 update_mdx_esm_state(line, &mut state);
2346 cursor += 1;
2347 }
2348 if cursor >= lines.len() && state_has_open_mdx_esm_construct(&state) {
2349 diagnostics.push(Diagnostic::new(
2350 DiagnosticSeverity::Error,
2351 DiagnosticCode::InvalidMdx,
2352 Span::new(lines[index].start, lines[cursor - 1].end_with_eol),
2353 "MDX ESM block is missing a closing delimiter",
2354 ));
2355 }
2356
2357 Some((
2358 Block::MdxEsm(MdxEsm {
2359 meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[cursor - 1].end))),
2360 value,
2361 }),
2362 cursor,
2363 ))
2364}
2365
2366fn is_mdx_esm_start(line: &str) -> bool {
2367 line.starts_with("import ") || line.starts_with("export ")
2368}
2369
2370fn is_mdx_esm_continuation(line: &str, state: &MdxEsmState) -> bool {
2371 if state_has_open_mdx_esm_construct(state) {
2372 return true;
2373 }
2374 let trimmed = line.trim_start();
2375 if trimmed.is_empty() {
2376 return false;
2377 }
2378 is_mdx_esm_start(line) || trimmed.starts_with("//") || trimmed.starts_with("/*")
2379}
2380
2381fn state_has_open_mdx_esm_construct(state: &MdxEsmState) -> bool {
2382 state.brace_depth > 0
2383 || state.bracket_depth > 0
2384 || state.paren_depth > 0
2385 || state.block_comment
2386 || state.quote == Some(b'`')
2387}
2388
2389fn update_mdx_esm_state(line: &str, state: &mut MdxEsmState) {
2390 let bytes = line.as_bytes();
2391 let mut index = 0;
2392 while index < bytes.len() {
2393 let byte = bytes[index];
2394 if state.block_comment {
2395 if byte == b'*' && bytes.get(index + 1) == Some(&b'/') {
2396 state.block_comment = false;
2397 index += 1;
2398 }
2399 index += 1;
2400 continue;
2401 }
2402
2403 if let Some(delimiter) = state.quote {
2404 if state.escaped {
2405 state.escaped = false;
2406 } else if byte == b'\\' {
2407 state.escaped = true;
2408 } else if byte == delimiter {
2409 state.quote = None;
2410 }
2411 index += 1;
2412 continue;
2413 }
2414
2415 match byte {
2416 b'\'' | b'"' | b'`' => state.quote = Some(byte),
2417 b'/' if bytes.get(index + 1) == Some(&b'/') => break,
2418 b'/' if bytes.get(index + 1) == Some(&b'*') => {
2419 state.block_comment = true;
2420 index += 1;
2421 }
2422 b'{' => state.brace_depth += 1,
2423 b'}' => state.brace_depth = state.brace_depth.saturating_sub(1),
2424 b'[' => state.bracket_depth += 1,
2425 b']' => state.bracket_depth = state.bracket_depth.saturating_sub(1),
2426 b'(' => state.paren_depth += 1,
2427 b')' => state.paren_depth = state.paren_depth.saturating_sub(1),
2428 _ => {}
2429 }
2430 index += 1;
2431 }
2432}
2433
2434fn find_mdx_expression_close(
2435 lines: &[Line<'_>],
2436 index: usize,
2437 open_byte: usize,
2438) -> Option<(usize, usize)> {
2439 let mut depth = 0usize;
2440 let mut state = MdxBraceState::Normal;
2441 let mut escaped = false;
2442 let mut cursor = index;
2443
2444 while cursor < lines.len() {
2445 let bytes = lines[cursor].text.as_bytes();
2446 let mut byte_index = if cursor == index { open_byte } else { 0 };
2447 while byte_index < bytes.len() {
2448 let byte = bytes[byte_index];
2449 match state {
2450 MdxBraceState::Normal => match byte {
2451 b'\'' => state = MdxBraceState::SingleQuoted,
2452 b'"' => state = MdxBraceState::DoubleQuoted,
2453 b'`' => state = MdxBraceState::Template,
2454 b'/' if bytes.get(byte_index + 1) == Some(&b'/') => {
2455 state = MdxBraceState::LineComment;
2456 break;
2457 }
2458 b'/' if bytes.get(byte_index + 1) == Some(&b'*') => {
2459 state = MdxBraceState::BlockComment;
2460 byte_index += 1;
2461 }
2462 b'{' => depth += 1,
2463 b'}' => {
2464 depth = depth.checked_sub(1)?;
2465 if depth == 0 {
2466 return lines[cursor].text[byte_index + 1..]
2467 .trim()
2468 .is_empty()
2469 .then_some((cursor, byte_index));
2470 }
2471 }
2472 _ => {}
2473 },
2474 MdxBraceState::SingleQuoted => {
2475 update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2476 }
2477 MdxBraceState::DoubleQuoted => {
2478 update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2479 }
2480 MdxBraceState::Template => {
2481 update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2482 }
2483 MdxBraceState::LineComment => break,
2484 MdxBraceState::BlockComment => {
2485 if byte == b'*' && bytes.get(byte_index + 1) == Some(&b'/') {
2486 state = MdxBraceState::Normal;
2487 byte_index += 1;
2488 }
2489 }
2490 }
2491 byte_index += 1;
2492 }
2493 if state == MdxBraceState::LineComment {
2494 state = MdxBraceState::Normal;
2495 }
2496 cursor += 1;
2497 }
2498
2499 None
2500}
2501
2502fn update_mdx_quote_state(byte: u8, delimiter: u8, state: &mut MdxBraceState, escaped: &mut bool) {
2503 if *escaped {
2504 *escaped = false;
2505 return;
2506 }
2507 if byte == b'\\' {
2508 *escaped = true;
2509 return;
2510 }
2511 if byte == delimiter {
2512 *state = MdxBraceState::Normal;
2513 }
2514}
2515
2516fn find_mdx_expression_inline_close(input: &str, open_byte: usize) -> Option<usize> {
2517 let bytes = input.as_bytes();
2518 if bytes.get(open_byte) != Some(&b'{') {
2519 return None;
2520 }
2521
2522 let mut depth = 0usize;
2523 let mut state = MdxBraceState::Normal;
2524 let mut escaped = false;
2525 let mut cursor = open_byte;
2526 while cursor < bytes.len() {
2527 let byte = bytes[cursor];
2528 match state {
2529 MdxBraceState::Normal => match byte {
2530 b'\'' => state = MdxBraceState::SingleQuoted,
2531 b'"' => state = MdxBraceState::DoubleQuoted,
2532 b'`' => state = MdxBraceState::Template,
2533 b'/' if bytes.get(cursor + 1) == Some(&b'/') => {
2534 state = MdxBraceState::LineComment;
2535 cursor += 1;
2536 }
2537 b'/' if bytes.get(cursor + 1) == Some(&b'*') => {
2538 state = MdxBraceState::BlockComment;
2539 cursor += 1;
2540 }
2541 b'{' => depth += 1,
2542 b'}' => {
2543 depth = depth.checked_sub(1)?;
2544 if depth == 0 {
2545 return Some(cursor);
2546 }
2547 }
2548 _ => {}
2549 },
2550 MdxBraceState::SingleQuoted => {
2551 update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2552 }
2553 MdxBraceState::DoubleQuoted => {
2554 update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2555 }
2556 MdxBraceState::Template => {
2557 update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2558 }
2559 MdxBraceState::LineComment => {
2560 if byte == b'\n' {
2561 state = MdxBraceState::Normal;
2562 }
2563 }
2564 MdxBraceState::BlockComment => {
2565 if byte == b'*' && bytes.get(cursor + 1) == Some(&b'/') {
2566 state = MdxBraceState::Normal;
2567 cursor += 1;
2568 }
2569 }
2570 }
2571 cursor += 1;
2572 }
2573 None
2574}
2575
2576fn collect_mdx_expression_value(
2577 lines: &[Line<'_>],
2578 start_line: usize,
2579 open_byte: usize,
2580 close_line: usize,
2581 close_byte: usize,
2582) -> String {
2583 let mut value = String::new();
2584 let mut cursor = start_line;
2585 while cursor <= close_line {
2586 if cursor > start_line {
2587 value.push('\n');
2588 }
2589 let line = lines[cursor].text;
2590 let segment = if cursor == start_line && cursor == close_line {
2591 &line[open_byte + 1..close_byte]
2592 } else if cursor == start_line {
2593 &line[open_byte + 1..]
2594 } else if cursor == close_line {
2595 &line[..close_byte]
2596 } else {
2597 line
2598 };
2599 value.push_str(segment);
2600 cursor += 1;
2601 }
2602 value
2603}
2604
2605fn find_mdx_jsx_close<'a>(lines: &'a [Line<'a>], index: usize) -> Option<usize> {
2606 let line = lines[index];
2607 let trimmed = line.text.trim_start();
2608 let start_byte = line.text.len() - trimmed.len();
2609 let root = mdx_jsx_tag_start(line.text, start_byte)?;
2610 if root.closing {
2611 return None;
2612 }
2613
2614 let (mut cursor_line, mut cursor_byte, self_closing) =
2615 find_mdx_jsx_tag_end(lines, index, start_byte)?;
2616 if self_closing {
2617 return Some(cursor_line);
2618 }
2619
2620 let mut depth = 1usize;
2621 cursor_byte += 1;
2622 'scan: while cursor_line < lines.len() {
2623 let line = lines[cursor_line].text;
2624 while cursor_byte < line.len() {
2625 let Some(relative_start) = line[cursor_byte..].find('<') else {
2626 break;
2627 };
2628 let tag_start_byte = cursor_byte + relative_start;
2629 let Some(candidate) = mdx_jsx_tag_start(line, tag_start_byte) else {
2630 cursor_byte = tag_start_byte + 1;
2631 continue;
2632 };
2633 let Some((tag_end_line, tag_end_byte, candidate_self_closing)) =
2634 find_mdx_jsx_tag_end(lines, cursor_line, tag_start_byte)
2635 else {
2636 return None;
2637 };
2638
2639 if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2640 if candidate.closing {
2641 depth = depth.saturating_sub(1);
2642 if depth == 0 {
2643 return Some(tag_end_line);
2644 }
2645 } else if !candidate_self_closing {
2646 depth += 1;
2647 }
2648 }
2649
2650 cursor_byte = tag_end_byte + 1;
2651 if tag_end_line != cursor_line {
2652 cursor_line = tag_end_line;
2653 continue 'scan;
2654 }
2655 }
2656 cursor_line += 1;
2657 cursor_byte = 0;
2658 }
2659 None
2660}
2661
2662fn parse_mdx_jsx_inline(input: &str, index: usize) -> Option<(usize, String)> {
2663 let root = mdx_jsx_tag_start(input, index)?;
2664 if root.closing {
2665 return None;
2666 }
2667
2668 let (mut cursor, self_closing) = find_mdx_jsx_tag_end_in_text(input, index)?;
2669 if self_closing {
2670 let end = cursor + 1;
2671 return Some((end, input[index..end].into()));
2672 }
2673
2674 let mut depth = 1usize;
2675 cursor += 1;
2676 while cursor < input.len() {
2677 let Some(relative_start) = input[cursor..].find('<') else {
2678 return None;
2679 };
2680 let tag_start_byte = cursor + relative_start;
2681 let Some(candidate) = mdx_jsx_tag_start(input, tag_start_byte) else {
2682 cursor = tag_start_byte + 1;
2683 continue;
2684 };
2685 let Some((tag_end, candidate_self_closing)) =
2686 find_mdx_jsx_tag_end_in_text(input, tag_start_byte)
2687 else {
2688 return None;
2689 };
2690
2691 if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2692 if candidate.closing {
2693 depth = depth.saturating_sub(1);
2694 if depth == 0 {
2695 let end = tag_end + 1;
2696 return Some((end, input[index..end].into()));
2697 }
2698 } else if !candidate_self_closing {
2699 depth += 1;
2700 }
2701 }
2702 cursor = tag_end + 1;
2703 }
2704 None
2705}
2706
2707fn mdx_jsx_tag_start(input: &str, start: usize) -> Option<MdxJsxTagStart<'_>> {
2708 let bytes = input.as_bytes();
2709 if bytes.get(start) != Some(&b'<') {
2710 return None;
2711 }
2712
2713 match bytes.get(start + 1) {
2714 Some(b'>') => {
2715 return Some(MdxJsxTagStart {
2716 tag: MdxJsxTag::Fragment,
2717 closing: false,
2718 });
2719 }
2720 Some(b'/') if bytes.get(start + 2) == Some(&b'>') => {
2721 return Some(MdxJsxTagStart {
2722 tag: MdxJsxTag::Fragment,
2723 closing: true,
2724 });
2725 }
2726 Some(b'!' | b'?') | None => return None,
2727 _ => {}
2728 }
2729
2730 let closing = bytes.get(start + 1) == Some(&b'/');
2731 let name_start = start + if closing { 2 } else { 1 };
2732 if !bytes
2733 .get(name_start)
2734 .is_some_and(|byte| is_mdx_jsx_name_start_byte(*byte))
2735 {
2736 return None;
2737 }
2738
2739 let mut name_end = name_start + 1;
2740 while bytes
2741 .get(name_end)
2742 .is_some_and(|byte| is_mdx_jsx_name_byte(*byte))
2743 {
2744 name_end += 1;
2745 }
2746 if name_end == name_start {
2747 return None;
2748 }
2749 if bytes
2750 .get(name_end)
2751 .is_some_and(|byte| !is_mdx_jsx_name_delimiter(*byte))
2752 {
2753 return None;
2754 }
2755 Some(MdxJsxTagStart {
2756 tag: MdxJsxTag::Named(&input[name_start..name_end]),
2757 closing,
2758 })
2759}
2760
2761fn mdx_jsx_tag_matches(left: MdxJsxTag<'_>, right: MdxJsxTag<'_>) -> bool {
2762 match (left, right) {
2763 (MdxJsxTag::Fragment, MdxJsxTag::Fragment) => true,
2764 (MdxJsxTag::Named(left), MdxJsxTag::Named(right)) => left == right,
2765 _ => false,
2766 }
2767}
2768
2769fn find_mdx_jsx_tag_end(
2770 lines: &[Line<'_>],
2771 start_line: usize,
2772 start_byte: usize,
2773) -> Option<(usize, usize, bool)> {
2774 let mut line_index = start_line;
2775 let mut byte_index = start_byte + 1;
2776 let mut quote = None;
2777 let mut escaped = false;
2778 let mut expression_depth = 0usize;
2779 let mut expression_state = MdxBraceState::Normal;
2780 let mut expression_escaped = false;
2781
2782 while line_index < lines.len() {
2783 let bytes = lines[line_index].text.as_bytes();
2784 while byte_index < bytes.len() {
2785 let byte = bytes[byte_index];
2786 if expression_depth > 0 {
2787 if update_mdx_jsx_expression_state(
2788 byte,
2789 bytes.get(byte_index + 1).copied(),
2790 &mut expression_depth,
2791 &mut expression_state,
2792 &mut expression_escaped,
2793 ) {
2794 byte_index += 1;
2795 }
2796 byte_index += 1;
2797 continue;
2798 }
2799
2800 if let Some(delimiter) = quote {
2801 if escaped {
2802 escaped = false;
2803 } else if byte == b'\\' {
2804 escaped = true;
2805 } else if byte == delimiter {
2806 quote = None;
2807 }
2808 byte_index += 1;
2809 continue;
2810 }
2811
2812 match byte {
2813 b'\'' | b'"' => quote = Some(byte),
2814 b'{' => {
2815 expression_depth = 1;
2816 expression_state = MdxBraceState::Normal;
2817 expression_escaped = false;
2818 }
2819 b'>' if expression_depth == 0 => {
2820 let self_closing =
2821 previous_nonspace_before(lines, line_index, byte_index) == Some(b'/');
2822 return Some((line_index, byte_index, self_closing));
2823 }
2824 _ => {}
2825 }
2826 byte_index += 1;
2827 }
2828 if expression_state == MdxBraceState::LineComment {
2829 expression_state = MdxBraceState::Normal;
2830 }
2831 line_index += 1;
2832 byte_index = 0;
2833 }
2834 None
2835}
2836
2837fn previous_nonspace_before(
2838 lines: &[Line<'_>],
2839 line_index: usize,
2840 byte_index: usize,
2841) -> Option<u8> {
2842 let mut cursor_line = line_index;
2843 let mut cursor_byte = byte_index;
2844
2845 loop {
2846 if let Some(byte) = lines[cursor_line].text.as_bytes()[..cursor_byte]
2847 .iter()
2848 .rev()
2849 .copied()
2850 .find(|byte| !byte.is_ascii_whitespace())
2851 {
2852 return Some(byte);
2853 }
2854 if cursor_line == 0 {
2855 return None;
2856 }
2857 cursor_line -= 1;
2858 cursor_byte = lines[cursor_line].text.len();
2859 }
2860}
2861
2862fn find_mdx_jsx_tag_end_in_text(input: &str, start_byte: usize) -> Option<(usize, bool)> {
2863 let bytes = input.as_bytes();
2864 let mut byte_index = start_byte + 1;
2865 let mut quote = None;
2866 let mut escaped = false;
2867 let mut expression_depth = 0usize;
2868 let mut expression_state = MdxBraceState::Normal;
2869 let mut expression_escaped = false;
2870
2871 while byte_index < bytes.len() {
2872 let byte = bytes[byte_index];
2873 if expression_depth > 0 {
2874 if update_mdx_jsx_expression_state(
2875 byte,
2876 bytes.get(byte_index + 1).copied(),
2877 &mut expression_depth,
2878 &mut expression_state,
2879 &mut expression_escaped,
2880 ) {
2881 byte_index += 1;
2882 }
2883 byte_index += 1;
2884 continue;
2885 }
2886
2887 if let Some(delimiter) = quote {
2888 if escaped {
2889 escaped = false;
2890 } else if byte == b'\\' {
2891 escaped = true;
2892 } else if byte == delimiter {
2893 quote = None;
2894 }
2895 byte_index += 1;
2896 continue;
2897 }
2898
2899 match byte {
2900 b'\'' | b'"' => quote = Some(byte),
2901 b'{' => {
2902 expression_depth = 1;
2903 expression_state = MdxBraceState::Normal;
2904 expression_escaped = false;
2905 }
2906 b'>' if expression_depth == 0 => {
2907 let self_closing = previous_nonspace_before_text(input, byte_index) == Some(b'/');
2908 return Some((byte_index, self_closing));
2909 }
2910 _ => {}
2911 }
2912 byte_index += 1;
2913 }
2914 None
2915}
2916
2917fn previous_nonspace_before_text(input: &str, byte_index: usize) -> Option<u8> {
2918 input.as_bytes()[..byte_index]
2919 .iter()
2920 .rev()
2921 .copied()
2922 .find(|byte| !byte.is_ascii_whitespace())
2923}
2924
2925fn update_mdx_jsx_expression_state(
2926 byte: u8,
2927 next: Option<u8>,
2928 depth: &mut usize,
2929 state: &mut MdxBraceState,
2930 escaped: &mut bool,
2931) -> bool {
2932 match *state {
2933 MdxBraceState::Normal => match byte {
2934 b'\'' => *state = MdxBraceState::SingleQuoted,
2935 b'"' => *state = MdxBraceState::DoubleQuoted,
2936 b'`' => *state = MdxBraceState::Template,
2937 b'/' if next == Some(b'/') => {
2938 *state = MdxBraceState::LineComment;
2939 return true;
2940 }
2941 b'/' if next == Some(b'*') => {
2942 *state = MdxBraceState::BlockComment;
2943 return true;
2944 }
2945 b'{' => *depth += 1,
2946 b'}' => {
2947 *depth = (*depth).saturating_sub(1);
2948 if *depth == 0 {
2949 *state = MdxBraceState::Normal;
2950 *escaped = false;
2951 }
2952 }
2953 _ => {}
2954 },
2955 MdxBraceState::SingleQuoted => {
2956 update_mdx_quote_state(byte, b'\'', state, escaped);
2957 }
2958 MdxBraceState::DoubleQuoted => {
2959 update_mdx_quote_state(byte, b'"', state, escaped);
2960 }
2961 MdxBraceState::Template => {
2962 update_mdx_quote_state(byte, b'`', state, escaped);
2963 }
2964 MdxBraceState::LineComment => {
2965 if byte == b'\n' {
2966 *state = MdxBraceState::Normal;
2967 }
2968 }
2969 MdxBraceState::BlockComment => {
2970 if byte == b'*' && next == Some(b'/') {
2971 *state = MdxBraceState::Normal;
2972 return true;
2973 }
2974 }
2975 }
2976 false
2977}
2978
2979fn is_mdx_jsx_name_start_byte(byte: u8) -> bool {
2980 byte.is_ascii_alphabetic() || matches!(byte, b'_' | b'$')
2981}
2982
2983fn is_mdx_jsx_name_byte(byte: u8) -> bool {
2984 byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b':' | b'_' | b'-' | b'$')
2985}
2986
2987fn is_mdx_jsx_name_delimiter(byte: u8) -> bool {
2988 byte.is_ascii_whitespace() || matches!(byte, b'/' | b'>' | b'{' | b'}')
2989}
2990
2991fn collect_line_range(lines: &[Line<'_>], start: usize, end: usize) -> String {
2992 let mut value = String::new();
2993 let mut cursor = start;
2994 while cursor <= end {
2995 if cursor > start {
2996 value.push('\n');
2997 }
2998 value.push_str(lines[cursor].text);
2999 cursor += 1;
3000 }
3001 value
3002}
3003
3004fn parse_indented_code(
3005 lines: &[Line<'_>],
3006 index: usize,
3007 options: &SyntaxOptions,
3008) -> Option<(Block, usize)> {
3009 if !options.constructs.indented_code || strip_indented_code_prefix(lines[index].text).is_none()
3010 {
3011 return None;
3012 }
3013 let mut value = String::new();
3014 let mut cursor = index;
3015 let mut content_end = index;
3018 let mut content_end_len = 0usize;
3019 while cursor < lines.len() {
3020 if let Some(text) = strip_indented_code_prefix(lines[cursor].text) {
3021 ensure_line_separator(&mut value);
3022 value.push_str(text);
3023 value.push_str(lines[cursor].eol);
3024 if !text.trim().is_empty() {
3025 content_end = cursor;
3026 content_end_len = value.len();
3027 }
3028 cursor += 1;
3029 continue;
3030 }
3031
3032 if !lines[cursor].text.trim().is_empty() {
3033 break;
3034 }
3035 ensure_line_separator(&mut value);
3036 value.push_str(lines[cursor].eol);
3037 cursor += 1;
3038 }
3039 value.truncate(content_end_len);
3041 Some((
3042 Block::CodeBlock(CodeBlock {
3043 meta: NodeMeta::new(Some(Span::new(
3044 lines[index].start,
3045 lines[content_end].end_with_eol,
3046 ))),
3047 kind: CodeBlockKind::Indented,
3048 info: None,
3049 value,
3050 }),
3051 cursor,
3052 ))
3053}
3054
3055fn strip_indented_code_prefix(input: &str) -> Option<&str> {
3056 let mut column = 0usize;
3057 for (index, byte) in input.as_bytes().iter().enumerate() {
3058 match *byte {
3059 b' ' => {
3060 column += 1;
3061 if column == 4 {
3062 return Some(&input[index + 1..]);
3063 }
3064 }
3065 b'\t' => {
3066 column += 4 - (column % 4);
3067 if column >= 4 {
3068 return Some(&input[index + 1..]);
3069 }
3070 }
3071 _ => return None,
3072 }
3073 }
3074 None
3075}
3076
3077fn parse_table(
3078 lines: &[Line<'_>],
3079 index: usize,
3080 options: &SyntaxOptions,
3081 definitions: &[String],
3082 diagnostics: &mut Vec<Diagnostic>,
3083) -> Option<(Block, usize)> {
3084 if !options.constructs.gfm_table || index + 1 >= lines.len() {
3085 return None;
3086 }
3087 let delimiter = table_indent_line(lines[index + 1].text, options.constructs.indented_code)?;
3088 if list_marker_info(delimiter).is_some() {
3089 return None;
3090 }
3091 if !table_has_separator(lines[index].text, delimiter, options.constructs.spoiler) {
3092 return None;
3093 }
3094 let alignments = parse_table_delimiter(delimiter, options.constructs.spoiler)?;
3095 let headers = split_table_row(lines[index].text, options.constructs.spoiler);
3096 if headers.len() != alignments.len() {
3097 return None;
3098 }
3099
3100 let mut rows = Vec::new();
3101 rows.push(TableRow {
3102 meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[index].end))),
3103 cells: headers
3104 .iter()
3105 .map(|cell| TableCell {
3106 meta: NodeMeta::default(),
3107 children: parse_inlines(
3108 cell.trim(),
3109 lines[index].start,
3110 options,
3111 definitions,
3112 diagnostics,
3113 ),
3114 })
3115 .collect(),
3116 });
3117
3118 let mut cursor = index + 2;
3119 while cursor < lines.len() {
3120 let Some(row) = table_indent_line(lines[cursor].text, options.constructs.indented_code)
3121 else {
3122 break;
3123 };
3124 if row.trim().is_empty() || table_body_line_ends_table(lines[cursor].text, options) {
3128 break;
3129 }
3130 let cells = split_table_row(row, options.constructs.spoiler);
3131 rows.push(TableRow {
3132 meta: NodeMeta::new(Some(Span::new(lines[cursor].start, lines[cursor].end))),
3133 cells: alignments
3134 .iter()
3135 .enumerate()
3136 .map(|(cell_index, _)| {
3137 let value = cells.get(cell_index).map(String::as_str).unwrap_or("");
3138 TableCell {
3139 meta: NodeMeta::default(),
3140 children: parse_inlines(
3141 value.trim(),
3142 lines[cursor].start,
3143 options,
3144 definitions,
3145 diagnostics,
3146 ),
3147 }
3148 })
3149 .collect(),
3150 });
3151 cursor += 1;
3152 }
3153
3154 Some((
3155 Block::Table(Table {
3156 meta: NodeMeta::new(Some(Span::new(
3157 lines[index].start,
3158 lines[cursor - 1].end_with_eol,
3159 ))),
3160 alignments,
3161 rows,
3162 }),
3163 cursor,
3164 ))
3165}
3166
3167fn parse_setext_heading(
3168 lines: &[Line<'_>],
3169 index: usize,
3170 options: &SyntaxOptions,
3171 definitions: &[String],
3172) -> Option<(Block, usize)> {
3173 if index + 1 >= lines.len() || lines[index].text.trim().is_empty() {
3174 return None;
3175 }
3176
3177 let mut underline_index = index + 1;
3182 loop {
3183 let underline_depth = if lines[underline_index].lazy {
3189 None
3190 } else {
3191 setext_underline_depth(lines[underline_index].text)
3192 };
3193 if let Some(depth) = underline_depth {
3194 let mut value = String::new();
3195 for line in &lines[index..underline_index] {
3196 push_line(&mut value, trim_ascii_start(line.text));
3199 }
3200 return Some((
3201 Block::Heading(Heading {
3202 meta: NodeMeta::new(Some(Span::new(
3203 lines[index].start,
3204 lines[underline_index].end,
3205 ))),
3206 depth,
3207 kind: HeadingKind::Setext,
3208 children: parse_inlines(
3209 &value,
3210 lines[index].start,
3211 options,
3212 definitions,
3213 &mut Vec::new(),
3214 ),
3215 }),
3216 underline_index + 1,
3217 ));
3218 }
3219
3220 let line = lines[underline_index].text;
3223 if line.trim().is_empty()
3224 || table_can_start(lines, underline_index, options)
3225 || likely_block_start(line, options)
3226 {
3227 return None;
3228 }
3229 underline_index += 1;
3230 if underline_index >= lines.len() {
3231 return None;
3232 }
3233 }
3234}
3235
3236fn setext_underline_depth(input: &str) -> Option<u8> {
3237 let underline = trim_up_to_three_spaces(input)?.trim();
3238 match underline {
3239 text if !text.is_empty() && text.chars().all(|char| char == '=') => Some(1),
3240 text if !text.is_empty() && text.chars().all(|char| char == '-') => Some(2),
3241 _ => None,
3242 }
3243}
3244
3245fn parse_paragraph(
3246 lines: &[Line<'_>],
3247 index: usize,
3248 options: &SyntaxOptions,
3249 definitions: &[String],
3250 diagnostics: &mut Vec<Diagnostic>,
3251) -> (Block, usize) {
3252 let mut value = String::new();
3253 let start = lines[index].start;
3254 let mut cursor = index;
3255 while cursor < lines.len() {
3256 if lines[cursor].text.trim().is_empty() {
3257 break;
3258 }
3259 if cursor > index && !lines[cursor].lazy {
3263 if table_can_start(lines, cursor, options) {
3264 break;
3265 }
3266 if likely_block_start(lines[cursor].text, options) {
3267 break;
3268 }
3269 }
3270 if !value.is_empty() {
3271 value.push('\n');
3272 }
3273 value.push_str(trim_ascii_start(lines[cursor].text));
3274 cursor += 1;
3275 }
3276
3277 let end = lines[cursor - 1].end;
3278 (
3279 Block::Paragraph(Paragraph {
3280 meta: NodeMeta::new(Some(Span::new(start, end))),
3281 children: parse_inlines(&value, start, options, definitions, diagnostics),
3282 }),
3283 cursor,
3284 )
3285}
3286
3287#[derive(Clone, Copy)]
3290struct DelimMarker {
3291 node_index: usize,
3295 marker: u8,
3296 length: usize,
3298 can_open: bool,
3299 can_close: bool,
3300 span_start: usize,
3302 inactive: bool,
3304}
3305
3306fn record_emphasis_delimiter(
3320 nodes: &mut Vec<Inline>,
3321 delimiters: &mut Vec<DelimMarker>,
3322 input: &str,
3323 index: usize,
3324 base_offset: usize,
3325 marker: u8,
3326 strikethrough: bool,
3327) {
3328 let length = delimiter_byte_run_len(input, index, marker);
3329 let (mut can_open, mut can_close) = if marker == b'_' {
3330 (
3331 can_open_underscore(input, index, length),
3332 can_close_underscore(input, index, length),
3333 )
3334 } else {
3335 (
3336 can_open_delimited(input, index, length),
3337 can_close_delimited(input, index, length),
3338 )
3339 };
3340
3341 if strikethrough && marker != b'~' {
3345 let before = input[..index].chars().next_back();
3346 let after = input[index + length..].chars().next();
3347 if after == Some('~') {
3348 can_open = true;
3349 }
3350 if before == Some('~') {
3351 can_close = true;
3352 }
3353 }
3354
3355 let value = String::from(marker as char).repeat(length);
3356
3357 let node_index = nodes.len();
3358 nodes.push(Inline::Text(Text {
3359 meta: NodeMeta::new(Some(Span::new(
3360 base_offset + index,
3361 base_offset + index + length,
3362 ))),
3363 value,
3364 }));
3365
3366 delimiters.push(DelimMarker {
3367 node_index,
3368 marker,
3369 length,
3370 can_open,
3371 can_close,
3372 span_start: base_offset + index,
3373 inactive: false,
3374 });
3375}
3376
3377fn process_emphasis(mut nodes: Vec<Inline>, mut delimiters: Vec<DelimMarker>) -> Vec<Inline> {
3380 if delimiters.is_empty() {
3381 return nodes;
3382 }
3383
3384 let mut openers_bottom: [Option<usize>; 18] = [None; 18];
3389 let mut closer_idx = 0;
3390
3391 while closer_idx < delimiters.len() {
3392 let closer = delimiters[closer_idx];
3393 if closer.inactive || !closer.can_close {
3394 closer_idx += 1;
3395 continue;
3396 }
3397
3398 let key = openers_bottom_key(&closer);
3399 let bottom = openers_bottom[key];
3400
3401 let mut opener_idx = None;
3403 let mut search = closer_idx;
3404 while search > 0 {
3405 search -= 1;
3406 if let Some(bottom) = bottom {
3407 if search < bottom {
3408 break;
3409 }
3410 }
3411 let candidate = delimiters[search];
3412 if candidate.inactive || candidate.marker != closer.marker || !candidate.can_open {
3413 continue;
3414 }
3415 if emphasis_delimiters_match(&candidate, &closer) {
3416 opener_idx = Some(search);
3417 break;
3418 }
3419 }
3420
3421 let Some(opener_idx) = opener_idx else {
3422 openers_bottom[key] = Some(closer_idx);
3426 if !closer.can_open {
3427 delimiters[closer_idx].inactive = true;
3428 }
3429 closer_idx += 1;
3430 continue;
3431 };
3432
3433 let (used, wrap) = if closer.marker == b'~' {
3434 let length = delimiters[closer_idx].length;
3437 let marker = if length >= 2 {
3438 DeleteMarker::DoubleTilde
3439 } else {
3440 DeleteMarker::SingleTilde
3441 };
3442 (length, EmphasisWrap::Delete(marker))
3443 } else {
3444 let strong = delimiters[opener_idx].length >= 2 && delimiters[closer_idx].length >= 2;
3445 let used = if strong { 2 } else { 1 };
3446 let wrap = if strong {
3447 EmphasisWrap::Strong
3448 } else {
3449 EmphasisWrap::Emphasis
3450 };
3451 (used, wrap)
3452 };
3453
3454 apply_emphasis(
3455 &mut nodes,
3456 &mut delimiters,
3457 opener_idx,
3458 closer_idx,
3459 used,
3460 wrap,
3461 );
3462
3463 let mut inner = opener_idx + 1;
3466 while inner < closer_idx {
3467 delimiters[inner].inactive = true;
3468 inner += 1;
3469 }
3470
3471 if delimiters[opener_idx].length == 0 {
3472 delimiters[opener_idx].inactive = true;
3473 }
3474 if delimiters[closer_idx].length == 0 {
3475 delimiters[closer_idx].inactive = true;
3476 closer_idx += 1;
3477 }
3478 }
3481
3482 merge_adjacent_text(&mut nodes);
3486 nodes
3487}
3488
3489fn merge_adjacent_text(nodes: &mut Vec<Inline>) {
3493 let mut write = 0;
3494 for read in 0..nodes.len() {
3495 if read != write {
3496 nodes.swap(read, write);
3497 }
3498 if write > 0 {
3499 let (head, tail) = nodes.split_at_mut(write);
3500 if let (Inline::Text(previous), Inline::Text(current)) =
3501 (&mut head[write - 1], &tail[0])
3502 {
3503 previous.value.push_str(¤t.value);
3504 if let (Some(previous_span), Some(current_span)) =
3505 (previous.meta.span.as_mut(), current.meta.span)
3506 {
3507 previous_span.end = current_span.end;
3508 }
3509 continue;
3510 }
3511 }
3512 write += 1;
3513 }
3514 nodes.truncate(write);
3515
3516 for node in nodes.iter_mut() {
3517 match node {
3518 Inline::Emphasis(emphasis) => merge_adjacent_text(&mut emphasis.children),
3519 Inline::Strong(strong) => merge_adjacent_text(&mut strong.children),
3520 Inline::Delete(delete) => merge_adjacent_text(&mut delete.children),
3521 _ => {}
3522 }
3523 }
3524}
3525
3526fn openers_bottom_key(closer: &DelimMarker) -> usize {
3528 let marker = match closer.marker {
3529 b'_' => 1,
3530 b'~' => 2,
3531 _ => 0,
3532 };
3533 let both = usize::from(closer.can_open && closer.can_close);
3534 let modulo = closer.length % 3;
3535 ((marker * 2) + both) * 3 + modulo
3536}
3537
3538fn emphasis_delimiters_match(opener: &DelimMarker, closer: &DelimMarker) -> bool {
3540 if opener.marker == b'~' {
3543 return opener.length == closer.length;
3544 }
3545
3546 let opener_both = opener.can_open && opener.can_close;
3550 let closer_both = closer.can_open && closer.can_close;
3551 if opener_both || closer_both {
3552 let sum = opener.length + closer.length;
3553 if sum % 3 == 0 && !(opener.length % 3 == 0 && closer.length % 3 == 0) {
3554 return false;
3555 }
3556 }
3557 true
3558}
3559
3560#[derive(Clone, Copy)]
3562enum EmphasisWrap {
3563 Emphasis,
3564 Strong,
3565 Delete(DeleteMarker),
3566}
3567
3568fn apply_emphasis(
3572 nodes: &mut Vec<Inline>,
3573 delimiters: &mut [DelimMarker],
3574 opener_idx: usize,
3575 closer_idx: usize,
3576 used: usize,
3577 wrap: EmphasisWrap,
3578) {
3579 let opener_node = delimiters[opener_idx].node_index;
3580 let closer_node = delimiters[closer_idx].node_index;
3581
3582 trim_delimiter_text_tail(&mut nodes[opener_node], used);
3585 delimiters[opener_idx].length -= used;
3586 delimiters[opener_idx].span_start += used;
3587
3588 trim_delimiter_text_head(&mut nodes[closer_node], used);
3589 delimiters[closer_idx].length -= used;
3590
3591 let span_start = delimiters[opener_idx].span_start - used;
3594 let span_end = delimiters[closer_idx].span_start + delimiters[closer_idx].length + used;
3595
3596 let children_start = opener_node + 1;
3599 let children_end = closer_node; let children: Vec<Inline> = nodes.drain(children_start..children_end).collect();
3601 let removed = children.len();
3602
3603 let meta = NodeMeta::new(Some(Span::new(span_start, span_end)));
3604 let wrapped = match wrap {
3605 EmphasisWrap::Strong => Inline::Strong(Strong { meta, children }),
3606 EmphasisWrap::Emphasis => Inline::Emphasis(Emphasis { meta, children }),
3607 EmphasisWrap::Delete(marker) => Inline::Delete(Delete {
3608 meta,
3609 marker,
3610 children,
3611 }),
3612 };
3613 nodes.insert(children_start, wrapped);
3614
3615 reindex_delimiters(delimiters, children_end, 1 - removed as isize);
3619
3620 if delimiters[closer_idx].length == 0 {
3624 let pos = delimiters[closer_idx].node_index;
3625 nodes.remove(pos);
3626 reindex_delimiters(delimiters, pos, -1);
3627 }
3628 if delimiters[opener_idx].length == 0 {
3629 let pos = delimiters[opener_idx].node_index;
3630 nodes.remove(pos);
3631 reindex_delimiters(delimiters, pos, -1);
3632 }
3633}
3634
3635fn reindex_delimiters(delimiters: &mut [DelimMarker], from: usize, delta: isize) {
3637 if delta == 0 {
3638 return;
3639 }
3640 for delimiter in delimiters.iter_mut() {
3641 if delimiter.node_index >= from {
3642 delimiter.node_index = (delimiter.node_index as isize + delta) as usize;
3643 }
3644 }
3645}
3646
3647fn trim_delimiter_text_tail(node: &mut Inline, count: usize) {
3649 if let Inline::Text(text) = node {
3650 let new_len = text.value.len().saturating_sub(count);
3651 text.value.truncate(new_len);
3652 if let Some(span) = text.meta.span.as_mut() {
3653 span.end = span.end.saturating_sub(count);
3654 }
3655 }
3656}
3657
3658fn trim_delimiter_text_head(node: &mut Inline, count: usize) {
3660 if let Inline::Text(text) = node {
3661 let count = count.min(text.value.len());
3662 text.value.drain(..count);
3663 if let Some(span) = text.meta.span.as_mut() {
3664 span.start += count;
3665 }
3666 }
3667}
3668
3669fn parse_inlines(
3670 input: &str,
3671 base_offset: usize,
3672 options: &SyntaxOptions,
3673 definitions: &[String],
3674 diagnostics: &mut Vec<Diagnostic>,
3675) -> Vec<Inline> {
3676 parse_inlines_with_context(
3677 input,
3678 base_offset,
3679 options,
3680 definitions,
3681 diagnostics,
3682 InlineContext::default(),
3683 )
3684}
3685
3686#[derive(Clone, Copy)]
3687struct InlineContext {
3688 allow_links: bool,
3689}
3690
3691impl Default for InlineContext {
3692 fn default() -> Self {
3693 Self { allow_links: true }
3694 }
3695}
3696
3697fn parse_inlines_with_context(
3698 input: &str,
3699 base_offset: usize,
3700 options: &SyntaxOptions,
3701 definitions: &[String],
3702 diagnostics: &mut Vec<Diagnostic>,
3703 context: InlineContext,
3704) -> Vec<Inline> {
3705 let bytes = input.as_bytes();
3706 let mut nodes = Vec::new();
3707 let mut text_start = 0;
3708 let mut text = String::new();
3709 let mut index = 0;
3710 let mut delimiters: Vec<DelimMarker> = Vec::new();
3715
3716 while index < bytes.len() {
3717 if bytes[index] == b'\\' {
3718 if let Some((next_index, char)) = next_char(input, index + 1) {
3719 if char.is_ascii_punctuation() {
3720 if options.parse.preserve_character_escapes {
3721 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3722 nodes.push(Inline::Escape(Escape {
3723 meta: NodeMeta::new(Some(Span::new(
3724 base_offset + index,
3725 base_offset + next_index,
3726 ))),
3727 value: char,
3728 }));
3729 index = next_index;
3730 text_start = index;
3731 continue;
3732 }
3733 if text.is_empty() {
3734 text_start = base_offset + index;
3735 }
3736 if gfm_link_label_preserves_url_dot_escape(&text, char, options, context) {
3737 text.push('\\');
3738 }
3739 text.push(char);
3740 index = next_index;
3741 continue;
3742 }
3743 }
3744 }
3745
3746 if bytes[index] == b'&' {
3747 if let Some((end, value)) = parse_character_reference(input, index) {
3748 if options.parse.preserve_character_references {
3749 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3750 nodes.push(Inline::CharacterReference(CharacterReference {
3751 meta: NodeMeta::new(Some(Span::new(
3752 base_offset + index,
3753 base_offset + end,
3754 ))),
3755 reference: input[index..end].into(),
3756 value,
3757 }));
3758 index = end;
3759 text_start = index;
3760 continue;
3761 }
3762 if text.is_empty() {
3763 text_start = base_offset + index;
3764 }
3765 text.push_str(&value);
3766 index = end;
3767 continue;
3768 }
3769 }
3770
3771 if bytes[index] == b'\n' {
3772 if text.ends_with('\\') {
3773 text.pop();
3774 flush_text(
3775 &mut nodes,
3776 &mut text,
3777 text_start,
3778 base_offset + index.saturating_sub(1),
3779 );
3780 nodes.push(Inline::LineBreak(LineBreak {
3781 meta: NodeMeta::new(Some(Span::new(
3782 base_offset + index.saturating_sub(1),
3783 base_offset + index + 1,
3784 ))),
3785 kind: LineBreakKind::Backslash,
3786 }));
3787 index += 1;
3788 text_start = index;
3789 continue;
3790 }
3791 let trailing_spaces = trailing_space_count(&text);
3792 if is_hard_break_suffix(&text, trailing_spaces) {
3793 text.truncate(text.len() - trailing_spaces);
3794 flush_text(
3795 &mut nodes,
3796 &mut text,
3797 text_start,
3798 base_offset + index.saturating_sub(trailing_spaces),
3799 );
3800 nodes.push(Inline::LineBreak(LineBreak {
3801 meta: NodeMeta::new(Some(Span::new(
3802 base_offset + index.saturating_sub(trailing_spaces),
3803 base_offset + index + 1,
3804 ))),
3805 kind: LineBreakKind::Spaces,
3806 }));
3807 index += 1;
3808 text_start = index;
3809 continue;
3810 }
3811 if trailing_spaces > 0 {
3812 text.truncate(text.len() - trailing_spaces);
3813 }
3814 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3815 nodes.push(Inline::SoftBreak(SoftBreak {
3816 meta: NodeMeta::new(Some(Span::new(
3817 base_offset + index,
3818 base_offset + index + 1,
3819 ))),
3820 }));
3821 index += 1;
3822 text_start = index;
3823 continue;
3824 }
3825
3826 if bytes[index] == b'`' {
3827 if let Some((end, code_span)) = parse_code_span(input, index) {
3828 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3829 nodes.push(Inline::Code(CodeInline {
3830 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
3831 value: code_span.value,
3832 raw: code_span.raw,
3833 fence_length: code_span.fence_length,
3834 }));
3835 index = end;
3836 text_start = index;
3837 continue;
3838 } else {
3839 let run = bytes[index..]
3845 .iter()
3846 .take_while(|byte| **byte == b'`')
3847 .count();
3848 if text.is_empty() {
3849 text_start = base_offset + index;
3850 }
3851 for _ in 0..run {
3852 text.push('`');
3853 }
3854 index += run;
3855 continue;
3856 }
3857 }
3858
3859 if options.constructs.spoiler
3860 && bytes.get(index) == Some(&b'|')
3861 && bytes.get(index + 1) == Some(&b'|')
3862 && bytes.get(index + 2) != Some(&b'|')
3863 {
3864 if let Some(end) = find_spoiler_close(input, index + 2) {
3865 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3866 let inner = &input[index + 2..end];
3867 nodes.push(Inline::Spoiler(Spoiler {
3868 meta: NodeMeta::new(Some(Span::new(
3869 base_offset + index,
3870 base_offset + end + 2,
3871 ))),
3872 children: parse_inlines_with_context(
3873 inner,
3874 base_offset + index + 2,
3875 options,
3876 definitions,
3877 diagnostics,
3878 context,
3879 ),
3880 }));
3881 index = end + 2;
3882 text_start = index;
3883 continue;
3884 }
3885 }
3886
3887 if bytes[index] == b'*' && delimiter_byte_run_start(input, index, b'*') == index {
3888 let run_len = delimiter_byte_run_len(input, index, b'*');
3889 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3890 record_emphasis_delimiter(
3891 &mut nodes,
3892 &mut delimiters,
3893 input,
3894 index,
3895 base_offset,
3896 b'*',
3897 options.constructs.gfm_strikethrough,
3898 );
3899 index += run_len;
3900 text_start = index;
3901 continue;
3902 }
3903
3904 if options.constructs.underline
3905 && bytes.get(index) == Some(&b'_')
3906 && bytes.get(index + 1) == Some(&b'_')
3907 && bytes.get(index + 2) == Some(&b'_')
3908 && can_open_underscore(input, index, 1)
3909 {
3910 if let Some(end) = find_closing_delimiter(input, index + 3, "___", true) {
3911 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3912 let inner = &input[index + 3..end];
3913 let underline = Inline::Underline(Underline {
3914 meta: NodeMeta::new(Some(Span::new(
3915 base_offset + index + 1,
3916 base_offset + end + 2,
3917 ))),
3918 children: parse_inlines_with_context(
3919 inner,
3920 base_offset + index + 3,
3921 options,
3922 definitions,
3923 diagnostics,
3924 context,
3925 ),
3926 });
3927 nodes.push(Inline::Emphasis(Emphasis {
3928 meta: NodeMeta::new(Some(Span::new(
3929 base_offset + index,
3930 base_offset + end + 3,
3931 ))),
3932 children: vec![underline],
3933 }));
3934 index = end + 3;
3935 text_start = index;
3936 continue;
3937 }
3938 }
3939
3940 if options.constructs.underline
3941 && bytes.get(index) == Some(&b'_')
3942 && bytes.get(index + 1) == Some(&b'_')
3943 && can_open_underscore(input, index, 2)
3944 {
3945 if let Some(end) = find_closing_delimiter(input, index + 2, "__", true) {
3946 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3947 let inner = &input[index + 2..end];
3948 nodes.push(Inline::Underline(Underline {
3949 meta: NodeMeta::new(Some(Span::new(
3950 base_offset + index,
3951 base_offset + end + 2,
3952 ))),
3953 children: parse_inlines_with_context(
3954 inner,
3955 base_offset + index + 2,
3956 options,
3957 definitions,
3958 diagnostics,
3959 context,
3960 ),
3961 }));
3962 index = end + 2;
3963 text_start = index;
3964 continue;
3965 }
3966 }
3967
3968 if bytes[index] == b'_' && delimiter_byte_run_start(input, index, b'_') == index {
3974 if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
3979 && context.allow_links
3980 {
3981 if let Some((end, destination)) = parse_literal_autolink(
3982 input,
3983 index,
3984 options.constructs.gfm_autolink_literal,
3985 options.constructs.relaxed_autolinks,
3986 ) {
3987 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3988 nodes.push(Inline::Autolink(Autolink {
3989 meta: NodeMeta::new(Some(Span::new(
3990 base_offset + index,
3991 base_offset + end,
3992 ))),
3993 destination,
3994 kind: AutolinkKind::GfmLiteral {
3995 original: input[index..end].into(),
3996 },
3997 }));
3998 index = end;
3999 text_start = index;
4000 continue;
4001 }
4002 }
4003 let run_len = delimiter_byte_run_len(input, index, b'_');
4004 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4005 record_emphasis_delimiter(
4006 &mut nodes,
4007 &mut delimiters,
4008 input,
4009 index,
4010 base_offset,
4011 b'_',
4012 options.constructs.gfm_strikethrough,
4013 );
4014 index += run_len;
4015 text_start = index;
4016 continue;
4017 }
4018
4019 if options.constructs.insert
4020 && bytes.get(index) == Some(&b'+')
4021 && bytes.get(index + 1) == Some(&b'+')
4022 && bytes.get(index + 2) != Some(&b'+')
4023 && can_open_delimited(input, index, 2)
4024 {
4025 if let Some(end) = find_closing_delimiter(input, index + 2, "++", false) {
4026 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4027 let inner = &input[index + 2..end];
4028 nodes.push(Inline::Insert(Insert {
4029 meta: NodeMeta::new(Some(Span::new(
4030 base_offset + index,
4031 base_offset + end + 2,
4032 ))),
4033 children: parse_inlines_with_context(
4034 inner,
4035 base_offset + index + 2,
4036 options,
4037 definitions,
4038 diagnostics,
4039 context,
4040 ),
4041 }));
4042 index = end + 2;
4043 text_start = index;
4044 continue;
4045 }
4046 }
4047
4048 if options.constructs.highlight
4049 && bytes.get(index) == Some(&b'=')
4050 && bytes.get(index + 1) == Some(&b'=')
4051 && bytes.get(index + 2) != Some(&b'=')
4052 && can_open_delimited(input, index, 2)
4053 {
4054 if let Some(end) = find_closing_delimiter(input, index + 2, "==", false) {
4055 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4056 let inner = &input[index + 2..end];
4057 nodes.push(Inline::Mark(Mark {
4058 meta: NodeMeta::new(Some(Span::new(
4059 base_offset + index,
4060 base_offset + end + 2,
4061 ))),
4062 children: parse_inlines_with_context(
4063 inner,
4064 base_offset + index + 2,
4065 options,
4066 definitions,
4067 diagnostics,
4068 context,
4069 ),
4070 }));
4071 index = end + 2;
4072 text_start = index;
4073 continue;
4074 }
4075 }
4076
4077 if options.constructs.subscript
4078 && starts_exact_byte_run(input, index, b'~', 1)
4079 && !single_tilde_delete_takes_precedence(options, input, index)
4080 {
4081 if let Some(end) = find_simple_inline_close(input, index + 1, b'~') {
4082 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4083 let inner = &input[index + 1..end];
4084 nodes.push(Inline::Subscript(Subscript {
4085 meta: NodeMeta::new(Some(Span::new(
4086 base_offset + index,
4087 base_offset + end + 1,
4088 ))),
4089 children: parse_inlines_with_context(
4090 inner,
4091 base_offset + index + 1,
4092 options,
4093 definitions,
4094 diagnostics,
4095 context,
4096 ),
4097 }));
4098 index = end + 1;
4099 text_start = index;
4100 continue;
4101 }
4102 }
4103
4104 if options.constructs.inline_footnote
4105 && options.constructs.footnote_reference
4106 && bytes.get(index) == Some(&b'^')
4107 && bytes.get(index + 1) == Some(&b'[')
4108 {
4109 if let Some(close) = find_inline_footnote_end(input, index + 2) {
4110 let inner = &input[index + 2..close];
4111 if !inner.trim().is_empty() {
4112 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4113 nodes.push(Inline::InlineFootnote(InlineFootnote {
4114 meta: NodeMeta::new(Some(Span::new(
4115 base_offset + index,
4116 base_offset + close + 1,
4117 ))),
4118 children: parse_inlines_with_context(
4119 inner,
4120 base_offset + index + 2,
4121 options,
4122 definitions,
4123 diagnostics,
4124 context,
4125 ),
4126 }));
4127 index = close + 1;
4128 text_start = index;
4129 continue;
4130 }
4131 }
4132 }
4133
4134 if options.constructs.superscript
4135 && bytes.get(index) == Some(&b'^')
4136 && !(options.constructs.inline_footnote && bytes.get(index + 1) == Some(&b'['))
4137 {
4138 if let Some(end) = find_simple_inline_close(input, index + 1, b'^') {
4139 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4140 let inner = &input[index + 1..end];
4141 nodes.push(Inline::Superscript(Superscript {
4142 meta: NodeMeta::new(Some(Span::new(
4143 base_offset + index,
4144 base_offset + end + 1,
4145 ))),
4146 children: parse_inlines_with_context(
4147 inner,
4148 base_offset + index + 1,
4149 options,
4150 definitions,
4151 diagnostics,
4152 context,
4153 ),
4154 }));
4155 index = end + 1;
4156 text_start = index;
4157 continue;
4158 }
4159 }
4160
4161 if options.constructs.gfm_strikethrough
4168 && bytes[index] == b'~'
4169 && delimiter_byte_run_start(input, index, b'~') == index
4170 {
4171 let run_len = delimiter_byte_run_len(input, index, b'~');
4172 let recordable =
4173 run_len == 2 || (run_len == 1 && options.parse.single_tilde_strikethrough);
4174 if recordable {
4175 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4176 record_emphasis_delimiter(
4177 &mut nodes,
4178 &mut delimiters,
4179 input,
4180 index,
4181 base_offset,
4182 b'~',
4183 true,
4184 );
4185 index += run_len;
4186 text_start = index;
4187 continue;
4188 }
4189 }
4190
4191 if bytes[index] == b'!' && index + 1 < bytes.len() && bytes[index + 1] == b'[' {
4192 if let Some((end, image)) =
4193 parse_image(input, index, base_offset, options, definitions, diagnostics)
4194 {
4195 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4196 nodes.push(image);
4197 index = end;
4198 text_start = index;
4199 continue;
4200 }
4201 }
4202
4203 if bytes[index] == b'[' {
4204 if let Some((end, wikilink)) = parse_wikilink(input, index, base_offset, options) {
4205 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4206 nodes.push(wikilink);
4207 index = end;
4208 text_start = index;
4209 continue;
4210 }
4211 if let Some((end, link)) = parse_link(
4212 input,
4213 index,
4214 base_offset,
4215 options,
4216 definitions,
4217 diagnostics,
4218 context,
4219 ) {
4220 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4221 nodes.push(link);
4222 index = end;
4223 text_start = index;
4224 continue;
4225 }
4226 if options.constructs.footnote_reference
4227 && bytes.get(index) == Some(&b'[')
4228 && bytes.get(index + 1) == Some(&b'^')
4229 {
4230 if let Some(close) = find_footnote_reference_label_end(input, index + 2) {
4231 let label = &input[index + 2..close];
4232 if is_footnote_label(label) {
4233 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4234 nodes.push(Inline::FootnoteReference(FootnoteReference {
4235 meta: NodeMeta::new(Some(Span::new(
4236 base_offset + index,
4237 base_offset + close + 1,
4238 ))),
4239 label: label.into(),
4240 identifier: normalize_label(label),
4241 }));
4242 index = close + 1;
4243 text_start = index;
4244 continue;
4245 }
4246 }
4247 }
4248 }
4249
4250 if bytes[index] == b'$' && options.constructs.math_inline {
4251 if let Some((end, value, kind)) = parse_math_inline(input, index) {
4252 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4253 nodes.push(Inline::Math(MathInline {
4254 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4255 value,
4256 kind,
4257 }));
4258 index = end;
4259 text_start = index;
4260 continue;
4261 }
4262 let run = bytes[index..]
4269 .iter()
4270 .take_while(|byte| **byte == b'$')
4271 .count();
4272 if run > 1 {
4273 if text.is_empty() {
4274 text_start = base_offset + index;
4275 }
4276 text.push_str(&input[index..index + run]);
4277 index += run;
4278 continue;
4279 }
4280 }
4281
4282 if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
4285 && context.allow_links
4286 {
4287 if let Some((end, destination)) = parse_literal_autolink(
4288 input,
4289 index,
4290 options.constructs.gfm_autolink_literal,
4291 options.constructs.relaxed_autolinks,
4292 ) {
4293 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4294 nodes.push(Inline::Autolink(Autolink {
4295 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4296 destination,
4297 kind: AutolinkKind::GfmLiteral {
4298 original: input[index..end].into(),
4299 },
4300 }));
4301 index = end;
4302 text_start = index;
4303 continue;
4304 }
4305 }
4306
4307 if bytes[index] == b'<' {
4308 if let Some(end) = parse_autolink_end(input, index) {
4309 let raw = &input[index..end];
4310 if is_autolink(raw) {
4311 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4312 if context.allow_links {
4313 nodes.push(Inline::Autolink(Autolink {
4314 meta: NodeMeta::new(Some(Span::new(
4315 base_offset + index,
4316 base_offset + end,
4317 ))),
4318 destination: raw[1..raw.len() - 1].into(),
4319 kind: AutolinkKind::Angle,
4320 }));
4321 } else {
4322 nodes.push(Inline::Text(Text {
4323 meta: NodeMeta::new(Some(Span::new(
4324 base_offset + index,
4325 base_offset + end,
4326 ))),
4327 value: raw[1..raw.len() - 1].into(),
4328 }));
4329 }
4330 index = end;
4331 text_start = index;
4332 continue;
4333 }
4334 }
4335 if options.constructs.mdx_jsx_inline {
4336 if let Some((end, raw)) = parse_mdx_jsx_inline(input, index) {
4337 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4338 nodes.push(Inline::MdxJsx(MdxJsxInline {
4339 meta: NodeMeta::new(Some(Span::new(
4340 base_offset + index,
4341 base_offset + end,
4342 ))),
4343 value: raw,
4344 }));
4345 index = end;
4346 text_start = index;
4347 continue;
4348 }
4349 }
4350 if let Some((end, raw)) = parse_html_inline(input, index) {
4351 if options.constructs.html_inline {
4352 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4353 nodes.push(Inline::Html(HtmlInline {
4354 meta: NodeMeta::new(Some(Span::new(
4355 base_offset + index,
4356 base_offset + end,
4357 ))),
4358 value: raw,
4359 }));
4360 index = end;
4361 text_start = index;
4362 continue;
4363 }
4364 }
4365 }
4366
4367 if bytes[index] == b'{' && options.constructs.mdx_expression_inline {
4368 if let Some(end) = find_mdx_expression_inline_close(input, index) {
4369 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4370 nodes.push(Inline::MdxExpression(MdxExpressionInline {
4371 meta: NodeMeta::new(Some(Span::new(
4372 base_offset + index,
4373 base_offset + end + 1,
4374 ))),
4375 value: input[index + 1..end].into(),
4376 }));
4377 index = end + 1;
4378 text_start = index;
4379 continue;
4380 } else {
4381 diagnostics.push(Diagnostic::new(
4382 DiagnosticSeverity::Error,
4383 DiagnosticCode::InvalidMdx,
4384 Span::new(base_offset + index, base_offset + input.len()),
4385 "MDX expression is missing a closing brace",
4386 ));
4387 }
4388 }
4389
4390 if bytes[index] == b':' && options.constructs.shortcode {
4391 if let Some((end, name)) = parse_shortcode(input, index) {
4392 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4393 nodes.push(Inline::Shortcode(Shortcode {
4394 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4395 name,
4396 }));
4397 index = end;
4398 text_start = index;
4399 continue;
4400 }
4401 }
4402
4403 if bytes[index] == b':' && options.constructs.directive_text {
4404 if let Some((end, directive)) =
4405 parse_text_directive(input, index, base_offset, options, definitions, diagnostics)
4406 {
4407 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4408 nodes.push(directive);
4409 index = end;
4410 text_start = index;
4411 continue;
4412 }
4413 }
4414
4415 let (next_index, char) = next_char(input, index).expect("valid UTF-8 byte index");
4416 if text.is_empty() {
4417 text_start = base_offset + index;
4418 }
4419 text.push(if char == '\0' { '\u{FFFD}' } else { char });
4420 index = next_index;
4421 }
4422
4423 flush_text(&mut nodes, &mut text, text_start, base_offset + input.len());
4424 process_emphasis(nodes, delimiters)
4425}
4426
4427fn parse_shortcode(input: &str, index: usize) -> Option<(usize, String)> {
4428 if input[index..].starts_with("::") {
4429 return None;
4430 }
4431
4432 let mut cursor = index + 1;
4433 while let Some((next, char)) = next_char(input, cursor) {
4434 if char == ':' {
4435 if cursor == index + 1 {
4436 return None;
4437 }
4438 return Some((next, input[index + 1..cursor].into()));
4439 }
4440 if !(char.is_ascii_alphanumeric() || matches!(char, '_' | '-' | '+')) {
4441 return None;
4442 }
4443 cursor = next;
4444 }
4445 None
4446}
4447
4448fn parse_wikilink(
4449 input: &str,
4450 index: usize,
4451 base_offset: usize,
4452 options: &SyntaxOptions,
4453) -> Option<(usize, Inline)> {
4454 let configured_order = if options.constructs.wikilink_title_after_pipe {
4455 WikiLinkLabelOrder::AfterPipe
4456 } else if options.constructs.wikilink_title_before_pipe {
4457 WikiLinkLabelOrder::BeforePipe
4458 } else {
4459 return None;
4460 };
4461 if input.as_bytes().get(index) != Some(&b'[') || input.as_bytes().get(index + 1) != Some(&b'[')
4462 {
4463 return None;
4464 }
4465
4466 let close = find_wikilink_close(input, index + 2)?;
4467 let source = &input[index + 2..close];
4468 if source.is_empty() || source.len() > WIKILINK_MAX_BYTES {
4469 return None;
4470 }
4471
4472 let (target_source, label_source, label_order) =
4473 if let Some(separator) = find_wikilink_separator(source) {
4474 match configured_order {
4475 WikiLinkLabelOrder::AfterPipe => (
4476 &source[..separator],
4477 &source[separator + 1..],
4478 WikiLinkLabelOrder::AfterPipe,
4479 ),
4480 WikiLinkLabelOrder::BeforePipe => (
4481 &source[separator + 1..],
4482 &source[..separator],
4483 WikiLinkLabelOrder::BeforePipe,
4484 ),
4485 }
4486 } else {
4487 (source, source, configured_order)
4488 };
4489
4490 let target = unescape_string(target_source);
4491 if target.is_empty() {
4492 return None;
4493 }
4494 let label = unescape_string(label_source);
4495 let end = close + 2;
4496 Some((
4497 end,
4498 Inline::WikiLink(WikiLink {
4499 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4500 target,
4501 label,
4502 label_order,
4503 }),
4504 ))
4505}
4506
4507fn find_wikilink_close(input: &str, start: usize) -> Option<usize> {
4508 let bytes = input.as_bytes();
4509 let mut cursor = start;
4510 while cursor < input.len() {
4511 match bytes[cursor] {
4512 b'\\' => {
4513 cursor += 1;
4514 if cursor < input.len() {
4515 cursor = next_char(input, cursor)?.0;
4516 }
4517 }
4518 b'\n' | b'\r' => return None,
4519 b']' if bytes.get(cursor + 1) == Some(&b']') => return Some(cursor),
4520 _ => cursor = next_char(input, cursor)?.0,
4521 }
4522 }
4523 None
4524}
4525
4526fn find_wikilink_separator(input: &str) -> Option<usize> {
4527 let bytes = input.as_bytes();
4528 let mut cursor = 0;
4529 while cursor < input.len() {
4530 match bytes[cursor] {
4531 b'\\' => {
4532 cursor += 1;
4533 if cursor < input.len() {
4534 cursor = next_char(input, cursor)?.0;
4535 }
4536 }
4537 b'|' => return Some(cursor),
4538 _ => cursor = next_char(input, cursor)?.0,
4539 }
4540 }
4541 None
4542}
4543
4544fn trailing_space_count(input: &str) -> usize {
4545 input
4546 .as_bytes()
4547 .iter()
4548 .rev()
4549 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
4550 .count()
4551}
4552
4553fn is_hard_break_suffix(input: &str, trailing: usize) -> bool {
4554 let bytes = input.as_bytes();
4558 trailing >= 2
4559 && bytes[bytes.len() - trailing..]
4560 .iter()
4561 .all(|byte| *byte == b' ')
4562}
4563
4564fn parse_image(
4565 input: &str,
4566 index: usize,
4567 base_offset: usize,
4568 options: &SyntaxOptions,
4569 definitions: &[String],
4570 diagnostics: &mut Vec<Diagnostic>,
4571) -> Option<(usize, Inline)> {
4572 let label_start = index + 2;
4573 let label_end = find_link_label_end(input, index + 1)?;
4574 let alt_source = &input[label_start..label_end];
4575 let after_label = label_end + 1;
4576 if input.as_bytes().get(after_label) == Some(&b'(') {
4577 let (close, resource) = parse_link_resource(input, after_label)?;
4578 return Some((
4579 close,
4580 Inline::Image(Image {
4581 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4582 destination: resource.destination,
4583 destination_kind: resource.destination_kind,
4584 title: resource.title,
4585 title_kind: resource.title_kind,
4586 alt: parse_inlines(
4587 alt_source,
4588 base_offset + label_start,
4589 options,
4590 definitions,
4591 diagnostics,
4592 ),
4593 }),
4594 ));
4595 }
4596 if input.as_bytes().get(after_label) == Some(&b'[') {
4597 let close = find_reference_label_end(input, after_label)?;
4598 let label = &input[after_label + 1..close];
4599 let identifier = if label.is_empty() { alt_source } else { label };
4600 if definition_exists(definitions, identifier) {
4601 return Some((
4602 close + 1,
4603 Inline::ImageReference(ImageReference {
4604 meta: NodeMeta::new(Some(Span::new(
4605 base_offset + index,
4606 base_offset + close + 1,
4607 ))),
4608 identifier: normalize_label(identifier),
4609 label: identifier.into(),
4610 kind: if label.is_empty() {
4611 ReferenceKind::Collapsed
4612 } else {
4613 ReferenceKind::Full
4614 },
4615 alt: parse_inlines(
4616 alt_source,
4617 base_offset + label_start,
4618 options,
4619 definitions,
4620 diagnostics,
4621 ),
4622 }),
4623 ));
4624 }
4625 return None;
4628 }
4629 if definition_exists(definitions, alt_source) {
4632 return Some((
4633 after_label,
4634 Inline::ImageReference(ImageReference {
4635 meta: NodeMeta::new(Some(Span::new(
4636 base_offset + index,
4637 base_offset + after_label,
4638 ))),
4639 identifier: normalize_label(alt_source),
4640 label: alt_source.into(),
4641 kind: ReferenceKind::Shortcut,
4642 alt: parse_inlines(
4643 alt_source,
4644 base_offset + label_start,
4645 options,
4646 definitions,
4647 diagnostics,
4648 ),
4649 }),
4650 ));
4651 }
4652 None
4653}
4654
4655fn parse_link(
4656 input: &str,
4657 index: usize,
4658 base_offset: usize,
4659 options: &SyntaxOptions,
4660 definitions: &[String],
4661 diagnostics: &mut Vec<Diagnostic>,
4662 context: InlineContext,
4663) -> Option<(usize, Inline)> {
4664 if !context.allow_links {
4665 return None;
4666 }
4667 let label_end = find_link_label_end(input, index)?;
4668 let label_source = &input[index + 1..label_end];
4669 if label_contains_link(label_source, base_offset + index + 1, options, definitions) {
4670 return None;
4671 }
4672 let after_label = label_end + 1;
4673 if input.as_bytes().get(after_label) == Some(&b'(') {
4674 if let Some((close, resource)) = parse_link_resource(input, after_label) {
4679 return Some((
4680 close,
4681 Inline::Link(Link {
4682 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4683 destination: resource.destination,
4684 destination_kind: resource.destination_kind,
4685 title: resource.title,
4686 title_kind: resource.title_kind,
4687 children: parse_inlines_with_context(
4688 label_source,
4689 base_offset + index + 1,
4690 options,
4691 definitions,
4692 diagnostics,
4693 InlineContext { allow_links: false },
4694 ),
4695 }),
4696 ));
4697 }
4698 }
4699 if input.as_bytes().get(after_label) == Some(&b'[') {
4700 let close = find_reference_label_end(input, after_label)?;
4701 let label = &input[after_label + 1..close];
4702 let identifier = if label.is_empty() {
4703 label_source
4704 } else {
4705 label
4706 };
4707 if definition_exists(definitions, identifier) {
4708 return Some((
4709 close + 1,
4710 Inline::LinkReference(LinkReference {
4711 meta: NodeMeta::new(Some(Span::new(
4712 base_offset + index,
4713 base_offset + close + 1,
4714 ))),
4715 identifier: normalize_label(identifier),
4716 label: identifier.into(),
4717 kind: if label.is_empty() {
4718 ReferenceKind::Collapsed
4719 } else {
4720 ReferenceKind::Full
4721 },
4722 children: parse_inlines_with_context(
4723 label_source,
4724 base_offset + index + 1,
4725 options,
4726 definitions,
4727 diagnostics,
4728 InlineContext { allow_links: false },
4729 ),
4730 }),
4731 ));
4732 }
4733 return None;
4738 }
4739 if definition_exists(definitions, label_source) {
4740 return Some((
4741 after_label,
4742 Inline::LinkReference(LinkReference {
4743 meta: NodeMeta::new(Some(Span::new(
4744 base_offset + index,
4745 base_offset + after_label,
4746 ))),
4747 identifier: normalize_label(label_source),
4748 label: label_source.into(),
4749 kind: ReferenceKind::Shortcut,
4750 children: parse_inlines_with_context(
4751 label_source,
4752 base_offset + index + 1,
4753 options,
4754 definitions,
4755 diagnostics,
4756 InlineContext { allow_links: false },
4757 ),
4758 }),
4759 ));
4760 }
4761 None
4762}
4763
4764fn find_reference_label_end(input: &str, open: usize) -> Option<usize> {
4765 if input.as_bytes().get(open) != Some(&b'[') {
4768 return None;
4769 }
4770
4771 let mut cursor = open + 1;
4772 while cursor < input.len() {
4773 let (next, char) = next_char(input, cursor)?;
4774 match char {
4775 '\\' => {
4776 cursor = next_char(input, next)
4777 .map(|(after_escape, _)| after_escape)
4778 .unwrap_or(next);
4779 continue;
4780 }
4781 '[' => return None,
4782 ']' => {
4783 return reference_label_is_within_limit(&input[open + 1..cursor]).then_some(cursor);
4784 }
4785 _ => {}
4786 }
4787 cursor = next;
4788 }
4789 None
4790}
4791
4792fn label_contains_link(
4793 label_source: &str,
4794 base_offset: usize,
4795 options: &SyntaxOptions,
4796 definitions: &[String],
4797) -> bool {
4798 let mut diagnostics = Vec::new();
4799 let inlines = parse_inlines_with_context(
4800 label_source,
4801 base_offset,
4802 options,
4803 definitions,
4804 &mut diagnostics,
4805 InlineContext::default(),
4806 );
4807 contains_link_inline(&inlines)
4808}
4809
4810fn contains_link_inline(inlines: &[Inline]) -> bool {
4811 inlines.iter().any(|inline| match inline {
4812 Inline::Link(_) | Inline::LinkReference(_) => true,
4813 Inline::Emphasis(node) => contains_link_inline(&node.children),
4814 Inline::Strong(node) => contains_link_inline(&node.children),
4815 Inline::Delete(node) => contains_link_inline(&node.children),
4816 Inline::TextDirective(node) => contains_link_inline(&node.label),
4817 _ => false,
4818 })
4819}
4820
4821fn find_link_label_end(input: &str, open: usize) -> Option<usize> {
4822 if input.as_bytes().get(open) != Some(&b'[') {
4823 return None;
4824 }
4825
4826 let mut depth = 1usize;
4827 let mut cursor = open + 1;
4828 while cursor < input.len() {
4829 let (next, char) = next_char(input, cursor)?;
4830 match char {
4831 '\\' => {
4832 cursor = next_char(input, next)
4833 .map(|(after_escape, _)| after_escape)
4834 .unwrap_or(next);
4835 continue;
4836 }
4837 '`' => {
4838 if let Some((end, _)) = parse_code_span(input, cursor) {
4839 cursor = end;
4840 continue;
4841 }
4842 }
4843 '<' => {
4844 if let Some(end) = parse_autolink_end(input, cursor) {
4845 let raw = &input[cursor..end];
4846 if is_autolink(raw) {
4847 cursor = end;
4848 continue;
4849 }
4850 }
4851 if let Some((end, _)) = parse_html_inline(input, cursor) {
4852 cursor = end;
4853 continue;
4854 }
4855 }
4856 '[' => depth += 1,
4857 ']' => {
4858 depth = depth.checked_sub(1)?;
4859 if depth == 0 {
4860 return Some(cursor);
4861 }
4862 }
4863 _ => {}
4864 }
4865 cursor = next;
4866 }
4867 None
4868}
4869
4870fn parse_text_directive(
4871 input: &str,
4872 index: usize,
4873 base_offset: usize,
4874 options: &SyntaxOptions,
4875 definitions: &[String],
4876 diagnostics: &mut Vec<Diagnostic>,
4877) -> Option<(usize, Inline)> {
4878 if input[index..].starts_with("::") {
4879 return None;
4880 }
4881 if index > 0 {
4882 let previous = input[..index].chars().next_back()?;
4883 if !previous.is_whitespace() && !matches!(previous, '(' | '[' | '{') {
4884 return None;
4885 }
4886 }
4887 let opener_source = &input[index + 1..];
4888 let (name, label_source, attributes, consumed) = match parse_directive_opener(opener_source) {
4889 Some(opener) => opener,
4890 None => {
4891 if directive_opener_looks_malformed(opener_source) {
4892 diagnostics.push(Diagnostic::new(
4893 DiagnosticSeverity::Error,
4894 DiagnosticCode::InvalidDirectiveName,
4895 Span::new(base_offset + index, base_offset + input.len()),
4896 "text directive opener is malformed",
4897 ));
4898 }
4899 return None;
4900 }
4901 };
4902 let label = label_source
4903 .map(|source| {
4904 parse_inlines(
4905 source,
4906 base_offset + index + 1 + name.len() + 1,
4907 options,
4908 definitions,
4909 diagnostics,
4910 )
4911 })
4912 .unwrap_or_default();
4913 Some((
4914 index + 1 + consumed,
4915 Inline::TextDirective(TextDirective {
4916 meta: NodeMeta::new(Some(Span::new(
4917 base_offset + index,
4918 base_offset + index + 1 + consumed,
4919 ))),
4920 name,
4921 label,
4922 attributes,
4923 }),
4924 ))
4925}
4926
4927fn parse_directive_opener(
4928 input: &str,
4929) -> Option<(String, Option<&str>, Vec<DirectiveAttribute>, usize)> {
4930 let mut index = 0;
4931 while let Some((next, char)) = next_char(input, index) {
4932 if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4933 index = next;
4934 } else {
4935 break;
4936 }
4937 }
4938 let name = &input[..index];
4939 if !is_directive_name(name) {
4940 return None;
4941 }
4942
4943 let mut label = None;
4944 let mut attributes = Vec::new();
4945 let mut consumed = index;
4946 if input.as_bytes().get(consumed) == Some(&b'[') {
4947 let close = find_link_label_end(input, consumed)?;
4948 label = Some(&input[consumed + 1..close]);
4949 consumed = close + 1;
4950 }
4951 if input.as_bytes().get(consumed) == Some(&b'{') {
4952 let close = find_directive_attributes_close(input, consumed)?;
4953 attributes = parse_attributes(&input[consumed + 1..close]);
4954 consumed = close + 1;
4955 }
4956
4957 Some((name.into(), label, attributes, consumed))
4958}
4959
4960fn directive_opener_looks_malformed(input: &str) -> bool {
4961 let mut index = 0;
4962 while let Some((next, char)) = next_char(input, index) {
4963 if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4964 index = next;
4965 } else {
4966 break;
4967 }
4968 }
4969 index > 0
4970 && is_directive_name(&input[..index])
4971 && matches!(input.as_bytes().get(index), Some(b'[' | b'{'))
4972}
4973
4974fn find_directive_attributes_close(input: &str, open: usize) -> Option<usize> {
4975 if input.as_bytes().get(open) != Some(&b'{') {
4976 return None;
4977 }
4978
4979 let bytes = input.as_bytes();
4980 let mut cursor = open + 1;
4981 let mut quote = None;
4982 let mut escaped = false;
4983 while cursor < input.len() {
4984 let byte = bytes[cursor];
4985 if escaped {
4986 escaped = false;
4987 cursor += 1;
4988 continue;
4989 }
4990 if byte == b'\\' {
4991 escaped = true;
4992 cursor += 1;
4993 continue;
4994 }
4995 if let Some(delimiter) = quote {
4996 if byte == delimiter {
4997 quote = None;
4998 }
4999 cursor += 1;
5000 continue;
5001 }
5002 match byte {
5003 b'"' | b'\'' => quote = Some(byte),
5004 b'}' => return Some(cursor),
5005 _ => {}
5006 }
5007 cursor += 1;
5008 }
5009 None
5010}
5011
5012fn parse_attributes(input: &str) -> Vec<DirectiveAttribute> {
5013 let mut attributes = Vec::new();
5014 let mut cursor = 0;
5015 while cursor < input.len() {
5016 cursor = skip_spaces(input, cursor);
5017 if cursor >= input.len() {
5018 break;
5019 }
5020
5021 if input.as_bytes().get(cursor) == Some(&b'#') {
5022 let (id, next) = parse_attribute_token(input, cursor + 1);
5023 if !id.is_empty() {
5024 attributes.push(DirectiveAttribute {
5025 name: "id".into(),
5026 value: Some(id.into()),
5027 });
5028 }
5029 cursor = next;
5030 continue;
5031 }
5032
5033 if input.as_bytes().get(cursor) == Some(&b'.') {
5034 let (class, next) = parse_attribute_token(input, cursor + 1);
5035 if !class.is_empty() {
5036 attributes.push(DirectiveAttribute {
5037 name: "class".into(),
5038 value: Some(class.into()),
5039 });
5040 }
5041 cursor = next;
5042 continue;
5043 }
5044
5045 let (name, next) = parse_attribute_name(input, cursor);
5046 if name.is_empty() {
5047 break;
5048 }
5049 cursor = skip_spaces(input, next);
5050 if input.as_bytes().get(cursor) == Some(&b'=') {
5051 cursor = skip_spaces(input, cursor + 1);
5052 if let Some((value, next)) = parse_attribute_value(input, cursor) {
5053 attributes.push(DirectiveAttribute {
5054 name: name.into(),
5055 value: Some(value),
5056 });
5057 cursor = next;
5058 } else {
5059 attributes.push(DirectiveAttribute {
5060 name: name.into(),
5061 value: Some(String::new()),
5062 });
5063 }
5064 } else {
5065 attributes.push(DirectiveAttribute {
5066 name: name.into(),
5067 value: None,
5068 });
5069 }
5070 }
5071 attributes
5072}
5073
5074fn parse_attribute_token(input: &str, index: usize) -> (&str, usize) {
5075 let mut cursor = index;
5076 while let Some((next, char)) = next_char(input, cursor) {
5077 if char.is_whitespace() {
5078 break;
5079 }
5080 cursor = next;
5081 }
5082 (&input[index..cursor], cursor)
5083}
5084
5085fn parse_attribute_name(input: &str, index: usize) -> (&str, usize) {
5086 let mut cursor = index;
5087 while let Some((next, char)) = next_char(input, cursor) {
5088 if char.is_whitespace() || char == '=' {
5089 break;
5090 }
5091 cursor = next;
5092 }
5093 (&input[index..cursor], cursor)
5094}
5095
5096fn parse_attribute_value(input: &str, index: usize) -> Option<(String, usize)> {
5097 let quote = input.as_bytes().get(index).copied();
5098 if matches!(quote, Some(b'"' | b'\'')) {
5099 let quote = quote?;
5100 let mut cursor = index + 1;
5101 while cursor < input.len() {
5102 let (next, char) = next_char(input, cursor)?;
5103 if char as u8 == quote && !is_escaped_at(input, cursor) {
5104 return Some((unescape_ascii_punctuation(&input[index + 1..cursor]), next));
5105 }
5106 cursor = next;
5107 }
5108 return None;
5109 }
5110
5111 let (value, next) = parse_attribute_token(input, index);
5112 Some((
5113 unescape_selected(value, |char| matches!(char, '\\' | '&')),
5114 next,
5115 ))
5116}
5117
5118struct CodeSpanSource {
5119 value: String,
5120 raw: String,
5121 fence_length: usize,
5122}
5123
5124fn parse_code_span(input: &str, index: usize) -> Option<(usize, CodeSpanSource)> {
5125 let len = input[index..]
5126 .as_bytes()
5127 .iter()
5128 .take_while(|byte| **byte == b'`')
5129 .count();
5130 let search_start = index + len;
5131 let close = find_code_span_close(input, search_start, len)?;
5132 let raw = &input[search_start..close];
5133 Some((
5134 close + len,
5135 CodeSpanSource {
5136 value: normalize_code_span(raw),
5137 raw: raw.into(),
5138 fence_length: len,
5139 },
5140 ))
5141}
5142
5143fn find_code_span_close(input: &str, start: usize, marker_len: usize) -> Option<usize> {
5144 let bytes = input.as_bytes();
5145 let mut cursor = start;
5146 while cursor < bytes.len() {
5147 if bytes[cursor] != b'`' {
5148 cursor = next_char(input, cursor)
5149 .map(|(next, _)| next)
5150 .unwrap_or(bytes.len());
5151 continue;
5152 }
5153 let run_len = bytes[cursor..]
5154 .iter()
5155 .take_while(|byte| **byte == b'`')
5156 .count();
5157 if run_len == marker_len {
5158 return Some(cursor);
5159 }
5160 cursor += run_len;
5161 }
5162 None
5163}
5164
5165fn normalize_code_span(input: &str) -> String {
5166 let mut normalized = String::new();
5167 let mut cursor = 0;
5168 while cursor < input.len() {
5169 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5170 if char == '\r' {
5171 if input.as_bytes().get(next) == Some(&b'\n') {
5172 cursor = next + 1;
5173 } else {
5174 cursor = next;
5175 }
5176 normalized.push(' ');
5177 continue;
5178 }
5179 if char == '\n' {
5180 normalized.push(' ');
5181 cursor = next;
5182 continue;
5183 }
5184 normalized.push(char);
5185 cursor = next;
5186 }
5187
5188 if normalized.starts_with(' ')
5189 && normalized.ends_with(' ')
5190 && normalized.chars().any(|char| char != ' ')
5191 {
5192 normalized[1..normalized.len() - 1].into()
5193 } else {
5194 normalized
5195 }
5196}
5197
5198fn can_open_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5199 delimiter_flanking(input, index, marker_len).left
5200}
5201
5202fn can_close_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5203 delimiter_flanking(input, index, marker_len).right
5204}
5205
5206fn find_closing_delimiter(
5207 input: &str,
5208 start: usize,
5209 marker: &str,
5210 underscore: bool,
5211) -> Option<usize> {
5212 let marker_len = marker.len();
5213 let mut cursor = start;
5214 let mut nested = 0usize;
5215 while cursor <= input.len() {
5216 let candidate = input[cursor..].find(marker).map(|offset| cursor + offset)?;
5217 if is_escaped_at(input, candidate) {
5218 cursor = candidate + marker_len;
5219 continue;
5220 }
5221 if delimiter_candidate_precedes_link_close(input, start, candidate, marker_len) {
5222 cursor = candidate + marker_len;
5223 continue;
5224 }
5225 if marker_len == 1 && nested == 0 && starts_longer_delimiter_run(input, candidate, marker) {
5226 cursor = candidate + delimiter_run_len(input, candidate, marker);
5227 continue;
5228 }
5229
5230 let can_open = if underscore {
5231 can_open_underscore(input, candidate, marker_len)
5232 } else {
5233 can_open_delimited(input, candidate, marker_len)
5234 };
5235 let can_close = if underscore {
5236 can_close_underscore(input, candidate, marker_len)
5237 } else {
5238 can_close_delimited(input, candidate, marker_len)
5239 };
5240
5241 if can_close {
5242 if nested == 0 {
5243 return Some(candidate);
5244 }
5245 nested -= 1;
5246 cursor = candidate + marker_len;
5247 continue;
5248 }
5249 if can_open {
5250 nested += 1;
5251 }
5252 cursor = candidate + marker_len;
5253 }
5254 None
5255}
5256
5257fn find_single_tilde_delete_close(input: &str, start: usize) -> Option<usize> {
5258 let mut cursor = start;
5259 while cursor < input.len() {
5260 let Some(candidate) = input[cursor..].find('~').map(|index| cursor + index) else {
5261 break;
5262 };
5263 if !is_escaped_at(input, candidate) && single_tilde_can_close_delete(input, candidate) {
5264 return Some(candidate);
5265 }
5266 cursor = candidate + 1;
5267 }
5268 None
5269}
5270
5271fn single_tilde_can_open_delete(input: &str, index: usize) -> bool {
5272 starts_exact_byte_run(input, index, b'~', 1)
5273 && can_open_delimited(input, index, 1)
5274 && !tilde_is_alphanumeric_interior(input, index)
5275}
5276
5277fn single_tilde_can_close_delete(input: &str, index: usize) -> bool {
5278 starts_exact_byte_run(input, index, b'~', 1)
5279 && can_close_delimited(input, index, 1)
5280 && !tilde_is_alphanumeric_interior(input, index)
5281}
5282
5283fn single_tilde_delete_takes_precedence(
5284 options: &SyntaxOptions,
5285 input: &str,
5286 index: usize,
5287) -> bool {
5288 options.constructs.gfm_strikethrough
5289 && options.parse.single_tilde_strikethrough
5290 && single_tilde_can_open_delete(input, index)
5291 && find_single_tilde_delete_close(input, index + 1).is_some()
5292}
5293
5294fn tilde_is_alphanumeric_interior(input: &str, index: usize) -> bool {
5295 let previous = input[..index].chars().next_back();
5296 let next = input[index + 1..].chars().next();
5297 previous.is_some_and(|char| char.is_alphanumeric())
5298 && next.is_some_and(|char| char.is_alphanumeric())
5299}
5300
5301fn starts_exact_byte_run(input: &str, index: usize, marker: u8, len: usize) -> bool {
5302 input.as_bytes().get(index) == Some(&marker)
5303 && delimiter_byte_run_start(input, index, marker) == index
5304 && delimiter_byte_run_len(input, index, marker) == len
5305}
5306
5307fn delimiter_byte_run_start(input: &str, index: usize, marker: u8) -> usize {
5308 let bytes = input.as_bytes();
5309 let mut start = index;
5310 while start > 0 && bytes[start - 1] == marker && !is_escaped_at(input, start - 1) {
5311 start -= 1;
5312 }
5313 start
5314}
5315
5316fn delimiter_byte_run_len(input: &str, index: usize, marker: u8) -> usize {
5317 let bytes = input.as_bytes();
5318 let mut cursor = index;
5319 while bytes.get(cursor) == Some(&marker) {
5320 cursor += 1;
5321 }
5322 cursor - index
5323}
5324
5325fn find_simple_inline_close(input: &str, start: usize, marker: u8) -> Option<usize> {
5326 let bytes = input.as_bytes();
5327 let mut cursor = start;
5328 while cursor < input.len() {
5329 match bytes[cursor] {
5330 b'\\' => {
5331 cursor += 1;
5332 if cursor < input.len() {
5333 cursor = next_char(input, cursor)?.0;
5334 }
5335 }
5336 b'\n' | b'\r' => return None,
5337 byte if byte == marker => return (cursor > start).then_some(cursor),
5338 _ => cursor = next_char(input, cursor)?.0,
5339 }
5340 }
5341 None
5342}
5343
5344fn find_spoiler_close(input: &str, start: usize) -> Option<usize> {
5345 let bytes = input.as_bytes();
5346 let mut cursor = start;
5347 while cursor + 1 < input.len() {
5348 match bytes[cursor] {
5349 b'\\' => {
5350 cursor += 1;
5351 if cursor < input.len() {
5352 cursor = next_char(input, cursor)?.0;
5353 }
5354 }
5355 b'\n' | b'\r' => return None,
5356 b'|' if bytes.get(cursor + 1) == Some(&b'|')
5357 && cursor > start
5358 && bytes.get(cursor.wrapping_sub(1)) != Some(&b'|') =>
5359 {
5360 return Some(cursor);
5361 }
5362 _ => cursor = next_char(input, cursor)?.0,
5363 }
5364 }
5365 None
5366}
5367
5368fn starts_longer_delimiter_run(input: &str, index: usize, marker: &str) -> bool {
5369 input[index..].starts_with(marker)
5370 && !input[..index].ends_with(marker)
5371 && input[index + marker.len()..].starts_with(marker)
5372}
5373
5374fn delimiter_run_len(input: &str, index: usize, marker: &str) -> usize {
5375 let mut cursor = index;
5376 while input[cursor..].starts_with(marker) {
5377 cursor += marker.len();
5378 }
5379 cursor - index
5380}
5381
5382fn delimiter_candidate_precedes_link_close(
5383 input: &str,
5384 start: usize,
5385 candidate: usize,
5386 marker_len: usize,
5387) -> bool {
5388 let bytes = input.as_bytes();
5389 if bytes.get(candidate + marker_len) != Some(&b']') {
5390 return false;
5391 }
5392 if !matches!(bytes.get(candidate + marker_len + 1), Some(b'(' | b'[')) {
5393 return false;
5394 }
5395
5396 let mut depth = 0usize;
5397 let mut cursor = start;
5398 while cursor < candidate {
5399 let Some((next, char)) = next_char(input, cursor) else {
5400 break;
5401 };
5402 match char {
5403 '\\' => {
5404 cursor = next_char(input, next)
5405 .map(|(after_escape, _)| after_escape)
5406 .unwrap_or(next);
5407 continue;
5408 }
5409 '`' => {
5410 if let Some((end, _)) = parse_code_span(input, cursor) {
5411 cursor = end;
5412 continue;
5413 }
5414 }
5415 '[' => depth += 1,
5416 ']' => depth = depth.saturating_sub(1),
5417 _ => {}
5418 }
5419 cursor = next;
5420 }
5421 depth > 0
5422}
5423
5424fn can_open_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5425 let flanking = delimiter_flanking(input, index, marker_len);
5426 flanking.left
5427 && (!flanking.right || flanking.previous.is_some_and(|c| c.is_ascii_punctuation()))
5428}
5429
5430fn can_close_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5431 let flanking = delimiter_flanking(input, index, marker_len);
5432 flanking.right && (!flanking.left || flanking.next.is_some_and(|c| c.is_ascii_punctuation()))
5433}
5434
5435#[derive(Clone, Copy)]
5436struct DelimiterFlanking {
5437 left: bool,
5438 right: bool,
5439 previous: Option<char>,
5440 next: Option<char>,
5441}
5442
5443fn delimiter_flanking(input: &str, index: usize, marker_len: usize) -> DelimiterFlanking {
5444 let previous = input[..index].chars().next_back();
5445 let next = input[index + marker_len..].chars().next();
5446
5447 let previous_whitespace = previous.is_none_or(char::is_whitespace);
5448 let next_whitespace = next.is_none_or(char::is_whitespace);
5449 let previous_punctuation = previous.is_some_and(is_flanking_punctuation);
5450 let next_punctuation = next.is_some_and(is_flanking_punctuation);
5451
5452 let left = next.is_some()
5453 && !next_whitespace
5454 && !(next_punctuation && !previous_whitespace && !previous_punctuation);
5455 let right = previous.is_some()
5456 && !previous_whitespace
5457 && !(previous_punctuation && !next_whitespace && !next_punctuation);
5458
5459 DelimiterFlanking {
5460 left,
5461 right,
5462 previous,
5463 next,
5464 }
5465}
5466
5467fn parse_math_inline(input: &str, index: usize) -> Option<(usize, String, MathInlineKind)> {
5488 if let Some((end, value)) = parse_math_code_inline(input, index) {
5489 return Some((end, value, MathInlineKind::Code));
5490 }
5491
5492 let bytes = input.as_bytes();
5493 let open_dollars = bytes[index..]
5494 .iter()
5495 .take_while(|byte| **byte == b'$')
5496 .count();
5497 if open_dollars == 0 || open_dollars > 2 {
5500 return None;
5501 }
5502
5503 let content_start = index + open_dollars;
5504 let close = scan_to_closing_dollar(input, content_start, open_dollars)?;
5505 let content_end = close - open_dollars;
5506 if content_end <= content_start {
5509 return None;
5510 }
5511
5512 let raw = &input[content_start..content_end];
5513 let value = if open_dollars == 1 {
5514 normalize_math_text(raw)
5515 } else {
5516 raw.into()
5517 };
5518 let dollars = u8::try_from(open_dollars).unwrap_or(u8::MAX);
5519 Some((close, value, MathInlineKind::Dollar { dollars }))
5520}
5521
5522fn scan_to_closing_dollar(input: &str, start: usize, open_dollars: usize) -> Option<usize> {
5526 let bytes = input.as_bytes();
5527 if open_dollars == 1 && bytes.get(start).is_some_and(|byte| is_math_space(*byte)) {
5529 return None;
5530 }
5531
5532 let mut cursor = start;
5533 loop {
5534 while cursor < bytes.len() && bytes[cursor] != b'$' {
5535 cursor += 1;
5536 }
5537 if cursor >= bytes.len() {
5538 return None;
5539 }
5540 let prev = bytes[cursor - 1];
5543 if open_dollars == 1 && is_math_space(prev) {
5544 return None;
5545 }
5546 if open_dollars == 1 && prev == b'\\' {
5547 cursor += 1;
5550 continue;
5551 }
5552 let run = bytes[cursor..]
5553 .iter()
5554 .take(open_dollars)
5555 .take_while(|byte| **byte == b'$')
5556 .count();
5557 if open_dollars == 1 && bytes.get(cursor + run).is_some_and(u8::is_ascii_digit) {
5559 return None;
5560 }
5561 if run == open_dollars {
5562 return Some(cursor + run);
5563 }
5564 cursor += run;
5565 }
5566}
5567
5568fn is_math_space(byte: u8) -> bool {
5570 matches!(byte, b'\t' | b'\n' | b'\r' | b' ')
5571}
5572
5573fn normalize_math_text(input: &str) -> String {
5577 let mut normalized = String::new();
5578 let mut cursor = 0;
5579 while cursor < input.len() {
5580 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5581 if char == '\r' {
5582 if input.as_bytes().get(next) == Some(&b'\n') {
5583 cursor = next + 1;
5584 } else {
5585 cursor = next;
5586 }
5587 normalized.push(' ');
5588 continue;
5589 }
5590 if char == '\n' {
5591 normalized.push(' ');
5592 cursor = next;
5593 continue;
5594 }
5595 normalized.push(char);
5596 cursor = next;
5597 }
5598
5599 if normalized.starts_with(' ')
5600 && normalized.ends_with(' ')
5601 && normalized.chars().any(|char| char != ' ')
5602 {
5603 normalized[1..normalized.len() - 1].into()
5604 } else {
5605 normalized
5606 }
5607}
5608
5609fn parse_math_code_inline(input: &str, index: usize) -> Option<(usize, String)> {
5610 if !input[index..].starts_with("$`") {
5611 return None;
5612 }
5613
5614 let search_start = index + 2;
5615 let close = input[search_start..]
5616 .find("`$")
5617 .map(|offset| search_start + offset)?;
5618 if close == search_start {
5619 return None;
5620 }
5621
5622 Some((close + 2, input[search_start..close].into()))
5623}
5624
5625fn parse_link_resource(input: &str, open: usize) -> Option<(usize, ParsedLinkResource)> {
5626 let bytes = input.as_bytes();
5627 if bytes.get(open) != Some(&b'(') {
5628 return None;
5629 }
5630 let (mut cursor, initial_space) = skip_link_resource_space_with_info(input, open + 1)?;
5631 if bytes.get(cursor) == Some(&b')') {
5632 return Some((
5633 cursor + 1,
5634 ParsedLinkResource {
5635 destination: String::new(),
5636 destination_kind: LinkDestinationKind::Omitted,
5637 title: None,
5638 title_kind: None,
5639 },
5640 ));
5641 }
5642 if initial_space && matches!(bytes.get(cursor), Some(b'"' | b'\'' | b'(')) {
5643 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5644 cursor = skip_link_resource_space(input, next)?;
5645 if bytes.get(cursor) == Some(&b')') {
5646 return Some((
5647 cursor + 1,
5648 ParsedLinkResource {
5649 destination: String::new(),
5650 destination_kind: LinkDestinationKind::Omitted,
5651 title: Some(title),
5652 title_kind: Some(title_kind),
5653 },
5654 ));
5655 }
5656 return None;
5657 }
5658 let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5659 let (after_destination, had_space) = skip_link_resource_space_with_info(input, next)?;
5660 cursor = after_destination;
5661 if bytes.get(cursor) == Some(&b')') {
5662 return Some((
5663 cursor + 1,
5664 ParsedLinkResource {
5665 destination,
5666 destination_kind,
5667 title: None,
5668 title_kind: None,
5669 },
5670 ));
5671 }
5672 if !had_space {
5673 return None;
5674 }
5675
5676 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5677 cursor = skip_link_resource_space(input, next)?;
5678 if bytes.get(cursor) == Some(&b')') {
5679 Some((
5680 cursor + 1,
5681 ParsedLinkResource {
5682 destination,
5683 destination_kind,
5684 title: Some(title),
5685 title_kind: Some(title_kind),
5686 },
5687 ))
5688 } else {
5689 None
5690 }
5691}
5692
5693fn parse_link_destination(
5694 input: &str,
5695 index: usize,
5696) -> Option<(String, LinkDestinationKind, usize)> {
5697 if input.as_bytes().get(index) == Some(&b'<') {
5698 let mut cursor = index + 1;
5699 while cursor < input.len() {
5700 let (next, char) = next_char(input, cursor)?;
5701 if char == '>' && !is_escaped_at(input, cursor) {
5702 return Some((
5703 unescape_ascii_punctuation(&input[index + 1..cursor]),
5704 LinkDestinationKind::Angle,
5705 next,
5706 ));
5707 }
5708 if (char == '<' && !is_escaped_at(input, cursor)) || char == '\n' || char == '\r' {
5709 return None;
5710 }
5711 cursor = next;
5712 }
5713 return None;
5714 }
5715
5716 let mut cursor = index;
5717 let mut depth = 0usize;
5718 while cursor < input.len() {
5719 let (next, char) = next_char(input, cursor)?;
5720 if (char == ' ' || char.is_ascii_control()) && depth == 0 {
5725 break;
5726 }
5727 if char == '(' && !is_escaped_at(input, cursor) {
5728 depth += 1;
5729 if depth > 32 {
5731 return None;
5732 }
5733 } else if char == ')' && !is_escaped_at(input, cursor) {
5734 if depth == 0 {
5735 break;
5736 }
5737 depth -= 1;
5738 }
5739 cursor = next;
5740 }
5741
5742 if cursor == index || depth > 0 {
5743 None
5744 } else {
5745 Some((
5746 unescape_ascii_punctuation(&input[index..cursor]),
5747 LinkDestinationKind::Bare,
5748 cursor,
5749 ))
5750 }
5751}
5752
5753fn parse_link_title(input: &str, index: usize) -> Option<(String, LinkTitleKind, usize)> {
5754 let opener = input.as_bytes().get(index).copied()?;
5755 let (closer, title_kind) = match opener {
5756 b'"' => ('"', LinkTitleKind::DoubleQuote),
5757 b'\'' => ('\'', LinkTitleKind::SingleQuote),
5758 b'(' => (')', LinkTitleKind::Paren),
5759 _ => return None,
5760 };
5761 let mut cursor = index + 1;
5762 while cursor < input.len() {
5763 let (next, char) = next_char(input, cursor)?;
5764 if char == closer && !is_escaped_at(input, cursor) {
5765 if contains_blank_line(&input[index + 1..cursor]) {
5766 return None;
5767 }
5768 return Some((
5769 unescape_ascii_punctuation(&input[index + 1..cursor]),
5770 title_kind,
5771 next,
5772 ));
5773 }
5774 if opener == b'(' && char == '(' && !is_escaped_at(input, cursor) {
5775 return None;
5776 }
5777 cursor = next;
5778 }
5779 None
5780}
5781
5782fn contains_blank_line(input: &str) -> bool {
5783 if !input.bytes().any(|byte| matches!(byte, b'\n' | b'\r')) {
5784 return false;
5785 }
5786 let lines = collect_lines(input, 0);
5791 let interior = lines.len().saturating_sub(1);
5792 lines
5793 .iter()
5794 .take(interior)
5795 .skip(1)
5796 .any(|line| line.text.trim().is_empty())
5797}
5798
5799fn skip_link_resource_space(input: &str, index: usize) -> Option<usize> {
5800 skip_link_resource_space_with_info(input, index).map(|(index, _)| index)
5801}
5802
5803fn skip_link_resource_space_with_info(input: &str, mut index: usize) -> Option<(usize, bool)> {
5804 let mut line_breaks = 0usize;
5805 let mut had_space = false;
5806 while input
5807 .as_bytes()
5808 .get(index)
5809 .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
5810 {
5811 had_space = true;
5812 match input.as_bytes()[index] {
5813 b'\n' => {
5814 line_breaks += 1;
5815 if line_breaks > 1 {
5816 return None;
5817 }
5818 index += 1;
5819 }
5820 b'\r' => {
5821 line_breaks += 1;
5822 if line_breaks > 1 {
5823 return None;
5824 }
5825 if input.as_bytes().get(index + 1) == Some(&b'\n') {
5826 index += 2;
5827 } else {
5828 index += 1;
5829 }
5830 }
5831 _ => index += 1,
5832 }
5833 }
5834 Some((index, had_space))
5835}
5836
5837pub(crate) fn parse_character_reference(input: &str, index: usize) -> Option<(usize, String)> {
5838 let rest = input.get(index..)?;
5839 if let Some(rest) = rest
5840 .strip_prefix("&#x")
5841 .or_else(|| rest.strip_prefix("&#X"))
5842 {
5843 let digits = rest.find(';')?;
5844 if digits == 0 || digits > 6 || !rest[..digits].bytes().all(|byte| byte.is_ascii_hexdigit())
5845 {
5846 return None;
5847 }
5848 let value = u32::from_str_radix(&rest[..digits], 16).ok()?;
5849 return Some((
5850 index + 3 + digits + 1,
5851 character_reference_value(value).into(),
5852 ));
5853 }
5854 if let Some(rest) = rest.strip_prefix("&#") {
5855 let digits = rest.find(';')?;
5856 if digits == 0 || digits > 7 || !rest[..digits].bytes().all(|byte| byte.is_ascii_digit()) {
5857 return None;
5858 }
5859 let value = rest[..digits].parse::<u32>().ok()?;
5860 return Some((
5861 index + 2 + digits + 1,
5862 character_reference_value(value).into(),
5863 ));
5864 }
5865
5866 let name_end = rest.find(';')?;
5867 if name_end == 0 || name_end > 32 {
5868 return None;
5869 }
5870 let name = &rest[1..name_end];
5871 named_character_reference(name).map(|value| (index + name_end + 1, value.into()))
5872}
5873
5874pub(crate) fn character_reference_value(value: u32) -> char {
5891 if value == 0 {
5892 '\u{FFFD}'
5893 } else {
5894 char::from_u32(value).unwrap_or('\u{FFFD}')
5895 }
5896}
5897
5898pub(crate) fn is_escaped_at(input: &str, index: usize) -> bool {
5899 let bytes = input.as_bytes();
5900 let mut cursor = index;
5901 let mut count = 0;
5902 while cursor > 0 && bytes[cursor - 1] == b'\\' {
5903 count += 1;
5904 cursor -= 1;
5905 }
5906 count % 2 == 1
5907}
5908
5909fn parse_definition_destination_title(input: &str) -> Option<ParsedLinkResource> {
5910 let (mut cursor, _) = skip_link_resource_space_with_info(input, 0)?;
5911 let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5912 cursor = next;
5913
5914 let (next, had_space) = skip_link_resource_space_with_info(input, cursor)?;
5915 cursor = next;
5916 if cursor >= input.len() {
5917 return Some(ParsedLinkResource {
5918 destination,
5919 destination_kind,
5920 title: None,
5921 title_kind: None,
5922 });
5923 }
5924 if !had_space {
5925 return None;
5926 }
5927
5928 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5929 let after_title = skip_link_resource_space(input, next)?;
5930 (after_title == input.len()).then_some(ParsedLinkResource {
5931 destination,
5932 destination_kind,
5933 title: Some(title),
5934 title_kind: Some(title_kind),
5935 })
5936}
5937
5938fn line_can_start_definition_title(input: &str) -> bool {
5939 let trimmed = input.trim_start();
5940 matches!(trimmed.as_bytes().first(), Some(b'"' | b'\'' | b'('))
5941}
5942
5943fn unescape_ascii_punctuation(input: &str) -> String {
5944 unescape_selected(input, |char| char.is_ascii_punctuation())
5946}
5947
5948fn unescape_string(input: &str) -> String {
5949 unescape_selected(input, |char| char.is_ascii_punctuation() || char == '&')
5950}
5951
5952fn unescape_selected(input: &str, should_unescape: impl Fn(char) -> bool) -> String {
5953 let mut output = String::new();
5954 let mut cursor = 0;
5955 while cursor < input.len() {
5956 if input.as_bytes().get(cursor) == Some(&b'&') {
5957 if let Some((end, value)) = parse_character_reference(input, cursor) {
5958 output.push_str(&value);
5959 cursor = end;
5960 continue;
5961 }
5962 }
5963 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5964 if char == '\\' {
5965 if let Some((after_escape, escaped)) = next_char(input, next) {
5966 if should_unescape(escaped) {
5967 output.push(escaped);
5968 } else {
5969 output.push(char);
5970 output.push(escaped);
5971 }
5972 cursor = after_escape;
5973 } else {
5974 output.push(char);
5975 cursor = next;
5976 }
5977 } else {
5978 output.push(if char == '\0' { '\u{FFFD}' } else { char });
5979 cursor = next;
5980 }
5981 }
5982 output
5983}
5984
5985fn push_line(output: &mut String, line: &str) {
5986 if !output.is_empty() {
5987 output.push('\n');
5988 }
5989 output.push_str(line);
5990}
5991
5992fn ensure_line_separator(output: &mut String) {
5993 if !output.is_empty() && !ends_with_line_ending(output) {
5994 output.push('\n');
5995 }
5996}
5997
5998fn ends_with_line_ending(input: &str) -> bool {
5999 input.ends_with('\n') || input.ends_with('\r')
6000}
6001
6002fn flush_text(nodes: &mut Vec<Inline>, text: &mut String, text_start: usize, end: usize) {
6003 if !text.is_empty() {
6004 nodes.push(Inline::Text(Text {
6005 meta: NodeMeta::new(Some(Span::new(text_start, end))),
6006 value: core::mem::take(text),
6007 }));
6008 }
6009}
6010
6011fn gfm_link_label_preserves_url_dot_escape(
6012 text: &str,
6013 escaped: char,
6014 options: &SyntaxOptions,
6015 context: InlineContext,
6016) -> bool {
6017 escaped == '.'
6018 && !context.allow_links
6019 && options.constructs.gfm_autolink_literal
6020 && (text.starts_with("www.") || text.starts_with("http://") || text.starts_with("https://"))
6021}
6022
6023fn next_char(input: &str, index: usize) -> Option<(usize, char)> {
6024 let char = input[index..].chars().next()?;
6025 Some((index + char.len_utf8(), char))
6026}
6027
6028fn is_flanking_punctuation(value: char) -> bool {
6033 value.is_ascii_punctuation() || crate::unicode_punctuation::is_unicode_punctuation(value)
6034}
6035
6036pub(crate) fn normalize_label(label: &str) -> String {
6045 label
6046 .replace('ẞ', "ss")
6052 .split_whitespace()
6053 .collect::<Vec<_>>()
6054 .join(" ")
6055 .to_uppercase()
6056 .to_lowercase()
6057}
6058
6059fn definition_exists(definitions: &[String], label: &str) -> bool {
6060 if label.is_empty() || !reference_label_is_within_limit(label) {
6061 return false;
6062 }
6063
6064 let identifier = normalize_label(label);
6065 definitions
6066 .iter()
6067 .any(|definition| definition == &identifier)
6068}
6069
6070fn reference_label_is_within_limit(label: &str) -> bool {
6071 label.chars().take(REFERENCE_LABEL_MAX_CHARS + 1).count() <= REFERENCE_LABEL_MAX_CHARS
6072}
6073
6074fn trim_up_to_three_spaces(input: &str) -> Option<&str> {
6075 let (columns, bytes) = leading_indent(input);
6076 if columns <= 3 {
6077 Some(&input[bytes..])
6078 } else {
6079 None
6080 }
6081}
6082
6083fn fence_start(input: &str) -> Option<(FenceMarker, usize)> {
6084 let marker = match input.as_bytes().first()? {
6085 b'`' => FenceMarker::Backtick,
6086 b'~' => FenceMarker::Tilde,
6087 _ => return None,
6088 };
6089 let byte = match marker {
6090 FenceMarker::Backtick => b'`',
6091 FenceMarker::Tilde => b'~',
6092 };
6093 let length = input
6094 .as_bytes()
6095 .iter()
6096 .take_while(|item| **item == byte)
6097 .count();
6098 if length >= 3 {
6099 Some((marker, length))
6100 } else {
6101 None
6102 }
6103}
6104
6105fn fence_close(input: &str, marker: FenceMarker, length: usize) -> bool {
6106 let byte = match marker {
6107 FenceMarker::Backtick => b'`',
6108 FenceMarker::Tilde => b'~',
6109 };
6110 let count = input
6111 .as_bytes()
6112 .iter()
6113 .take_while(|item| **item == byte)
6114 .count();
6115 count >= length && input[count..].trim().is_empty()
6116}
6117
6118fn trim_closing_hashes(input: &str) -> &str {
6119 let input = input.trim_end();
6120 let hash_start = input.trim_end_matches('#').len();
6121 if hash_start == input.len() {
6122 return input;
6123 }
6124 if hash_start == 0 {
6125 return "";
6126 }
6127
6128 let before = &input[..hash_start];
6129 if before.ends_with(' ') || before.ends_with('\t') {
6130 before.trim_end()
6131 } else {
6132 input
6133 }
6134}
6135
6136fn list_marker_info(input: &str) -> Option<ListMarkerInfo<'_>> {
6137 let trimmed = trim_up_to_three_spaces(input)?;
6138 let indent = input.len() - trimmed.len();
6139 let bytes = trimmed.as_bytes();
6140 match bytes.first()? {
6141 b'-' | b'*' | b'+' if is_list_padding_byte(bytes.get(1).copied()) => {
6142 let delimiter = match bytes[0] {
6143 b'-' => ListDelimiter::Dash,
6144 b'*' => ListDelimiter::Asterisk,
6145 _ => ListDelimiter::Plus,
6146 };
6147 let (content_offset, content_indent) = list_content_offset(trimmed, 1, indent);
6148 Some(ListMarkerInfo {
6149 ordered: false,
6150 start: None,
6151 delimiter,
6152 indent,
6153 marker_len: 1,
6154 content_indent,
6155 content: &trimmed[content_offset..],
6156 })
6157 }
6158 byte if byte.is_ascii_digit() => {
6159 let mut end = 0;
6160 while bytes.get(end).is_some_and(|byte| byte.is_ascii_digit()) {
6161 end += 1;
6162 }
6163 if end > 9 {
6164 return None;
6165 }
6166 let delimiter = match bytes.get(end)? {
6167 b'.' => ListDelimiter::Period,
6168 b')' => ListDelimiter::Paren,
6169 _ => return None,
6170 };
6171 if !is_list_padding_byte(bytes.get(end + 1).copied()) {
6172 return None;
6173 }
6174 let start = trimmed[..end].parse().ok()?;
6175 let marker_len = end + 1;
6176 let (content_offset, content_indent) = list_content_offset(trimmed, marker_len, indent);
6177 Some(ListMarkerInfo {
6178 ordered: true,
6179 start: Some(start),
6180 delimiter,
6181 indent,
6182 marker_len,
6183 content_indent,
6184 content: &trimmed[content_offset..],
6185 })
6186 }
6187 _ => None,
6188 }
6189}
6190
6191fn list_content_offset(input: &str, marker_len: usize, indent: usize) -> (usize, usize) {
6192 let bytes = input.as_bytes();
6193 if bytes.get(marker_len).is_none() {
6194 return (marker_len, indent + marker_len + 1);
6195 }
6196 let mut cursor = marker_len;
6197 let mut column = indent + marker_len;
6198 let marker_end_column = column;
6199 while let Some(byte) = bytes.get(cursor) {
6200 match *byte {
6201 b' ' => column += 1,
6202 b'\t' => column += 4 - (column % 4),
6203 _ => break,
6204 }
6205 cursor += 1;
6206 }
6207 if cursor >= bytes.len() {
6212 return (cursor, marker_end_column + 1);
6213 }
6214 let padding_columns = column.saturating_sub(marker_end_column);
6215 if padding_columns > 0 && padding_columns <= 4 {
6216 (cursor, column)
6217 } else {
6218 (marker_len + 1, marker_end_column + 1)
6219 }
6220}
6221
6222fn list_marker_first_content<'a>(input: &'a str, marker: ListMarkerInfo<'a>) -> Cow<'a, str> {
6223 let Some(trimmed) = trim_up_to_three_spaces(input) else {
6224 return Cow::Borrowed(marker.content);
6225 };
6226 let after_marker = &trimmed[marker.marker_len..];
6227 if after_marker.starts_with('\t') {
6228 strip_leading_indent_columns_from(after_marker, 1, marker.indent + marker.marker_len)
6229 } else {
6230 Cow::Borrowed(marker.content)
6231 }
6232}
6233
6234fn is_list_padding_byte(byte: Option<u8>) -> bool {
6235 matches!(byte, None | Some(b' ' | b'\t'))
6236}
6237
6238fn same_list_marker(left: ListMarkerInfo<'_>, right: ListMarkerInfo<'_>) -> bool {
6239 left.ordered == right.ordered && left.delimiter == right.delimiter
6243}
6244
6245fn sibling_list_marker_at_line(
6252 input: &str,
6253 first_marker: ListMarkerInfo<'_>,
6254 content_indent: usize,
6255) -> bool {
6256 list_marker_info(input).is_some_and(|candidate| {
6257 same_list_marker(first_marker, candidate) && candidate.indent < content_indent
6258 })
6259}
6260
6261fn same_list_marker_line(input: &str, first_marker: ListMarkerInfo<'_>) -> bool {
6266 list_marker_info(input).is_some_and(|candidate| same_list_marker(first_marker, candidate))
6267}
6268
6269fn next_nonblank_line(lines: &[Line<'_>], mut index: usize) -> usize {
6270 while index < lines.len() && lines[index].text.trim().is_empty() {
6271 index += 1;
6272 }
6273 index
6274}
6275
6276fn leading_indent(input: &str) -> (usize, usize) {
6277 let mut column = 0usize;
6278 let mut bytes = 0usize;
6279 for byte in input.as_bytes() {
6280 match *byte {
6281 b' ' => column += 1,
6282 b'\t' => column += 4 - (column % 4),
6283 _ => break,
6284 }
6285 bytes += 1;
6286 }
6287 (column, bytes)
6288}
6289
6290fn leading_indent_columns(input: &str) -> usize {
6291 leading_indent(input).0
6292}
6293
6294fn strip_leading_indent_columns(input: &str, max_columns: usize) -> Cow<'_, str> {
6301 strip_leading_indent_columns_from(input, max_columns, 0)
6302}
6303
6304fn strip_leading_indent_columns_from(
6305 input: &str,
6306 max_columns: usize,
6307 start_column: usize,
6308) -> Cow<'_, str> {
6309 let mut column = start_column;
6310 let target_column = start_column + max_columns;
6311 for (index, byte) in input.as_bytes().iter().enumerate() {
6312 let next = match *byte {
6313 b' ' => column + 1,
6314 b'\t' => column + (4 - (column % 4)),
6315 _ => return Cow::Borrowed(&input[index..]),
6316 };
6317 if next > target_column {
6318 if *byte == b'\t' && column < target_column {
6321 let residual = next - target_column;
6322 let mut owned = String::with_capacity(residual + input.len() - (index + 1));
6323 for _ in 0..residual {
6324 owned.push(' ');
6325 }
6326 let mut rest_column = next;
6327 let mut rest_index = index + 1;
6328 while let Some(rest_byte) = input.as_bytes().get(rest_index) {
6329 match *rest_byte {
6330 b' ' => {
6331 owned.push(' ');
6332 rest_column += 1;
6333 rest_index += 1;
6334 }
6335 b'\t' => {
6336 let width = 4 - (rest_column % 4);
6337 for _ in 0..width {
6338 owned.push(' ');
6339 }
6340 rest_column += width;
6341 rest_index += 1;
6342 }
6343 _ => break,
6344 }
6345 }
6346 owned.push_str(&input[rest_index..]);
6347 return Cow::Owned(owned);
6348 }
6349 return Cow::Borrowed(&input[index..]);
6350 }
6351 column = next;
6352 }
6353 Cow::Borrowed("")
6354}
6355
6356fn strip_list_continuation(input: &str, content_indent: usize, list_indent: usize) -> Cow<'_, str> {
6357 let (indent_columns, indent_bytes) = leading_indent(input);
6358 if indent_columns >= content_indent {
6359 strip_leading_indent_columns(input, content_indent)
6364 } else if indent_columns > list_indent {
6365 Cow::Borrowed(&input[indent_bytes..])
6366 } else {
6367 Cow::Borrowed(trim_ascii_start(input))
6368 }
6369}
6370
6371fn take_task_marker_from_children(children: &mut [Block]) -> Option<bool> {
6372 let Some(Block::Paragraph(paragraph)) = children.first_mut() else {
6373 return None;
6374 };
6375 take_task_marker_from_inlines(&mut paragraph.children)
6376}
6377
6378fn take_task_marker_from_inlines(inlines: &mut Vec<Inline>) -> Option<bool> {
6379 let Some(Inline::Text(text)) = inlines.first() else {
6380 return None;
6381 };
6382 let first = text.value.clone();
6383
6384 if let Some((checked, consumed)) = task_marker_inline_prefix(&first) {
6385 if !first[consumed..].is_empty() || inlines_have_content_after(inlines, 1) {
6386 remove_text_prefix(inlines, consumed);
6387 return Some(checked);
6388 }
6389 }
6390
6391 if let Some(checked) = task_marker_at_text_end(&first) {
6392 if inlines
6393 .get(1)
6394 .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6395 && inlines_have_content_after(inlines, 2)
6396 {
6397 inlines.remove(1);
6398 inlines.remove(0);
6399 return Some(checked);
6400 }
6401 }
6402
6403 if task_marker_split_open(&first)
6404 && inlines
6405 .get(1)
6406 .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6407 {
6408 let Some(Inline::Text(next)) = inlines.get(2) else {
6409 return None;
6410 };
6411 if let Some((checked, consumed)) = task_marker_split_close_prefix(&next.value) {
6412 if !next.value[consumed..].is_empty() || inlines_have_content_after(inlines, 3) {
6413 inlines.remove(1);
6414 inlines.remove(0);
6415 remove_text_prefix(inlines, consumed);
6416 return Some(checked);
6417 }
6418 }
6419 }
6420
6421 None
6422}
6423
6424fn task_marker_inline_prefix(input: &str) -> Option<(bool, usize)> {
6425 let start = leading_trim_bytes(input);
6426 let rest = &input[start..];
6427 let checked = task_marker_checked(rest)?;
6428 let after_marker = start + 3;
6429 match input.as_bytes().get(after_marker) {
6430 Some(b' ' | b'\t') => Some((checked, after_marker + 1)),
6431 _ => None,
6432 }
6433}
6434
6435fn task_marker_at_text_end(input: &str) -> Option<bool> {
6436 let start = leading_trim_bytes(input);
6437 let rest = &input[start..];
6438 let checked = task_marker_checked(rest)?;
6439 if rest.len() == 3 {
6440 Some(checked)
6441 } else {
6442 None
6443 }
6444}
6445
6446fn task_marker_split_open(input: &str) -> bool {
6447 let start = leading_trim_bytes(input);
6448 input[start..] == *"["
6449}
6450
6451fn task_marker_split_close_prefix(input: &str) -> Option<(bool, usize)> {
6452 match input.as_bytes().get(..2)? {
6453 b"] " => Some((false, 2)),
6454 b"]\t" => Some((false, 2)),
6455 b"x]" | b"X]" if matches!(input.as_bytes().get(2), Some(b' ' | b'\t')) => Some((true, 3)),
6456 _ => None,
6457 }
6458}
6459
6460fn task_marker_checked(input: &str) -> Option<bool> {
6461 if input.starts_with("[ ]") {
6462 Some(false)
6463 } else if input.starts_with("[x]") || input.starts_with("[X]") {
6464 Some(true)
6465 } else {
6466 None
6467 }
6468}
6469
6470fn remove_text_prefix(inlines: &mut Vec<Inline>, consumed: usize) {
6471 if let Some(Inline::Text(text)) = inlines.first_mut() {
6472 text.value = text.value[consumed..].into();
6473 if text.value.is_empty() {
6474 inlines.remove(0);
6475 }
6476 }
6477}
6478
6479fn inlines_have_content_after(inlines: &[Inline], start: usize) -> bool {
6480 inlines.iter().skip(start).any(|inline| match inline {
6481 Inline::Text(text) => !text.value.is_empty(),
6482 Inline::SoftBreak(_) | Inline::LineBreak(_) => false,
6483 _ => true,
6484 })
6485}
6486
6487fn update_list_item_fence(line: &str, open_fence: &mut Option<(FenceMarker, usize)>) {
6488 let Some(trimmed) = trim_up_to_three_spaces(line) else {
6489 return;
6490 };
6491 if let Some((marker, length)) = *open_fence {
6492 if fence_close(trimmed, marker, length) {
6493 *open_fence = None;
6494 }
6495 return;
6496 }
6497 if let Some((marker, length)) = fence_start(trimmed) {
6498 *open_fence = Some((marker, length));
6499 }
6500}
6501
6502fn trim_ascii_start(input: &str) -> &str {
6503 input.trim_start_matches(|char| matches!(char, ' ' | '\t'))
6504}
6505
6506fn leading_trim_bytes(input: &str) -> usize {
6507 input.len() - trim_ascii_start(input).len()
6508}
6509
6510fn parse_table_delimiter(input: &str, spoiler: bool) -> Option<Vec<TableAlignment>> {
6511 let cells = split_table_row(input, spoiler);
6512 if cells.is_empty() {
6513 return None;
6514 }
6515 let mut alignments = Vec::new();
6516 for cell in cells {
6517 alignments.push(table_delimiter_alignment(cell.trim())?);
6518 }
6519 Some(alignments)
6520}
6521
6522fn table_delimiter_alignment(cell: &str) -> Option<TableAlignment> {
6525 let bytes = cell.as_bytes();
6526 let mut cursor = 0;
6527 let left = bytes.first() == Some(&b':');
6528 if left {
6529 cursor += 1;
6530 }
6531 let dash_start = cursor;
6532 while bytes.get(cursor) == Some(&b'-') {
6533 cursor += 1;
6534 }
6535 if cursor == dash_start {
6536 return None;
6537 }
6538 let right = bytes.get(cursor) == Some(&b':');
6539 if right {
6540 cursor += 1;
6541 }
6542 if cursor != bytes.len() {
6543 return None;
6544 }
6545 Some(match (left, right) {
6546 (true, true) => TableAlignment::Center,
6547 (true, false) => TableAlignment::Left,
6548 (false, true) => TableAlignment::Right,
6549 (false, false) => TableAlignment::None,
6550 })
6551}
6552
6553fn table_indent_line(input: &str, indented_code: bool) -> Option<&str> {
6557 if indented_code {
6558 trim_up_to_three_spaces(input)
6559 } else {
6560 Some(input)
6561 }
6562}
6563
6564fn backtick_run_has_close(input: &str, start: usize, length: usize) -> bool {
6569 let bytes = input.as_bytes();
6570 let mut i = start + length;
6571 while i < input.len() {
6572 if bytes[i] == b'`' {
6573 let run = input[i..]
6574 .as_bytes()
6575 .iter()
6576 .take_while(|byte| **byte == b'`')
6577 .count();
6578 if run == length {
6579 return true;
6580 }
6581 i += run;
6582 } else {
6583 i += 1;
6584 }
6585 }
6586 false
6587}
6588
6589fn table_backslash_pipe_run(input: &str, cursor: usize) -> Option<(usize, bool)> {
6590 let bytes = input.as_bytes();
6591 if bytes.get(cursor) != Some(&b'\\') {
6592 return None;
6593 }
6594 let mut pipe = cursor;
6595 while bytes.get(pipe) == Some(&b'\\') {
6596 pipe += 1;
6597 }
6598 (bytes.get(pipe) == Some(&b'|')).then_some((pipe, (pipe - cursor) % 2 == 1))
6599}
6600
6601fn split_table_row(input: &str, spoiler: bool) -> Vec<String> {
6602 let trimmed = input.trim();
6603 let mut cells = Vec::new();
6604 let mut cell = String::new();
6605 let mut cursor = 0;
6606 let mut code_fence = None;
6607 let mut spoiler_open = false;
6608 let mut trailing_delimiter_end = None;
6614
6615 while cursor < trimmed.len() {
6616 let (next, char) = next_char(trimmed, cursor).expect("valid UTF-8 byte index");
6617 if char == '\\' {
6622 if let Some((pipe, escaped)) = table_backslash_pipe_run(trimmed, cursor) {
6623 if escaped {
6624 for _ in 0..pipe - cursor - 1 {
6625 cell.push('\\');
6626 }
6627 cell.push('|');
6628 cursor = pipe + 1;
6629 } else {
6630 for _ in 0..pipe - cursor {
6631 cell.push('\\');
6632 }
6633 cursor = pipe;
6634 }
6635 continue;
6636 }
6637 }
6638 if char == '`' {
6642 let length = trimmed[cursor..]
6643 .as_bytes()
6644 .iter()
6645 .take_while(|byte| **byte == b'`')
6646 .count();
6647 if code_fence == Some(length) {
6648 code_fence = None;
6649 } else if code_fence.is_none() && backtick_run_has_close(trimmed, cursor, length) {
6650 code_fence = Some(length);
6651 }
6652 cell.push_str(&trimmed[cursor..cursor + length]);
6653 cursor += length;
6654 continue;
6655 }
6656
6657 if spoiler
6658 && char == '|'
6659 && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6660 && code_fence.is_some()
6661 {
6662 cell.push_str("||");
6663 cursor += 2;
6664 continue;
6665 }
6666
6667 if spoiler
6668 && char == '|'
6669 && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6670 && code_fence.is_none()
6671 && !is_escaped_at(trimmed, cursor)
6672 {
6673 let closes_spoiler =
6674 spoiler_open && trimmed.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6675 let opens_spoiler = !spoiler_open
6676 && trimmed.as_bytes().get(cursor + 2) != Some(&b'|')
6677 && find_spoiler_close(trimmed, cursor + 2).is_some();
6678 if closes_spoiler || opens_spoiler {
6679 spoiler_open = opens_spoiler;
6680 cell.push_str("||");
6681 cursor += 2;
6682 continue;
6683 }
6684 }
6685
6686 if char == '|' && !spoiler_open && !is_escaped_at(trimmed, cursor) {
6687 cells.push(core::mem::take(&mut cell));
6688 spoiler_open = false;
6690 trailing_delimiter_end = Some(next);
6691 } else {
6692 cell.push(char);
6693 }
6694 cursor = next;
6695 }
6696 cells.push(cell);
6697
6698 if trimmed.starts_with('|') {
6699 cells.remove(0);
6700 }
6701 if let Some(end) = trailing_delimiter_end {
6704 if trimmed[end..].trim().is_empty() {
6705 cells.pop();
6706 }
6707 }
6708 cells
6709}
6710
6711fn table_can_start(lines: &[Line<'_>], index: usize, options: &SyntaxOptions) -> bool {
6712 if !options.constructs.gfm_table || index + 1 >= lines.len() {
6713 return false;
6714 }
6715 table_can_start_source(
6716 lines[index].text,
6717 lines[index + 1].text,
6718 options.constructs.indented_code,
6719 options.constructs.spoiler,
6720 )
6721}
6722
6723pub(crate) fn gfm_table_can_start_source(header: &str, delimiter: &str) -> bool {
6724 table_can_start_source(header, delimiter, true, false)
6725}
6726
6727fn table_can_start_source(
6728 header: &str,
6729 delimiter: &str,
6730 indented_code: bool,
6731 spoiler: bool,
6732) -> bool {
6733 let Some(delimiter) = table_indent_line(delimiter, indented_code) else {
6734 return false;
6735 };
6736 if list_marker_info(delimiter).is_some() {
6737 return false;
6738 }
6739 if !table_has_separator(header, delimiter, spoiler) {
6740 return false;
6741 }
6742 let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6743 return false;
6744 };
6745 split_table_row(header, spoiler).len() == alignments.len()
6746}
6747
6748fn table_has_separator(header: &str, delimiter: &str, spoiler: bool) -> bool {
6749 let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6757 return true;
6758 };
6759 if alignments.len() == 1 {
6760 return contains_unescaped_pipe(header, spoiler)
6761 || contains_unescaped_pipe(delimiter, spoiler)
6762 || delimiter.contains(':');
6763 }
6764 true
6765}
6766
6767fn contains_unescaped_pipe(input: &str, spoiler: bool) -> bool {
6770 let mut cursor = 0;
6771 let mut code_fence = None;
6772 let mut spoiler_open = false;
6773 while cursor < input.len() {
6774 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
6775 if char == '\\' {
6776 if let Some((pipe, escaped)) = table_backslash_pipe_run(input, cursor) {
6777 cursor = if escaped { pipe + 1 } else { pipe };
6778 continue;
6779 }
6780 }
6781 if char == '`' {
6783 let length = input[cursor..]
6784 .as_bytes()
6785 .iter()
6786 .take_while(|byte| **byte == b'`')
6787 .count();
6788 if code_fence == Some(length) {
6789 code_fence = None;
6790 } else if code_fence.is_none() {
6791 code_fence = Some(length);
6792 }
6793 cursor += length;
6794 continue;
6795 }
6796 if spoiler
6797 && char == '|'
6798 && input.as_bytes().get(cursor + 1) == Some(&b'|')
6799 && code_fence.is_some()
6800 {
6801 cursor += 2;
6802 continue;
6803 }
6804 if spoiler
6805 && char == '|'
6806 && input.as_bytes().get(cursor + 1) == Some(&b'|')
6807 && code_fence.is_none()
6808 && !is_escaped_at(input, cursor)
6809 {
6810 let closes_spoiler =
6811 spoiler_open && input.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6812 let opens_spoiler = !spoiler_open
6813 && input.as_bytes().get(cursor + 2) != Some(&b'|')
6814 && find_spoiler_close(input, cursor + 2).is_some();
6815 if closes_spoiler || opens_spoiler {
6816 spoiler_open = opens_spoiler;
6817 cursor += 2;
6818 continue;
6819 }
6820 }
6821 if char == '|' && !spoiler_open && !is_escaped_at(input, cursor) {
6822 return true;
6823 }
6824 cursor = next;
6825 }
6826 false
6827}
6828
6829fn likely_block_start(input: &str, options: &SyntaxOptions) -> bool {
6830 let Some(trimmed) = trim_up_to_three_spaces(input) else {
6835 return false;
6836 };
6837 trimmed.starts_with('#')
6838 || trimmed.starts_with('>')
6839 || trimmed.starts_with("```")
6840 || trimmed.starts_with("~~~")
6841 || list_marker_can_interrupt_paragraph(input)
6842 || parse_thematic_break(Line {
6843 text: input,
6844 eol: "",
6845 start: 0,
6846 end: input.len(),
6847 end_with_eol: input.len(),
6848 lazy: false,
6849 })
6850 .is_some()
6851 || (options.constructs.html_block && line_starts_interrupting_html_block(input))
6852 || (options.constructs.math_block && math_block_fence_length(trimmed).is_some())
6853 || (options.constructs.directive_container && trimmed.starts_with(":::"))
6854 || (options.constructs.directive_leaf && trimmed.starts_with("::"))
6855 || (options.constructs.footnote_definition && line_starts_footnote_definition(trimmed))
6856}
6857
6858fn line_starts_footnote_definition(trimmed: &str) -> bool {
6861 trimmed.starts_with("[^")
6862 && find_footnote_definition_label_end(trimmed)
6863 .is_some_and(|close| is_footnote_label(&trimmed[2..close]))
6864}
6865
6866fn list_marker_can_interrupt_paragraph(input: &str) -> bool {
6867 list_marker_info(input).is_some_and(|marker| {
6868 !marker.content.trim().is_empty() && (!marker.ordered || marker.start == Some(1))
6871 })
6872}
6873
6874fn table_body_line_ends_table(line: &str, options: &SyntaxOptions) -> bool {
6879 likely_block_start(line, options)
6880 || list_marker_info(line).is_some()
6881 || (options.constructs.html_block && line_starts_html_block(line))
6882}
6883
6884fn line_starts_interrupting_html_block(input: &str) -> bool {
6885 match trim_up_to_three_spaces(input).and_then(html_block_start) {
6886 Some(HtmlBlockKind::UntilBlank) | None => false,
6887 Some(_) => true,
6888 }
6889}
6890
6891fn parse_autolink_end(input: &str, index: usize) -> Option<usize> {
6892 input[index..].find('>').map(|end| index + end + 1)
6893}
6894
6895fn parse_html_inline(input: &str, index: usize) -> Option<(usize, String)> {
6896 let rest = &input[index..];
6897 if rest.starts_with("<!--") {
6898 let end = rest.find("-->")? + 3;
6899 return Some((index + end, rest[..end].into()));
6900 }
6901 if rest.starts_with("<?") {
6902 let end = rest.find("?>")? + 2;
6903 return Some((index + end, rest[..end].into()));
6904 }
6905 if rest.starts_with("<![CDATA[") {
6906 let end = rest.find("]]>")? + 3;
6907 return Some((index + end, rest[..end].into()));
6908 }
6909 if is_declaration_start(rest) {
6910 let end = rest.find('>')? + 1;
6911 return Some((index + end, rest[..end].into()));
6912 }
6913
6914 let (end, _) = parse_html_tag(input, index)?;
6915 Some((end, input[index..end].into()))
6916}
6917
6918fn parse_html_tag(input: &str, index: usize) -> Option<(usize, &str)> {
6919 let bytes = input.as_bytes();
6920 if bytes.get(index) != Some(&b'<') {
6921 return None;
6922 }
6923
6924 let closing = bytes.get(index + 1) == Some(&b'/');
6925 let name_start = index + if closing { 2 } else { 1 };
6926 let first = *bytes.get(name_start)?;
6927 if !first.is_ascii_alphabetic() {
6928 return None;
6929 }
6930
6931 let mut cursor = name_start + 1;
6932 while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
6933 cursor += 1;
6934 }
6935 let name = &input[name_start..cursor];
6936
6937 if closing {
6938 cursor = skip_spaces(input, cursor);
6939 if bytes.get(cursor) == Some(&b'>') {
6940 return Some((cursor + 1, name));
6941 }
6942 return None;
6943 }
6944
6945 let mut needs_space = false;
6946 loop {
6947 let before_spaces = cursor;
6948 cursor = skip_spaces(input, cursor);
6949 let had_space = cursor > before_spaces;
6950 match bytes.get(cursor) {
6951 Some(b'>') => return Some((cursor + 1, name)),
6952 Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => return Some((cursor + 2, name)),
6953 Some(byte) if had_space && html_attribute_name_start(*byte) => {
6954 cursor += 1;
6955 while bytes
6956 .get(cursor)
6957 .is_some_and(|byte| html_attribute_name_byte(*byte))
6958 {
6959 cursor += 1;
6960 }
6961 let after_name = cursor;
6962 let after_spaces = skip_spaces(input, cursor);
6963 if bytes.get(after_spaces) == Some(&b'=') {
6964 cursor = skip_spaces(input, after_spaces + 1);
6965 cursor = parse_html_attribute_value(input, cursor)?;
6966 } else {
6967 cursor = after_name;
6968 }
6969 needs_space = true;
6970 }
6971 Some(_) if needs_space => return None,
6972 _ => return None,
6973 }
6974 }
6975}
6976
6977fn parse_html_attribute_value(input: &str, index: usize) -> Option<usize> {
6978 let bytes = input.as_bytes();
6979 match bytes.get(index)? {
6980 b'"' | b'\'' => {
6981 let quote = bytes[index];
6982 let mut cursor = index + 1;
6983 while cursor < bytes.len() {
6984 if bytes[cursor] == quote {
6985 return Some(cursor + 1);
6986 }
6987 cursor += 1;
6988 }
6989 None
6990 }
6991 b'=' | b'<' | b'>' | b'`' => None,
6992 _ => {
6993 let mut cursor = index;
6994 while bytes.get(cursor).is_some_and(|byte| {
6995 !byte.is_ascii_whitespace()
6996 && !matches!(*byte, b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
6997 }) {
6998 cursor += 1;
6999 }
7000 if cursor == index {
7001 None
7002 } else {
7003 Some(cursor)
7004 }
7005 }
7006 }
7007}
7008
7009fn html_name_byte(byte: u8) -> bool {
7010 byte.is_ascii_alphanumeric() || byte == b'-'
7011}
7012
7013fn html_attribute_name_start(byte: u8) -> bool {
7014 byte.is_ascii_alphabetic() || byte == b'_' || byte == b':'
7015}
7016
7017fn html_attribute_name_byte(byte: u8) -> bool {
7018 byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b':' | b'.' | b'-')
7019}
7020
7021fn skip_spaces(input: &str, mut index: usize) -> usize {
7022 while input
7023 .as_bytes()
7024 .get(index)
7025 .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
7026 {
7027 index += 1;
7028 }
7029 index
7030}
7031
7032fn is_autolink(input: &str) -> bool {
7033 let inner = &input[1..input.len() - 1];
7034 is_uri_autolink(inner) || is_email_autolink(inner)
7035}
7036
7037fn is_uri_autolink(input: &str) -> bool {
7038 let Some(colon) = input.find(':') else {
7039 return false;
7040 };
7041 let scheme = &input[..colon];
7042 if scheme.len() < 2 || scheme.len() > 32 {
7043 return false;
7044 }
7045 let mut bytes = scheme.bytes();
7046 if !bytes.next().is_some_and(|byte| byte.is_ascii_alphabetic()) {
7047 return false;
7048 }
7049 if !bytes.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-')) {
7050 return false;
7051 }
7052 input[colon + 1..]
7053 .chars()
7054 .all(|char| !matches!(char, '<' | '>') && !char.is_control() && !char.is_whitespace())
7055}
7056
7057fn is_email_autolink(input: &str) -> bool {
7058 if input.chars().any(char::is_whitespace) {
7059 return false;
7060 }
7061 let Some(at) = input.find('@') else {
7062 return false;
7063 };
7064 if at == 0 || at + 1 >= input.len() {
7065 return false;
7066 }
7067 is_email_local_part(&input[..at]) && is_email_domain(&input[at + 1..], 1)
7070}
7071
7072fn parse_literal_autolink(
7080 input: &str,
7081 index: usize,
7082 gfm: bool,
7083 relaxed: bool,
7084) -> Option<(usize, String)> {
7085 let rest = &input[index..];
7086
7087 if gfm {
7088 if let Some(scheme_len) = rest
7091 .starts_with("http://")
7092 .then_some(7)
7093 .or_else(|| rest.starts_with("https://").then_some(8))
7094 {
7095 if !literal_scheme_prefix_ok(input, index) {
7096 return None;
7097 }
7098 let host = &input[index + scheme_len..];
7099 if !http_literal_host_ok(host) {
7102 if relaxed {
7103 } else {
7106 return None;
7107 }
7108 } else {
7109 let end = autolink_url_end(input, index + scheme_len, index + scheme_len, relaxed);
7113 if end <= index + scheme_len {
7114 return None;
7115 }
7116 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7117 return None;
7118 }
7119 return Some((end, input[index..end].into()));
7120 }
7121 }
7122
7123 if rest
7126 .as_bytes()
7127 .get(..4)
7128 .is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"www."))
7129 {
7130 if !literal_www_prefix_ok(input, index) {
7131 return None;
7132 }
7133 check_domain(rest, false)?;
7134 let end = autolink_url_end(input, index, index, relaxed);
7135 if end <= index || (!relaxed && end <= index + 3 && !literal_starts_line(input, index))
7136 {
7137 return None;
7138 }
7139 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7140 return None;
7141 }
7142 let mut destination = String::from("http://");
7143 destination.push_str(&input[index..end]);
7144 return Some((end, destination));
7145 }
7146
7147 if let Some(email) = parse_literal_email(input, index) {
7148 return Some(email);
7149 }
7150 }
7151
7152 if relaxed {
7153 if literal_scheme_prefix_ok(input, index) {
7160 if let Some(after_slashes) = relaxed_scheme_after_slashes(rest) {
7161 let body_start = index + after_slashes;
7162 let next = input[body_start..].chars().next();
7163 if next.is_none_or(|char| char.is_whitespace()) && after_slashes == 3 {
7164 return None;
7165 }
7166 let end = autolink_url_end(input, body_start, body_start, true);
7167 if end > index {
7168 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7169 return None;
7170 }
7171 return Some((end, input[index..end].into()));
7172 }
7173 }
7174 }
7175 }
7176
7177 None
7178}
7179
7180fn relaxed_scheme_after_slashes(rest: &str) -> Option<usize> {
7186 let bytes = rest.as_bytes();
7187 if bytes.starts_with(b"://") {
7188 return Some(3);
7189 }
7190 let first = bytes.first()?;
7191 if !first.is_ascii_alphabetic() {
7192 return None;
7193 }
7194 let mut i = 1;
7195 while i < bytes.len() {
7196 match bytes[i] {
7197 b':' => break,
7198 byte if byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-') => i += 1,
7199 _ => return None,
7200 }
7201 }
7202 if bytes.get(i..i + 3) == Some(b"://") {
7203 Some(i + 3)
7204 } else {
7205 None
7206 }
7207}
7208
7209fn literal_scheme_prefix_ok(input: &str, index: usize) -> bool {
7213 if index == 0 {
7214 return true;
7215 }
7216 let Some(previous) = input[..index].chars().next_back() else {
7217 return true;
7218 };
7219 !previous.is_ascii_alphabetic()
7220}
7221
7222fn literal_www_prefix_ok(input: &str, index: usize) -> bool {
7226 if index == 0 {
7227 return true;
7228 }
7229 let Some(previous) = input[..index].chars().next_back() else {
7230 return true;
7231 };
7232 if matches!(previous, '*' | '_' | '~' | '(' | '[' | ']') {
7233 return true;
7234 }
7235 matches!(previous, ' ' | '\t' | '\n' | '\r')
7236}
7237
7238fn literal_starts_line(input: &str, index: usize) -> bool {
7239 index == 0
7240 || input
7241 .as_bytes()
7242 .get(index - 1)
7243 .is_some_and(|byte| matches!(byte, b'\n' | b'\r'))
7244}
7245
7246fn literal_autolink_suppressed_by_link_label(
7247 input: &str,
7248 index: usize,
7249 end: usize,
7250 relaxed: bool,
7251 gfm_autolink_literal: bool,
7252) -> bool {
7253 if !has_unclosed_link_label_opener(input, index) {
7254 return false;
7255 }
7256 if input[end..].starts_with("](") && !link_resource_tail_has_close(input, end + 2) {
7257 return true;
7258 }
7259 !relaxed && !gfm_autolink_literal && input.as_bytes().get(end).is_some_and(|byte| *byte == b']')
7260}
7261
7262fn has_unclosed_link_label_opener(input: &str, index: usize) -> bool {
7263 let line_start = input[..index]
7264 .rfind(['\n', '\r'])
7265 .map_or(0, |offset| offset + 1);
7266 let mut depth = 0usize;
7267 let mut cursor = line_start;
7268 while cursor < index {
7269 let Some((next, char)) = next_char(input, cursor) else {
7270 break;
7271 };
7272 match char {
7273 '\\' => {
7274 cursor = next_char(input, next)
7275 .map(|(after_escape, _)| after_escape)
7276 .unwrap_or(next);
7277 continue;
7278 }
7279 '[' => depth += 1,
7280 ']' => {
7281 depth = depth.saturating_sub(1);
7282 }
7283 _ => {}
7284 }
7285 cursor = next;
7286 }
7287 depth > 0
7288}
7289
7290fn link_resource_tail_has_close(input: &str, start: usize) -> bool {
7291 let mut cursor = start;
7292 while cursor < input.len() {
7293 let Some((next, char)) = next_char(input, cursor) else {
7294 break;
7295 };
7296 match char {
7297 '\\' => {
7298 cursor = next_char(input, next)
7299 .map(|(after_escape, _)| after_escape)
7300 .unwrap_or(next);
7301 continue;
7302 }
7303 '\n' | '\r' => return false,
7304 ')' => return true,
7305 _ => {}
7306 }
7307 cursor = next;
7308 }
7309 false
7310}
7311
7312fn http_literal_host_ok(host: &str) -> bool {
7313 if host.starts_with('[') {
7314 return bracketed_ipv6_host_end(host).is_some();
7315 }
7316 match host.chars().next() {
7317 Some(char) if char.is_ascii() && char.is_ascii_alphanumeric() => {
7318 check_domain(host, true).is_some()
7319 }
7320 Some(char) if !char.is_ascii() && is_valid_hostchar(char) => {
7321 check_domain(host, true).is_some()
7322 }
7323 _ => false,
7324 }
7325}
7326
7327fn bracketed_ipv6_host_end(host: &str) -> Option<usize> {
7328 let close = host.find(']')?;
7329 (close > 1).then_some(close + 1)
7330}
7331
7332fn is_valid_hostchar(char: char) -> bool {
7335 !char.is_whitespace() && !crate::unicode_punctuation::is_unicode_punctuation(char)
7336}
7337
7338fn check_domain(data: &str, allow_short: bool) -> Option<usize> {
7349 let mut np = 0usize;
7350 let mut uscore1 = 0usize;
7351 let mut uscore2 = 0usize;
7352 let mut host_len = 0usize;
7353
7354 let mut chars = data.char_indices().peekable();
7355 while let Some((offset, char)) = chars.next() {
7356 let account = offset != 0 && chars.peek().is_some();
7361 match char {
7362 '\\' => {
7363 host_len = offset + char.len_utf8();
7365 if let Some((next_off, next)) = chars.next() {
7366 host_len = next_off + next.len_utf8();
7367 }
7368 }
7369 '_' if account => {
7370 uscore2 += 1;
7371 host_len = offset + char.len_utf8();
7372 }
7373 '.' if account => {
7374 uscore1 = uscore2;
7375 uscore2 = 0;
7376 np += 1;
7377 host_len = offset + char.len_utf8();
7378 }
7379 '_' | '.' | '-' => {
7380 host_len = offset + char.len_utf8();
7381 }
7382 _ => {
7383 if !is_valid_hostchar(char) {
7384 break;
7385 }
7386 host_len = offset + char.len_utf8();
7387 }
7388 }
7389 }
7390
7391 if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
7392 return None;
7393 }
7394
7395 if allow_short || np > 0 {
7396 Some(host_len)
7397 } else {
7398 None
7399 }
7400}
7401
7402fn autolink_url_end(input: &str, start: usize, trim_from: usize, balanced: bool) -> usize {
7408 let bytes = input.as_bytes();
7409 let mut end = start;
7410 let mut bracket_depth = 0i32;
7417 let mut curly_depth = 0i32;
7418 let mut strict_has_open_bracket = false;
7419 let mut strict_inside_backticks = false;
7420 for (offset, char) in input[start..].char_indices() {
7421 if char.is_whitespace() || char == '<' || is_autolink_terminating_control(char) {
7422 break;
7423 }
7424 if balanced {
7425 match char {
7426 '[' => bracket_depth += 1,
7427 ']' => {
7428 if bracket_depth > 0 {
7429 bracket_depth -= 1;
7430 } else {
7431 break;
7432 }
7433 }
7434 '{' => curly_depth += 1,
7435 '}' => {
7436 if curly_depth > 0 {
7437 curly_depth -= 1;
7438 } else {
7439 break;
7440 }
7441 }
7442 _ => {}
7443 }
7444 } else {
7445 match char {
7446 '[' => strict_has_open_bracket = true,
7447 '`' => strict_inside_backticks = !strict_inside_backticks,
7448 ']' if !strict_has_open_bracket && !strict_inside_backticks => break,
7449 _ => {}
7450 }
7451 }
7452 if char == '\\' {
7461 if let Some(&next) = bytes.get(start + offset + 1) {
7462 let next_is_escapable_punct = next.is_ascii_punctuation() && next != b'.';
7463 if next_is_escapable_punct {
7464 break;
7465 }
7466 }
7467 }
7468 end = start + offset + char.len_utf8();
7469 }
7470 autolink_delim(input, trim_from, end)
7471}
7472
7473fn is_autolink_terminating_control(char: char) -> bool {
7474 matches!(char, '\u{2066}'..='\u{2069}')
7475}
7476
7477fn autolink_delim(input: &str, start: usize, mut end: usize) -> usize {
7482 let bytes = input.as_bytes();
7483 let mut opening = 0usize;
7484 let mut closing = 0usize;
7485 for &byte in &bytes[start..end] {
7486 match byte {
7487 b'(' => opening += 1,
7488 b')' => closing += 1,
7489 _ => {}
7490 }
7491 }
7492
7493 while end > start {
7494 match bytes[end - 1] {
7495 b')' => {
7496 if closing <= opening {
7497 break;
7498 }
7499 closing -= 1;
7500 end -= 1;
7501 }
7502 b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~' | b'\'' | b'"' => {
7503 end -= 1;
7504 }
7505 b';' => {
7506 if let Some(amp) = trailing_hex_entity_run_start(bytes, start, end) {
7513 end = amp;
7514 } else {
7515 let mut new_end = end - 1;
7518 while new_end > start && bytes[new_end - 1].is_ascii_alphanumeric() {
7519 new_end -= 1;
7520 }
7521 if new_end > start && new_end < end - 1 && bytes[new_end - 1] == b'&' {
7522 end = new_end - 1;
7523 } else {
7524 end -= 1;
7525 }
7526 }
7527 }
7528 _ => break,
7529 }
7530 }
7531 end
7532}
7533
7534fn trailing_hex_entity_run_start(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
7540 if end <= start || bytes[end - 1] != b';' {
7541 return None;
7542 }
7543 let mut cursor = end - 1;
7544 while cursor > start && bytes[cursor - 1].is_ascii_hexdigit() {
7545 cursor -= 1;
7546 }
7547 if cursor == end - 1 || cursor < start + 3 {
7549 return None;
7550 }
7551 let x = bytes[cursor - 1];
7552 if (x == b'x' || x == b'X') && bytes[cursor - 2] == b'#' && bytes[cursor - 3] == b'&' {
7553 Some(cursor - 3)
7554 } else {
7555 None
7556 }
7557}
7558
7559fn parse_literal_email(input: &str, index: usize) -> Option<(usize, String)> {
7565 let rest = &input[index..];
7566 let at = rest.find('@')?;
7567 if at == 0 {
7568 return None;
7569 }
7570 let local = &rest[..at];
7571
7572 let (auto_mailto, is_xmpp) = classify_email_local(local);
7576
7577 if !email_left_boundary_ok(input, index, auto_mailto) {
7582 return None;
7583 }
7584
7585 if !email_local_is_valid(local, auto_mailto) {
7586 return None;
7587 }
7588
7589 let domain_start = index + at + 1;
7590 let domain_end = literal_email_domain_end(input, domain_start, is_xmpp)?;
7591 let trimmed = autolink_delim(input, domain_start, domain_end);
7592 if trimmed <= domain_start {
7593 return None;
7594 }
7595
7596 let domain = &input[domain_start..trimmed];
7597 if !is_gfm_email_domain(domain, is_xmpp) {
7598 return None;
7599 }
7600
7601 let mut destination = String::new();
7602 if auto_mailto {
7603 destination.push_str("mailto:");
7604 }
7605 destination.push_str(&input[index..trimmed]);
7606 Some((trimmed, destination))
7607}
7608
7609fn classify_email_local(local: &str) -> (bool, bool) {
7614 if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7615 if !rest.is_empty() {
7616 return (false, false);
7617 }
7618 }
7619 if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7620 if !rest.is_empty() {
7621 return (false, true);
7622 }
7623 }
7624 (true, false)
7625}
7626
7627fn strip_ci_prefix<'a>(input: &'a str, prefix: &str) -> Option<&'a str> {
7628 let bytes = input.as_bytes();
7629 let plen = prefix.len();
7630 if bytes.len() >= plen && bytes[..plen].eq_ignore_ascii_case(prefix.as_bytes()) {
7631 Some(&input[plen..])
7632 } else {
7633 None
7634 }
7635}
7636
7637fn email_left_boundary_ok(input: &str, index: usize, auto_mailto: bool) -> bool {
7644 if index == 0 {
7645 return true;
7646 }
7647 let Some(previous) = input[..index].chars().next_back() else {
7648 return true;
7649 };
7650 if previous.is_ascii_alphanumeric() {
7651 if auto_mailto
7652 && input[index..].starts_with('+')
7653 && prefix_ends_with_gfm_email(input, index)
7654 {
7655 return true;
7656 }
7657 return false;
7658 }
7659 if auto_mailto && previous == '/' {
7660 return false;
7661 }
7662 true
7663}
7664
7665fn prefix_ends_with_gfm_email(input: &str, end: usize) -> bool {
7666 let start = input[..end]
7667 .rfind(char::is_whitespace)
7668 .map_or(0, |offset| offset + 1);
7669 let candidate = &input[start..end];
7670 let Some(at) = candidate.rfind('@') else {
7671 return false;
7672 };
7673 email_local_is_valid(&candidate[..at], true) && is_gfm_email_domain(&candidate[at + 1..], false)
7674}
7675
7676fn email_local_is_valid(local: &str, auto_mailto: bool) -> bool {
7680 let body = if auto_mailto {
7681 local
7682 } else if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7683 rest
7684 } else if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7685 rest
7686 } else {
7687 local
7688 };
7689 !body.is_empty() && body.bytes().all(is_gfm_email_local_byte)
7690}
7691
7692fn is_gfm_email_local_byte(byte: u8) -> bool {
7695 byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'+' | b'_' | b'-')
7696}
7697
7698fn is_email_local_part(input: &str) -> bool {
7699 !input.is_empty()
7700 && input
7701 .split('.')
7702 .all(|segment| !segment.is_empty() && segment.bytes().all(is_email_atext))
7703}
7704
7705fn is_email_atext(byte: u8) -> bool {
7706 byte.is_ascii_alphanumeric()
7707 || matches!(
7708 byte,
7709 b'!' | b'#'
7710 | b'$'
7711 | b'%'
7712 | b'&'
7713 | b'\''
7714 | b'*'
7715 | b'+'
7716 | b'/'
7717 | b'='
7718 | b'?'
7719 | b'^'
7720 | b'_'
7721 | b'`'
7722 | b'{'
7723 | b'|'
7724 | b'}'
7725 | b'~'
7726 | b'-'
7727 )
7728}
7729
7730fn literal_email_domain_end(input: &str, index: usize, is_xmpp: bool) -> Option<usize> {
7738 let bytes = input.as_bytes();
7739 let mut end = index;
7740 let mut np = 0usize;
7741 while end < bytes.len() {
7742 let byte = bytes[end];
7743 if byte.is_ascii_alphanumeric() {
7744 end += 1;
7745 } else if byte == b'.' && end + 1 < bytes.len() && bytes[end + 1].is_ascii_alphanumeric() {
7746 np += 1;
7747 end += 1;
7748 } else if byte == b'-' || byte == b'_' || (byte == b'/' && is_xmpp) {
7749 end += 1;
7752 } else {
7753 break;
7754 }
7755 }
7756 if end <= index {
7757 return None;
7758 }
7759 let len = end - index;
7760 let last = bytes[end - 1];
7761 if len < 1 || np == 0 || !(last.is_ascii_alphabetic() || last == b'.') {
7762 return None;
7763 }
7764 Some(end)
7765}
7766
7767fn is_gfm_email_domain(input: &str, is_xmpp: bool) -> bool {
7772 if input.is_empty() {
7773 return false;
7774 }
7775 let host = if is_xmpp {
7778 input.split('/').next().unwrap_or(input)
7779 } else {
7780 input
7781 };
7782 if !host.contains('.') {
7783 return false;
7784 }
7785 let last = host.as_bytes()[host.len() - 1];
7786 if matches!(last, b'-' | b'_') {
7789 return false;
7790 }
7791 host.split('.').all(|label| {
7792 !label.is_empty()
7793 && label
7794 .bytes()
7795 .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
7796 })
7797}
7798
7799fn is_email_domain(input: &str, min_labels: usize) -> bool {
7800 let mut label_count = 0usize;
7801 for label in input.split('.') {
7802 label_count += 1;
7803 let bytes = label.as_bytes();
7804 if bytes.is_empty()
7805 || bytes.len() > 63
7806 || !bytes
7807 .first()
7808 .is_some_and(|byte| byte.is_ascii_alphanumeric())
7809 || !bytes
7810 .last()
7811 .is_some_and(|byte| byte.is_ascii_alphanumeric())
7812 || !bytes
7813 .iter()
7814 .all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-')
7815 {
7816 return false;
7817 }
7818 }
7819 label_count >= min_labels
7820}
7821
7822fn is_footnote_label(label: &str) -> bool {
7823 !label.is_empty()
7824 && reference_label_is_within_limit(label)
7825 && !label.chars().any(char::is_whitespace)
7826}
7827
7828fn find_footnote_definition_label_end(input: &str) -> Option<usize> {
7829 let close = find_footnote_reference_label_end(input, 2)?;
7830 if input.as_bytes().get(close + 1) == Some(&b':') {
7831 Some(close)
7832 } else {
7833 None
7834 }
7835}
7836
7837fn find_footnote_reference_label_end(input: &str, mut cursor: usize) -> Option<usize> {
7838 while cursor < input.len() {
7839 let (next, char) = next_char(input, cursor)?;
7840 if char == ']' && !is_escaped_at(input, cursor) {
7841 return Some(cursor);
7842 }
7843 cursor = next;
7844 }
7845 None
7846}
7847
7848fn find_inline_footnote_end(input: &str, mut cursor: usize) -> Option<usize> {
7849 let mut depth = 0usize;
7850 while cursor < input.len() {
7851 let (next, char) = next_char(input, cursor)?;
7852 if !is_escaped_at(input, cursor) {
7853 match char {
7854 '[' => depth += 1,
7855 ']' if depth == 0 => return Some(cursor),
7856 ']' => depth = depth.saturating_sub(1),
7857 _ => {}
7858 }
7859 }
7860 cursor = next;
7861 }
7862 None
7863}