1use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8use crate::{
9 ast::*,
10 diagnostic::{Diagnostic, DiagnosticCode, DiagnosticSeverity},
11 entities::named_character_reference,
12 options::{SyntaxConfigError, SyntaxOptions},
13 span::Span,
14 validate::is_directive_name,
15};
16
17#[derive(Clone, Debug, Eq, PartialEq)]
20pub struct ParseOutput {
21 pub document: Document,
23 pub diagnostics: Vec<Diagnostic>,
25}
26
27#[derive(Clone, Debug, Eq, PartialEq)]
29pub enum ParseStrictError {
30 Config(SyntaxConfigError),
32 Diagnostic(Diagnostic),
34}
35
36#[derive(Clone, Debug, Eq, PartialEq)]
37struct ParsedLinkResource {
38 destination: String,
39 destination_kind: LinkDestinationKind,
40 title: Option<String>,
41 title_kind: Option<LinkTitleKind>,
42}
43
44const REFERENCE_LABEL_MAX_CHARS: usize = 999;
45const WIKILINK_MAX_BYTES: usize = 999;
46
47#[derive(Clone, Copy, Debug)]
48struct Line<'a> {
49 text: &'a str,
50 eol: &'a str,
51 start: usize,
52 end: usize,
53 end_with_eol: usize,
54 lazy: bool,
59}
60
61#[derive(Clone, Copy, Debug)]
62struct ListMarkerInfo<'a> {
63 ordered: bool,
64 start: Option<u64>,
65 delimiter: ListDelimiter,
66 indent: usize,
67 marker_len: usize,
68 content_indent: usize,
69 content: &'a str,
70}
71
72#[derive(Clone, Copy, Debug)]
73struct DescriptionMarker<'a> {
74 content_offset: usize,
75 content: &'a str,
76}
77
78#[derive(Clone, Debug)]
79struct DescriptionTerm {
80 marker_index: usize,
81 term_end: usize,
82 blank_after_term: bool,
83 source: String,
84 source_offset: usize,
85}
86
87#[derive(Clone, Copy, Debug, Eq, PartialEq)]
88enum HtmlBlockKind {
89 RawTag,
90 BlockTag,
91 Until(&'static str),
92 UntilBlank,
93}
94
95pub fn parse(input: &str) -> ParseOutput {
98 SyntaxOptions::default().parse(input)
99}
100
101impl SyntaxOptions {
102 pub fn parse(&self, input: &str) -> ParseOutput {
107 match parse_checked(input, self) {
108 Ok(output) => output,
109 Err(error) => ParseOutput {
110 document: Document::default(),
111 diagnostics: vec![Diagnostic::new(
112 DiagnosticSeverity::Error,
113 DiagnosticCode::StrictParse,
114 Span::new(0, input.len()),
115 error.message(),
116 )],
117 },
118 }
119 }
120
121 pub fn parse_strict(&self, input: &str) -> Result<ParseOutput, ParseStrictError> {
124 let output = parse_checked(input, self).map_err(ParseStrictError::Config)?;
125 if let Some(diagnostic) = output
126 .diagnostics
127 .iter()
128 .find(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
129 {
130 return Err(ParseStrictError::Diagnostic(diagnostic.clone()));
131 }
132 Ok(output)
133 }
134}
135
136fn parse_checked(input: &str, options: &SyntaxOptions) -> Result<ParseOutput, SyntaxConfigError> {
137 options.validate()?;
138 let input = input.strip_prefix('\u{feff}').unwrap_or(input);
141 let input: Cow<'_, str> = if input.contains('\u{0}') {
144 Cow::Owned(input.replace('\u{0}', "\u{fffd}"))
145 } else {
146 Cow::Borrowed(input)
147 };
148 let input = input.as_ref();
149 let mut diagnostics = Vec::new();
150 let definitions = collect_definitions(input, options);
151 let children = parse_blocks(input, 0, true, options, &definitions, &mut diagnostics);
152
153 Ok(ParseOutput {
154 document: Document {
155 meta: NodeMeta::new(Some(Span::new(0, input.len()))),
156 children,
157 },
158 diagnostics,
159 })
160}
161
162fn parse_blocks(
163 input: &str,
164 base_offset: usize,
165 allow_frontmatter: bool,
166 options: &SyntaxOptions,
167 definitions: &[String],
168 diagnostics: &mut Vec<Diagnostic>,
169) -> Vec<Block> {
170 let lines = collect_lines(input, base_offset);
171 parse_blocks_from_lines(&lines, allow_frontmatter, options, definitions, diagnostics)
172}
173
174fn parse_blocks_from_lines(
175 lines: &[Line<'_>],
176 allow_frontmatter: bool,
177 options: &SyntaxOptions,
178 definitions: &[String],
179 diagnostics: &mut Vec<Diagnostic>,
180) -> Vec<Block> {
181 let mut blocks = Vec::new();
182 let mut index = 0;
183
184 while index < lines.len() {
185 let line = lines[index];
186 if line.text.trim().is_empty() {
187 index += 1;
188 continue;
189 }
190 let after_definition_unbroken = index > 0
191 && !lines[index - 1].text.trim().is_empty()
192 && matches!(blocks.last(), Some(Block::Definition(_)));
193
194 if allow_frontmatter && index == 0 {
195 if let Some((block, next)) = parse_frontmatter(lines, index, options) {
196 blocks.push(block);
197 index = next;
198 continue;
199 }
200 }
201
202 if let Some((block, next)) =
203 parse_container_directive(lines, index, options, definitions, diagnostics)
204 {
205 blocks.push(block);
206 index = next;
207 continue;
208 }
209
210 if let Some((block, next)) = parse_math_block(lines, index, options) {
211 blocks.push(block);
212 index = next;
213 continue;
214 }
215
216 if let Some((block, next)) = parse_fenced_code(lines, index, options) {
217 blocks.push(block);
218 index = next;
219 continue;
220 }
221
222 if let Some((block, next)) =
223 parse_block_quote(lines, index, options, definitions, diagnostics)
224 {
225 blocks.push(block);
226 index = next;
227 continue;
228 }
229
230 if let Some(block) = parse_atx_heading(line, options, definitions) {
231 blocks.push(block);
232 index += 1;
233 continue;
234 }
235
236 if let Some(block) = parse_thematic_break(line) {
237 blocks.push(block);
238 index += 1;
239 continue;
240 }
241
242 if let Some((block, next)) = parse_list(lines, index, options, definitions, diagnostics) {
243 blocks.push(block);
244 index = next;
245 continue;
246 }
247
248 if let Some((block, next)) =
249 parse_footnote_definition(lines, index, options, definitions, diagnostics)
250 {
251 blocks.push(block);
252 index = next;
253 continue;
254 }
255
256 if let Some((block, next)) =
257 parse_definition(lines, index, options, after_definition_unbroken)
258 {
259 blocks.push(block);
260 index = next;
261 continue;
262 }
263
264 if let Some(block) = parse_leaf_directive(line, options, definitions, diagnostics) {
265 blocks.push(block);
266 index += 1;
267 continue;
268 }
269
270 if let Some((block, next)) = parse_html_block(lines, index, options) {
271 blocks.push(block);
272 index = next;
273 continue;
274 }
275
276 if let Some((block, next)) = parse_mdx_flow(lines, index, options, diagnostics) {
277 blocks.push(block);
278 index = next;
279 continue;
280 }
281
282 if !after_definition_unbroken {
283 if let Some((block, next)) = parse_indented_code(lines, index, options) {
284 blocks.push(block);
285 index = next;
286 continue;
287 }
288 }
289
290 if let Some((block, next)) = parse_table(lines, index, options, definitions, diagnostics) {
291 blocks.push(block);
292 index = next;
293 continue;
294 }
295
296 if let Some((block, next)) = parse_setext_heading(lines, index, options, definitions) {
297 blocks.push(block);
298 index = next;
299 continue;
300 }
301
302 if let Some((block, next)) =
303 parse_description_list(lines, index, options, definitions, diagnostics)
304 {
305 blocks.push(block);
306 index = next;
307 continue;
308 }
309
310 let (block, next) = parse_paragraph(lines, index, options, definitions, diagnostics);
311 blocks.push(block);
312 index = next;
313 }
314
315 blocks
316}
317
318fn collect_lines(input: &str, base_offset: usize) -> Vec<Line<'_>> {
319 let bytes = input.as_bytes();
320 let mut lines = Vec::new();
321 let mut start = 0;
322 let mut index = 0;
323
324 while index < bytes.len() {
325 match bytes[index] {
326 b'\n' => {
327 let end = index;
328 lines.push(Line {
329 text: &input[start..end],
330 eol: &input[index..index + 1],
331 start: base_offset + start,
332 end: base_offset + end,
333 end_with_eol: base_offset + index + 1,
334 lazy: false,
335 });
336 index += 1;
337 start = index;
338 }
339 b'\r' => {
340 let end = index;
341 let eol_end = if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
342 index + 2
343 } else {
344 index + 1
345 };
346 lines.push(Line {
347 text: &input[start..end],
348 eol: &input[index..eol_end],
349 start: base_offset + start,
350 end: base_offset + end,
351 end_with_eol: base_offset + eol_end,
352 lazy: false,
353 });
354 index = eol_end;
355 start = index;
356 }
357 _ => index += 1,
358 }
359 }
360
361 if start < bytes.len() || input.is_empty() {
362 lines.push(Line {
363 text: &input[start..],
364 eol: "",
365 start: base_offset + start,
366 end: base_offset + bytes.len(),
367 end_with_eol: base_offset + bytes.len(),
368 lazy: false,
369 });
370 }
371
372 lines
373}
374
375fn collect_definitions(input: &str, options: &SyntaxOptions) -> Vec<String> {
376 let mut diagnostics = Vec::new();
377 let blocks = parse_blocks(input, 0, true, options, &[], &mut diagnostics);
378 let mut definitions = Vec::new();
379 collect_definition_refs_from_blocks(&blocks, &mut definitions);
380 definitions
381}
382
383fn collect_definition_refs_from_blocks(blocks: &[Block], definitions: &mut Vec<String>) {
384 for block in blocks {
385 match block {
386 Block::Definition(definition) => {
387 if definitions
388 .iter()
389 .all(|identifier| identifier != &definition.identifier)
390 {
391 definitions.push(definition.identifier.clone());
392 }
393 }
394 Block::BlockQuote(node) => {
395 collect_definition_refs_from_blocks(&node.children, definitions);
396 }
397 Block::Alert(node) => {
398 collect_definition_refs_from_blocks(&node.children, definitions);
399 }
400 Block::List(node) => {
401 for item in &node.children {
402 collect_definition_refs_from_blocks(&item.children, definitions);
403 }
404 }
405 Block::DescriptionList(node) => {
406 for item in &node.children {
407 for details in &item.details {
408 collect_definition_refs_from_blocks(&details.children, definitions);
409 }
410 }
411 }
412 Block::FootnoteDefinition(node) => {
413 collect_definition_refs_from_blocks(&node.children, definitions);
414 }
415 Block::ContainerDirective(node) => {
416 collect_definition_refs_from_blocks(&node.children, definitions);
417 }
418 _ => {}
419 }
420 }
421}
422
423fn parse_frontmatter(
424 lines: &[Line<'_>],
425 index: usize,
426 options: &SyntaxOptions,
427) -> Option<(Block, usize)> {
428 if !options.constructs.frontmatter {
429 return None;
430 }
431 let kind = frontmatter_fence_kind(lines[index].text)?;
432
433 let mut value = String::new();
434 let mut cursor = index + 1;
435 while cursor < lines.len() {
436 if frontmatter_fence_kind(lines[cursor].text) == Some(kind) {
437 let span = Span::new(lines[index].start, lines[cursor].end_with_eol);
438 return Some((
439 Block::Frontmatter(Frontmatter {
440 meta: NodeMeta::new(Some(span)),
441 kind,
442 value,
443 }),
444 cursor + 1,
445 ));
446 }
447 push_line(&mut value, lines[cursor].text);
448 cursor += 1;
449 }
450
451 None
452}
453
454fn frontmatter_fence_kind(line: &str) -> Option<FrontmatterKind> {
455 match line.trim_end_matches([' ', '\t']) {
456 "---" => Some(FrontmatterKind::Yaml),
457 "+++" => Some(FrontmatterKind::Toml),
458 _ => None,
459 }
460}
461
462fn parse_container_directive(
463 lines: &[Line<'_>],
464 index: usize,
465 options: &SyntaxOptions,
466 definitions: &[String],
467 diagnostics: &mut Vec<Diagnostic>,
468) -> Option<(Block, usize)> {
469 if !options.constructs.directive_container {
470 return None;
471 }
472 let trimmed = trim_up_to_three_spaces(lines[index].text)?;
473 let Some((fence_len, opener_rest)) = directive_container_opener_prefix(trimmed) else {
474 return None;
475 };
476 let opener_base = lines[index].start + (lines[index].text.len() - trimmed.len()) + fence_len;
477
478 let Some((name, label_source, attributes, _consumed)) = parse_directive_opener(opener_rest)
479 else {
480 diagnostics.push(Diagnostic::new(
481 DiagnosticSeverity::Error,
482 DiagnosticCode::InvalidDirectiveName,
483 Span::new(lines[index].start, lines[index].end),
484 "container directive must have a valid name",
485 ));
486 return None;
487 };
488 let label_base = opener_base + name.len() + 1;
489
490 let mut content = String::new();
491 let mut cursor = index + 1;
492 let mut nested_fences = Vec::new();
493 while cursor < lines.len() {
494 let line = lines[cursor].text;
495 let trimmed = trim_up_to_three_spaces(line);
496 if let Some(trimmed) = trimmed {
497 if let Some(nested_len) = nested_fences.last().copied() {
498 if directive_container_closing_fence(trimmed, nested_len).is_some() {
499 nested_fences.pop();
500 push_line(&mut content, line);
501 cursor += 1;
502 continue;
503 }
504 } else if directive_container_closing_fence(trimmed, fence_len).is_some() {
505 let label = label_source
506 .map(|source| {
507 parse_inlines(source, label_base, options, definitions, diagnostics)
508 })
509 .unwrap_or_default();
510 let children = parse_blocks(
511 &content,
512 lines[index + 1].start,
513 false,
514 options,
515 definitions,
516 diagnostics,
517 );
518 return Some((
519 Block::ContainerDirective(ContainerDirective {
520 meta: NodeMeta::new(Some(Span::new(
521 lines[index].start,
522 lines[cursor].end_with_eol,
523 ))),
524 name,
525 label,
526 attributes,
527 children,
528 }),
529 cursor + 1,
530 ));
531 }
532
533 if let Some((nested_len, nested_rest)) = directive_container_opener_prefix(trimmed) {
534 if parse_directive_opener(nested_rest).is_some() {
535 nested_fences.push(nested_len);
536 }
537 }
538 }
539
540 push_line(&mut content, line);
541 cursor += 1;
542 }
543
544 diagnostics.push(Diagnostic::new(
545 DiagnosticSeverity::Error,
546 DiagnosticCode::UnclosedDirectiveContainer,
547 Span::new(lines[index].start, lines[index].end),
548 "container directive is missing a closing fence",
549 ));
550 Some((
551 Block::ContainerDirective(ContainerDirective {
552 meta: NodeMeta::new(Some(Span::new(
553 lines[index].start,
554 lines.last()?.end_with_eol,
555 ))),
556 name,
557 label: label_source
558 .map(|source| parse_inlines(source, label_base, options, definitions, diagnostics))
559 .unwrap_or_default(),
560 attributes,
561 children: parse_blocks(
562 &content,
563 lines
564 .get(index + 1)
565 .map(|line| line.start)
566 .unwrap_or(lines[index].end),
567 false,
568 options,
569 definitions,
570 diagnostics,
571 ),
572 }),
573 lines.len(),
574 ))
575}
576
577fn directive_container_opener_prefix(input: &str) -> Option<(usize, &str)> {
578 let fence_len = input
579 .as_bytes()
580 .iter()
581 .take_while(|byte| **byte == b':')
582 .count();
583 if fence_len >= 3 {
584 Some((fence_len, &input[fence_len..]))
585 } else {
586 None
587 }
588}
589
590fn directive_container_closing_fence(input: &str, min_len: usize) -> Option<usize> {
591 let fence_len = input
592 .as_bytes()
593 .iter()
594 .take_while(|byte| **byte == b':')
595 .count();
596 if fence_len >= min_len && input[fence_len..].trim().is_empty() {
597 Some(fence_len)
598 } else {
599 None
600 }
601}
602
603fn parse_math_block(
604 lines: &[Line<'_>],
605 index: usize,
606 options: &SyntaxOptions,
607) -> Option<(Block, usize)> {
608 if !options.constructs.math_block {
609 return None;
610 }
611 let opener = trim_up_to_three_spaces(lines[index].text)?;
617 let fence_length = math_block_fence_length(opener)?;
618 let opening_indent = leading_indent_columns(lines[index].text);
619
620 let mut value = String::new();
621 let mut content_lines = 0usize;
622 let mut cursor = index + 1;
623 while cursor < lines.len() {
624 if let Some(close_line) = trim_up_to_three_spaces(lines[cursor].text) {
625 if math_block_fence_closes(close_line, fence_length) {
626 return Some((
627 Block::MathBlock(MathBlock {
628 meta: NodeMeta::new(Some(Span::new(
629 lines[index].start,
630 lines[cursor].end_with_eol,
631 ))),
632 value,
633 }),
634 cursor + 1,
635 ));
636 }
637 }
638 if content_lines > 0 {
639 ensure_line_separator(&mut value);
643 }
644 let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
645 value.push_str(&stripped);
646 value.push_str(lines[cursor].eol);
647 content_lines += 1;
648 cursor += 1;
649 }
650
651 Some((
654 Block::MathBlock(MathBlock {
655 meta: NodeMeta::new(Some(Span::new(
656 lines[index].start,
657 lines.last()?.end_with_eol,
658 ))),
659 value,
660 }),
661 lines.len(),
662 ))
663}
664
665fn math_block_fence_length(input: &str) -> Option<usize> {
668 let length = input
669 .as_bytes()
670 .iter()
671 .take_while(|byte| **byte == b'$')
672 .count();
673 if length < 2 || input[length..].contains('$') {
674 return None;
675 }
676 Some(length)
677}
678
679fn math_block_fence_closes(input: &str, length: usize) -> bool {
682 let count = input
683 .as_bytes()
684 .iter()
685 .take_while(|byte| **byte == b'$')
686 .count();
687 count >= length && input[count..].trim().is_empty()
688}
689
690fn parse_fenced_code(
691 lines: &[Line<'_>],
692 index: usize,
693 options: &SyntaxOptions,
694) -> Option<(Block, usize)> {
695 let line = fence_line(lines[index].text, options)?;
696 let (marker, length) = fence_start(line)?;
697 let opening_indent = leading_indent_columns(lines[index].text);
700 let info = line[length..].trim();
701 if marker == FenceMarker::Backtick && info.contains('`') {
702 return None;
703 }
704 let info = if info.is_empty() {
705 None
706 } else {
707 Some(unescape_string(info))
708 };
709
710 let mut value = String::new();
711 let mut content_lines = 0usize;
716 let mut cursor = index + 1;
717 while cursor < lines.len() {
718 if let Some(close_line) = fence_line(lines[cursor].text, options) {
719 if fence_close(close_line, marker, length) {
720 return Some((
721 Block::CodeBlock(CodeBlock {
722 meta: NodeMeta::new(Some(Span::new(
723 lines[index].start,
724 lines[cursor].end_with_eol,
725 ))),
726 kind: CodeBlockKind::Fenced { marker, length },
727 info,
728 value,
729 }),
730 cursor + 1,
731 ));
732 }
733 }
734 if content_lines > 0 {
735 ensure_line_separator(&mut value);
739 }
740 let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
741 value.push_str(&stripped);
742 value.push_str(lines[cursor].eol);
743 content_lines += 1;
744 cursor += 1;
745 }
746 Some((
747 Block::CodeBlock(CodeBlock {
748 meta: NodeMeta::new(Some(Span::new(
749 lines[index].start,
750 lines.last()?.end_with_eol,
751 ))),
752 kind: CodeBlockKind::Fenced { marker, length },
753 info,
754 value,
755 }),
756 lines.len(),
757 ))
758}
759
760fn fence_line<'a>(line: &'a str, options: &SyntaxOptions) -> Option<&'a str> {
761 if options.constructs.indented_code {
762 trim_up_to_three_spaces(line)
763 } else {
764 Some(trim_ascii_start(line))
765 }
766}
767
768fn container_closed_after_unclosed_fence(
769 lines: &[Line<'_>],
770 cursor: usize,
771 last_content_index: usize,
772 content: &str,
773 options: &SyntaxOptions,
774) -> bool {
775 !lines[last_content_index].eol.is_empty()
776 && (cursor >= lines.len() || lines[cursor].text.trim().is_empty())
777 && content_has_unclosed_fenced_code(content, options)
778}
779
780fn content_has_unclosed_fenced_code(content: &str, options: &SyntaxOptions) -> bool {
781 let lines = collect_lines(content, 0);
782 let mut open_fence = None;
783 for line in lines {
784 let Some(trimmed) = fence_line(line.text, options) else {
785 continue;
786 };
787 if let Some((marker, length, has_nonblank_content)) = open_fence {
788 if fence_close(trimmed, marker, length) {
789 open_fence = None;
790 } else {
791 open_fence = Some((
792 marker,
793 length,
794 has_nonblank_content || !trimmed.trim().is_empty(),
795 ));
796 }
797 continue;
798 }
799 let Some((marker, length)) = fence_start(trimmed) else {
800 continue;
801 };
802 let info = trimmed[length..].trim();
803 if marker != FenceMarker::Backtick || !info.contains('`') {
804 open_fence = Some((marker, length, false));
805 }
806 }
807 open_fence.is_some_and(|(_, _, has_nonblank_content)| !has_nonblank_content)
808}
809
810fn block_quote_content_paragraph_open(content: &str, options: &SyntaxOptions) -> bool {
820 let Some(trimmed) = trim_up_to_three_spaces(content) else {
821 return false;
823 };
824 if trimmed.is_empty() {
825 return false;
826 }
827 if let Some(rest) = trimmed.strip_prefix('>') {
828 let rest = rest.strip_prefix(' ').unwrap_or(rest);
829 return block_quote_content_paragraph_open(rest, options);
830 }
831 if let Some(marker) = list_marker_info(trimmed) {
832 let first_content = list_marker_first_content(trimmed, marker);
833 return block_quote_content_paragraph_open(&first_content, options);
834 }
835 !lazy_line_starts_block(trimmed, options)
836}
837
838fn lazy_line_starts_block(input: &str, options: &SyntaxOptions) -> bool {
844 likely_block_start(input, options)
845 || (options.constructs.html_block && line_starts_html_block(input))
846 || trim_up_to_three_spaces(input).is_some_and(|t| t.starts_with('`') || t.starts_with('~'))
851}
852
853fn parse_block_quote(
854 lines: &[Line<'_>],
855 index: usize,
856 options: &SyntaxOptions,
857 definitions: &[String],
858 diagnostics: &mut Vec<Diagnostic>,
859) -> Option<(Block, usize)> {
860 if !trim_up_to_three_spaces(lines[index].text)?.starts_with('>') {
861 return None;
862 }
863
864 let mut content = String::new();
865 let mut lazy_flags: Vec<bool> = Vec::new();
869 let mut cursor = index;
870 let mut paragraph_open = false;
871 let mut in_table = false;
872 let mut last_content_line: Option<String> = None;
873 let mut content_base_offset = None;
874 while cursor < lines.len() {
875 let raw = lines[cursor].text;
876 let trimmed_opt = trim_up_to_three_spaces(raw);
877 let marked = trimmed_opt.is_some_and(|trimmed| trimmed.starts_with('>'));
878 let quote_rest_owned: String;
879 if let Some(trimmed) = trimmed_opt {
880 if trimmed.is_empty() {
881 break;
882 }
883 }
884 let (line, line_start) = if marked {
885 let trimmed = trimmed_opt.expect("marked implies a trimmed line");
886 let trimmed_start = lines[cursor].start + (raw.len() - trimmed.len());
887 let mut rest_start = 1;
888 let mut rest = &trimmed[rest_start..];
889 if rest.starts_with(' ') {
890 rest_start += 1;
891 rest = &rest[1..];
892 } else if rest.starts_with('\t') {
893 let marker_end_column = leading_indent_columns(raw) + 1;
894 match strip_leading_indent_columns_from(rest, 1, marker_end_column) {
895 Cow::Borrowed(stripped) => rest = stripped,
896 Cow::Owned(stripped) => {
897 quote_rest_owned = stripped;
898 rest = "e_rest_owned;
899 }
900 }
901 }
902 (rest, trimmed_start + rest_start)
903 } else if in_table {
904 break;
907 } else if paragraph_open && !lazy_line_starts_block(raw, options) {
908 (raw, lines[cursor].start)
912 } else {
913 break;
914 };
915
916 let mut escaped_lazy = String::new();
917 let line = if !marked
918 && last_content_line.as_deref().is_some_and(|previous| {
919 table_can_start_source(
920 previous,
921 line,
922 options.constructs.indented_code,
923 options.constructs.spoiler,
924 )
925 }) {
926 escaped_lazy.push_str(line);
927 if let Some(offset) = escaped_lazy.find('-') {
928 escaped_lazy.insert(offset, '\\');
929 }
930 &escaped_lazy
931 } else {
932 line
933 };
934
935 let starts_table = last_content_line.as_deref().is_some_and(|previous| {
936 table_can_start_source(
937 previous,
938 line,
939 options.constructs.indented_code,
940 options.constructs.spoiler,
941 )
942 });
943 if marked && starts_table {
944 paragraph_open = false;
945 in_table = true;
946 } else if marked && in_table && block_quote_table_body_row(line, options) {
947 paragraph_open = false;
948 } else {
949 in_table = false;
950 paragraph_open = block_quote_content_paragraph_open(line, options);
953 }
954 last_content_line = Some(line.into());
955 if content_base_offset.is_none() {
956 content_base_offset = Some(line_start);
957 }
958 push_line(&mut content, line);
959 lazy_flags.push(!marked);
960 cursor += 1;
961 }
962
963 let span = Span::new(lines[index].start, lines[cursor - 1].end_with_eol);
964 let child_base_offset = content_base_offset.unwrap_or(lines[index].start);
965 if !lines[cursor - 1].eol.is_empty() && !ends_with_line_ending(&content) {
966 content.push_str(lines[cursor - 1].eol);
967 }
968 if container_closed_after_unclosed_fence(lines, cursor, cursor - 1, &content, options) {
969 content.push('\n');
970 }
971 if let Some(alert) = parse_alert_from_block_quote(
972 &content,
973 child_base_offset,
974 span,
975 options,
976 definitions,
977 diagnostics,
978 ) {
979 return Some((alert, cursor));
980 }
981
982 let mut child_lines = collect_lines(&content, child_base_offset);
983 for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
984 child.lazy = lazy;
985 }
986 let children = parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
987 Some((
988 Block::BlockQuote(BlockQuote {
989 meta: NodeMeta::new(Some(span)),
990 children,
991 }),
992 cursor,
993 ))
994}
995
996fn parse_alert_from_block_quote(
997 content: &str,
998 base_offset: usize,
999 span: Span,
1000 options: &SyntaxOptions,
1001 definitions: &[String],
1002 diagnostics: &mut Vec<Diagnostic>,
1003) -> Option<Block> {
1004 if !options.constructs.gfm_alert {
1005 return None;
1006 }
1007 let (first_line, rest) = content.split_once('\n').unwrap_or((content, ""));
1008 let (kind, title) = parse_alert_marker(first_line)?;
1009 let rest_base_offset = base_offset + first_line.len() + usize::from(!rest.is_empty());
1010 let children = if rest.is_empty() {
1011 Vec::new()
1012 } else {
1013 parse_blocks(
1014 rest,
1015 rest_base_offset,
1016 false,
1017 options,
1018 definitions,
1019 diagnostics,
1020 )
1021 };
1022 Some(Block::Alert(Alert {
1023 meta: NodeMeta::new(Some(span)),
1024 kind,
1025 title,
1026 children,
1027 }))
1028}
1029
1030fn parse_alert_marker(line: &str) -> Option<(AlertKind, Option<String>)> {
1031 let close = line.find(']')?;
1032 let marker = line.get(0..close + 1)?;
1033 if !marker.starts_with("[!") {
1034 return None;
1035 }
1036 let kind = match &marker[2..close].to_ascii_lowercase()[..] {
1037 "note" => AlertKind::Note,
1038 "tip" => AlertKind::Tip,
1039 "important" => AlertKind::Important,
1040 "warning" => AlertKind::Warning,
1041 "caution" => AlertKind::Caution,
1042 _ => return None,
1043 };
1044 let title = line[close + 1..].trim();
1045 Some((
1046 kind,
1047 if title.is_empty() {
1048 None
1049 } else {
1050 Some(title.into())
1051 },
1052 ))
1053}
1054
1055fn block_quote_table_body_row(line: &str, options: &SyntaxOptions) -> bool {
1056 table_indent_line(line, options.constructs.indented_code).is_some_and(|row| {
1057 !row.trim().is_empty() && contains_unescaped_pipe(row, options.constructs.spoiler)
1058 })
1059}
1060
1061fn parse_list(
1062 lines: &[Line<'_>],
1063 index: usize,
1064 options: &SyntaxOptions,
1065 definitions: &[String],
1066 diagnostics: &mut Vec<Diagnostic>,
1067) -> Option<(Block, usize)> {
1068 let first_marker = list_marker_info(lines[index].text)?;
1069 let mut items = Vec::new();
1070 let mut cursor = index;
1071 let mut tight = true;
1072
1073 while cursor < lines.len() {
1074 if parse_thematic_break(lines[cursor]).is_some() {
1079 break;
1080 }
1081 let Some(marker) = list_marker_info(lines[cursor].text) else {
1082 break;
1083 };
1084 if !same_list_marker(first_marker, marker) {
1085 break;
1086 }
1087
1088 let item_start = cursor;
1089 let mut item_end = cursor;
1090 let mut item_tight = true;
1091 let mut item_blank_offsets: Vec<usize> = Vec::new();
1097 let mut content = String::new();
1098 let mut lazy_flags: Vec<bool> = Vec::new();
1105 let mut open_fence = None;
1106 let first_content = list_marker_first_content(lines[cursor].text, marker);
1107 let mut last_content_line: Option<String> = Some(first_content.as_ref().into());
1108 let mut paragraph_open = list_item_paragraph_stays_open(None, &first_content, options);
1109 let mut item_started_blank = first_content.trim().is_empty();
1114 push_line(&mut content, &first_content);
1115 lazy_flags.push(false);
1116 update_list_item_fence(&first_content, &mut open_fence);
1117 cursor += 1;
1118
1119 while cursor < lines.len() {
1120 if lines[cursor].text.trim().is_empty() {
1121 if open_fence.is_some() {
1124 let stripped = strip_list_continuation(
1125 lines[cursor].text,
1126 marker.content_indent,
1127 first_marker.indent,
1128 );
1129 push_line(&mut content, &stripped);
1130 lazy_flags.push(false);
1131 update_list_item_fence(&stripped, &mut open_fence);
1132 item_end = cursor;
1133 cursor += 1;
1134 continue;
1135 }
1136 let next = next_nonblank_line(lines, cursor + 1);
1137 if item_started_blank
1138 || next >= lines.len()
1139 || sibling_list_marker_at_line(
1140 lines[next].text,
1141 first_marker,
1142 marker.content_indent,
1143 )
1144 || leading_indent_columns(lines[next].text) < marker.content_indent
1145 {
1146 if next < lines.len()
1147 && sibling_list_marker_at_line(
1148 lines[next].text,
1149 first_marker,
1150 marker.content_indent,
1151 )
1152 {
1153 item_tight = false;
1154 }
1155 cursor = next;
1156 break;
1157 }
1158 item_blank_offsets.push(content.len() + usize::from(!content.is_empty()));
1166 paragraph_open = false;
1167 push_line(&mut content, "");
1168 lazy_flags.push(false);
1169 item_end = cursor;
1170 cursor += 1;
1171 continue;
1172 }
1173
1174 item_started_blank = false;
1175
1176 if sibling_list_marker_at_line(lines[cursor].text, first_marker, marker.content_indent)
1177 {
1178 break;
1179 }
1180
1181 if leading_indent_columns(lines[cursor].text) < marker.content_indent
1186 && !same_list_marker_line(lines[cursor].text, first_marker)
1187 && list_marker_info(lines[cursor].text).is_some()
1188 {
1189 break;
1190 }
1191
1192 if leading_indent_columns(lines[cursor].text) < marker.content_indent {
1193 if likely_block_start(lines[cursor].text, options) || !paragraph_open {
1194 break;
1195 }
1196 }
1197
1198 let lazy = paragraph_open
1204 && leading_indent_columns(lines[cursor].text) < marker.content_indent;
1205 let stripped = strip_list_continuation(
1206 lines[cursor].text,
1207 marker.content_indent,
1208 first_marker.indent,
1209 );
1210 let starts_table = last_content_line.as_deref().is_some_and(|previous| {
1211 table_can_start_source(
1212 previous,
1213 &stripped,
1214 options.constructs.indented_code,
1215 options.constructs.spoiler,
1216 )
1217 });
1218 paragraph_open = if starts_table {
1219 false
1220 } else {
1221 list_item_paragraph_stays_open(Some(paragraph_open), &stripped, options)
1222 };
1223 push_line(&mut content, &stripped);
1224 lazy_flags.push(lazy);
1225 update_list_item_fence(&stripped, &mut open_fence);
1226 last_content_line = Some(stripped.into_owned());
1227 item_end = cursor;
1228 cursor += 1;
1229 }
1230
1231 let child_base = lines[item_start].start + marker.content_indent;
1232 if !lines[item_end].eol.is_empty() && !ends_with_line_ending(&content) {
1233 content.push_str(lines[item_end].eol);
1234 }
1235 if container_closed_after_unclosed_fence(lines, cursor, item_end, &content, options) {
1236 content.push('\n');
1237 }
1238 let mut child_lines = collect_lines(&content, child_base);
1239 for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
1240 child.lazy = lazy;
1241 }
1242 let mut children =
1243 parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
1244 let checked = if options.constructs.gfm_task_list_item {
1245 take_task_marker_from_children(&mut children)
1246 } else {
1247 None
1248 };
1249
1250 if item_tight
1251 && blank_separates_top_level_blocks(&item_blank_offsets, &children, child_base)
1252 {
1253 item_tight = false;
1254 }
1255 tight = tight && item_tight;
1256 items.push(ListItem {
1257 meta: NodeMeta::new(Some(Span::new(
1258 lines[item_start].start,
1259 lines[item_end].end_with_eol,
1260 ))),
1261 checked,
1262 children,
1263 });
1264 }
1265
1266 Some((
1267 Block::List(List {
1268 meta: NodeMeta::new(Some(Span::new(
1269 lines[index].start,
1270 lines[cursor - 1].end_with_eol,
1271 ))),
1272 ordered: first_marker.ordered,
1273 start: first_marker.start,
1274 delimiter: first_marker.delimiter,
1275 tight,
1276 children: items,
1277 }),
1278 cursor,
1279 ))
1280}
1281
1282fn blank_separates_top_level_blocks(
1295 blank_offsets: &[usize],
1296 children: &[Block],
1297 child_base: usize,
1298) -> bool {
1299 if blank_offsets.is_empty() || children.len() < 2 {
1300 return false;
1301 }
1302 let Some(&first_blank) = blank_offsets.iter().min() else {
1303 return false;
1304 };
1305 children.iter().any(|child| {
1306 block_span(child).is_some_and(|span| span.start.saturating_sub(child_base) > first_blank)
1307 })
1308}
1309
1310fn block_span(block: &Block) -> Option<Span> {
1311 let meta = match block {
1312 Block::Paragraph(node) => &node.meta,
1313 Block::Heading(node) => &node.meta,
1314 Block::ThematicBreak(node) => &node.meta,
1315 Block::BlockQuote(node) => &node.meta,
1316 Block::Alert(node) => &node.meta,
1317 Block::List(node) => &node.meta,
1318 Block::DescriptionList(node) => &node.meta,
1319 Block::CodeBlock(node) => &node.meta,
1320 Block::HtmlBlock(node) => &node.meta,
1321 Block::Definition(node) => &node.meta,
1322 Block::FootnoteDefinition(node) => &node.meta,
1323 Block::Table(node) => &node.meta,
1324 Block::MathBlock(node) => &node.meta,
1325 Block::Frontmatter(node) => &node.meta,
1326 Block::MdxEsm(node) => &node.meta,
1327 Block::MdxExpression(node) => &node.meta,
1328 Block::MdxJsx(node) => &node.meta,
1329 Block::LeafDirective(node) => &node.meta,
1330 Block::ContainerDirective(node) => &node.meta,
1331 };
1332 meta.span
1333}
1334
1335fn list_item_paragraph_stays_open(
1336 previous_open: Option<bool>,
1337 line: &str,
1338 options: &SyntaxOptions,
1339) -> bool {
1340 if line.trim().is_empty() {
1341 return false;
1342 }
1343 if previous_open == Some(false) {
1344 return false;
1345 }
1346 block_quote_content_paragraph_open(line, options)
1347}
1348
1349fn parse_description_list(
1350 lines: &[Line<'_>],
1351 index: usize,
1352 options: &SyntaxOptions,
1353 definitions: &[String],
1354 diagnostics: &mut Vec<Diagnostic>,
1355) -> Option<(Block, usize)> {
1356 if !options.constructs.description_list || !is_description_term_line(lines[index].text, options)
1357 {
1358 return None;
1359 }
1360
1361 let mut cursor = index;
1362 let mut items = Vec::new();
1363 let mut tight = true;
1364 let mut list_end = lines[index].end_with_eol;
1365
1366 while cursor < lines.len() {
1367 if !is_description_term_line(lines[cursor].text, options) {
1368 break;
1369 }
1370 let Some(term) = description_term(lines, cursor, options) else {
1371 break;
1372 };
1373 let term_line = lines[cursor];
1374 let mut details = Vec::new();
1375 let item_start = term_line.start;
1376 let mut item_end = lines[term.term_end].end_with_eol;
1377 tight = tight && !term.blank_after_term;
1378 cursor = term.marker_index;
1379
1380 loop {
1381 let Some(marker) = description_marker(lines[cursor].text) else {
1382 break;
1383 };
1384 let (detail, next, detail_tight) = parse_description_details(
1385 lines,
1386 cursor,
1387 marker,
1388 options,
1389 definitions,
1390 diagnostics,
1391 )?;
1392 tight = tight && detail_tight;
1393 item_end = detail
1394 .meta
1395 .span
1396 .map(|span| span.end)
1397 .unwrap_or(lines[cursor].end_with_eol);
1398 details.push(detail);
1399 cursor = next;
1400
1401 let next_nonblank = next_nonblank_line(lines, cursor);
1402 if next_nonblank < lines.len()
1403 && description_marker(lines[next_nonblank].text).is_some()
1404 {
1405 if next_nonblank != cursor {
1406 tight = false;
1407 }
1408 cursor = next_nonblank;
1409 continue;
1410 }
1411 break;
1412 }
1413
1414 if details.is_empty() {
1415 return None;
1416 }
1417 list_end = item_end;
1418 items.push(DescriptionItem {
1419 meta: NodeMeta::new(Some(Span::new(item_start, item_end))),
1420 term: parse_inlines(
1421 &term.source,
1422 term.source_offset,
1423 options,
1424 definitions,
1425 diagnostics,
1426 ),
1427 details,
1428 });
1429
1430 let next_item = next_nonblank_line(lines, cursor);
1431 if next_item >= lines.len() {
1432 cursor = next_item;
1433 break;
1434 }
1435 if description_term(lines, next_item, options).is_some() {
1436 if next_item != cursor {
1437 tight = false;
1438 }
1439 cursor = next_item;
1440 continue;
1441 }
1442 cursor = next_item;
1443 break;
1444 }
1445
1446 (!items.is_empty()).then_some((
1447 Block::DescriptionList(DescriptionList {
1448 meta: NodeMeta::new(Some(Span::new(lines[index].start, list_end))),
1449 tight,
1450 children: items,
1451 }),
1452 cursor,
1453 ))
1454}
1455
1456fn parse_description_details(
1457 lines: &[Line<'_>],
1458 index: usize,
1459 marker: DescriptionMarker<'_>,
1460 options: &SyntaxOptions,
1461 definitions: &[String],
1462 diagnostics: &mut Vec<Diagnostic>,
1463) -> Option<(DescriptionDetails, usize, bool)> {
1464 let mut content = String::new();
1465 push_line(&mut content, marker.content);
1466 let mut cursor = index + 1;
1467 let mut end = lines[index].end_with_eol;
1468 let mut tight = true;
1469 let mut paragraph_open = paragraph_stays_open(marker.content, options);
1470
1471 while cursor < lines.len() {
1472 if lines[cursor].text.trim().is_empty() {
1473 let next = next_nonblank_line(lines, cursor + 1);
1474 if next >= lines.len() || description_term(lines, next, options).is_some() {
1481 cursor = next;
1482 break;
1483 }
1484 if description_marker(lines[next].text).is_some() {
1485 tight = false;
1486 cursor = next;
1487 break;
1488 }
1489 if strip_indent_continuation(lines[next].text).is_none() {
1490 break;
1491 }
1492 push_line(&mut content, "");
1493 paragraph_open = false;
1494 tight = false;
1495 end = lines[cursor].end_with_eol;
1496 cursor += 1;
1497 continue;
1498 }
1499
1500 if description_marker(lines[cursor].text).is_some()
1501 || description_term(lines, cursor, options).is_some()
1502 {
1503 break;
1504 }
1505
1506 let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1507 {
1508 continuation
1509 } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1510 trim_ascii_start(lines[cursor].text)
1511 } else {
1512 break;
1513 };
1514 paragraph_open = paragraph_stays_open(continuation, options);
1515 push_line(&mut content, continuation);
1516 end = lines[cursor].end_with_eol;
1517 cursor += 1;
1518 }
1519
1520 if content.trim().is_empty() {
1521 return None;
1522 }
1523
1524 Some((
1525 DescriptionDetails {
1526 meta: NodeMeta::new(Some(Span::new(lines[index].start, end))),
1527 children: parse_blocks(
1528 &content,
1529 lines[index].start + marker.content_offset,
1530 false,
1531 options,
1532 definitions,
1533 diagnostics,
1534 ),
1535 },
1536 cursor,
1537 tight,
1538 ))
1539}
1540
1541fn description_term(
1542 lines: &[Line<'_>],
1543 term_index: usize,
1544 options: &SyntaxOptions,
1545) -> Option<DescriptionTerm> {
1546 if term_index >= lines.len() || !is_description_term_line(lines[term_index].text, options) {
1547 return None;
1548 }
1549 let mut source = String::new();
1550 let mut term_end = term_index;
1551 let mut cursor = term_index;
1552 while cursor < lines.len() && is_description_term_line(lines[cursor].text, options) {
1553 if !source.is_empty() {
1554 source.push('\n');
1555 }
1556 source.push_str(trim_ascii_start(lines[cursor].text).trim_end());
1557 term_end = cursor;
1558 cursor += 1;
1559 }
1560
1561 let mut marker_index = cursor;
1562 let mut blank_after_term = false;
1563 while marker_index < lines.len() && lines[marker_index].text.trim().is_empty() {
1564 blank_after_term = true;
1565 marker_index += 1;
1566 }
1567 (marker_index < lines.len() && description_marker(lines[marker_index].text).is_some()).then(
1568 || DescriptionTerm {
1569 marker_index,
1570 term_end,
1571 blank_after_term,
1572 source,
1573 source_offset: lines[term_index].start + leading_trim_bytes(lines[term_index].text),
1574 },
1575 )
1576}
1577
1578fn is_description_term_line(line: &str, options: &SyntaxOptions) -> bool {
1579 leading_indent_columns(line) <= 3
1580 && !line.trim().is_empty()
1581 && description_marker(line).is_none()
1582 && !likely_block_start(line, options)
1583}
1584
1585fn description_marker(line: &str) -> Option<DescriptionMarker<'_>> {
1586 let (columns, bytes) = leading_indent(line);
1587 if columns > 2 || !matches!(line.as_bytes().get(bytes), Some(b':' | b'~')) {
1588 return None;
1589 }
1590 if line
1591 .as_bytes()
1592 .get(bytes + 1)
1593 .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1594 {
1595 return None;
1596 }
1597 let mut content_offset = bytes + 1;
1598 while line
1599 .as_bytes()
1600 .get(content_offset)
1601 .is_some_and(|byte| matches!(*byte, b' ' | b'\t'))
1602 {
1603 content_offset += 1;
1604 }
1605 Some(DescriptionMarker {
1606 content_offset,
1607 content: &line[content_offset..],
1608 })
1609}
1610
1611fn paragraph_stays_open(line: &str, options: &SyntaxOptions) -> bool {
1615 !line.trim().is_empty() && !likely_block_start(line, options)
1616}
1617
1618fn strip_indent_continuation(input: &str) -> Option<&str> {
1620 input
1621 .strip_prefix(" ")
1622 .or_else(|| input.strip_prefix('\t'))
1623}
1624
1625fn parse_atx_heading(
1626 line: Line<'_>,
1627 options: &SyntaxOptions,
1628 definitions: &[String],
1629) -> Option<Block> {
1630 let text = trim_up_to_three_spaces(line.text)?;
1631 let depth = text
1632 .as_bytes()
1633 .iter()
1634 .take_while(|byte| **byte == b'#')
1635 .count();
1636 if depth == 0 || depth > 6 {
1637 return None;
1638 }
1639 if text
1640 .as_bytes()
1641 .get(depth)
1642 .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1643 && text.len() != depth
1644 {
1645 return None;
1646 }
1647 let after_opening = &text[depth..];
1648 let content_start_in_text = depth + leading_trim_bytes(after_opening);
1649 let content = trim_closing_hashes(after_opening.trim_start());
1650 let content_start = line.start + (line.text.len() - text.len()) + content_start_in_text;
1651 Some(Block::Heading(Heading {
1652 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1653 depth: depth as u8,
1654 kind: HeadingKind::Atx,
1655 children: parse_inlines(
1656 content,
1657 content_start,
1658 options,
1659 definitions,
1660 &mut Vec::new(),
1661 ),
1662 }))
1663}
1664
1665fn parse_thematic_break(line: Line<'_>) -> Option<Block> {
1666 let text = trim_up_to_three_spaces(line.text)?.trim();
1667 let mut marker = None;
1668 let mut count = 0;
1669 for char in text.chars() {
1670 if char == ' ' || char == '\t' {
1671 continue;
1672 }
1673 let current = match char {
1674 '-' => ThematicBreakMarker::Dash,
1675 '*' => ThematicBreakMarker::Asterisk,
1676 '_' => ThematicBreakMarker::Underscore,
1677 _ => return None,
1678 };
1679 if marker.is_some_and(|marker| marker != current) {
1680 return None;
1681 }
1682 marker = Some(current);
1683 count += 1;
1684 }
1685 if count >= 3 {
1686 Some(Block::ThematicBreak(ThematicBreak {
1687 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1688 marker: marker?,
1689 }))
1690 } else {
1691 None
1692 }
1693}
1694
1695fn parse_definition(
1696 lines: &[Line<'_>],
1697 index: usize,
1698 options: &SyntaxOptions,
1699 allow_subsequent_indent: bool,
1700) -> Option<(Block, usize)> {
1701 let line = lines[index];
1702 let text = trim_definition_start(line.text, allow_subsequent_indent)?;
1703 if !text.starts_with('[') {
1704 return None;
1705 }
1706
1707 let mut accumulated = String::from(text);
1715 let mut label_end_line = index;
1716 let close = loop {
1717 if let Some(close) = find_reference_label_end(&accumulated, 0) {
1718 if accumulated.as_bytes().get(close + 1) == Some(&b':') {
1719 break close;
1720 }
1721 return None;
1723 }
1724 let next = label_end_line + 1;
1725 if next >= lines.len() || lines[next].text.trim().is_empty() {
1726 return None;
1727 }
1728 if likely_block_start(lines[next].text, options)
1735 || setext_underline_depth(lines[next].text).is_some()
1736 || table_can_start(lines, next, options)
1737 {
1738 return None;
1739 }
1740 accumulated.push('\n');
1741 accumulated.push_str(lines[next].text);
1742 label_end_line = next;
1743 };
1744 let label = String::from(&accumulated[1..close]);
1745 if normalize_label(&label).is_empty() {
1746 return None;
1747 }
1748 let label = label.as_str();
1749 let mut source = String::from(&accumulated[close + 2..]);
1750 let mut cursor = label_end_line;
1751 let mut best_without_title = None;
1752
1753 loop {
1754 if let Some(resource) = parse_definition_destination_title(&source) {
1755 if resource.title.is_some() {
1756 return Some((
1757 Block::Definition(Definition {
1758 meta: NodeMeta::new(Some(Span::new(
1759 line.start,
1760 lines[cursor].end_with_eol,
1761 ))),
1762 label: label.into(),
1763 identifier: normalize_label(label),
1764 destination: resource.destination,
1765 destination_kind: resource.destination_kind,
1766 title: resource.title,
1767 title_kind: resource.title_kind,
1768 }),
1769 cursor + 1,
1770 ));
1771 }
1772
1773 best_without_title = Some((resource, cursor + 1));
1774 let next = cursor + 1;
1775 if next >= lines.len()
1776 || lines[next].text.trim().is_empty()
1777 || !line_can_start_definition_title(lines[next].text)
1778 {
1779 break;
1780 }
1781 }
1782
1783 let next = cursor + 1;
1784 if next >= lines.len() || lines[next].text.trim().is_empty() {
1785 break;
1786 }
1787 if likely_block_start(lines[next].text, options)
1793 || setext_underline_depth(lines[next].text).is_some()
1794 {
1795 break;
1796 }
1797 source.push('\n');
1798 source.push_str(lines[next].text);
1799 cursor = next;
1800 }
1801
1802 let (resource, next) = best_without_title?;
1803 let end = lines[next - 1].end_with_eol;
1804 Some((
1805 Block::Definition(Definition {
1806 meta: NodeMeta::new(Some(Span::new(line.start, end))),
1807 label: label.into(),
1808 identifier: normalize_label(label),
1809 destination: resource.destination,
1810 destination_kind: resource.destination_kind,
1811 title: resource.title,
1812 title_kind: resource.title_kind,
1813 }),
1814 next,
1815 ))
1816}
1817
1818fn trim_definition_start(input: &str, allow_subsequent_indent: bool) -> Option<&str> {
1819 if let Some(trimmed) = trim_up_to_three_spaces(input) {
1820 return Some(trimmed);
1821 }
1822 if allow_subsequent_indent {
1823 let (columns, bytes) = leading_indent(input);
1824 if columns == 4 {
1825 return Some(&input[bytes..]);
1826 }
1827 }
1828 None
1829}
1830
1831fn parse_footnote_definition(
1832 lines: &[Line<'_>],
1833 index: usize,
1834 options: &SyntaxOptions,
1835 definitions: &[String],
1836 diagnostics: &mut Vec<Diagnostic>,
1837) -> Option<(Block, usize)> {
1838 if !options.constructs.footnote_definition {
1839 return None;
1840 }
1841 let line = lines[index];
1842 let text = line.text.trim();
1843 if !text.starts_with("[^") {
1844 return None;
1845 }
1846 let close = find_footnote_definition_label_end(text)?;
1847 let label = &text[2..close];
1848 if !is_footnote_label(label) {
1849 return None;
1850 }
1851 let rest = text[close + 2..].trim();
1852 let mut content = String::new();
1853 push_line(&mut content, rest);
1854 let mut cursor = index + 1;
1855 let mut end = line.end_with_eol;
1856 let mut paragraph_open = paragraph_stays_open(rest, options);
1857
1858 while cursor < lines.len() {
1859 if lines[cursor].text.trim().is_empty() {
1860 let next = next_nonblank_line(lines, cursor + 1);
1861 if next >= lines.len() || !is_footnote_continuation(lines[next].text) {
1862 break;
1863 }
1864 push_line(&mut content, "");
1865 paragraph_open = false;
1866 end = lines[cursor].end_with_eol;
1867 cursor += 1;
1868 continue;
1869 }
1870
1871 let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1872 {
1873 continuation
1874 } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1875 trim_ascii_start(lines[cursor].text)
1876 } else {
1877 break;
1878 };
1879 paragraph_open = paragraph_stays_open(continuation, options);
1880 push_line(&mut content, continuation);
1881 end = lines[cursor].end_with_eol;
1882 cursor += 1;
1883 }
1884
1885 Some((
1886 Block::FootnoteDefinition(FootnoteDefinition {
1887 meta: NodeMeta::new(Some(Span::new(line.start, end))),
1888 label: label.into(),
1889 identifier: normalize_label(label),
1890 children: parse_blocks(
1891 &content,
1892 line.end.saturating_sub(rest.len()),
1893 false,
1894 options,
1895 definitions,
1896 diagnostics,
1897 ),
1898 }),
1899 cursor,
1900 ))
1901}
1902
1903fn is_footnote_continuation(input: &str) -> bool {
1904 strip_indent_continuation(input).is_some()
1905}
1906
1907fn parse_leaf_directive(
1908 line: Line<'_>,
1909 options: &SyntaxOptions,
1910 definitions: &[String],
1911 diagnostics: &mut Vec<Diagnostic>,
1912) -> Option<Block> {
1913 if !options.constructs.directive_leaf {
1914 return None;
1915 }
1916 let trimmed = line.text.trim_start();
1917 if trimmed.starts_with(":::") || !trimmed.starts_with("::") {
1918 return None;
1919 }
1920 let opener_base = line.start + (line.text.len() - trimmed.len()) + 2;
1921 let Some((name, label_source, attributes, _)) = parse_directive_opener(&trimmed[2..]) else {
1922 diagnostics.push(Diagnostic::new(
1923 DiagnosticSeverity::Error,
1924 DiagnosticCode::InvalidDirectiveName,
1925 Span::new(line.start, line.end),
1926 "leaf directive must have a valid name",
1927 ));
1928 return None;
1929 };
1930 let label = label_source
1931 .map(|source| {
1932 parse_inlines(
1933 source,
1934 opener_base + name.len() + 1,
1935 options,
1936 definitions,
1937 diagnostics,
1938 )
1939 })
1940 .unwrap_or_default();
1941 Some(Block::LeafDirective(LeafDirective {
1942 meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1943 name,
1944 label,
1945 attributes,
1946 }))
1947}
1948
1949fn parse_html_block(
1950 lines: &[Line<'_>],
1951 index: usize,
1952 options: &SyntaxOptions,
1953) -> Option<(Block, usize)> {
1954 if !options.constructs.html_block {
1955 return None;
1956 }
1957
1958 let trimmed = trim_up_to_three_spaces(lines[index].text)?;
1959 let kind = html_block_start(trimmed)?;
1960 let mut value = String::new();
1961 let mut cursor = index;
1962 match kind {
1963 HtmlBlockKind::RawTag => {
1964 while cursor < lines.len() {
1968 push_line(&mut value, lines[cursor].text);
1969 if ["script", "pre", "style", "textarea"]
1970 .iter()
1971 .any(|tag| line_contains_raw_closing_tag(lines[cursor].text, tag))
1972 {
1973 cursor += 1;
1974 break;
1975 }
1976 cursor += 1;
1977 }
1978 }
1979 HtmlBlockKind::BlockTag => {
1980 while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1981 push_line(&mut value, lines[cursor].text);
1982 cursor += 1;
1983 }
1984 }
1985 HtmlBlockKind::Until(end) => {
1986 while cursor < lines.len() {
1987 push_line(&mut value, lines[cursor].text);
1988 if lines[cursor].text.contains(end) {
1989 cursor += 1;
1990 break;
1991 }
1992 cursor += 1;
1993 }
1994 }
1995 HtmlBlockKind::UntilBlank => {
1996 while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1997 push_line(&mut value, lines[cursor].text);
1998 cursor += 1;
1999 }
2000 }
2001 }
2002 Some((
2003 Block::HtmlBlock(HtmlBlock {
2004 meta: NodeMeta::new(Some(Span::new(
2005 lines[index].start,
2006 lines[cursor - 1].end_with_eol,
2007 ))),
2008 value,
2009 }),
2010 cursor,
2011 ))
2012}
2013
2014fn html_block_start(input: &str) -> Option<HtmlBlockKind> {
2015 let trimmed = input.trim_end();
2016 if !trimmed.starts_with('<') {
2017 return None;
2018 }
2019
2020 if raw_html_tag_start(trimmed) {
2021 return Some(HtmlBlockKind::RawTag);
2022 }
2023 if trimmed.starts_with("<!--") {
2024 return Some(HtmlBlockKind::Until("-->"));
2025 }
2026 if trimmed.starts_with("<?") {
2027 return Some(HtmlBlockKind::Until("?>"));
2028 }
2029 if is_declaration_start(trimmed) {
2030 return Some(HtmlBlockKind::Until(">"));
2031 }
2032 if trimmed.starts_with("<![CDATA[") {
2033 return Some(HtmlBlockKind::Until("]]>"));
2034 }
2035
2036 if html_block_tag_start(trimmed) {
2037 return Some(HtmlBlockKind::BlockTag);
2038 }
2039
2040 let Some((end, _tag_name)) = parse_html_tag(trimmed, 0) else {
2041 return None;
2042 };
2043 let rest = trimmed[end..].trim();
2044 if rest.is_empty() {
2045 Some(HtmlBlockKind::UntilBlank)
2046 } else {
2047 None
2048 }
2049}
2050
2051pub(crate) fn line_starts_html_block(input: &str) -> bool {
2052 trim_up_to_three_spaces(input)
2053 .and_then(html_block_start)
2054 .is_some()
2055}
2056
2057fn raw_html_tag_start(input: &str) -> bool {
2058 for tag in ["script", "pre", "style", "textarea"] {
2059 if html_raw_open_tag_prefix(input, tag) {
2060 return true;
2061 }
2062 }
2063 false
2064}
2065
2066fn html_raw_open_tag_prefix(input: &str, tag: &str) -> bool {
2067 let Some(rest) = input.strip_prefix('<') else {
2068 return false;
2069 };
2070 if rest.starts_with('/') || rest.len() < tag.len() {
2071 return false;
2072 }
2073 let rest_bytes = rest.as_bytes();
2074 let tag_bytes = tag.as_bytes();
2075 if !rest_bytes
2076 .get(..tag_bytes.len())
2077 .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2078 {
2079 return false;
2080 }
2081 match rest_bytes.get(tag.len()) {
2082 None => true,
2083 Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2084 Some(b'/') => {
2085 rest_bytes.get(tag.len() + 1) == Some(&b'>') && rest_bytes.get(tag.len() + 2).is_none()
2086 }
2087 _ => false,
2088 }
2089}
2090
2091fn line_contains_raw_closing_tag(input: &str, tag: &str) -> bool {
2092 let bytes = input.as_bytes();
2093 let tag_bytes = tag.as_bytes();
2094 let mut cursor = 0;
2095
2096 while cursor + 2 + tag_bytes.len() <= bytes.len() {
2097 let tag_start = cursor + 2;
2098 let tag_end = tag_start + tag_bytes.len();
2099 if bytes.get(cursor) == Some(&b'<')
2100 && bytes.get(cursor + 1) == Some(&b'/')
2101 && bytes
2102 .get(tag_start..tag_end)
2103 .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2104 {
2105 match bytes.get(tag_end) {
2106 Some(b'>') => return true,
2107 Some(byte) if byte.is_ascii_whitespace() => {
2108 let mut after_space = tag_end;
2109 while bytes
2110 .get(after_space)
2111 .is_some_and(|byte| byte.is_ascii_whitespace())
2112 {
2113 after_space += 1;
2114 }
2115 if bytes.get(after_space) == Some(&b'>') {
2116 return true;
2117 }
2118 }
2119 _ => {}
2120 }
2121 }
2122 cursor += 1;
2123 }
2124
2125 false
2126}
2127
2128fn html_block_tag_start(input: &str) -> bool {
2129 let bytes = input.as_bytes();
2130 if bytes.first() != Some(&b'<') {
2131 return false;
2132 }
2133
2134 let mut cursor = 1;
2135 if bytes.get(cursor) == Some(&b'/') {
2136 cursor += 1;
2137 }
2138
2139 let name_start = cursor;
2140 if !bytes
2141 .get(cursor)
2142 .is_some_and(|byte| byte.is_ascii_alphabetic())
2143 {
2144 return false;
2145 }
2146 cursor += 1;
2147 while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
2148 cursor += 1;
2149 }
2150
2151 let name = &input[name_start..cursor];
2152 if !html_block_tag(name) {
2153 return false;
2154 }
2155
2156 match bytes.get(cursor) {
2157 None | Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2158 Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => true,
2159 _ => false,
2160 }
2161}
2162
2163fn html_block_tag(tag: &str) -> bool {
2164 matches!(
2165 tag.to_ascii_lowercase().as_str(),
2166 "address"
2167 | "article"
2168 | "aside"
2169 | "base"
2170 | "basefont"
2171 | "blockquote"
2172 | "body"
2173 | "caption"
2174 | "center"
2175 | "col"
2176 | "colgroup"
2177 | "dd"
2178 | "details"
2179 | "dialog"
2180 | "dir"
2181 | "div"
2182 | "dl"
2183 | "dt"
2184 | "fieldset"
2185 | "figcaption"
2186 | "figure"
2187 | "footer"
2188 | "form"
2189 | "frame"
2190 | "frameset"
2191 | "h1"
2192 | "h2"
2193 | "h3"
2194 | "h4"
2195 | "h5"
2196 | "h6"
2197 | "head"
2198 | "header"
2199 | "hr"
2200 | "html"
2201 | "iframe"
2202 | "legend"
2203 | "li"
2204 | "link"
2205 | "main"
2206 | "menu"
2207 | "menuitem"
2208 | "nav"
2209 | "noframes"
2210 | "ol"
2211 | "optgroup"
2212 | "option"
2213 | "p"
2214 | "param"
2215 | "search"
2216 | "section"
2217 | "summary"
2218 | "table"
2219 | "tbody"
2220 | "td"
2221 | "tfoot"
2222 | "th"
2223 | "thead"
2224 | "title"
2225 | "tr"
2226 | "track"
2227 | "ul"
2228 )
2229}
2230
2231fn is_declaration_start(input: &str) -> bool {
2232 input
2233 .as_bytes()
2234 .get(2)
2235 .is_some_and(|byte| input.starts_with("<!") && byte.is_ascii_alphabetic())
2236}
2237
2238fn parse_mdx_flow(
2239 lines: &[Line<'_>],
2240 index: usize,
2241 options: &SyntaxOptions,
2242 diagnostics: &mut Vec<Diagnostic>,
2243) -> Option<(Block, usize)> {
2244 if options.constructs.mdx_esm {
2245 if let Some((block, next)) = parse_mdx_esm_flow(lines, index, diagnostics) {
2246 return Some((block, next));
2247 }
2248 }
2249
2250 let line = lines[index];
2251 let trimmed = line.text.trim_start();
2252 if options.constructs.mdx_expression_block && trimmed.starts_with('{') {
2253 let open_byte = line.text.len() - trimmed.len();
2254 if let Some((close_line, close_byte)) = find_mdx_expression_close(lines, index, open_byte) {
2255 return Some((
2256 Block::MdxExpression(MdxExpression {
2257 meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2258 value: collect_mdx_expression_value(
2259 lines, index, open_byte, close_line, close_byte,
2260 ),
2261 }),
2262 close_line + 1,
2263 ));
2264 }
2265 diagnostics.push(Diagnostic::new(
2266 DiagnosticSeverity::Error,
2267 DiagnosticCode::InvalidMdx,
2268 Span::new(line.start + open_byte, lines.last()?.end_with_eol),
2269 "MDX expression block is missing a closing brace",
2270 ));
2271 }
2272 if options.constructs.mdx_jsx_block && trimmed.starts_with('<') {
2273 if let Some(close_line) = find_mdx_jsx_close(lines, index) {
2274 return Some((
2275 Block::MdxJsx(MdxJsx {
2276 meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2277 value: collect_line_range(lines, index, close_line),
2278 }),
2279 close_line + 1,
2280 ));
2281 }
2282 let start_byte = line.text.len() - trimmed.len();
2283 if let Some(root) = mdx_jsx_tag_start(line.text, start_byte) {
2284 if !root.closing {
2285 if let Some((_tag_end_line, _tag_end_byte, self_closing)) =
2286 find_mdx_jsx_tag_end(lines, index, start_byte)
2287 {
2288 if !self_closing {
2289 diagnostics.push(Diagnostic::new(
2290 DiagnosticSeverity::Error,
2291 DiagnosticCode::InvalidMdx,
2292 Span::new(line.start + start_byte, lines.last()?.end_with_eol),
2293 "MDX JSX block is missing a closing tag",
2294 ));
2295 }
2296 }
2297 }
2298 }
2299 }
2300 None
2301}
2302
2303#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2304struct MdxEsmState {
2305 brace_depth: usize,
2306 bracket_depth: usize,
2307 paren_depth: usize,
2308 block_comment: bool,
2309 quote: Option<u8>,
2310 escaped: bool,
2311}
2312
2313#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2314enum MdxBraceState {
2315 Normal,
2316 SingleQuoted,
2317 DoubleQuoted,
2318 Template,
2319 LineComment,
2320 BlockComment,
2321}
2322
2323#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2324enum MdxJsxTag<'a> {
2325 Fragment,
2326 Named(&'a str),
2327}
2328
2329#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2330struct MdxJsxTagStart<'a> {
2331 tag: MdxJsxTag<'a>,
2332 closing: bool,
2333}
2334
2335fn parse_mdx_esm_flow(
2336 lines: &[Line<'_>],
2337 index: usize,
2338 diagnostics: &mut Vec<Diagnostic>,
2339) -> Option<(Block, usize)> {
2340 if !is_mdx_esm_start(lines[index].text) {
2341 return None;
2342 }
2343
2344 let mut value = String::new();
2345 let mut state = MdxEsmState::default();
2346 let mut cursor = index;
2347 while cursor < lines.len() {
2348 let line = lines[cursor].text;
2349 if cursor > index && !is_mdx_esm_continuation(line, &state) {
2350 break;
2351 }
2352 if cursor > index {
2353 value.push('\n');
2354 }
2355 value.push_str(line);
2356 update_mdx_esm_state(line, &mut state);
2357 cursor += 1;
2358 }
2359 if cursor >= lines.len() && state_has_open_mdx_esm_construct(&state) {
2360 diagnostics.push(Diagnostic::new(
2361 DiagnosticSeverity::Error,
2362 DiagnosticCode::InvalidMdx,
2363 Span::new(lines[index].start, lines[cursor - 1].end_with_eol),
2364 "MDX ESM block is missing a closing delimiter",
2365 ));
2366 }
2367
2368 Some((
2369 Block::MdxEsm(MdxEsm {
2370 meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[cursor - 1].end))),
2371 value,
2372 }),
2373 cursor,
2374 ))
2375}
2376
2377fn is_mdx_esm_start(line: &str) -> bool {
2378 line.starts_with("import ") || line.starts_with("export ")
2379}
2380
2381fn is_mdx_esm_continuation(line: &str, state: &MdxEsmState) -> bool {
2382 if state_has_open_mdx_esm_construct(state) {
2383 return true;
2384 }
2385 let trimmed = line.trim_start();
2386 if trimmed.is_empty() {
2387 return false;
2388 }
2389 is_mdx_esm_start(line) || trimmed.starts_with("//") || trimmed.starts_with("/*")
2390}
2391
2392fn state_has_open_mdx_esm_construct(state: &MdxEsmState) -> bool {
2393 state.brace_depth > 0
2394 || state.bracket_depth > 0
2395 || state.paren_depth > 0
2396 || state.block_comment
2397 || state.quote == Some(b'`')
2398}
2399
2400fn update_mdx_esm_state(line: &str, state: &mut MdxEsmState) {
2401 let bytes = line.as_bytes();
2402 let mut index = 0;
2403 while index < bytes.len() {
2404 let byte = bytes[index];
2405 if state.block_comment {
2406 if byte == b'*' && bytes.get(index + 1) == Some(&b'/') {
2407 state.block_comment = false;
2408 index += 1;
2409 }
2410 index += 1;
2411 continue;
2412 }
2413
2414 if let Some(delimiter) = state.quote {
2415 if state.escaped {
2416 state.escaped = false;
2417 } else if byte == b'\\' {
2418 state.escaped = true;
2419 } else if byte == delimiter {
2420 state.quote = None;
2421 }
2422 index += 1;
2423 continue;
2424 }
2425
2426 match byte {
2427 b'\'' | b'"' | b'`' => state.quote = Some(byte),
2428 b'/' if bytes.get(index + 1) == Some(&b'/') => break,
2429 b'/' if bytes.get(index + 1) == Some(&b'*') => {
2430 state.block_comment = true;
2431 index += 1;
2432 }
2433 b'{' => state.brace_depth += 1,
2434 b'}' => state.brace_depth = state.brace_depth.saturating_sub(1),
2435 b'[' => state.bracket_depth += 1,
2436 b']' => state.bracket_depth = state.bracket_depth.saturating_sub(1),
2437 b'(' => state.paren_depth += 1,
2438 b')' => state.paren_depth = state.paren_depth.saturating_sub(1),
2439 _ => {}
2440 }
2441 index += 1;
2442 }
2443}
2444
2445fn find_mdx_expression_close(
2446 lines: &[Line<'_>],
2447 index: usize,
2448 open_byte: usize,
2449) -> Option<(usize, usize)> {
2450 let mut depth = 0usize;
2451 let mut state = MdxBraceState::Normal;
2452 let mut escaped = false;
2453 let mut cursor = index;
2454
2455 while cursor < lines.len() {
2456 let bytes = lines[cursor].text.as_bytes();
2457 let mut byte_index = if cursor == index { open_byte } else { 0 };
2458 while byte_index < bytes.len() {
2459 let byte = bytes[byte_index];
2460 match state {
2461 MdxBraceState::Normal => match byte {
2462 b'\'' => state = MdxBraceState::SingleQuoted,
2463 b'"' => state = MdxBraceState::DoubleQuoted,
2464 b'`' => state = MdxBraceState::Template,
2465 b'/' if bytes.get(byte_index + 1) == Some(&b'/') => {
2466 state = MdxBraceState::LineComment;
2467 break;
2468 }
2469 b'/' if bytes.get(byte_index + 1) == Some(&b'*') => {
2470 state = MdxBraceState::BlockComment;
2471 byte_index += 1;
2472 }
2473 b'{' => depth += 1,
2474 b'}' => {
2475 depth = depth.checked_sub(1)?;
2476 if depth == 0 {
2477 return lines[cursor].text[byte_index + 1..]
2478 .trim()
2479 .is_empty()
2480 .then_some((cursor, byte_index));
2481 }
2482 }
2483 _ => {}
2484 },
2485 MdxBraceState::SingleQuoted => {
2486 update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2487 }
2488 MdxBraceState::DoubleQuoted => {
2489 update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2490 }
2491 MdxBraceState::Template => {
2492 update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2493 }
2494 MdxBraceState::LineComment => break,
2495 MdxBraceState::BlockComment => {
2496 if byte == b'*' && bytes.get(byte_index + 1) == Some(&b'/') {
2497 state = MdxBraceState::Normal;
2498 byte_index += 1;
2499 }
2500 }
2501 }
2502 byte_index += 1;
2503 }
2504 if state == MdxBraceState::LineComment {
2505 state = MdxBraceState::Normal;
2506 }
2507 cursor += 1;
2508 }
2509
2510 None
2511}
2512
2513fn update_mdx_quote_state(byte: u8, delimiter: u8, state: &mut MdxBraceState, escaped: &mut bool) {
2514 if *escaped {
2515 *escaped = false;
2516 return;
2517 }
2518 if byte == b'\\' {
2519 *escaped = true;
2520 return;
2521 }
2522 if byte == delimiter {
2523 *state = MdxBraceState::Normal;
2524 }
2525}
2526
2527fn find_mdx_expression_inline_close(input: &str, open_byte: usize) -> Option<usize> {
2528 let bytes = input.as_bytes();
2529 if bytes.get(open_byte) != Some(&b'{') {
2530 return None;
2531 }
2532
2533 let mut depth = 0usize;
2534 let mut state = MdxBraceState::Normal;
2535 let mut escaped = false;
2536 let mut cursor = open_byte;
2537 while cursor < bytes.len() {
2538 let byte = bytes[cursor];
2539 match state {
2540 MdxBraceState::Normal => match byte {
2541 b'\'' => state = MdxBraceState::SingleQuoted,
2542 b'"' => state = MdxBraceState::DoubleQuoted,
2543 b'`' => state = MdxBraceState::Template,
2544 b'/' if bytes.get(cursor + 1) == Some(&b'/') => {
2545 state = MdxBraceState::LineComment;
2546 cursor += 1;
2547 }
2548 b'/' if bytes.get(cursor + 1) == Some(&b'*') => {
2549 state = MdxBraceState::BlockComment;
2550 cursor += 1;
2551 }
2552 b'{' => depth += 1,
2553 b'}' => {
2554 depth = depth.checked_sub(1)?;
2555 if depth == 0 {
2556 return Some(cursor);
2557 }
2558 }
2559 _ => {}
2560 },
2561 MdxBraceState::SingleQuoted => {
2562 update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2563 }
2564 MdxBraceState::DoubleQuoted => {
2565 update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2566 }
2567 MdxBraceState::Template => {
2568 update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2569 }
2570 MdxBraceState::LineComment => {
2571 if byte == b'\n' {
2572 state = MdxBraceState::Normal;
2573 }
2574 }
2575 MdxBraceState::BlockComment => {
2576 if byte == b'*' && bytes.get(cursor + 1) == Some(&b'/') {
2577 state = MdxBraceState::Normal;
2578 cursor += 1;
2579 }
2580 }
2581 }
2582 cursor += 1;
2583 }
2584 None
2585}
2586
2587fn collect_mdx_expression_value(
2588 lines: &[Line<'_>],
2589 start_line: usize,
2590 open_byte: usize,
2591 close_line: usize,
2592 close_byte: usize,
2593) -> String {
2594 let mut value = String::new();
2595 let mut cursor = start_line;
2596 while cursor <= close_line {
2597 if cursor > start_line {
2598 value.push('\n');
2599 }
2600 let line = lines[cursor].text;
2601 let segment = if cursor == start_line && cursor == close_line {
2602 &line[open_byte + 1..close_byte]
2603 } else if cursor == start_line {
2604 &line[open_byte + 1..]
2605 } else if cursor == close_line {
2606 &line[..close_byte]
2607 } else {
2608 line
2609 };
2610 value.push_str(segment);
2611 cursor += 1;
2612 }
2613 value
2614}
2615
2616fn find_mdx_jsx_close<'a>(lines: &'a [Line<'a>], index: usize) -> Option<usize> {
2617 let line = lines[index];
2618 let trimmed = line.text.trim_start();
2619 let start_byte = line.text.len() - trimmed.len();
2620 let root = mdx_jsx_tag_start(line.text, start_byte)?;
2621 if root.closing {
2622 return None;
2623 }
2624
2625 let (mut cursor_line, mut cursor_byte, self_closing) =
2626 find_mdx_jsx_tag_end(lines, index, start_byte)?;
2627 if self_closing {
2628 return Some(cursor_line);
2629 }
2630
2631 let mut depth = 1usize;
2632 cursor_byte += 1;
2633 'scan: while cursor_line < lines.len() {
2634 let line = lines[cursor_line].text;
2635 while cursor_byte < line.len() {
2636 let Some(relative_start) = line[cursor_byte..].find('<') else {
2637 break;
2638 };
2639 let tag_start_byte = cursor_byte + relative_start;
2640 let Some(candidate) = mdx_jsx_tag_start(line, tag_start_byte) else {
2641 cursor_byte = tag_start_byte + 1;
2642 continue;
2643 };
2644 let Some((tag_end_line, tag_end_byte, candidate_self_closing)) =
2645 find_mdx_jsx_tag_end(lines, cursor_line, tag_start_byte)
2646 else {
2647 return None;
2648 };
2649
2650 if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2651 if candidate.closing {
2652 depth = depth.saturating_sub(1);
2653 if depth == 0 {
2654 return Some(tag_end_line);
2655 }
2656 } else if !candidate_self_closing {
2657 depth += 1;
2658 }
2659 }
2660
2661 cursor_byte = tag_end_byte + 1;
2662 if tag_end_line != cursor_line {
2663 cursor_line = tag_end_line;
2664 continue 'scan;
2665 }
2666 }
2667 cursor_line += 1;
2668 cursor_byte = 0;
2669 }
2670 None
2671}
2672
2673fn parse_mdx_jsx_inline(input: &str, index: usize) -> Option<(usize, String)> {
2674 let root = mdx_jsx_tag_start(input, index)?;
2675 if root.closing {
2676 return None;
2677 }
2678
2679 let (mut cursor, self_closing) = find_mdx_jsx_tag_end_in_text(input, index)?;
2680 if self_closing {
2681 let end = cursor + 1;
2682 return Some((end, input[index..end].into()));
2683 }
2684
2685 let mut depth = 1usize;
2686 cursor += 1;
2687 while cursor < input.len() {
2688 let Some(relative_start) = input[cursor..].find('<') else {
2689 return None;
2690 };
2691 let tag_start_byte = cursor + relative_start;
2692 let Some(candidate) = mdx_jsx_tag_start(input, tag_start_byte) else {
2693 cursor = tag_start_byte + 1;
2694 continue;
2695 };
2696 let Some((tag_end, candidate_self_closing)) =
2697 find_mdx_jsx_tag_end_in_text(input, tag_start_byte)
2698 else {
2699 return None;
2700 };
2701
2702 if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2703 if candidate.closing {
2704 depth = depth.saturating_sub(1);
2705 if depth == 0 {
2706 let end = tag_end + 1;
2707 return Some((end, input[index..end].into()));
2708 }
2709 } else if !candidate_self_closing {
2710 depth += 1;
2711 }
2712 }
2713 cursor = tag_end + 1;
2714 }
2715 None
2716}
2717
2718fn mdx_jsx_tag_start(input: &str, start: usize) -> Option<MdxJsxTagStart<'_>> {
2719 let bytes = input.as_bytes();
2720 if bytes.get(start) != Some(&b'<') {
2721 return None;
2722 }
2723
2724 match bytes.get(start + 1) {
2725 Some(b'>') => {
2726 return Some(MdxJsxTagStart {
2727 tag: MdxJsxTag::Fragment,
2728 closing: false,
2729 });
2730 }
2731 Some(b'/') if bytes.get(start + 2) == Some(&b'>') => {
2732 return Some(MdxJsxTagStart {
2733 tag: MdxJsxTag::Fragment,
2734 closing: true,
2735 });
2736 }
2737 Some(b'!' | b'?') | None => return None,
2738 _ => {}
2739 }
2740
2741 let closing = bytes.get(start + 1) == Some(&b'/');
2742 let name_start = start + if closing { 2 } else { 1 };
2743 if !bytes
2744 .get(name_start)
2745 .is_some_and(|byte| is_mdx_jsx_name_start_byte(*byte))
2746 {
2747 return None;
2748 }
2749
2750 let mut name_end = name_start + 1;
2751 while bytes
2752 .get(name_end)
2753 .is_some_and(|byte| is_mdx_jsx_name_byte(*byte))
2754 {
2755 name_end += 1;
2756 }
2757 if name_end == name_start {
2758 return None;
2759 }
2760 if bytes
2761 .get(name_end)
2762 .is_some_and(|byte| !is_mdx_jsx_name_delimiter(*byte))
2763 {
2764 return None;
2765 }
2766 Some(MdxJsxTagStart {
2767 tag: MdxJsxTag::Named(&input[name_start..name_end]),
2768 closing,
2769 })
2770}
2771
2772fn mdx_jsx_tag_matches(left: MdxJsxTag<'_>, right: MdxJsxTag<'_>) -> bool {
2773 match (left, right) {
2774 (MdxJsxTag::Fragment, MdxJsxTag::Fragment) => true,
2775 (MdxJsxTag::Named(left), MdxJsxTag::Named(right)) => left == right,
2776 _ => false,
2777 }
2778}
2779
2780fn find_mdx_jsx_tag_end(
2781 lines: &[Line<'_>],
2782 start_line: usize,
2783 start_byte: usize,
2784) -> Option<(usize, usize, bool)> {
2785 let mut line_index = start_line;
2786 let mut byte_index = start_byte + 1;
2787 let mut quote = None;
2788 let mut escaped = false;
2789 let mut expression_depth = 0usize;
2790 let mut expression_state = MdxBraceState::Normal;
2791 let mut expression_escaped = false;
2792
2793 while line_index < lines.len() {
2794 let bytes = lines[line_index].text.as_bytes();
2795 while byte_index < bytes.len() {
2796 let byte = bytes[byte_index];
2797 if expression_depth > 0 {
2798 if update_mdx_jsx_expression_state(
2799 byte,
2800 bytes.get(byte_index + 1).copied(),
2801 &mut expression_depth,
2802 &mut expression_state,
2803 &mut expression_escaped,
2804 ) {
2805 byte_index += 1;
2806 }
2807 byte_index += 1;
2808 continue;
2809 }
2810
2811 if let Some(delimiter) = quote {
2812 if escaped {
2813 escaped = false;
2814 } else if byte == b'\\' {
2815 escaped = true;
2816 } else if byte == delimiter {
2817 quote = None;
2818 }
2819 byte_index += 1;
2820 continue;
2821 }
2822
2823 match byte {
2824 b'\'' | b'"' => quote = Some(byte),
2825 b'{' => {
2826 expression_depth = 1;
2827 expression_state = MdxBraceState::Normal;
2828 expression_escaped = false;
2829 }
2830 b'>' if expression_depth == 0 => {
2831 let self_closing =
2832 previous_nonspace_before(lines, line_index, byte_index) == Some(b'/');
2833 return Some((line_index, byte_index, self_closing));
2834 }
2835 _ => {}
2836 }
2837 byte_index += 1;
2838 }
2839 if expression_state == MdxBraceState::LineComment {
2840 expression_state = MdxBraceState::Normal;
2841 }
2842 line_index += 1;
2843 byte_index = 0;
2844 }
2845 None
2846}
2847
2848fn previous_nonspace_before(
2849 lines: &[Line<'_>],
2850 line_index: usize,
2851 byte_index: usize,
2852) -> Option<u8> {
2853 let mut cursor_line = line_index;
2854 let mut cursor_byte = byte_index;
2855
2856 loop {
2857 if let Some(byte) = lines[cursor_line].text.as_bytes()[..cursor_byte]
2858 .iter()
2859 .rev()
2860 .copied()
2861 .find(|byte| !byte.is_ascii_whitespace())
2862 {
2863 return Some(byte);
2864 }
2865 if cursor_line == 0 {
2866 return None;
2867 }
2868 cursor_line -= 1;
2869 cursor_byte = lines[cursor_line].text.len();
2870 }
2871}
2872
2873fn find_mdx_jsx_tag_end_in_text(input: &str, start_byte: usize) -> Option<(usize, bool)> {
2874 let bytes = input.as_bytes();
2875 let mut byte_index = start_byte + 1;
2876 let mut quote = None;
2877 let mut escaped = false;
2878 let mut expression_depth = 0usize;
2879 let mut expression_state = MdxBraceState::Normal;
2880 let mut expression_escaped = false;
2881
2882 while byte_index < bytes.len() {
2883 let byte = bytes[byte_index];
2884 if expression_depth > 0 {
2885 if update_mdx_jsx_expression_state(
2886 byte,
2887 bytes.get(byte_index + 1).copied(),
2888 &mut expression_depth,
2889 &mut expression_state,
2890 &mut expression_escaped,
2891 ) {
2892 byte_index += 1;
2893 }
2894 byte_index += 1;
2895 continue;
2896 }
2897
2898 if let Some(delimiter) = quote {
2899 if escaped {
2900 escaped = false;
2901 } else if byte == b'\\' {
2902 escaped = true;
2903 } else if byte == delimiter {
2904 quote = None;
2905 }
2906 byte_index += 1;
2907 continue;
2908 }
2909
2910 match byte {
2911 b'\'' | b'"' => quote = Some(byte),
2912 b'{' => {
2913 expression_depth = 1;
2914 expression_state = MdxBraceState::Normal;
2915 expression_escaped = false;
2916 }
2917 b'>' if expression_depth == 0 => {
2918 let self_closing = previous_nonspace_before_text(input, byte_index) == Some(b'/');
2919 return Some((byte_index, self_closing));
2920 }
2921 _ => {}
2922 }
2923 byte_index += 1;
2924 }
2925 None
2926}
2927
2928fn previous_nonspace_before_text(input: &str, byte_index: usize) -> Option<u8> {
2929 input.as_bytes()[..byte_index]
2930 .iter()
2931 .rev()
2932 .copied()
2933 .find(|byte| !byte.is_ascii_whitespace())
2934}
2935
2936fn update_mdx_jsx_expression_state(
2937 byte: u8,
2938 next: Option<u8>,
2939 depth: &mut usize,
2940 state: &mut MdxBraceState,
2941 escaped: &mut bool,
2942) -> bool {
2943 match *state {
2944 MdxBraceState::Normal => match byte {
2945 b'\'' => *state = MdxBraceState::SingleQuoted,
2946 b'"' => *state = MdxBraceState::DoubleQuoted,
2947 b'`' => *state = MdxBraceState::Template,
2948 b'/' if next == Some(b'/') => {
2949 *state = MdxBraceState::LineComment;
2950 return true;
2951 }
2952 b'/' if next == Some(b'*') => {
2953 *state = MdxBraceState::BlockComment;
2954 return true;
2955 }
2956 b'{' => *depth += 1,
2957 b'}' => {
2958 *depth = (*depth).saturating_sub(1);
2959 if *depth == 0 {
2960 *state = MdxBraceState::Normal;
2961 *escaped = false;
2962 }
2963 }
2964 _ => {}
2965 },
2966 MdxBraceState::SingleQuoted => {
2967 update_mdx_quote_state(byte, b'\'', state, escaped);
2968 }
2969 MdxBraceState::DoubleQuoted => {
2970 update_mdx_quote_state(byte, b'"', state, escaped);
2971 }
2972 MdxBraceState::Template => {
2973 update_mdx_quote_state(byte, b'`', state, escaped);
2974 }
2975 MdxBraceState::LineComment => {
2976 if byte == b'\n' {
2977 *state = MdxBraceState::Normal;
2978 }
2979 }
2980 MdxBraceState::BlockComment => {
2981 if byte == b'*' && next == Some(b'/') {
2982 *state = MdxBraceState::Normal;
2983 return true;
2984 }
2985 }
2986 }
2987 false
2988}
2989
2990fn is_mdx_jsx_name_start_byte(byte: u8) -> bool {
2991 byte.is_ascii_alphabetic() || matches!(byte, b'_' | b'$')
2992}
2993
2994fn is_mdx_jsx_name_byte(byte: u8) -> bool {
2995 byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b':' | b'_' | b'-' | b'$')
2996}
2997
2998fn is_mdx_jsx_name_delimiter(byte: u8) -> bool {
2999 byte.is_ascii_whitespace() || matches!(byte, b'/' | b'>' | b'{' | b'}')
3000}
3001
3002fn collect_line_range(lines: &[Line<'_>], start: usize, end: usize) -> String {
3003 let mut value = String::new();
3004 let mut cursor = start;
3005 while cursor <= end {
3006 if cursor > start {
3007 value.push('\n');
3008 }
3009 value.push_str(lines[cursor].text);
3010 cursor += 1;
3011 }
3012 value
3013}
3014
3015fn parse_indented_code(
3016 lines: &[Line<'_>],
3017 index: usize,
3018 options: &SyntaxOptions,
3019) -> Option<(Block, usize)> {
3020 if !options.constructs.indented_code || strip_indented_code_prefix(lines[index].text).is_none()
3021 {
3022 return None;
3023 }
3024 let mut value = String::new();
3025 let mut cursor = index;
3026 let mut content_end = index;
3029 let mut content_end_len = 0usize;
3030 while cursor < lines.len() {
3031 if let Some(text) = strip_indented_code_prefix(lines[cursor].text) {
3032 ensure_line_separator(&mut value);
3033 value.push_str(text);
3034 value.push_str(lines[cursor].eol);
3035 if !text.trim().is_empty() {
3036 content_end = cursor;
3037 content_end_len = value.len();
3038 }
3039 cursor += 1;
3040 continue;
3041 }
3042
3043 if !lines[cursor].text.trim().is_empty() {
3044 break;
3045 }
3046 ensure_line_separator(&mut value);
3047 value.push_str(lines[cursor].eol);
3048 cursor += 1;
3049 }
3050 value.truncate(content_end_len);
3052 Some((
3053 Block::CodeBlock(CodeBlock {
3054 meta: NodeMeta::new(Some(Span::new(
3055 lines[index].start,
3056 lines[content_end].end_with_eol,
3057 ))),
3058 kind: CodeBlockKind::Indented,
3059 info: None,
3060 value,
3061 }),
3062 cursor,
3063 ))
3064}
3065
3066fn strip_indented_code_prefix(input: &str) -> Option<&str> {
3067 let mut column = 0usize;
3068 for (index, byte) in input.as_bytes().iter().enumerate() {
3069 match *byte {
3070 b' ' => {
3071 column += 1;
3072 if column == 4 {
3073 return Some(&input[index + 1..]);
3074 }
3075 }
3076 b'\t' => {
3077 column += 4 - (column % 4);
3078 if column >= 4 {
3079 return Some(&input[index + 1..]);
3080 }
3081 }
3082 _ => return None,
3083 }
3084 }
3085 None
3086}
3087
3088fn parse_table(
3089 lines: &[Line<'_>],
3090 index: usize,
3091 options: &SyntaxOptions,
3092 definitions: &[String],
3093 diagnostics: &mut Vec<Diagnostic>,
3094) -> Option<(Block, usize)> {
3095 if !options.constructs.gfm_table || index + 1 >= lines.len() {
3096 return None;
3097 }
3098 let delimiter = table_indent_line(lines[index + 1].text, options.constructs.indented_code)?;
3099 if list_marker_info(delimiter).is_some() {
3100 return None;
3101 }
3102 if !table_has_separator(lines[index].text, delimiter, options.constructs.spoiler) {
3103 return None;
3104 }
3105 let alignments = parse_table_delimiter(delimiter, options.constructs.spoiler)?;
3106 let headers = split_table_row(lines[index].text, options.constructs.spoiler);
3107 if headers.len() != alignments.len() {
3108 return None;
3109 }
3110
3111 let mut rows = Vec::new();
3112 rows.push(TableRow {
3113 meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[index].end))),
3114 cells: headers
3115 .iter()
3116 .map(|cell| TableCell {
3117 meta: NodeMeta::default(),
3118 children: parse_inlines(
3119 cell.trim(),
3120 lines[index].start,
3121 options,
3122 definitions,
3123 diagnostics,
3124 ),
3125 })
3126 .collect(),
3127 });
3128
3129 let mut cursor = index + 2;
3130 while cursor < lines.len() {
3131 let Some(row) = table_indent_line(lines[cursor].text, options.constructs.indented_code)
3132 else {
3133 break;
3134 };
3135 if row.trim().is_empty() || table_body_line_ends_table(lines[cursor].text, options) {
3139 break;
3140 }
3141 let cells = split_table_row(row, options.constructs.spoiler);
3142 rows.push(TableRow {
3143 meta: NodeMeta::new(Some(Span::new(lines[cursor].start, lines[cursor].end))),
3144 cells: alignments
3145 .iter()
3146 .enumerate()
3147 .map(|(cell_index, _)| {
3148 let value = cells.get(cell_index).map(String::as_str).unwrap_or("");
3149 TableCell {
3150 meta: NodeMeta::default(),
3151 children: parse_inlines(
3152 value.trim(),
3153 lines[cursor].start,
3154 options,
3155 definitions,
3156 diagnostics,
3157 ),
3158 }
3159 })
3160 .collect(),
3161 });
3162 cursor += 1;
3163 }
3164
3165 Some((
3166 Block::Table(Table {
3167 meta: NodeMeta::new(Some(Span::new(
3168 lines[index].start,
3169 lines[cursor - 1].end_with_eol,
3170 ))),
3171 alignments,
3172 rows,
3173 }),
3174 cursor,
3175 ))
3176}
3177
3178fn parse_setext_heading(
3179 lines: &[Line<'_>],
3180 index: usize,
3181 options: &SyntaxOptions,
3182 definitions: &[String],
3183) -> Option<(Block, usize)> {
3184 if index + 1 >= lines.len() || lines[index].text.trim().is_empty() {
3185 return None;
3186 }
3187
3188 let mut underline_index = index + 1;
3193 loop {
3194 let underline_depth = if lines[underline_index].lazy {
3200 None
3201 } else {
3202 setext_underline_depth(lines[underline_index].text)
3203 };
3204 if let Some(depth) = underline_depth {
3205 let mut value = String::new();
3206 for line in &lines[index..underline_index] {
3207 push_line(&mut value, trim_ascii_start(line.text));
3210 }
3211 return Some((
3212 Block::Heading(Heading {
3213 meta: NodeMeta::new(Some(Span::new(
3214 lines[index].start,
3215 lines[underline_index].end,
3216 ))),
3217 depth,
3218 kind: HeadingKind::Setext,
3219 children: parse_inlines(
3220 &value,
3221 lines[index].start,
3222 options,
3223 definitions,
3224 &mut Vec::new(),
3225 ),
3226 }),
3227 underline_index + 1,
3228 ));
3229 }
3230
3231 let line = lines[underline_index].text;
3234 if line.trim().is_empty()
3235 || table_can_start(lines, underline_index, options)
3236 || likely_block_start(line, options)
3237 {
3238 return None;
3239 }
3240 underline_index += 1;
3241 if underline_index >= lines.len() {
3242 return None;
3243 }
3244 }
3245}
3246
3247fn setext_underline_depth(input: &str) -> Option<u8> {
3248 let underline = trim_up_to_three_spaces(input)?.trim();
3249 match underline {
3250 text if !text.is_empty() && text.chars().all(|char| char == '=') => Some(1),
3251 text if !text.is_empty() && text.chars().all(|char| char == '-') => Some(2),
3252 _ => None,
3253 }
3254}
3255
3256fn parse_paragraph(
3257 lines: &[Line<'_>],
3258 index: usize,
3259 options: &SyntaxOptions,
3260 definitions: &[String],
3261 diagnostics: &mut Vec<Diagnostic>,
3262) -> (Block, usize) {
3263 let mut value = String::new();
3264 let start = lines[index].start;
3265 let mut cursor = index;
3266 while cursor < lines.len() {
3267 if lines[cursor].text.trim().is_empty() {
3268 break;
3269 }
3270 if cursor > index && !lines[cursor].lazy {
3274 if table_can_start(lines, cursor, options) {
3275 break;
3276 }
3277 if likely_block_start(lines[cursor].text, options) {
3278 break;
3279 }
3280 }
3281 if !value.is_empty() {
3282 value.push('\n');
3283 }
3284 value.push_str(trim_ascii_start(lines[cursor].text));
3285 cursor += 1;
3286 }
3287
3288 let end = lines[cursor - 1].end;
3289 (
3290 Block::Paragraph(Paragraph {
3291 meta: NodeMeta::new(Some(Span::new(start, end))),
3292 children: parse_inlines(&value, start, options, definitions, diagnostics),
3293 }),
3294 cursor,
3295 )
3296}
3297
3298#[derive(Clone, Copy)]
3301struct DelimMarker {
3302 node_index: usize,
3306 marker: u8,
3307 length: usize,
3309 can_open: bool,
3310 can_close: bool,
3311 span_start: usize,
3313 inactive: bool,
3315}
3316
3317fn record_emphasis_delimiter(
3331 nodes: &mut Vec<Inline>,
3332 delimiters: &mut Vec<DelimMarker>,
3333 input: &str,
3334 index: usize,
3335 base_offset: usize,
3336 marker: u8,
3337 strikethrough: bool,
3338) {
3339 let length = delimiter_byte_run_len(input, index, marker);
3340 let (mut can_open, mut can_close) = if marker == b'_' {
3341 (
3342 can_open_underscore(input, index, length),
3343 can_close_underscore(input, index, length),
3344 )
3345 } else {
3346 (
3347 can_open_delimited(input, index, length),
3348 can_close_delimited(input, index, length),
3349 )
3350 };
3351
3352 if strikethrough && marker != b'~' {
3356 let before = input[..index].chars().next_back();
3357 let after = input[index + length..].chars().next();
3358 if after == Some('~') {
3359 can_open = true;
3360 }
3361 if before == Some('~') {
3362 can_close = true;
3363 }
3364 }
3365
3366 let value = String::from(marker as char).repeat(length);
3367
3368 let node_index = nodes.len();
3369 nodes.push(Inline::Text(Text {
3370 meta: NodeMeta::new(Some(Span::new(
3371 base_offset + index,
3372 base_offset + index + length,
3373 ))),
3374 value,
3375 }));
3376
3377 delimiters.push(DelimMarker {
3378 node_index,
3379 marker,
3380 length,
3381 can_open,
3382 can_close,
3383 span_start: base_offset + index,
3384 inactive: false,
3385 });
3386}
3387
3388fn process_emphasis(mut nodes: Vec<Inline>, mut delimiters: Vec<DelimMarker>) -> Vec<Inline> {
3391 if delimiters.is_empty() {
3392 return nodes;
3393 }
3394
3395 let mut openers_bottom: [Option<usize>; 18] = [None; 18];
3400 let mut closer_idx = 0;
3401
3402 while closer_idx < delimiters.len() {
3403 let closer = delimiters[closer_idx];
3404 if closer.inactive || !closer.can_close {
3405 closer_idx += 1;
3406 continue;
3407 }
3408
3409 let key = openers_bottom_key(&closer);
3410 let bottom = openers_bottom[key];
3411
3412 let mut opener_idx = None;
3414 let mut search = closer_idx;
3415 while search > 0 {
3416 search -= 1;
3417 if let Some(bottom) = bottom {
3418 if search < bottom {
3419 break;
3420 }
3421 }
3422 let candidate = delimiters[search];
3423 if candidate.inactive || candidate.marker != closer.marker || !candidate.can_open {
3424 continue;
3425 }
3426 if emphasis_delimiters_match(&candidate, &closer) {
3427 opener_idx = Some(search);
3428 break;
3429 }
3430 }
3431
3432 let Some(opener_idx) = opener_idx else {
3433 openers_bottom[key] = Some(closer_idx);
3437 if !closer.can_open {
3438 delimiters[closer_idx].inactive = true;
3439 }
3440 closer_idx += 1;
3441 continue;
3442 };
3443
3444 let (used, wrap) = if closer.marker == b'~' {
3445 let length = delimiters[closer_idx].length;
3448 let marker = if length >= 2 {
3449 DeleteMarker::DoubleTilde
3450 } else {
3451 DeleteMarker::SingleTilde
3452 };
3453 (length, EmphasisWrap::Delete(marker))
3454 } else {
3455 let strong = delimiters[opener_idx].length >= 2 && delimiters[closer_idx].length >= 2;
3456 let used = if strong { 2 } else { 1 };
3457 let wrap = if strong {
3458 EmphasisWrap::Strong
3459 } else {
3460 EmphasisWrap::Emphasis
3461 };
3462 (used, wrap)
3463 };
3464
3465 apply_emphasis(
3466 &mut nodes,
3467 &mut delimiters,
3468 opener_idx,
3469 closer_idx,
3470 used,
3471 wrap,
3472 );
3473
3474 let mut inner = opener_idx + 1;
3477 while inner < closer_idx {
3478 delimiters[inner].inactive = true;
3479 inner += 1;
3480 }
3481
3482 if delimiters[opener_idx].length == 0 {
3483 delimiters[opener_idx].inactive = true;
3484 }
3485 if delimiters[closer_idx].length == 0 {
3486 delimiters[closer_idx].inactive = true;
3487 closer_idx += 1;
3488 }
3489 }
3492
3493 merge_adjacent_text(&mut nodes);
3497 nodes
3498}
3499
3500fn merge_adjacent_text(nodes: &mut Vec<Inline>) {
3504 let mut write = 0;
3505 for read in 0..nodes.len() {
3506 if read != write {
3507 nodes.swap(read, write);
3508 }
3509 if write > 0 {
3510 let (head, tail) = nodes.split_at_mut(write);
3511 if let (Inline::Text(previous), Inline::Text(current)) =
3512 (&mut head[write - 1], &tail[0])
3513 {
3514 previous.value.push_str(¤t.value);
3515 if let (Some(previous_span), Some(current_span)) =
3516 (previous.meta.span.as_mut(), current.meta.span)
3517 {
3518 previous_span.end = current_span.end;
3519 }
3520 continue;
3521 }
3522 }
3523 write += 1;
3524 }
3525 nodes.truncate(write);
3526
3527 for node in nodes.iter_mut() {
3528 match node {
3529 Inline::Emphasis(emphasis) => merge_adjacent_text(&mut emphasis.children),
3530 Inline::Strong(strong) => merge_adjacent_text(&mut strong.children),
3531 Inline::Delete(delete) => merge_adjacent_text(&mut delete.children),
3532 _ => {}
3533 }
3534 }
3535}
3536
3537fn openers_bottom_key(closer: &DelimMarker) -> usize {
3539 let marker = match closer.marker {
3540 b'_' => 1,
3541 b'~' => 2,
3542 _ => 0,
3543 };
3544 let both = usize::from(closer.can_open && closer.can_close);
3545 let modulo = closer.length % 3;
3546 ((marker * 2) + both) * 3 + modulo
3547}
3548
3549fn emphasis_delimiters_match(opener: &DelimMarker, closer: &DelimMarker) -> bool {
3551 if opener.marker == b'~' {
3554 return opener.length == closer.length;
3555 }
3556
3557 let opener_both = opener.can_open && opener.can_close;
3561 let closer_both = closer.can_open && closer.can_close;
3562 if opener_both || closer_both {
3563 let sum = opener.length + closer.length;
3564 if sum % 3 == 0 && !(opener.length % 3 == 0 && closer.length % 3 == 0) {
3565 return false;
3566 }
3567 }
3568 true
3569}
3570
3571#[derive(Clone, Copy)]
3573enum EmphasisWrap {
3574 Emphasis,
3575 Strong,
3576 Delete(DeleteMarker),
3577}
3578
3579fn apply_emphasis(
3583 nodes: &mut Vec<Inline>,
3584 delimiters: &mut [DelimMarker],
3585 opener_idx: usize,
3586 closer_idx: usize,
3587 used: usize,
3588 wrap: EmphasisWrap,
3589) {
3590 let opener_node = delimiters[opener_idx].node_index;
3591 let closer_node = delimiters[closer_idx].node_index;
3592
3593 trim_delimiter_text_tail(&mut nodes[opener_node], used);
3596 delimiters[opener_idx].length -= used;
3597 delimiters[opener_idx].span_start += used;
3598
3599 trim_delimiter_text_head(&mut nodes[closer_node], used);
3600 delimiters[closer_idx].length -= used;
3601
3602 let span_start = delimiters[opener_idx].span_start - used;
3605 let span_end = delimiters[closer_idx].span_start + delimiters[closer_idx].length + used;
3606
3607 let children_start = opener_node + 1;
3610 let children_end = closer_node; let children: Vec<Inline> = nodes.drain(children_start..children_end).collect();
3612 let removed = children.len();
3613
3614 let meta = NodeMeta::new(Some(Span::new(span_start, span_end)));
3615 let wrapped = match wrap {
3616 EmphasisWrap::Strong => Inline::Strong(Strong { meta, children }),
3617 EmphasisWrap::Emphasis => Inline::Emphasis(Emphasis { meta, children }),
3618 EmphasisWrap::Delete(marker) => Inline::Delete(Delete {
3619 meta,
3620 marker,
3621 children,
3622 }),
3623 };
3624 nodes.insert(children_start, wrapped);
3625
3626 reindex_delimiters(delimiters, children_end, 1 - removed as isize);
3630
3631 if delimiters[closer_idx].length == 0 {
3635 let pos = delimiters[closer_idx].node_index;
3636 nodes.remove(pos);
3637 reindex_delimiters(delimiters, pos, -1);
3638 }
3639 if delimiters[opener_idx].length == 0 {
3640 let pos = delimiters[opener_idx].node_index;
3641 nodes.remove(pos);
3642 reindex_delimiters(delimiters, pos, -1);
3643 }
3644}
3645
3646fn reindex_delimiters(delimiters: &mut [DelimMarker], from: usize, delta: isize) {
3648 if delta == 0 {
3649 return;
3650 }
3651 for delimiter in delimiters.iter_mut() {
3652 if delimiter.node_index >= from {
3653 delimiter.node_index = (delimiter.node_index as isize + delta) as usize;
3654 }
3655 }
3656}
3657
3658fn trim_delimiter_text_tail(node: &mut Inline, count: usize) {
3660 if let Inline::Text(text) = node {
3661 let new_len = text.value.len().saturating_sub(count);
3662 text.value.truncate(new_len);
3663 if let Some(span) = text.meta.span.as_mut() {
3664 span.end = span.end.saturating_sub(count);
3665 }
3666 }
3667}
3668
3669fn trim_delimiter_text_head(node: &mut Inline, count: usize) {
3671 if let Inline::Text(text) = node {
3672 let count = count.min(text.value.len());
3673 text.value.drain(..count);
3674 if let Some(span) = text.meta.span.as_mut() {
3675 span.start += count;
3676 }
3677 }
3678}
3679
3680fn parse_inlines(
3681 input: &str,
3682 base_offset: usize,
3683 options: &SyntaxOptions,
3684 definitions: &[String],
3685 diagnostics: &mut Vec<Diagnostic>,
3686) -> Vec<Inline> {
3687 parse_inlines_with_context(
3688 input,
3689 base_offset,
3690 options,
3691 definitions,
3692 diagnostics,
3693 InlineContext::default(),
3694 )
3695}
3696
3697#[derive(Clone, Copy)]
3698struct InlineContext {
3699 allow_links: bool,
3700}
3701
3702impl Default for InlineContext {
3703 fn default() -> Self {
3704 Self { allow_links: true }
3705 }
3706}
3707
3708fn parse_inlines_with_context(
3709 input: &str,
3710 base_offset: usize,
3711 options: &SyntaxOptions,
3712 definitions: &[String],
3713 diagnostics: &mut Vec<Diagnostic>,
3714 context: InlineContext,
3715) -> Vec<Inline> {
3716 let bytes = input.as_bytes();
3717 let mut nodes = Vec::new();
3718 let mut text_start = 0;
3719 let mut text = String::new();
3720 let mut index = 0;
3721 let mut delimiters: Vec<DelimMarker> = Vec::new();
3726
3727 while index < bytes.len() {
3728 if bytes[index] == b'\\' {
3729 if let Some((next_index, char)) = next_char(input, index + 1) {
3730 if char.is_ascii_punctuation() {
3731 if options.parse.preserve_character_escapes {
3732 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3733 nodes.push(Inline::Escape(Escape {
3734 meta: NodeMeta::new(Some(Span::new(
3735 base_offset + index,
3736 base_offset + next_index,
3737 ))),
3738 value: char,
3739 }));
3740 index = next_index;
3741 text_start = index;
3742 continue;
3743 }
3744 if text.is_empty() {
3745 text_start = base_offset + index;
3746 }
3747 if gfm_link_label_preserves_url_dot_escape(&text, char, options, context) {
3748 text.push('\\');
3749 }
3750 text.push(char);
3751 index = next_index;
3752 continue;
3753 }
3754 }
3755 }
3756
3757 if bytes[index] == b'&' {
3758 if let Some((end, value)) = parse_character_reference(input, index) {
3759 if options.parse.preserve_character_references {
3760 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3761 nodes.push(Inline::CharacterReference(CharacterReference {
3762 meta: NodeMeta::new(Some(Span::new(
3763 base_offset + index,
3764 base_offset + end,
3765 ))),
3766 reference: input[index..end].into(),
3767 value,
3768 }));
3769 index = end;
3770 text_start = index;
3771 continue;
3772 }
3773 if text.is_empty() {
3774 text_start = base_offset + index;
3775 }
3776 text.push_str(&value);
3777 index = end;
3778 continue;
3779 }
3780 }
3781
3782 if bytes[index] == b'\n' {
3783 if text.ends_with('\\') {
3784 text.pop();
3785 flush_text(
3786 &mut nodes,
3787 &mut text,
3788 text_start,
3789 base_offset + index.saturating_sub(1),
3790 );
3791 nodes.push(Inline::LineBreak(LineBreak {
3792 meta: NodeMeta::new(Some(Span::new(
3793 base_offset + index.saturating_sub(1),
3794 base_offset + index + 1,
3795 ))),
3796 kind: LineBreakKind::Backslash,
3797 }));
3798 index += 1;
3799 text_start = index;
3800 continue;
3801 }
3802 let trailing_spaces = trailing_space_count(&text);
3803 if is_hard_break_suffix(&text, trailing_spaces) {
3804 text.truncate(text.len() - trailing_spaces);
3805 flush_text(
3806 &mut nodes,
3807 &mut text,
3808 text_start,
3809 base_offset + index.saturating_sub(trailing_spaces),
3810 );
3811 nodes.push(Inline::LineBreak(LineBreak {
3812 meta: NodeMeta::new(Some(Span::new(
3813 base_offset + index.saturating_sub(trailing_spaces),
3814 base_offset + index + 1,
3815 ))),
3816 kind: LineBreakKind::Spaces,
3817 }));
3818 index += 1;
3819 text_start = index;
3820 continue;
3821 }
3822 if trailing_spaces > 0 {
3823 text.truncate(text.len() - trailing_spaces);
3824 }
3825 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3826 nodes.push(Inline::SoftBreak(SoftBreak {
3827 meta: NodeMeta::new(Some(Span::new(
3828 base_offset + index,
3829 base_offset + index + 1,
3830 ))),
3831 }));
3832 index += 1;
3833 text_start = index;
3834 continue;
3835 }
3836
3837 if bytes[index] == b'`' {
3838 if let Some((end, code_span)) = parse_code_span(input, index) {
3839 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3840 nodes.push(Inline::Code(CodeInline {
3841 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
3842 value: code_span.value,
3843 raw: code_span.raw,
3844 fence_length: code_span.fence_length,
3845 }));
3846 index = end;
3847 text_start = index;
3848 continue;
3849 } else {
3850 let run = bytes[index..]
3856 .iter()
3857 .take_while(|byte| **byte == b'`')
3858 .count();
3859 if text.is_empty() {
3860 text_start = base_offset + index;
3861 }
3862 for _ in 0..run {
3863 text.push('`');
3864 }
3865 index += run;
3866 continue;
3867 }
3868 }
3869
3870 if options.constructs.spoiler
3871 && bytes.get(index) == Some(&b'|')
3872 && bytes.get(index + 1) == Some(&b'|')
3873 && bytes.get(index + 2) != Some(&b'|')
3874 {
3875 if let Some(end) = find_spoiler_close(input, index + 2) {
3876 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3877 let inner = &input[index + 2..end];
3878 nodes.push(Inline::Spoiler(Spoiler {
3879 meta: NodeMeta::new(Some(Span::new(
3880 base_offset + index,
3881 base_offset + end + 2,
3882 ))),
3883 children: parse_inlines_with_context(
3884 inner,
3885 base_offset + index + 2,
3886 options,
3887 definitions,
3888 diagnostics,
3889 context,
3890 ),
3891 }));
3892 index = end + 2;
3893 text_start = index;
3894 continue;
3895 }
3896 }
3897
3898 if bytes[index] == b'*' && delimiter_byte_run_start(input, index, b'*') == index {
3899 let run_len = delimiter_byte_run_len(input, index, b'*');
3900 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3901 record_emphasis_delimiter(
3902 &mut nodes,
3903 &mut delimiters,
3904 input,
3905 index,
3906 base_offset,
3907 b'*',
3908 options.constructs.gfm_strikethrough,
3909 );
3910 index += run_len;
3911 text_start = index;
3912 continue;
3913 }
3914
3915 if options.constructs.underline
3916 && bytes.get(index) == Some(&b'_')
3917 && bytes.get(index + 1) == Some(&b'_')
3918 && bytes.get(index + 2) == Some(&b'_')
3919 && can_open_underscore(input, index, 1)
3920 {
3921 if let Some(end) = find_closing_delimiter(input, index + 3, "___", true) {
3922 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3923 let inner = &input[index + 3..end];
3924 let underline = Inline::Underline(Underline {
3925 meta: NodeMeta::new(Some(Span::new(
3926 base_offset + index + 1,
3927 base_offset + end + 2,
3928 ))),
3929 children: parse_inlines_with_context(
3930 inner,
3931 base_offset + index + 3,
3932 options,
3933 definitions,
3934 diagnostics,
3935 context,
3936 ),
3937 });
3938 nodes.push(Inline::Emphasis(Emphasis {
3939 meta: NodeMeta::new(Some(Span::new(
3940 base_offset + index,
3941 base_offset + end + 3,
3942 ))),
3943 children: vec![underline],
3944 }));
3945 index = end + 3;
3946 text_start = index;
3947 continue;
3948 }
3949 }
3950
3951 if options.constructs.underline
3952 && bytes.get(index) == Some(&b'_')
3953 && bytes.get(index + 1) == Some(&b'_')
3954 && can_open_underscore(input, index, 2)
3955 {
3956 if let Some(end) = find_closing_delimiter(input, index + 2, "__", true) {
3957 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3958 let inner = &input[index + 2..end];
3959 nodes.push(Inline::Underline(Underline {
3960 meta: NodeMeta::new(Some(Span::new(
3961 base_offset + index,
3962 base_offset + end + 2,
3963 ))),
3964 children: parse_inlines_with_context(
3965 inner,
3966 base_offset + index + 2,
3967 options,
3968 definitions,
3969 diagnostics,
3970 context,
3971 ),
3972 }));
3973 index = end + 2;
3974 text_start = index;
3975 continue;
3976 }
3977 }
3978
3979 if bytes[index] == b'_' && delimiter_byte_run_start(input, index, b'_') == index {
3985 if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
3990 && context.allow_links
3991 {
3992 if let Some((end, destination)) = parse_literal_autolink(
3993 input,
3994 index,
3995 options.constructs.gfm_autolink_literal,
3996 options.constructs.relaxed_autolinks,
3997 ) {
3998 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3999 nodes.push(Inline::Autolink(Autolink {
4000 meta: NodeMeta::new(Some(Span::new(
4001 base_offset + index,
4002 base_offset + end,
4003 ))),
4004 destination,
4005 kind: AutolinkKind::GfmLiteral {
4006 original: input[index..end].into(),
4007 },
4008 }));
4009 index = end;
4010 text_start = index;
4011 continue;
4012 }
4013 }
4014 let run_len = delimiter_byte_run_len(input, index, b'_');
4015 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4016 record_emphasis_delimiter(
4017 &mut nodes,
4018 &mut delimiters,
4019 input,
4020 index,
4021 base_offset,
4022 b'_',
4023 options.constructs.gfm_strikethrough,
4024 );
4025 index += run_len;
4026 text_start = index;
4027 continue;
4028 }
4029
4030 if options.constructs.insert
4031 && bytes.get(index) == Some(&b'+')
4032 && bytes.get(index + 1) == Some(&b'+')
4033 && bytes.get(index + 2) != Some(&b'+')
4034 && can_open_delimited(input, index, 2)
4035 {
4036 if let Some(end) = find_closing_delimiter(input, index + 2, "++", false) {
4037 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4038 let inner = &input[index + 2..end];
4039 nodes.push(Inline::Insert(Insert {
4040 meta: NodeMeta::new(Some(Span::new(
4041 base_offset + index,
4042 base_offset + end + 2,
4043 ))),
4044 children: parse_inlines_with_context(
4045 inner,
4046 base_offset + index + 2,
4047 options,
4048 definitions,
4049 diagnostics,
4050 context,
4051 ),
4052 }));
4053 index = end + 2;
4054 text_start = index;
4055 continue;
4056 }
4057 }
4058
4059 if options.constructs.highlight
4060 && bytes.get(index) == Some(&b'=')
4061 && bytes.get(index + 1) == Some(&b'=')
4062 && bytes.get(index + 2) != Some(&b'=')
4063 && can_open_delimited(input, index, 2)
4064 {
4065 if let Some(end) = find_closing_delimiter(input, index + 2, "==", false) {
4066 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4067 let inner = &input[index + 2..end];
4068 nodes.push(Inline::Mark(Mark {
4069 meta: NodeMeta::new(Some(Span::new(
4070 base_offset + index,
4071 base_offset + end + 2,
4072 ))),
4073 children: parse_inlines_with_context(
4074 inner,
4075 base_offset + index + 2,
4076 options,
4077 definitions,
4078 diagnostics,
4079 context,
4080 ),
4081 }));
4082 index = end + 2;
4083 text_start = index;
4084 continue;
4085 }
4086 }
4087
4088 if options.constructs.subscript
4089 && starts_exact_byte_run(input, index, b'~', 1)
4090 && !single_tilde_delete_takes_precedence(options, input, index)
4091 {
4092 if let Some(end) = find_simple_inline_close(input, index + 1, b'~') {
4093 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4094 let inner = &input[index + 1..end];
4095 nodes.push(Inline::Subscript(Subscript {
4096 meta: NodeMeta::new(Some(Span::new(
4097 base_offset + index,
4098 base_offset + end + 1,
4099 ))),
4100 children: parse_inlines_with_context(
4101 inner,
4102 base_offset + index + 1,
4103 options,
4104 definitions,
4105 diagnostics,
4106 context,
4107 ),
4108 }));
4109 index = end + 1;
4110 text_start = index;
4111 continue;
4112 }
4113 }
4114
4115 if options.constructs.inline_footnote
4116 && options.constructs.footnote_reference
4117 && bytes.get(index) == Some(&b'^')
4118 && bytes.get(index + 1) == Some(&b'[')
4119 {
4120 if let Some(close) = find_inline_footnote_end(input, index + 2) {
4121 let inner = &input[index + 2..close];
4122 if !inner.trim().is_empty() {
4123 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4124 nodes.push(Inline::InlineFootnote(InlineFootnote {
4125 meta: NodeMeta::new(Some(Span::new(
4126 base_offset + index,
4127 base_offset + close + 1,
4128 ))),
4129 children: parse_inlines_with_context(
4130 inner,
4131 base_offset + index + 2,
4132 options,
4133 definitions,
4134 diagnostics,
4135 context,
4136 ),
4137 }));
4138 index = close + 1;
4139 text_start = index;
4140 continue;
4141 }
4142 }
4143 }
4144
4145 if options.constructs.superscript
4146 && bytes.get(index) == Some(&b'^')
4147 && !(options.constructs.inline_footnote && bytes.get(index + 1) == Some(&b'['))
4148 {
4149 if let Some(end) = find_simple_inline_close(input, index + 1, b'^') {
4150 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4151 let inner = &input[index + 1..end];
4152 nodes.push(Inline::Superscript(Superscript {
4153 meta: NodeMeta::new(Some(Span::new(
4154 base_offset + index,
4155 base_offset + end + 1,
4156 ))),
4157 children: parse_inlines_with_context(
4158 inner,
4159 base_offset + index + 1,
4160 options,
4161 definitions,
4162 diagnostics,
4163 context,
4164 ),
4165 }));
4166 index = end + 1;
4167 text_start = index;
4168 continue;
4169 }
4170 }
4171
4172 if options.constructs.gfm_strikethrough
4179 && bytes[index] == b'~'
4180 && delimiter_byte_run_start(input, index, b'~') == index
4181 {
4182 let run_len = delimiter_byte_run_len(input, index, b'~');
4183 let recordable =
4184 run_len == 2 || (run_len == 1 && options.parse.single_tilde_strikethrough);
4185 if recordable {
4186 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4187 record_emphasis_delimiter(
4188 &mut nodes,
4189 &mut delimiters,
4190 input,
4191 index,
4192 base_offset,
4193 b'~',
4194 true,
4195 );
4196 index += run_len;
4197 text_start = index;
4198 continue;
4199 }
4200 }
4201
4202 if bytes[index] == b'!' && index + 1 < bytes.len() && bytes[index + 1] == b'[' {
4203 if let Some((end, image)) =
4204 parse_image(input, index, base_offset, options, definitions, diagnostics)
4205 {
4206 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4207 nodes.push(image);
4208 index = end;
4209 text_start = index;
4210 continue;
4211 }
4212 }
4213
4214 if bytes[index] == b'[' {
4215 if let Some((end, wikilink)) = parse_wikilink(input, index, base_offset, options) {
4216 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4217 nodes.push(wikilink);
4218 index = end;
4219 text_start = index;
4220 continue;
4221 }
4222 if let Some((end, link)) = parse_link(
4223 input,
4224 index,
4225 base_offset,
4226 options,
4227 definitions,
4228 diagnostics,
4229 context,
4230 ) {
4231 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4232 nodes.push(link);
4233 index = end;
4234 text_start = index;
4235 continue;
4236 }
4237 if options.constructs.footnote_reference
4238 && bytes.get(index) == Some(&b'[')
4239 && bytes.get(index + 1) == Some(&b'^')
4240 {
4241 if let Some(close) = find_footnote_reference_label_end(input, index + 2) {
4242 let label = &input[index + 2..close];
4243 if is_footnote_label(label) {
4244 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4245 nodes.push(Inline::FootnoteReference(FootnoteReference {
4246 meta: NodeMeta::new(Some(Span::new(
4247 base_offset + index,
4248 base_offset + close + 1,
4249 ))),
4250 label: label.into(),
4251 identifier: normalize_label(label),
4252 }));
4253 index = close + 1;
4254 text_start = index;
4255 continue;
4256 }
4257 }
4258 }
4259 }
4260
4261 if bytes[index] == b'$' && options.constructs.math_inline {
4262 if let Some((end, value, kind)) = parse_math_inline(input, index) {
4263 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4264 nodes.push(Inline::Math(MathInline {
4265 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4266 value,
4267 kind,
4268 }));
4269 index = end;
4270 text_start = index;
4271 continue;
4272 }
4273 let run = bytes[index..]
4280 .iter()
4281 .take_while(|byte| **byte == b'$')
4282 .count();
4283 if run > 1 {
4284 if text.is_empty() {
4285 text_start = base_offset + index;
4286 }
4287 text.push_str(&input[index..index + run]);
4288 index += run;
4289 continue;
4290 }
4291 }
4292
4293 if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
4296 && context.allow_links
4297 {
4298 if let Some((end, destination)) = parse_literal_autolink(
4299 input,
4300 index,
4301 options.constructs.gfm_autolink_literal,
4302 options.constructs.relaxed_autolinks,
4303 ) {
4304 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4305 nodes.push(Inline::Autolink(Autolink {
4306 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4307 destination,
4308 kind: AutolinkKind::GfmLiteral {
4309 original: input[index..end].into(),
4310 },
4311 }));
4312 index = end;
4313 text_start = index;
4314 continue;
4315 }
4316 }
4317
4318 if bytes[index] == b'<' {
4319 if let Some(end) = parse_autolink_end(input, index) {
4320 let raw = &input[index..end];
4321 if is_autolink(raw) {
4322 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4323 if context.allow_links {
4324 nodes.push(Inline::Autolink(Autolink {
4325 meta: NodeMeta::new(Some(Span::new(
4326 base_offset + index,
4327 base_offset + end,
4328 ))),
4329 destination: raw[1..raw.len() - 1].into(),
4330 kind: AutolinkKind::Angle,
4331 }));
4332 } else {
4333 nodes.push(Inline::Text(Text {
4334 meta: NodeMeta::new(Some(Span::new(
4335 base_offset + index,
4336 base_offset + end,
4337 ))),
4338 value: raw[1..raw.len() - 1].into(),
4339 }));
4340 }
4341 index = end;
4342 text_start = index;
4343 continue;
4344 }
4345 }
4346 if options.constructs.mdx_jsx_inline {
4347 if let Some((end, raw)) = parse_mdx_jsx_inline(input, index) {
4348 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4349 nodes.push(Inline::MdxJsx(MdxJsxInline {
4350 meta: NodeMeta::new(Some(Span::new(
4351 base_offset + index,
4352 base_offset + end,
4353 ))),
4354 value: raw,
4355 }));
4356 index = end;
4357 text_start = index;
4358 continue;
4359 }
4360 }
4361 if let Some((end, raw)) = parse_html_inline(input, index) {
4362 if options.constructs.html_inline {
4363 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4364 nodes.push(Inline::Html(HtmlInline {
4365 meta: NodeMeta::new(Some(Span::new(
4366 base_offset + index,
4367 base_offset + end,
4368 ))),
4369 value: raw,
4370 }));
4371 index = end;
4372 text_start = index;
4373 continue;
4374 }
4375 }
4376 }
4377
4378 if bytes[index] == b'{' && options.constructs.mdx_expression_inline {
4379 if let Some(end) = find_mdx_expression_inline_close(input, index) {
4380 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4381 nodes.push(Inline::MdxExpression(MdxExpressionInline {
4382 meta: NodeMeta::new(Some(Span::new(
4383 base_offset + index,
4384 base_offset + end + 1,
4385 ))),
4386 value: input[index + 1..end].into(),
4387 }));
4388 index = end + 1;
4389 text_start = index;
4390 continue;
4391 } else {
4392 diagnostics.push(Diagnostic::new(
4393 DiagnosticSeverity::Error,
4394 DiagnosticCode::InvalidMdx,
4395 Span::new(base_offset + index, base_offset + input.len()),
4396 "MDX expression is missing a closing brace",
4397 ));
4398 }
4399 }
4400
4401 if bytes[index] == b':' && options.constructs.shortcode {
4402 if let Some((end, name)) = parse_shortcode(input, index) {
4403 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4404 nodes.push(Inline::Shortcode(Shortcode {
4405 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4406 name,
4407 }));
4408 index = end;
4409 text_start = index;
4410 continue;
4411 }
4412 }
4413
4414 if bytes[index] == b':' && options.constructs.directive_text {
4415 if let Some((end, directive)) =
4416 parse_text_directive(input, index, base_offset, options, definitions, diagnostics)
4417 {
4418 flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4419 nodes.push(directive);
4420 index = end;
4421 text_start = index;
4422 continue;
4423 }
4424 }
4425
4426 let (next_index, char) = next_char(input, index).expect("valid UTF-8 byte index");
4427 if text.is_empty() {
4428 text_start = base_offset + index;
4429 }
4430 text.push(if char == '\0' { '\u{FFFD}' } else { char });
4431 index = next_index;
4432 }
4433
4434 flush_text(&mut nodes, &mut text, text_start, base_offset + input.len());
4435 process_emphasis(nodes, delimiters)
4436}
4437
4438fn parse_shortcode(input: &str, index: usize) -> Option<(usize, String)> {
4439 if input[index..].starts_with("::") {
4440 return None;
4441 }
4442
4443 let mut cursor = index + 1;
4444 while let Some((next, char)) = next_char(input, cursor) {
4445 if char == ':' {
4446 if cursor == index + 1 {
4447 return None;
4448 }
4449 return Some((next, input[index + 1..cursor].into()));
4450 }
4451 if !(char.is_ascii_alphanumeric() || matches!(char, '_' | '-' | '+')) {
4452 return None;
4453 }
4454 cursor = next;
4455 }
4456 None
4457}
4458
4459fn parse_wikilink(
4460 input: &str,
4461 index: usize,
4462 base_offset: usize,
4463 options: &SyntaxOptions,
4464) -> Option<(usize, Inline)> {
4465 let configured_order = if options.constructs.wikilink_title_after_pipe {
4466 WikiLinkLabelOrder::AfterPipe
4467 } else if options.constructs.wikilink_title_before_pipe {
4468 WikiLinkLabelOrder::BeforePipe
4469 } else {
4470 return None;
4471 };
4472 if input.as_bytes().get(index) != Some(&b'[') || input.as_bytes().get(index + 1) != Some(&b'[')
4473 {
4474 return None;
4475 }
4476
4477 let close = find_wikilink_close(input, index + 2)?;
4478 let source = &input[index + 2..close];
4479 if source.is_empty() || source.len() > WIKILINK_MAX_BYTES {
4480 return None;
4481 }
4482
4483 let (target_source, label_source, label_order) =
4484 if let Some(separator) = find_wikilink_separator(source) {
4485 match configured_order {
4486 WikiLinkLabelOrder::AfterPipe => (
4487 &source[..separator],
4488 &source[separator + 1..],
4489 WikiLinkLabelOrder::AfterPipe,
4490 ),
4491 WikiLinkLabelOrder::BeforePipe => (
4492 &source[separator + 1..],
4493 &source[..separator],
4494 WikiLinkLabelOrder::BeforePipe,
4495 ),
4496 }
4497 } else {
4498 (source, source, configured_order)
4499 };
4500
4501 let target = unescape_string(target_source);
4502 if target.is_empty() {
4503 return None;
4504 }
4505 let label = unescape_string(label_source);
4506 let end = close + 2;
4507 Some((
4508 end,
4509 Inline::WikiLink(WikiLink {
4510 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4511 target,
4512 label,
4513 label_order,
4514 }),
4515 ))
4516}
4517
4518fn find_wikilink_close(input: &str, start: usize) -> Option<usize> {
4519 let bytes = input.as_bytes();
4520 let mut cursor = start;
4521 while cursor < input.len() {
4522 match bytes[cursor] {
4523 b'\\' => {
4524 cursor += 1;
4525 if cursor < input.len() {
4526 cursor = next_char(input, cursor)?.0;
4527 }
4528 }
4529 b'\n' | b'\r' => return None,
4530 b']' if bytes.get(cursor + 1) == Some(&b']') => return Some(cursor),
4531 _ => cursor = next_char(input, cursor)?.0,
4532 }
4533 }
4534 None
4535}
4536
4537fn find_wikilink_separator(input: &str) -> Option<usize> {
4538 let bytes = input.as_bytes();
4539 let mut cursor = 0;
4540 while cursor < input.len() {
4541 match bytes[cursor] {
4542 b'\\' => {
4543 cursor += 1;
4544 if cursor < input.len() {
4545 cursor = next_char(input, cursor)?.0;
4546 }
4547 }
4548 b'|' => return Some(cursor),
4549 _ => cursor = next_char(input, cursor)?.0,
4550 }
4551 }
4552 None
4553}
4554
4555fn trailing_space_count(input: &str) -> usize {
4556 input
4557 .as_bytes()
4558 .iter()
4559 .rev()
4560 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
4561 .count()
4562}
4563
4564fn is_hard_break_suffix(input: &str, trailing: usize) -> bool {
4565 let bytes = input.as_bytes();
4569 trailing >= 2
4570 && bytes[bytes.len() - trailing..]
4571 .iter()
4572 .all(|byte| *byte == b' ')
4573}
4574
4575fn parse_image(
4576 input: &str,
4577 index: usize,
4578 base_offset: usize,
4579 options: &SyntaxOptions,
4580 definitions: &[String],
4581 diagnostics: &mut Vec<Diagnostic>,
4582) -> Option<(usize, Inline)> {
4583 let label_start = index + 2;
4584 let label_end = find_link_label_end(input, index + 1)?;
4585 let alt_source = &input[label_start..label_end];
4586 let after_label = label_end + 1;
4587 if input.as_bytes().get(after_label) == Some(&b'(') {
4588 let (close, resource) = parse_link_resource(input, after_label)?;
4589 return Some((
4590 close,
4591 Inline::Image(Image {
4592 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4593 destination: resource.destination,
4594 destination_kind: resource.destination_kind,
4595 title: resource.title,
4596 title_kind: resource.title_kind,
4597 alt: parse_inlines(
4598 alt_source,
4599 base_offset + label_start,
4600 options,
4601 definitions,
4602 diagnostics,
4603 ),
4604 }),
4605 ));
4606 }
4607 if input.as_bytes().get(after_label) == Some(&b'[') {
4608 let close = find_reference_label_end(input, after_label)?;
4609 let label = &input[after_label + 1..close];
4610 let identifier = if label.is_empty() { alt_source } else { label };
4611 if definition_exists(definitions, identifier) {
4612 return Some((
4613 close + 1,
4614 Inline::ImageReference(ImageReference {
4615 meta: NodeMeta::new(Some(Span::new(
4616 base_offset + index,
4617 base_offset + close + 1,
4618 ))),
4619 identifier: normalize_label(identifier),
4620 label: identifier.into(),
4621 kind: if label.is_empty() {
4622 ReferenceKind::Collapsed
4623 } else {
4624 ReferenceKind::Full
4625 },
4626 alt: parse_inlines(
4627 alt_source,
4628 base_offset + label_start,
4629 options,
4630 definitions,
4631 diagnostics,
4632 ),
4633 }),
4634 ));
4635 }
4636 return None;
4639 }
4640 if definition_exists(definitions, alt_source) {
4643 return Some((
4644 after_label,
4645 Inline::ImageReference(ImageReference {
4646 meta: NodeMeta::new(Some(Span::new(
4647 base_offset + index,
4648 base_offset + after_label,
4649 ))),
4650 identifier: normalize_label(alt_source),
4651 label: alt_source.into(),
4652 kind: ReferenceKind::Shortcut,
4653 alt: parse_inlines(
4654 alt_source,
4655 base_offset + label_start,
4656 options,
4657 definitions,
4658 diagnostics,
4659 ),
4660 }),
4661 ));
4662 }
4663 None
4664}
4665
4666fn parse_link(
4667 input: &str,
4668 index: usize,
4669 base_offset: usize,
4670 options: &SyntaxOptions,
4671 definitions: &[String],
4672 diagnostics: &mut Vec<Diagnostic>,
4673 context: InlineContext,
4674) -> Option<(usize, Inline)> {
4675 if !context.allow_links {
4676 return None;
4677 }
4678 let label_end = find_link_label_end(input, index)?;
4679 let label_source = &input[index + 1..label_end];
4680 if label_contains_link(label_source, base_offset + index + 1, options, definitions) {
4681 return None;
4682 }
4683 let after_label = label_end + 1;
4684 if input.as_bytes().get(after_label) == Some(&b'(') {
4685 if let Some((close, resource)) = parse_link_resource(input, after_label) {
4690 return Some((
4691 close,
4692 Inline::Link(Link {
4693 meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4694 destination: resource.destination,
4695 destination_kind: resource.destination_kind,
4696 title: resource.title,
4697 title_kind: resource.title_kind,
4698 children: parse_inlines_with_context(
4699 label_source,
4700 base_offset + index + 1,
4701 options,
4702 definitions,
4703 diagnostics,
4704 InlineContext { allow_links: false },
4705 ),
4706 }),
4707 ));
4708 }
4709 }
4710 if input.as_bytes().get(after_label) == Some(&b'[') {
4711 let close = find_reference_label_end(input, after_label)?;
4712 let label = &input[after_label + 1..close];
4713 let identifier = if label.is_empty() {
4714 label_source
4715 } else {
4716 label
4717 };
4718 if definition_exists(definitions, identifier) {
4719 return Some((
4720 close + 1,
4721 Inline::LinkReference(LinkReference {
4722 meta: NodeMeta::new(Some(Span::new(
4723 base_offset + index,
4724 base_offset + close + 1,
4725 ))),
4726 identifier: normalize_label(identifier),
4727 label: identifier.into(),
4728 kind: if label.is_empty() {
4729 ReferenceKind::Collapsed
4730 } else {
4731 ReferenceKind::Full
4732 },
4733 children: parse_inlines_with_context(
4734 label_source,
4735 base_offset + index + 1,
4736 options,
4737 definitions,
4738 diagnostics,
4739 InlineContext { allow_links: false },
4740 ),
4741 }),
4742 ));
4743 }
4744 return None;
4749 }
4750 if definition_exists(definitions, label_source) {
4751 return Some((
4752 after_label,
4753 Inline::LinkReference(LinkReference {
4754 meta: NodeMeta::new(Some(Span::new(
4755 base_offset + index,
4756 base_offset + after_label,
4757 ))),
4758 identifier: normalize_label(label_source),
4759 label: label_source.into(),
4760 kind: ReferenceKind::Shortcut,
4761 children: parse_inlines_with_context(
4762 label_source,
4763 base_offset + index + 1,
4764 options,
4765 definitions,
4766 diagnostics,
4767 InlineContext { allow_links: false },
4768 ),
4769 }),
4770 ));
4771 }
4772 None
4773}
4774
4775fn find_reference_label_end(input: &str, open: usize) -> Option<usize> {
4776 if input.as_bytes().get(open) != Some(&b'[') {
4779 return None;
4780 }
4781
4782 let mut cursor = open + 1;
4783 while cursor < input.len() {
4784 let (next, char) = next_char(input, cursor)?;
4785 match char {
4786 '\\' => {
4787 cursor = next_char(input, next)
4788 .map(|(after_escape, _)| after_escape)
4789 .unwrap_or(next);
4790 continue;
4791 }
4792 '[' => return None,
4793 ']' => {
4794 return reference_label_is_within_limit(&input[open + 1..cursor]).then_some(cursor);
4795 }
4796 _ => {}
4797 }
4798 cursor = next;
4799 }
4800 None
4801}
4802
4803fn label_contains_link(
4804 label_source: &str,
4805 base_offset: usize,
4806 options: &SyntaxOptions,
4807 definitions: &[String],
4808) -> bool {
4809 let mut diagnostics = Vec::new();
4810 let inlines = parse_inlines_with_context(
4811 label_source,
4812 base_offset,
4813 options,
4814 definitions,
4815 &mut diagnostics,
4816 InlineContext::default(),
4817 );
4818 contains_link_inline(&inlines)
4819}
4820
4821fn contains_link_inline(inlines: &[Inline]) -> bool {
4822 inlines.iter().any(|inline| match inline {
4823 Inline::Link(_) | Inline::LinkReference(_) => true,
4824 Inline::Emphasis(node) => contains_link_inline(&node.children),
4825 Inline::Strong(node) => contains_link_inline(&node.children),
4826 Inline::Delete(node) => contains_link_inline(&node.children),
4827 Inline::TextDirective(node) => contains_link_inline(&node.label),
4828 _ => false,
4829 })
4830}
4831
4832fn find_link_label_end(input: &str, open: usize) -> Option<usize> {
4833 if input.as_bytes().get(open) != Some(&b'[') {
4834 return None;
4835 }
4836
4837 let mut depth = 1usize;
4838 let mut cursor = open + 1;
4839 while cursor < input.len() {
4840 let (next, char) = next_char(input, cursor)?;
4841 match char {
4842 '\\' => {
4843 cursor = next_char(input, next)
4844 .map(|(after_escape, _)| after_escape)
4845 .unwrap_or(next);
4846 continue;
4847 }
4848 '`' => {
4849 if let Some((end, _)) = parse_code_span(input, cursor) {
4850 cursor = end;
4851 continue;
4852 }
4853 }
4854 '<' => {
4855 if let Some(end) = parse_autolink_end(input, cursor) {
4856 let raw = &input[cursor..end];
4857 if is_autolink(raw) {
4858 cursor = end;
4859 continue;
4860 }
4861 }
4862 if let Some((end, _)) = parse_html_inline(input, cursor) {
4863 cursor = end;
4864 continue;
4865 }
4866 }
4867 '[' => depth += 1,
4868 ']' => {
4869 depth = depth.checked_sub(1)?;
4870 if depth == 0 {
4871 return Some(cursor);
4872 }
4873 }
4874 _ => {}
4875 }
4876 cursor = next;
4877 }
4878 None
4879}
4880
4881fn parse_text_directive(
4882 input: &str,
4883 index: usize,
4884 base_offset: usize,
4885 options: &SyntaxOptions,
4886 definitions: &[String],
4887 diagnostics: &mut Vec<Diagnostic>,
4888) -> Option<(usize, Inline)> {
4889 if input[index..].starts_with("::") {
4890 return None;
4891 }
4892 if index > 0 {
4893 let previous = input[..index].chars().next_back()?;
4894 if !previous.is_whitespace() && !matches!(previous, '(' | '[' | '{') {
4895 return None;
4896 }
4897 }
4898 let opener_source = &input[index + 1..];
4899 let (name, label_source, attributes, consumed) = match parse_directive_opener(opener_source) {
4900 Some(opener) => opener,
4901 None => {
4902 if directive_opener_looks_malformed(opener_source) {
4903 diagnostics.push(Diagnostic::new(
4904 DiagnosticSeverity::Error,
4905 DiagnosticCode::InvalidDirectiveName,
4906 Span::new(base_offset + index, base_offset + input.len()),
4907 "text directive opener is malformed",
4908 ));
4909 }
4910 return None;
4911 }
4912 };
4913 let label = label_source
4914 .map(|source| {
4915 parse_inlines(
4916 source,
4917 base_offset + index + 1 + name.len() + 1,
4918 options,
4919 definitions,
4920 diagnostics,
4921 )
4922 })
4923 .unwrap_or_default();
4924 Some((
4925 index + 1 + consumed,
4926 Inline::TextDirective(TextDirective {
4927 meta: NodeMeta::new(Some(Span::new(
4928 base_offset + index,
4929 base_offset + index + 1 + consumed,
4930 ))),
4931 name,
4932 label,
4933 attributes,
4934 }),
4935 ))
4936}
4937
4938fn parse_directive_opener(
4939 input: &str,
4940) -> Option<(String, Option<&str>, Vec<DirectiveAttribute>, usize)> {
4941 let mut index = 0;
4942 while let Some((next, char)) = next_char(input, index) {
4943 if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4944 index = next;
4945 } else {
4946 break;
4947 }
4948 }
4949 let name = &input[..index];
4950 if !is_directive_name(name) {
4951 return None;
4952 }
4953
4954 let mut label = None;
4955 let mut attributes = Vec::new();
4956 let mut consumed = index;
4957 if input.as_bytes().get(consumed) == Some(&b'[') {
4958 let close = find_link_label_end(input, consumed)?;
4959 label = Some(&input[consumed + 1..close]);
4960 consumed = close + 1;
4961 }
4962 if input.as_bytes().get(consumed) == Some(&b'{') {
4963 let close = find_directive_attributes_close(input, consumed)?;
4964 attributes = parse_attributes(&input[consumed + 1..close]);
4965 consumed = close + 1;
4966 }
4967
4968 Some((name.into(), label, attributes, consumed))
4969}
4970
4971fn directive_opener_looks_malformed(input: &str) -> bool {
4972 let mut index = 0;
4973 while let Some((next, char)) = next_char(input, index) {
4974 if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4975 index = next;
4976 } else {
4977 break;
4978 }
4979 }
4980 index > 0
4981 && is_directive_name(&input[..index])
4982 && matches!(input.as_bytes().get(index), Some(b'[' | b'{'))
4983}
4984
4985fn find_directive_attributes_close(input: &str, open: usize) -> Option<usize> {
4986 if input.as_bytes().get(open) != Some(&b'{') {
4987 return None;
4988 }
4989
4990 let bytes = input.as_bytes();
4991 let mut cursor = open + 1;
4992 let mut quote = None;
4993 let mut escaped = false;
4994 while cursor < input.len() {
4995 let byte = bytes[cursor];
4996 if escaped {
4997 escaped = false;
4998 cursor += 1;
4999 continue;
5000 }
5001 if byte == b'\\' {
5002 escaped = true;
5003 cursor += 1;
5004 continue;
5005 }
5006 if let Some(delimiter) = quote {
5007 if byte == delimiter {
5008 quote = None;
5009 }
5010 cursor += 1;
5011 continue;
5012 }
5013 match byte {
5014 b'"' | b'\'' => quote = Some(byte),
5015 b'}' => return Some(cursor),
5016 _ => {}
5017 }
5018 cursor += 1;
5019 }
5020 None
5021}
5022
5023fn parse_attributes(input: &str) -> Vec<DirectiveAttribute> {
5024 let mut attributes = Vec::new();
5025 let mut cursor = 0;
5026 while cursor < input.len() {
5027 cursor = skip_spaces(input, cursor);
5028 if cursor >= input.len() {
5029 break;
5030 }
5031
5032 if input.as_bytes().get(cursor) == Some(&b'#') {
5033 let (id, next) = parse_attribute_token(input, cursor + 1);
5034 if !id.is_empty() {
5035 attributes.push(DirectiveAttribute {
5036 name: "id".into(),
5037 value: Some(id.into()),
5038 });
5039 }
5040 cursor = next;
5041 continue;
5042 }
5043
5044 if input.as_bytes().get(cursor) == Some(&b'.') {
5045 let (class, next) = parse_attribute_token(input, cursor + 1);
5046 if !class.is_empty() {
5047 attributes.push(DirectiveAttribute {
5048 name: "class".into(),
5049 value: Some(class.into()),
5050 });
5051 }
5052 cursor = next;
5053 continue;
5054 }
5055
5056 let (name, next) = parse_attribute_name(input, cursor);
5057 if name.is_empty() {
5058 break;
5059 }
5060 cursor = skip_spaces(input, next);
5061 if input.as_bytes().get(cursor) == Some(&b'=') {
5062 cursor = skip_spaces(input, cursor + 1);
5063 if let Some((value, next)) = parse_attribute_value(input, cursor) {
5064 attributes.push(DirectiveAttribute {
5065 name: name.into(),
5066 value: Some(value),
5067 });
5068 cursor = next;
5069 } else {
5070 attributes.push(DirectiveAttribute {
5071 name: name.into(),
5072 value: Some(String::new()),
5073 });
5074 }
5075 } else {
5076 attributes.push(DirectiveAttribute {
5077 name: name.into(),
5078 value: None,
5079 });
5080 }
5081 }
5082 attributes
5083}
5084
5085fn parse_attribute_token(input: &str, index: usize) -> (&str, usize) {
5086 let mut cursor = index;
5087 while let Some((next, char)) = next_char(input, cursor) {
5088 if char.is_whitespace() {
5089 break;
5090 }
5091 cursor = next;
5092 }
5093 (&input[index..cursor], cursor)
5094}
5095
5096fn parse_attribute_name(input: &str, index: usize) -> (&str, usize) {
5097 let mut cursor = index;
5098 while let Some((next, char)) = next_char(input, cursor) {
5099 if char.is_whitespace() || char == '=' {
5100 break;
5101 }
5102 cursor = next;
5103 }
5104 (&input[index..cursor], cursor)
5105}
5106
5107fn parse_attribute_value(input: &str, index: usize) -> Option<(String, usize)> {
5108 let quote = input.as_bytes().get(index).copied();
5109 if matches!(quote, Some(b'"' | b'\'')) {
5110 let quote = quote?;
5111 let mut cursor = index + 1;
5112 while cursor < input.len() {
5113 let (next, char) = next_char(input, cursor)?;
5114 if char as u8 == quote && !is_escaped_at(input, cursor) {
5115 return Some((unescape_ascii_punctuation(&input[index + 1..cursor]), next));
5116 }
5117 cursor = next;
5118 }
5119 return None;
5120 }
5121
5122 let (value, next) = parse_attribute_token(input, index);
5123 Some((
5124 unescape_selected(value, |char| matches!(char, '\\' | '&')),
5125 next,
5126 ))
5127}
5128
5129struct CodeSpanSource {
5130 value: String,
5131 raw: String,
5132 fence_length: usize,
5133}
5134
5135fn parse_code_span(input: &str, index: usize) -> Option<(usize, CodeSpanSource)> {
5136 let len = input[index..]
5137 .as_bytes()
5138 .iter()
5139 .take_while(|byte| **byte == b'`')
5140 .count();
5141 let search_start = index + len;
5142 let close = find_code_span_close(input, search_start, len)?;
5143 let raw = &input[search_start..close];
5144 Some((
5145 close + len,
5146 CodeSpanSource {
5147 value: normalize_code_span(raw),
5148 raw: raw.into(),
5149 fence_length: len,
5150 },
5151 ))
5152}
5153
5154fn find_code_span_close(input: &str, start: usize, marker_len: usize) -> Option<usize> {
5155 let bytes = input.as_bytes();
5156 let mut cursor = start;
5157 while cursor < bytes.len() {
5158 if bytes[cursor] != b'`' {
5159 cursor = next_char(input, cursor)
5160 .map(|(next, _)| next)
5161 .unwrap_or(bytes.len());
5162 continue;
5163 }
5164 let run_len = bytes[cursor..]
5165 .iter()
5166 .take_while(|byte| **byte == b'`')
5167 .count();
5168 if run_len == marker_len {
5169 return Some(cursor);
5170 }
5171 cursor += run_len;
5172 }
5173 None
5174}
5175
5176fn normalize_code_span(input: &str) -> String {
5177 let mut normalized = String::new();
5178 let mut cursor = 0;
5179 while cursor < input.len() {
5180 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5181 if char == '\r' {
5182 if input.as_bytes().get(next) == Some(&b'\n') {
5183 cursor = next + 1;
5184 } else {
5185 cursor = next;
5186 }
5187 normalized.push(' ');
5188 continue;
5189 }
5190 if char == '\n' {
5191 normalized.push(' ');
5192 cursor = next;
5193 continue;
5194 }
5195 normalized.push(char);
5196 cursor = next;
5197 }
5198
5199 if normalized.starts_with(' ')
5200 && normalized.ends_with(' ')
5201 && normalized.chars().any(|char| char != ' ')
5202 {
5203 normalized[1..normalized.len() - 1].into()
5204 } else {
5205 normalized
5206 }
5207}
5208
5209fn can_open_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5210 delimiter_flanking(input, index, marker_len).left
5211}
5212
5213fn can_close_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5214 delimiter_flanking(input, index, marker_len).right
5215}
5216
5217fn find_closing_delimiter(
5218 input: &str,
5219 start: usize,
5220 marker: &str,
5221 underscore: bool,
5222) -> Option<usize> {
5223 let marker_len = marker.len();
5224 let mut cursor = start;
5225 let mut nested = 0usize;
5226 while cursor <= input.len() {
5227 let candidate = input[cursor..].find(marker).map(|offset| cursor + offset)?;
5228 if is_escaped_at(input, candidate) {
5229 cursor = candidate + marker_len;
5230 continue;
5231 }
5232 if delimiter_candidate_precedes_link_close(input, start, candidate, marker_len) {
5233 cursor = candidate + marker_len;
5234 continue;
5235 }
5236 if marker_len == 1 && nested == 0 && starts_longer_delimiter_run(input, candidate, marker) {
5237 cursor = candidate + delimiter_run_len(input, candidate, marker);
5238 continue;
5239 }
5240
5241 let can_open = if underscore {
5242 can_open_underscore(input, candidate, marker_len)
5243 } else {
5244 can_open_delimited(input, candidate, marker_len)
5245 };
5246 let can_close = if underscore {
5247 can_close_underscore(input, candidate, marker_len)
5248 } else {
5249 can_close_delimited(input, candidate, marker_len)
5250 };
5251
5252 if can_close {
5253 if nested == 0 {
5254 return Some(candidate);
5255 }
5256 nested -= 1;
5257 cursor = candidate + marker_len;
5258 continue;
5259 }
5260 if can_open {
5261 nested += 1;
5262 }
5263 cursor = candidate + marker_len;
5264 }
5265 None
5266}
5267
5268fn find_single_tilde_delete_close(input: &str, start: usize) -> Option<usize> {
5269 let mut cursor = start;
5270 while cursor < input.len() {
5271 let Some(candidate) = input[cursor..].find('~').map(|index| cursor + index) else {
5272 break;
5273 };
5274 if !is_escaped_at(input, candidate) && single_tilde_can_close_delete(input, candidate) {
5275 return Some(candidate);
5276 }
5277 cursor = candidate + 1;
5278 }
5279 None
5280}
5281
5282fn single_tilde_can_open_delete(input: &str, index: usize) -> bool {
5283 starts_exact_byte_run(input, index, b'~', 1)
5284 && can_open_delimited(input, index, 1)
5285 && !tilde_is_alphanumeric_interior(input, index)
5286}
5287
5288fn single_tilde_can_close_delete(input: &str, index: usize) -> bool {
5289 starts_exact_byte_run(input, index, b'~', 1)
5290 && can_close_delimited(input, index, 1)
5291 && !tilde_is_alphanumeric_interior(input, index)
5292}
5293
5294fn single_tilde_delete_takes_precedence(
5295 options: &SyntaxOptions,
5296 input: &str,
5297 index: usize,
5298) -> bool {
5299 options.constructs.gfm_strikethrough
5300 && options.parse.single_tilde_strikethrough
5301 && single_tilde_can_open_delete(input, index)
5302 && find_single_tilde_delete_close(input, index + 1).is_some()
5303}
5304
5305fn tilde_is_alphanumeric_interior(input: &str, index: usize) -> bool {
5306 let previous = input[..index].chars().next_back();
5307 let next = input[index + 1..].chars().next();
5308 previous.is_some_and(|char| char.is_alphanumeric())
5309 && next.is_some_and(|char| char.is_alphanumeric())
5310}
5311
5312fn starts_exact_byte_run(input: &str, index: usize, marker: u8, len: usize) -> bool {
5313 input.as_bytes().get(index) == Some(&marker)
5314 && delimiter_byte_run_start(input, index, marker) == index
5315 && delimiter_byte_run_len(input, index, marker) == len
5316}
5317
5318fn delimiter_byte_run_start(input: &str, index: usize, marker: u8) -> usize {
5319 let bytes = input.as_bytes();
5320 let mut start = index;
5321 while start > 0 && bytes[start - 1] == marker && !is_escaped_at(input, start - 1) {
5322 start -= 1;
5323 }
5324 start
5325}
5326
5327fn delimiter_byte_run_len(input: &str, index: usize, marker: u8) -> usize {
5328 let bytes = input.as_bytes();
5329 let mut cursor = index;
5330 while bytes.get(cursor) == Some(&marker) {
5331 cursor += 1;
5332 }
5333 cursor - index
5334}
5335
5336fn find_simple_inline_close(input: &str, start: usize, marker: u8) -> Option<usize> {
5337 let bytes = input.as_bytes();
5338 let mut cursor = start;
5339 while cursor < input.len() {
5340 match bytes[cursor] {
5341 b'\\' => {
5342 cursor += 1;
5343 if cursor < input.len() {
5344 cursor = next_char(input, cursor)?.0;
5345 }
5346 }
5347 b'\n' | b'\r' => return None,
5348 byte if byte == marker => return (cursor > start).then_some(cursor),
5349 _ => cursor = next_char(input, cursor)?.0,
5350 }
5351 }
5352 None
5353}
5354
5355fn find_spoiler_close(input: &str, start: usize) -> Option<usize> {
5356 let bytes = input.as_bytes();
5357 let mut cursor = start;
5358 while cursor + 1 < input.len() {
5359 match bytes[cursor] {
5360 b'\\' => {
5361 cursor += 1;
5362 if cursor < input.len() {
5363 cursor = next_char(input, cursor)?.0;
5364 }
5365 }
5366 b'\n' | b'\r' => return None,
5367 b'|' if bytes.get(cursor + 1) == Some(&b'|')
5368 && cursor > start
5369 && bytes.get(cursor.wrapping_sub(1)) != Some(&b'|') =>
5370 {
5371 return Some(cursor);
5372 }
5373 _ => cursor = next_char(input, cursor)?.0,
5374 }
5375 }
5376 None
5377}
5378
5379fn starts_longer_delimiter_run(input: &str, index: usize, marker: &str) -> bool {
5380 input[index..].starts_with(marker)
5381 && !input[..index].ends_with(marker)
5382 && input[index + marker.len()..].starts_with(marker)
5383}
5384
5385fn delimiter_run_len(input: &str, index: usize, marker: &str) -> usize {
5386 let mut cursor = index;
5387 while input[cursor..].starts_with(marker) {
5388 cursor += marker.len();
5389 }
5390 cursor - index
5391}
5392
5393fn delimiter_candidate_precedes_link_close(
5394 input: &str,
5395 start: usize,
5396 candidate: usize,
5397 marker_len: usize,
5398) -> bool {
5399 let bytes = input.as_bytes();
5400 if bytes.get(candidate + marker_len) != Some(&b']') {
5401 return false;
5402 }
5403 if !matches!(bytes.get(candidate + marker_len + 1), Some(b'(' | b'[')) {
5404 return false;
5405 }
5406
5407 let mut depth = 0usize;
5408 let mut cursor = start;
5409 while cursor < candidate {
5410 let Some((next, char)) = next_char(input, cursor) else {
5411 break;
5412 };
5413 match char {
5414 '\\' => {
5415 cursor = next_char(input, next)
5416 .map(|(after_escape, _)| after_escape)
5417 .unwrap_or(next);
5418 continue;
5419 }
5420 '`' => {
5421 if let Some((end, _)) = parse_code_span(input, cursor) {
5422 cursor = end;
5423 continue;
5424 }
5425 }
5426 '[' => depth += 1,
5427 ']' => depth = depth.saturating_sub(1),
5428 _ => {}
5429 }
5430 cursor = next;
5431 }
5432 depth > 0
5433}
5434
5435fn can_open_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5436 let flanking = delimiter_flanking(input, index, marker_len);
5437 flanking.left
5438 && (!flanking.right || flanking.previous.is_some_and(|c| c.is_ascii_punctuation()))
5439}
5440
5441fn can_close_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5442 let flanking = delimiter_flanking(input, index, marker_len);
5443 flanking.right && (!flanking.left || flanking.next.is_some_and(|c| c.is_ascii_punctuation()))
5444}
5445
5446#[derive(Clone, Copy)]
5447struct DelimiterFlanking {
5448 left: bool,
5449 right: bool,
5450 previous: Option<char>,
5451 next: Option<char>,
5452}
5453
5454fn delimiter_flanking(input: &str, index: usize, marker_len: usize) -> DelimiterFlanking {
5455 let previous = input[..index].chars().next_back();
5456 let next = input[index + marker_len..].chars().next();
5457
5458 let previous_whitespace = previous.is_none_or(char::is_whitespace);
5459 let next_whitespace = next.is_none_or(char::is_whitespace);
5460 let previous_punctuation = previous.is_some_and(is_flanking_punctuation);
5461 let next_punctuation = next.is_some_and(is_flanking_punctuation);
5462
5463 let left = next.is_some()
5464 && !next_whitespace
5465 && !(next_punctuation && !previous_whitespace && !previous_punctuation);
5466 let right = previous.is_some()
5467 && !previous_whitespace
5468 && !(previous_punctuation && !next_whitespace && !next_punctuation);
5469
5470 DelimiterFlanking {
5471 left,
5472 right,
5473 previous,
5474 next,
5475 }
5476}
5477
5478fn parse_math_inline(input: &str, index: usize) -> Option<(usize, String, MathInlineKind)> {
5499 if let Some((end, value)) = parse_math_code_inline(input, index) {
5500 return Some((end, value, MathInlineKind::Code));
5501 }
5502
5503 let bytes = input.as_bytes();
5504 let open_dollars = bytes[index..]
5505 .iter()
5506 .take_while(|byte| **byte == b'$')
5507 .count();
5508 if open_dollars == 0 || open_dollars > 2 {
5511 return None;
5512 }
5513
5514 let content_start = index + open_dollars;
5515 let close = scan_to_closing_dollar(input, content_start, open_dollars)?;
5516 let content_end = close - open_dollars;
5517 if content_end <= content_start {
5520 return None;
5521 }
5522
5523 let raw = &input[content_start..content_end];
5524 let value = if open_dollars == 1 {
5525 normalize_math_text(raw)
5526 } else {
5527 raw.into()
5528 };
5529 let dollars = u8::try_from(open_dollars).unwrap_or(u8::MAX);
5530 Some((close, value, MathInlineKind::Dollar { dollars }))
5531}
5532
5533fn scan_to_closing_dollar(input: &str, start: usize, open_dollars: usize) -> Option<usize> {
5537 let bytes = input.as_bytes();
5538 if open_dollars == 1 && bytes.get(start).is_some_and(|byte| is_math_space(*byte)) {
5540 return None;
5541 }
5542
5543 let mut cursor = start;
5544 loop {
5545 while cursor < bytes.len() && bytes[cursor] != b'$' {
5546 cursor += 1;
5547 }
5548 if cursor >= bytes.len() {
5549 return None;
5550 }
5551 let prev = bytes[cursor - 1];
5554 if open_dollars == 1 && is_math_space(prev) {
5555 return None;
5556 }
5557 if open_dollars == 1 && prev == b'\\' {
5558 cursor += 1;
5561 continue;
5562 }
5563 let run = bytes[cursor..]
5564 .iter()
5565 .take(open_dollars)
5566 .take_while(|byte| **byte == b'$')
5567 .count();
5568 if open_dollars == 1 && bytes.get(cursor + run).is_some_and(u8::is_ascii_digit) {
5570 return None;
5571 }
5572 if run == open_dollars {
5573 return Some(cursor + run);
5574 }
5575 cursor += run;
5576 }
5577}
5578
5579fn is_math_space(byte: u8) -> bool {
5581 matches!(byte, b'\t' | b'\n' | b'\r' | b' ')
5582}
5583
5584fn normalize_math_text(input: &str) -> String {
5588 let mut normalized = String::new();
5589 let mut cursor = 0;
5590 while cursor < input.len() {
5591 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5592 if char == '\r' {
5593 if input.as_bytes().get(next) == Some(&b'\n') {
5594 cursor = next + 1;
5595 } else {
5596 cursor = next;
5597 }
5598 normalized.push(' ');
5599 continue;
5600 }
5601 if char == '\n' {
5602 normalized.push(' ');
5603 cursor = next;
5604 continue;
5605 }
5606 normalized.push(char);
5607 cursor = next;
5608 }
5609
5610 if normalized.starts_with(' ')
5611 && normalized.ends_with(' ')
5612 && normalized.chars().any(|char| char != ' ')
5613 {
5614 normalized[1..normalized.len() - 1].into()
5615 } else {
5616 normalized
5617 }
5618}
5619
5620fn parse_math_code_inline(input: &str, index: usize) -> Option<(usize, String)> {
5621 if !input[index..].starts_with("$`") {
5622 return None;
5623 }
5624
5625 let search_start = index + 2;
5626 let close = input[search_start..]
5627 .find("`$")
5628 .map(|offset| search_start + offset)?;
5629 if close == search_start {
5630 return None;
5631 }
5632
5633 Some((close + 2, input[search_start..close].into()))
5634}
5635
5636fn parse_link_resource(input: &str, open: usize) -> Option<(usize, ParsedLinkResource)> {
5637 let bytes = input.as_bytes();
5638 if bytes.get(open) != Some(&b'(') {
5639 return None;
5640 }
5641 let (mut cursor, initial_space) = skip_link_resource_space_with_info(input, open + 1)?;
5642 if bytes.get(cursor) == Some(&b')') {
5643 return Some((
5644 cursor + 1,
5645 ParsedLinkResource {
5646 destination: String::new(),
5647 destination_kind: LinkDestinationKind::Omitted,
5648 title: None,
5649 title_kind: None,
5650 },
5651 ));
5652 }
5653 if initial_space && matches!(bytes.get(cursor), Some(b'"' | b'\'' | b'(')) {
5654 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5655 cursor = skip_link_resource_space(input, next)?;
5656 if bytes.get(cursor) == Some(&b')') {
5657 return Some((
5658 cursor + 1,
5659 ParsedLinkResource {
5660 destination: String::new(),
5661 destination_kind: LinkDestinationKind::Omitted,
5662 title: Some(title),
5663 title_kind: Some(title_kind),
5664 },
5665 ));
5666 }
5667 return None;
5668 }
5669 let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5670 let (after_destination, had_space) = skip_link_resource_space_with_info(input, next)?;
5671 cursor = after_destination;
5672 if bytes.get(cursor) == Some(&b')') {
5673 return Some((
5674 cursor + 1,
5675 ParsedLinkResource {
5676 destination,
5677 destination_kind,
5678 title: None,
5679 title_kind: None,
5680 },
5681 ));
5682 }
5683 if !had_space {
5684 return None;
5685 }
5686
5687 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5688 cursor = skip_link_resource_space(input, next)?;
5689 if bytes.get(cursor) == Some(&b')') {
5690 Some((
5691 cursor + 1,
5692 ParsedLinkResource {
5693 destination,
5694 destination_kind,
5695 title: Some(title),
5696 title_kind: Some(title_kind),
5697 },
5698 ))
5699 } else {
5700 None
5701 }
5702}
5703
5704fn parse_link_destination(
5705 input: &str,
5706 index: usize,
5707) -> Option<(String, LinkDestinationKind, usize)> {
5708 if input.as_bytes().get(index) == Some(&b'<') {
5709 let mut cursor = index + 1;
5710 while cursor < input.len() {
5711 let (next, char) = next_char(input, cursor)?;
5712 if char == '>' && !is_escaped_at(input, cursor) {
5713 return Some((
5714 unescape_ascii_punctuation(&input[index + 1..cursor]),
5715 LinkDestinationKind::Angle,
5716 next,
5717 ));
5718 }
5719 if (char == '<' && !is_escaped_at(input, cursor)) || char == '\n' || char == '\r' {
5720 return None;
5721 }
5722 cursor = next;
5723 }
5724 return None;
5725 }
5726
5727 let mut cursor = index;
5728 let mut depth = 0usize;
5729 while cursor < input.len() {
5730 let (next, char) = next_char(input, cursor)?;
5731 if (char == ' ' || char.is_ascii_control()) && depth == 0 {
5736 break;
5737 }
5738 if char == '(' && !is_escaped_at(input, cursor) {
5739 depth += 1;
5740 if depth > 32 {
5742 return None;
5743 }
5744 } else if char == ')' && !is_escaped_at(input, cursor) {
5745 if depth == 0 {
5746 break;
5747 }
5748 depth -= 1;
5749 }
5750 cursor = next;
5751 }
5752
5753 if cursor == index || depth > 0 {
5754 None
5755 } else {
5756 Some((
5757 unescape_ascii_punctuation(&input[index..cursor]),
5758 LinkDestinationKind::Bare,
5759 cursor,
5760 ))
5761 }
5762}
5763
5764fn parse_link_title(input: &str, index: usize) -> Option<(String, LinkTitleKind, usize)> {
5765 let opener = input.as_bytes().get(index).copied()?;
5766 let (closer, title_kind) = match opener {
5767 b'"' => ('"', LinkTitleKind::DoubleQuote),
5768 b'\'' => ('\'', LinkTitleKind::SingleQuote),
5769 b'(' => (')', LinkTitleKind::Paren),
5770 _ => return None,
5771 };
5772 let mut cursor = index + 1;
5773 while cursor < input.len() {
5774 let (next, char) = next_char(input, cursor)?;
5775 if char == closer && !is_escaped_at(input, cursor) {
5776 if contains_blank_line(&input[index + 1..cursor]) {
5777 return None;
5778 }
5779 return Some((
5780 unescape_ascii_punctuation(&input[index + 1..cursor]),
5781 title_kind,
5782 next,
5783 ));
5784 }
5785 if opener == b'(' && char == '(' && !is_escaped_at(input, cursor) {
5786 return None;
5787 }
5788 cursor = next;
5789 }
5790 None
5791}
5792
5793fn contains_blank_line(input: &str) -> bool {
5794 if !input.bytes().any(|byte| matches!(byte, b'\n' | b'\r')) {
5795 return false;
5796 }
5797 let lines = collect_lines(input, 0);
5802 let interior = lines.len().saturating_sub(1);
5803 lines
5804 .iter()
5805 .take(interior)
5806 .skip(1)
5807 .any(|line| line.text.trim().is_empty())
5808}
5809
5810fn skip_link_resource_space(input: &str, index: usize) -> Option<usize> {
5811 skip_link_resource_space_with_info(input, index).map(|(index, _)| index)
5812}
5813
5814fn skip_link_resource_space_with_info(input: &str, mut index: usize) -> Option<(usize, bool)> {
5815 let mut line_breaks = 0usize;
5816 let mut had_space = false;
5817 while input
5818 .as_bytes()
5819 .get(index)
5820 .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
5821 {
5822 had_space = true;
5823 match input.as_bytes()[index] {
5824 b'\n' => {
5825 line_breaks += 1;
5826 if line_breaks > 1 {
5827 return None;
5828 }
5829 index += 1;
5830 }
5831 b'\r' => {
5832 line_breaks += 1;
5833 if line_breaks > 1 {
5834 return None;
5835 }
5836 if input.as_bytes().get(index + 1) == Some(&b'\n') {
5837 index += 2;
5838 } else {
5839 index += 1;
5840 }
5841 }
5842 _ => index += 1,
5843 }
5844 }
5845 Some((index, had_space))
5846}
5847
5848pub(crate) fn parse_character_reference(input: &str, index: usize) -> Option<(usize, String)> {
5849 let rest = input.get(index..)?;
5850 if let Some(rest) = rest
5851 .strip_prefix("&#x")
5852 .or_else(|| rest.strip_prefix("&#X"))
5853 {
5854 let digits = rest.find(';')?;
5855 if digits == 0 || digits > 6 || !rest[..digits].bytes().all(|byte| byte.is_ascii_hexdigit())
5856 {
5857 return None;
5858 }
5859 let value = u32::from_str_radix(&rest[..digits], 16).ok()?;
5860 return Some((
5861 index + 3 + digits + 1,
5862 character_reference_value(value).into(),
5863 ));
5864 }
5865 if let Some(rest) = rest.strip_prefix("&#") {
5866 let digits = rest.find(';')?;
5867 if digits == 0 || digits > 7 || !rest[..digits].bytes().all(|byte| byte.is_ascii_digit()) {
5868 return None;
5869 }
5870 let value = rest[..digits].parse::<u32>().ok()?;
5871 return Some((
5872 index + 2 + digits + 1,
5873 character_reference_value(value).into(),
5874 ));
5875 }
5876
5877 let name_end = rest.find(';')?;
5878 if name_end == 0 || name_end > 32 {
5879 return None;
5880 }
5881 let name = &rest[1..name_end];
5882 named_character_reference(name).map(|value| (index + name_end + 1, value.into()))
5883}
5884
5885pub(crate) fn character_reference_value(value: u32) -> char {
5902 if value == 0 {
5903 '\u{FFFD}'
5904 } else {
5905 char::from_u32(value).unwrap_or('\u{FFFD}')
5906 }
5907}
5908
5909pub(crate) fn is_escaped_at(input: &str, index: usize) -> bool {
5910 let bytes = input.as_bytes();
5911 let mut cursor = index;
5912 let mut count = 0;
5913 while cursor > 0 && bytes[cursor - 1] == b'\\' {
5914 count += 1;
5915 cursor -= 1;
5916 }
5917 count % 2 == 1
5918}
5919
5920fn parse_definition_destination_title(input: &str) -> Option<ParsedLinkResource> {
5921 let (mut cursor, _) = skip_link_resource_space_with_info(input, 0)?;
5922 let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5923 cursor = next;
5924
5925 let (next, had_space) = skip_link_resource_space_with_info(input, cursor)?;
5926 cursor = next;
5927 if cursor >= input.len() {
5928 return Some(ParsedLinkResource {
5929 destination,
5930 destination_kind,
5931 title: None,
5932 title_kind: None,
5933 });
5934 }
5935 if !had_space {
5936 return None;
5937 }
5938
5939 let (title, title_kind, next) = parse_link_title(input, cursor)?;
5940 let after_title = skip_link_resource_space(input, next)?;
5941 (after_title == input.len()).then_some(ParsedLinkResource {
5942 destination,
5943 destination_kind,
5944 title: Some(title),
5945 title_kind: Some(title_kind),
5946 })
5947}
5948
5949fn line_can_start_definition_title(input: &str) -> bool {
5950 let trimmed = input.trim_start();
5951 matches!(trimmed.as_bytes().first(), Some(b'"' | b'\'' | b'('))
5952}
5953
5954fn unescape_ascii_punctuation(input: &str) -> String {
5955 unescape_selected(input, |char| char.is_ascii_punctuation())
5957}
5958
5959fn unescape_string(input: &str) -> String {
5960 unescape_selected(input, |char| char.is_ascii_punctuation() || char == '&')
5961}
5962
5963fn unescape_selected(input: &str, should_unescape: impl Fn(char) -> bool) -> String {
5964 let mut output = String::new();
5965 let mut cursor = 0;
5966 while cursor < input.len() {
5967 if input.as_bytes().get(cursor) == Some(&b'&') {
5968 if let Some((end, value)) = parse_character_reference(input, cursor) {
5969 output.push_str(&value);
5970 cursor = end;
5971 continue;
5972 }
5973 }
5974 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5975 if char == '\\' {
5976 if let Some((after_escape, escaped)) = next_char(input, next) {
5977 if should_unescape(escaped) {
5978 output.push(escaped);
5979 } else {
5980 output.push(char);
5981 output.push(escaped);
5982 }
5983 cursor = after_escape;
5984 } else {
5985 output.push(char);
5986 cursor = next;
5987 }
5988 } else {
5989 output.push(if char == '\0' { '\u{FFFD}' } else { char });
5990 cursor = next;
5991 }
5992 }
5993 output
5994}
5995
5996fn push_line(output: &mut String, line: &str) {
5997 if !output.is_empty() {
5998 output.push('\n');
5999 }
6000 output.push_str(line);
6001}
6002
6003fn ensure_line_separator(output: &mut String) {
6004 if !output.is_empty() && !ends_with_line_ending(output) {
6005 output.push('\n');
6006 }
6007}
6008
6009fn ends_with_line_ending(input: &str) -> bool {
6010 input.ends_with('\n') || input.ends_with('\r')
6011}
6012
6013fn flush_text(nodes: &mut Vec<Inline>, text: &mut String, text_start: usize, end: usize) {
6014 if !text.is_empty() {
6015 nodes.push(Inline::Text(Text {
6016 meta: NodeMeta::new(Some(Span::new(text_start, end))),
6017 value: core::mem::take(text),
6018 }));
6019 }
6020}
6021
6022fn gfm_link_label_preserves_url_dot_escape(
6023 text: &str,
6024 escaped: char,
6025 options: &SyntaxOptions,
6026 context: InlineContext,
6027) -> bool {
6028 escaped == '.'
6029 && !context.allow_links
6030 && options.constructs.gfm_autolink_literal
6031 && (text.starts_with("www.") || text.starts_with("http://") || text.starts_with("https://"))
6032}
6033
6034fn next_char(input: &str, index: usize) -> Option<(usize, char)> {
6035 let char = input[index..].chars().next()?;
6036 Some((index + char.len_utf8(), char))
6037}
6038
6039fn is_flanking_punctuation(value: char) -> bool {
6044 value.is_ascii_punctuation() || crate::unicode_punctuation::is_unicode_punctuation(value)
6045}
6046
6047pub(crate) fn normalize_label(label: &str) -> String {
6056 label
6057 .replace('ẞ', "ss")
6063 .split_whitespace()
6064 .collect::<Vec<_>>()
6065 .join(" ")
6066 .to_uppercase()
6067 .to_lowercase()
6068}
6069
6070fn definition_exists(definitions: &[String], label: &str) -> bool {
6071 if label.is_empty() || !reference_label_is_within_limit(label) {
6072 return false;
6073 }
6074
6075 let identifier = normalize_label(label);
6076 definitions
6077 .iter()
6078 .any(|definition| definition == &identifier)
6079}
6080
6081fn reference_label_is_within_limit(label: &str) -> bool {
6082 label.chars().take(REFERENCE_LABEL_MAX_CHARS + 1).count() <= REFERENCE_LABEL_MAX_CHARS
6083}
6084
6085fn trim_up_to_three_spaces(input: &str) -> Option<&str> {
6086 let (columns, bytes) = leading_indent(input);
6087 if columns <= 3 {
6088 Some(&input[bytes..])
6089 } else {
6090 None
6091 }
6092}
6093
6094fn fence_start(input: &str) -> Option<(FenceMarker, usize)> {
6095 let marker = match input.as_bytes().first()? {
6096 b'`' => FenceMarker::Backtick,
6097 b'~' => FenceMarker::Tilde,
6098 _ => return None,
6099 };
6100 let byte = match marker {
6101 FenceMarker::Backtick => b'`',
6102 FenceMarker::Tilde => b'~',
6103 };
6104 let length = input
6105 .as_bytes()
6106 .iter()
6107 .take_while(|item| **item == byte)
6108 .count();
6109 if length >= 3 {
6110 Some((marker, length))
6111 } else {
6112 None
6113 }
6114}
6115
6116fn fence_close(input: &str, marker: FenceMarker, length: usize) -> bool {
6117 let byte = match marker {
6118 FenceMarker::Backtick => b'`',
6119 FenceMarker::Tilde => b'~',
6120 };
6121 let count = input
6122 .as_bytes()
6123 .iter()
6124 .take_while(|item| **item == byte)
6125 .count();
6126 count >= length && input[count..].trim().is_empty()
6127}
6128
6129fn trim_closing_hashes(input: &str) -> &str {
6130 let input = input.trim_end();
6131 let hash_start = input.trim_end_matches('#').len();
6132 if hash_start == input.len() {
6133 return input;
6134 }
6135 if hash_start == 0 {
6136 return "";
6137 }
6138
6139 let before = &input[..hash_start];
6140 if before.ends_with(' ') || before.ends_with('\t') {
6141 before.trim_end()
6142 } else {
6143 input
6144 }
6145}
6146
6147fn list_marker_info(input: &str) -> Option<ListMarkerInfo<'_>> {
6148 let trimmed = trim_up_to_three_spaces(input)?;
6149 let indent = input.len() - trimmed.len();
6150 let bytes = trimmed.as_bytes();
6151 match bytes.first()? {
6152 b'-' | b'*' | b'+' if is_list_padding_byte(bytes.get(1).copied()) => {
6153 let delimiter = match bytes[0] {
6154 b'-' => ListDelimiter::Dash,
6155 b'*' => ListDelimiter::Asterisk,
6156 _ => ListDelimiter::Plus,
6157 };
6158 let (content_offset, content_indent) = list_content_offset(trimmed, 1, indent);
6159 Some(ListMarkerInfo {
6160 ordered: false,
6161 start: None,
6162 delimiter,
6163 indent,
6164 marker_len: 1,
6165 content_indent,
6166 content: &trimmed[content_offset..],
6167 })
6168 }
6169 byte if byte.is_ascii_digit() => {
6170 let mut end = 0;
6171 while bytes.get(end).is_some_and(|byte| byte.is_ascii_digit()) {
6172 end += 1;
6173 }
6174 if end > 9 {
6175 return None;
6176 }
6177 let delimiter = match bytes.get(end)? {
6178 b'.' => ListDelimiter::Period,
6179 b')' => ListDelimiter::Paren,
6180 _ => return None,
6181 };
6182 if !is_list_padding_byte(bytes.get(end + 1).copied()) {
6183 return None;
6184 }
6185 let start = trimmed[..end].parse().ok()?;
6186 let marker_len = end + 1;
6187 let (content_offset, content_indent) = list_content_offset(trimmed, marker_len, indent);
6188 Some(ListMarkerInfo {
6189 ordered: true,
6190 start: Some(start),
6191 delimiter,
6192 indent,
6193 marker_len,
6194 content_indent,
6195 content: &trimmed[content_offset..],
6196 })
6197 }
6198 _ => None,
6199 }
6200}
6201
6202fn list_content_offset(input: &str, marker_len: usize, indent: usize) -> (usize, usize) {
6203 let bytes = input.as_bytes();
6204 if bytes.get(marker_len).is_none() {
6205 return (marker_len, indent + marker_len + 1);
6206 }
6207 let mut cursor = marker_len;
6208 let mut column = indent + marker_len;
6209 let marker_end_column = column;
6210 while let Some(byte) = bytes.get(cursor) {
6211 match *byte {
6212 b' ' => column += 1,
6213 b'\t' => column += 4 - (column % 4),
6214 _ => break,
6215 }
6216 cursor += 1;
6217 }
6218 if cursor >= bytes.len() {
6223 return (cursor, marker_end_column + 1);
6224 }
6225 let padding_columns = column.saturating_sub(marker_end_column);
6226 if padding_columns > 0 && padding_columns <= 4 {
6227 (cursor, column)
6228 } else {
6229 (marker_len + 1, marker_end_column + 1)
6230 }
6231}
6232
6233fn list_marker_first_content<'a>(input: &'a str, marker: ListMarkerInfo<'a>) -> Cow<'a, str> {
6234 let Some(trimmed) = trim_up_to_three_spaces(input) else {
6235 return Cow::Borrowed(marker.content);
6236 };
6237 let after_marker = &trimmed[marker.marker_len..];
6238 if after_marker.starts_with('\t') {
6239 strip_leading_indent_columns_from(after_marker, 1, marker.indent + marker.marker_len)
6240 } else {
6241 Cow::Borrowed(marker.content)
6242 }
6243}
6244
6245fn is_list_padding_byte(byte: Option<u8>) -> bool {
6246 matches!(byte, None | Some(b' ' | b'\t'))
6247}
6248
6249fn same_list_marker(left: ListMarkerInfo<'_>, right: ListMarkerInfo<'_>) -> bool {
6250 left.ordered == right.ordered && left.delimiter == right.delimiter
6254}
6255
6256fn sibling_list_marker_at_line(
6263 input: &str,
6264 first_marker: ListMarkerInfo<'_>,
6265 content_indent: usize,
6266) -> bool {
6267 list_marker_info(input).is_some_and(|candidate| {
6268 same_list_marker(first_marker, candidate) && candidate.indent < content_indent
6269 })
6270}
6271
6272fn same_list_marker_line(input: &str, first_marker: ListMarkerInfo<'_>) -> bool {
6277 list_marker_info(input).is_some_and(|candidate| same_list_marker(first_marker, candidate))
6278}
6279
6280fn next_nonblank_line(lines: &[Line<'_>], mut index: usize) -> usize {
6281 while index < lines.len() && lines[index].text.trim().is_empty() {
6282 index += 1;
6283 }
6284 index
6285}
6286
6287fn leading_indent(input: &str) -> (usize, usize) {
6288 let mut column = 0usize;
6289 let mut bytes = 0usize;
6290 for byte in input.as_bytes() {
6291 match *byte {
6292 b' ' => column += 1,
6293 b'\t' => column += 4 - (column % 4),
6294 _ => break,
6295 }
6296 bytes += 1;
6297 }
6298 (column, bytes)
6299}
6300
6301fn leading_indent_columns(input: &str) -> usize {
6302 leading_indent(input).0
6303}
6304
6305fn strip_leading_indent_columns(input: &str, max_columns: usize) -> Cow<'_, str> {
6312 strip_leading_indent_columns_from(input, max_columns, 0)
6313}
6314
6315fn strip_leading_indent_columns_from(
6316 input: &str,
6317 max_columns: usize,
6318 start_column: usize,
6319) -> Cow<'_, str> {
6320 let mut column = start_column;
6321 let target_column = start_column + max_columns;
6322 for (index, byte) in input.as_bytes().iter().enumerate() {
6323 let next = match *byte {
6324 b' ' => column + 1,
6325 b'\t' => column + (4 - (column % 4)),
6326 _ => return Cow::Borrowed(&input[index..]),
6327 };
6328 if next > target_column {
6329 if *byte == b'\t' && column < target_column {
6332 let residual = next - target_column;
6333 let mut owned = String::with_capacity(residual + input.len() - (index + 1));
6334 for _ in 0..residual {
6335 owned.push(' ');
6336 }
6337 let mut rest_column = next;
6338 let mut rest_index = index + 1;
6339 while let Some(rest_byte) = input.as_bytes().get(rest_index) {
6340 match *rest_byte {
6341 b' ' => {
6342 owned.push(' ');
6343 rest_column += 1;
6344 rest_index += 1;
6345 }
6346 b'\t' => {
6347 let width = 4 - (rest_column % 4);
6348 for _ in 0..width {
6349 owned.push(' ');
6350 }
6351 rest_column += width;
6352 rest_index += 1;
6353 }
6354 _ => break,
6355 }
6356 }
6357 owned.push_str(&input[rest_index..]);
6358 return Cow::Owned(owned);
6359 }
6360 return Cow::Borrowed(&input[index..]);
6361 }
6362 column = next;
6363 }
6364 Cow::Borrowed("")
6365}
6366
6367fn strip_list_continuation(input: &str, content_indent: usize, list_indent: usize) -> Cow<'_, str> {
6368 let (indent_columns, indent_bytes) = leading_indent(input);
6369 if indent_columns >= content_indent {
6370 strip_leading_indent_columns(input, content_indent)
6375 } else if indent_columns > list_indent {
6376 Cow::Borrowed(&input[indent_bytes..])
6377 } else {
6378 Cow::Borrowed(trim_ascii_start(input))
6379 }
6380}
6381
6382fn take_task_marker_from_children(children: &mut [Block]) -> Option<bool> {
6383 let Some(Block::Paragraph(paragraph)) = children.first_mut() else {
6384 return None;
6385 };
6386 take_task_marker_from_inlines(&mut paragraph.children)
6387}
6388
6389fn take_task_marker_from_inlines(inlines: &mut Vec<Inline>) -> Option<bool> {
6390 let Some(Inline::Text(text)) = inlines.first() else {
6391 return None;
6392 };
6393 let first = text.value.clone();
6394
6395 if let Some((checked, consumed)) = task_marker_inline_prefix(&first) {
6396 if !first[consumed..].is_empty() || inlines_have_content_after(inlines, 1) {
6397 remove_text_prefix(inlines, consumed);
6398 return Some(checked);
6399 }
6400 }
6401
6402 if let Some(checked) = task_marker_at_text_end(&first) {
6403 if inlines
6404 .get(1)
6405 .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6406 && inlines_have_content_after(inlines, 2)
6407 {
6408 inlines.remove(1);
6409 inlines.remove(0);
6410 return Some(checked);
6411 }
6412 }
6413
6414 if task_marker_split_open(&first)
6415 && inlines
6416 .get(1)
6417 .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6418 {
6419 let Some(Inline::Text(next)) = inlines.get(2) else {
6420 return None;
6421 };
6422 if let Some((checked, consumed)) = task_marker_split_close_prefix(&next.value) {
6423 if !next.value[consumed..].is_empty() || inlines_have_content_after(inlines, 3) {
6424 inlines.remove(1);
6425 inlines.remove(0);
6426 remove_text_prefix(inlines, consumed);
6427 return Some(checked);
6428 }
6429 }
6430 }
6431
6432 None
6433}
6434
6435fn task_marker_inline_prefix(input: &str) -> Option<(bool, usize)> {
6436 let start = leading_trim_bytes(input);
6437 let rest = &input[start..];
6438 let checked = task_marker_checked(rest)?;
6439 let after_marker = start + 3;
6440 match input.as_bytes().get(after_marker) {
6441 Some(b' ' | b'\t') => Some((checked, after_marker + 1)),
6442 _ => None,
6443 }
6444}
6445
6446fn task_marker_at_text_end(input: &str) -> Option<bool> {
6447 let start = leading_trim_bytes(input);
6448 let rest = &input[start..];
6449 let checked = task_marker_checked(rest)?;
6450 if rest.len() == 3 {
6451 Some(checked)
6452 } else {
6453 None
6454 }
6455}
6456
6457fn task_marker_split_open(input: &str) -> bool {
6458 let start = leading_trim_bytes(input);
6459 input[start..] == *"["
6460}
6461
6462fn task_marker_split_close_prefix(input: &str) -> Option<(bool, usize)> {
6463 match input.as_bytes().get(..2)? {
6464 b"] " => Some((false, 2)),
6465 b"]\t" => Some((false, 2)),
6466 b"x]" | b"X]" if matches!(input.as_bytes().get(2), Some(b' ' | b'\t')) => Some((true, 3)),
6467 _ => None,
6468 }
6469}
6470
6471fn task_marker_checked(input: &str) -> Option<bool> {
6472 if input.starts_with("[ ]") {
6473 Some(false)
6474 } else if input.starts_with("[x]") || input.starts_with("[X]") {
6475 Some(true)
6476 } else {
6477 None
6478 }
6479}
6480
6481fn remove_text_prefix(inlines: &mut Vec<Inline>, consumed: usize) {
6482 if let Some(Inline::Text(text)) = inlines.first_mut() {
6483 text.value = text.value[consumed..].into();
6484 if text.value.is_empty() {
6485 inlines.remove(0);
6486 }
6487 }
6488}
6489
6490fn inlines_have_content_after(inlines: &[Inline], start: usize) -> bool {
6491 inlines.iter().skip(start).any(|inline| match inline {
6492 Inline::Text(text) => !text.value.is_empty(),
6493 Inline::SoftBreak(_) | Inline::LineBreak(_) => false,
6494 _ => true,
6495 })
6496}
6497
6498fn update_list_item_fence(line: &str, open_fence: &mut Option<(FenceMarker, usize)>) {
6499 let Some(trimmed) = trim_up_to_three_spaces(line) else {
6500 return;
6501 };
6502 if let Some((marker, length)) = *open_fence {
6503 if fence_close(trimmed, marker, length) {
6504 *open_fence = None;
6505 }
6506 return;
6507 }
6508 if let Some((marker, length)) = fence_start(trimmed) {
6509 *open_fence = Some((marker, length));
6510 }
6511}
6512
6513fn trim_ascii_start(input: &str) -> &str {
6514 input.trim_start_matches(|char| matches!(char, ' ' | '\t'))
6515}
6516
6517fn leading_trim_bytes(input: &str) -> usize {
6518 input.len() - trim_ascii_start(input).len()
6519}
6520
6521fn parse_table_delimiter(input: &str, spoiler: bool) -> Option<Vec<TableAlignment>> {
6522 let cells = split_table_row(input, spoiler);
6523 if cells.is_empty() {
6524 return None;
6525 }
6526 let mut alignments = Vec::new();
6527 for cell in cells {
6528 alignments.push(table_delimiter_alignment(cell.trim())?);
6529 }
6530 Some(alignments)
6531}
6532
6533fn table_delimiter_alignment(cell: &str) -> Option<TableAlignment> {
6536 let bytes = cell.as_bytes();
6537 let mut cursor = 0;
6538 let left = bytes.first() == Some(&b':');
6539 if left {
6540 cursor += 1;
6541 }
6542 let dash_start = cursor;
6543 while bytes.get(cursor) == Some(&b'-') {
6544 cursor += 1;
6545 }
6546 if cursor == dash_start {
6547 return None;
6548 }
6549 let right = bytes.get(cursor) == Some(&b':');
6550 if right {
6551 cursor += 1;
6552 }
6553 if cursor != bytes.len() {
6554 return None;
6555 }
6556 Some(match (left, right) {
6557 (true, true) => TableAlignment::Center,
6558 (true, false) => TableAlignment::Left,
6559 (false, true) => TableAlignment::Right,
6560 (false, false) => TableAlignment::None,
6561 })
6562}
6563
6564fn table_indent_line(input: &str, indented_code: bool) -> Option<&str> {
6568 if indented_code {
6569 trim_up_to_three_spaces(input)
6570 } else {
6571 Some(input)
6572 }
6573}
6574
6575fn backtick_run_has_close(input: &str, start: usize, length: usize) -> bool {
6580 let bytes = input.as_bytes();
6581 let mut i = start + length;
6582 while i < input.len() {
6583 if bytes[i] == b'`' {
6584 let run = input[i..]
6585 .as_bytes()
6586 .iter()
6587 .take_while(|byte| **byte == b'`')
6588 .count();
6589 if run == length {
6590 return true;
6591 }
6592 i += run;
6593 } else {
6594 i += 1;
6595 }
6596 }
6597 false
6598}
6599
6600fn table_backslash_pipe_run(input: &str, cursor: usize) -> Option<(usize, bool)> {
6601 let bytes = input.as_bytes();
6602 if bytes.get(cursor) != Some(&b'\\') {
6603 return None;
6604 }
6605 let mut pipe = cursor;
6606 while bytes.get(pipe) == Some(&b'\\') {
6607 pipe += 1;
6608 }
6609 (bytes.get(pipe) == Some(&b'|')).then_some((pipe, (pipe - cursor) % 2 == 1))
6610}
6611
6612fn split_table_row(input: &str, spoiler: bool) -> Vec<String> {
6613 let trimmed = input.trim();
6614 let mut cells = Vec::new();
6615 let mut cell = String::new();
6616 let mut cursor = 0;
6617 let mut code_fence = None;
6618 let mut spoiler_open = false;
6619 let mut trailing_delimiter_end = None;
6625
6626 while cursor < trimmed.len() {
6627 let (next, char) = next_char(trimmed, cursor).expect("valid UTF-8 byte index");
6628 if char == '\\' {
6633 if let Some((pipe, escaped)) = table_backslash_pipe_run(trimmed, cursor) {
6634 if escaped {
6635 for _ in 0..pipe - cursor - 1 {
6636 cell.push('\\');
6637 }
6638 cell.push('|');
6639 cursor = pipe + 1;
6640 } else {
6641 for _ in 0..pipe - cursor {
6642 cell.push('\\');
6643 }
6644 cursor = pipe;
6645 }
6646 continue;
6647 }
6648 }
6649 if char == '`' {
6653 let length = trimmed[cursor..]
6654 .as_bytes()
6655 .iter()
6656 .take_while(|byte| **byte == b'`')
6657 .count();
6658 if code_fence == Some(length) {
6659 code_fence = None;
6660 } else if code_fence.is_none() && backtick_run_has_close(trimmed, cursor, length) {
6661 code_fence = Some(length);
6662 }
6663 cell.push_str(&trimmed[cursor..cursor + length]);
6664 cursor += length;
6665 continue;
6666 }
6667
6668 if spoiler
6669 && char == '|'
6670 && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6671 && code_fence.is_some()
6672 {
6673 cell.push_str("||");
6674 cursor += 2;
6675 continue;
6676 }
6677
6678 if spoiler
6679 && char == '|'
6680 && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6681 && code_fence.is_none()
6682 && !is_escaped_at(trimmed, cursor)
6683 {
6684 let closes_spoiler =
6685 spoiler_open && trimmed.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6686 let opens_spoiler = !spoiler_open
6687 && trimmed.as_bytes().get(cursor + 2) != Some(&b'|')
6688 && find_spoiler_close(trimmed, cursor + 2).is_some();
6689 if closes_spoiler || opens_spoiler {
6690 spoiler_open = opens_spoiler;
6691 cell.push_str("||");
6692 cursor += 2;
6693 continue;
6694 }
6695 }
6696
6697 if char == '|' && !spoiler_open && !is_escaped_at(trimmed, cursor) {
6698 cells.push(core::mem::take(&mut cell));
6699 spoiler_open = false;
6701 trailing_delimiter_end = Some(next);
6702 } else {
6703 cell.push(char);
6704 }
6705 cursor = next;
6706 }
6707 cells.push(cell);
6708
6709 if trimmed.starts_with('|') {
6710 cells.remove(0);
6711 }
6712 if let Some(end) = trailing_delimiter_end {
6715 if trimmed[end..].trim().is_empty() {
6716 cells.pop();
6717 }
6718 }
6719 cells
6720}
6721
6722fn table_can_start(lines: &[Line<'_>], index: usize, options: &SyntaxOptions) -> bool {
6723 if !options.constructs.gfm_table || index + 1 >= lines.len() {
6724 return false;
6725 }
6726 table_can_start_source(
6727 lines[index].text,
6728 lines[index + 1].text,
6729 options.constructs.indented_code,
6730 options.constructs.spoiler,
6731 )
6732}
6733
6734pub(crate) fn gfm_table_can_start_source(header: &str, delimiter: &str) -> bool {
6735 table_can_start_source(header, delimiter, true, false)
6736}
6737
6738fn table_can_start_source(
6739 header: &str,
6740 delimiter: &str,
6741 indented_code: bool,
6742 spoiler: bool,
6743) -> bool {
6744 let Some(delimiter) = table_indent_line(delimiter, indented_code) else {
6745 return false;
6746 };
6747 if list_marker_info(delimiter).is_some() {
6748 return false;
6749 }
6750 if !table_has_separator(header, delimiter, spoiler) {
6751 return false;
6752 }
6753 let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6754 return false;
6755 };
6756 split_table_row(header, spoiler).len() == alignments.len()
6757}
6758
6759fn table_has_separator(header: &str, delimiter: &str, spoiler: bool) -> bool {
6760 let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6768 return true;
6769 };
6770 if alignments.len() == 1 {
6771 return contains_unescaped_pipe(header, spoiler)
6772 || contains_unescaped_pipe(delimiter, spoiler)
6773 || delimiter.contains(':');
6774 }
6775 true
6776}
6777
6778fn contains_unescaped_pipe(input: &str, spoiler: bool) -> bool {
6781 let mut cursor = 0;
6782 let mut code_fence = None;
6783 let mut spoiler_open = false;
6784 while cursor < input.len() {
6785 let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
6786 if char == '\\' {
6787 if let Some((pipe, escaped)) = table_backslash_pipe_run(input, cursor) {
6788 cursor = if escaped { pipe + 1 } else { pipe };
6789 continue;
6790 }
6791 }
6792 if char == '`' {
6794 let length = input[cursor..]
6795 .as_bytes()
6796 .iter()
6797 .take_while(|byte| **byte == b'`')
6798 .count();
6799 if code_fence == Some(length) {
6800 code_fence = None;
6801 } else if code_fence.is_none() {
6802 code_fence = Some(length);
6803 }
6804 cursor += length;
6805 continue;
6806 }
6807 if spoiler
6808 && char == '|'
6809 && input.as_bytes().get(cursor + 1) == Some(&b'|')
6810 && code_fence.is_some()
6811 {
6812 cursor += 2;
6813 continue;
6814 }
6815 if spoiler
6816 && char == '|'
6817 && input.as_bytes().get(cursor + 1) == Some(&b'|')
6818 && code_fence.is_none()
6819 && !is_escaped_at(input, cursor)
6820 {
6821 let closes_spoiler =
6822 spoiler_open && input.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6823 let opens_spoiler = !spoiler_open
6824 && input.as_bytes().get(cursor + 2) != Some(&b'|')
6825 && find_spoiler_close(input, cursor + 2).is_some();
6826 if closes_spoiler || opens_spoiler {
6827 spoiler_open = opens_spoiler;
6828 cursor += 2;
6829 continue;
6830 }
6831 }
6832 if char == '|' && !spoiler_open && !is_escaped_at(input, cursor) {
6833 return true;
6834 }
6835 cursor = next;
6836 }
6837 false
6838}
6839
6840fn likely_block_start(input: &str, options: &SyntaxOptions) -> bool {
6841 let Some(trimmed) = trim_up_to_three_spaces(input) else {
6846 return false;
6847 };
6848 trimmed.starts_with('#')
6849 || trimmed.starts_with('>')
6850 || trimmed.starts_with("```")
6851 || trimmed.starts_with("~~~")
6852 || list_marker_can_interrupt_paragraph(input)
6853 || parse_thematic_break(Line {
6854 text: input,
6855 eol: "",
6856 start: 0,
6857 end: input.len(),
6858 end_with_eol: input.len(),
6859 lazy: false,
6860 })
6861 .is_some()
6862 || (options.constructs.html_block && line_starts_interrupting_html_block(input))
6863 || (options.constructs.math_block && math_block_fence_length(trimmed).is_some())
6864 || (options.constructs.directive_container && trimmed.starts_with(":::"))
6865 || (options.constructs.directive_leaf && trimmed.starts_with("::"))
6866 || (options.constructs.footnote_definition && line_starts_footnote_definition(trimmed))
6867}
6868
6869fn line_starts_footnote_definition(trimmed: &str) -> bool {
6872 trimmed.starts_with("[^")
6873 && find_footnote_definition_label_end(trimmed)
6874 .is_some_and(|close| is_footnote_label(&trimmed[2..close]))
6875}
6876
6877fn list_marker_can_interrupt_paragraph(input: &str) -> bool {
6878 list_marker_info(input).is_some_and(|marker| {
6879 !marker.content.trim().is_empty() && (!marker.ordered || marker.start == Some(1))
6882 })
6883}
6884
6885fn table_body_line_ends_table(line: &str, options: &SyntaxOptions) -> bool {
6890 likely_block_start(line, options)
6891 || list_marker_info(line).is_some()
6892 || (options.constructs.html_block && line_starts_html_block(line))
6893}
6894
6895fn line_starts_interrupting_html_block(input: &str) -> bool {
6896 match trim_up_to_three_spaces(input).and_then(html_block_start) {
6897 Some(HtmlBlockKind::UntilBlank) | None => false,
6898 Some(_) => true,
6899 }
6900}
6901
6902fn parse_autolink_end(input: &str, index: usize) -> Option<usize> {
6903 input[index..].find('>').map(|end| index + end + 1)
6904}
6905
6906fn parse_html_inline(input: &str, index: usize) -> Option<(usize, String)> {
6907 let rest = &input[index..];
6908 if rest.starts_with("<!--") {
6909 let end = rest.find("-->")? + 3;
6910 return Some((index + end, rest[..end].into()));
6911 }
6912 if rest.starts_with("<?") {
6913 let end = rest.find("?>")? + 2;
6914 return Some((index + end, rest[..end].into()));
6915 }
6916 if rest.starts_with("<![CDATA[") {
6917 let end = rest.find("]]>")? + 3;
6918 return Some((index + end, rest[..end].into()));
6919 }
6920 if is_declaration_start(rest) {
6921 let end = rest.find('>')? + 1;
6922 return Some((index + end, rest[..end].into()));
6923 }
6924
6925 let (end, _) = parse_html_tag(input, index)?;
6926 Some((end, input[index..end].into()))
6927}
6928
6929fn parse_html_tag(input: &str, index: usize) -> Option<(usize, &str)> {
6930 let bytes = input.as_bytes();
6931 if bytes.get(index) != Some(&b'<') {
6932 return None;
6933 }
6934
6935 let closing = bytes.get(index + 1) == Some(&b'/');
6936 let name_start = index + if closing { 2 } else { 1 };
6937 let first = *bytes.get(name_start)?;
6938 if !first.is_ascii_alphabetic() {
6939 return None;
6940 }
6941
6942 let mut cursor = name_start + 1;
6943 while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
6944 cursor += 1;
6945 }
6946 let name = &input[name_start..cursor];
6947
6948 if closing {
6949 cursor = skip_spaces(input, cursor);
6950 if bytes.get(cursor) == Some(&b'>') {
6951 return Some((cursor + 1, name));
6952 }
6953 return None;
6954 }
6955
6956 let mut needs_space = false;
6957 loop {
6958 let before_spaces = cursor;
6959 cursor = skip_spaces(input, cursor);
6960 let had_space = cursor > before_spaces;
6961 match bytes.get(cursor) {
6962 Some(b'>') => return Some((cursor + 1, name)),
6963 Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => return Some((cursor + 2, name)),
6964 Some(byte) if had_space && html_attribute_name_start(*byte) => {
6965 cursor += 1;
6966 while bytes
6967 .get(cursor)
6968 .is_some_and(|byte| html_attribute_name_byte(*byte))
6969 {
6970 cursor += 1;
6971 }
6972 let after_name = cursor;
6973 let after_spaces = skip_spaces(input, cursor);
6974 if bytes.get(after_spaces) == Some(&b'=') {
6975 cursor = skip_spaces(input, after_spaces + 1);
6976 cursor = parse_html_attribute_value(input, cursor)?;
6977 } else {
6978 cursor = after_name;
6979 }
6980 needs_space = true;
6981 }
6982 Some(_) if needs_space => return None,
6983 _ => return None,
6984 }
6985 }
6986}
6987
6988fn parse_html_attribute_value(input: &str, index: usize) -> Option<usize> {
6989 let bytes = input.as_bytes();
6990 match bytes.get(index)? {
6991 b'"' | b'\'' => {
6992 let quote = bytes[index];
6993 let mut cursor = index + 1;
6994 while cursor < bytes.len() {
6995 if bytes[cursor] == quote {
6996 return Some(cursor + 1);
6997 }
6998 cursor += 1;
6999 }
7000 None
7001 }
7002 b'=' | b'<' | b'>' | b'`' => None,
7003 _ => {
7004 let mut cursor = index;
7005 while bytes.get(cursor).is_some_and(|byte| {
7006 !byte.is_ascii_whitespace()
7007 && !matches!(*byte, b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
7008 }) {
7009 cursor += 1;
7010 }
7011 if cursor == index {
7012 None
7013 } else {
7014 Some(cursor)
7015 }
7016 }
7017 }
7018}
7019
7020fn html_name_byte(byte: u8) -> bool {
7021 byte.is_ascii_alphanumeric() || byte == b'-'
7022}
7023
7024fn html_attribute_name_start(byte: u8) -> bool {
7025 byte.is_ascii_alphabetic() || byte == b'_' || byte == b':'
7026}
7027
7028fn html_attribute_name_byte(byte: u8) -> bool {
7029 byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b':' | b'.' | b'-')
7030}
7031
7032fn skip_spaces(input: &str, mut index: usize) -> usize {
7033 while input
7034 .as_bytes()
7035 .get(index)
7036 .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
7037 {
7038 index += 1;
7039 }
7040 index
7041}
7042
7043fn is_autolink(input: &str) -> bool {
7044 let inner = &input[1..input.len() - 1];
7045 is_uri_autolink(inner) || is_email_autolink(inner)
7046}
7047
7048fn is_uri_autolink(input: &str) -> bool {
7049 let Some(colon) = input.find(':') else {
7050 return false;
7051 };
7052 let scheme = &input[..colon];
7053 if scheme.len() < 2 || scheme.len() > 32 {
7054 return false;
7055 }
7056 let mut bytes = scheme.bytes();
7057 if !bytes.next().is_some_and(|byte| byte.is_ascii_alphabetic()) {
7058 return false;
7059 }
7060 if !bytes.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-')) {
7061 return false;
7062 }
7063 input[colon + 1..]
7064 .chars()
7065 .all(|char| !matches!(char, '<' | '>') && !char.is_control() && !char.is_whitespace())
7066}
7067
7068fn is_email_autolink(input: &str) -> bool {
7069 if input.chars().any(char::is_whitespace) {
7070 return false;
7071 }
7072 let Some(at) = input.find('@') else {
7073 return false;
7074 };
7075 if at == 0 || at + 1 >= input.len() {
7076 return false;
7077 }
7078 is_email_local_part(&input[..at]) && is_email_domain(&input[at + 1..], 1)
7081}
7082
7083fn parse_literal_autolink(
7091 input: &str,
7092 index: usize,
7093 gfm: bool,
7094 relaxed: bool,
7095) -> Option<(usize, String)> {
7096 let rest = &input[index..];
7097
7098 if gfm {
7099 if let Some(scheme_len) = rest
7102 .starts_with("http://")
7103 .then_some(7)
7104 .or_else(|| rest.starts_with("https://").then_some(8))
7105 {
7106 if !literal_scheme_prefix_ok(input, index) {
7107 return None;
7108 }
7109 let host = &input[index + scheme_len..];
7110 if !http_literal_host_ok(host) {
7113 if relaxed {
7114 } else {
7117 return None;
7118 }
7119 } else {
7120 let end = autolink_url_end(input, index + scheme_len, index + scheme_len, relaxed);
7124 if end <= index + scheme_len {
7125 return None;
7126 }
7127 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7128 return None;
7129 }
7130 return Some((end, input[index..end].into()));
7131 }
7132 }
7133
7134 if rest
7137 .as_bytes()
7138 .get(..4)
7139 .is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"www."))
7140 {
7141 if !literal_www_prefix_ok(input, index) {
7142 return None;
7143 }
7144 check_domain(rest, false)?;
7145 let end = autolink_url_end(input, index, index, relaxed);
7146 if end <= index || (!relaxed && end <= index + 3 && !literal_starts_line(input, index))
7147 {
7148 return None;
7149 }
7150 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7151 return None;
7152 }
7153 let mut destination = String::from("http://");
7154 destination.push_str(&input[index..end]);
7155 return Some((end, destination));
7156 }
7157
7158 if let Some(email) = parse_literal_email(input, index) {
7159 return Some(email);
7160 }
7161 }
7162
7163 if relaxed {
7164 if literal_scheme_prefix_ok(input, index) {
7171 if let Some(after_slashes) = relaxed_scheme_after_slashes(rest) {
7172 let body_start = index + after_slashes;
7173 let next = input[body_start..].chars().next();
7174 if next.is_none_or(|char| char.is_whitespace()) && after_slashes == 3 {
7175 return None;
7176 }
7177 let end = autolink_url_end(input, body_start, body_start, true);
7178 if end > index {
7179 if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7180 return None;
7181 }
7182 return Some((end, input[index..end].into()));
7183 }
7184 }
7185 }
7186 }
7187
7188 None
7189}
7190
7191fn relaxed_scheme_after_slashes(rest: &str) -> Option<usize> {
7197 let bytes = rest.as_bytes();
7198 if bytes.starts_with(b"://") {
7199 return Some(3);
7200 }
7201 let first = bytes.first()?;
7202 if !first.is_ascii_alphabetic() {
7203 return None;
7204 }
7205 let mut i = 1;
7206 while i < bytes.len() {
7207 match bytes[i] {
7208 b':' => break,
7209 byte if byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-') => i += 1,
7210 _ => return None,
7211 }
7212 }
7213 if bytes.get(i..i + 3) == Some(b"://") {
7214 Some(i + 3)
7215 } else {
7216 None
7217 }
7218}
7219
7220fn literal_scheme_prefix_ok(input: &str, index: usize) -> bool {
7224 if index == 0 {
7225 return true;
7226 }
7227 let Some(previous) = input[..index].chars().next_back() else {
7228 return true;
7229 };
7230 !previous.is_ascii_alphabetic()
7231}
7232
7233fn literal_www_prefix_ok(input: &str, index: usize) -> bool {
7237 if index == 0 {
7238 return true;
7239 }
7240 let Some(previous) = input[..index].chars().next_back() else {
7241 return true;
7242 };
7243 if matches!(previous, '*' | '_' | '~' | '(' | '[' | ']') {
7244 return true;
7245 }
7246 matches!(previous, ' ' | '\t' | '\n' | '\r')
7247}
7248
7249fn literal_starts_line(input: &str, index: usize) -> bool {
7250 index == 0
7251 || input
7252 .as_bytes()
7253 .get(index - 1)
7254 .is_some_and(|byte| matches!(byte, b'\n' | b'\r'))
7255}
7256
7257fn literal_autolink_suppressed_by_link_label(
7258 input: &str,
7259 index: usize,
7260 end: usize,
7261 relaxed: bool,
7262 gfm_autolink_literal: bool,
7263) -> bool {
7264 if !has_unclosed_link_label_opener(input, index) {
7265 return false;
7266 }
7267 if input[end..].starts_with("](") && !link_resource_tail_has_close(input, end + 2) {
7268 return true;
7269 }
7270 !relaxed && !gfm_autolink_literal && input.as_bytes().get(end).is_some_and(|byte| *byte == b']')
7271}
7272
7273fn has_unclosed_link_label_opener(input: &str, index: usize) -> bool {
7274 let line_start = input[..index]
7275 .rfind(['\n', '\r'])
7276 .map_or(0, |offset| offset + 1);
7277 let mut depth = 0usize;
7278 let mut cursor = line_start;
7279 while cursor < index {
7280 let Some((next, char)) = next_char(input, cursor) else {
7281 break;
7282 };
7283 match char {
7284 '\\' => {
7285 cursor = next_char(input, next)
7286 .map(|(after_escape, _)| after_escape)
7287 .unwrap_or(next);
7288 continue;
7289 }
7290 '[' => depth += 1,
7291 ']' => {
7292 depth = depth.saturating_sub(1);
7293 }
7294 _ => {}
7295 }
7296 cursor = next;
7297 }
7298 depth > 0
7299}
7300
7301fn link_resource_tail_has_close(input: &str, start: usize) -> bool {
7302 let mut cursor = start;
7303 while cursor < input.len() {
7304 let Some((next, char)) = next_char(input, cursor) else {
7305 break;
7306 };
7307 match char {
7308 '\\' => {
7309 cursor = next_char(input, next)
7310 .map(|(after_escape, _)| after_escape)
7311 .unwrap_or(next);
7312 continue;
7313 }
7314 '\n' | '\r' => return false,
7315 ')' => return true,
7316 _ => {}
7317 }
7318 cursor = next;
7319 }
7320 false
7321}
7322
7323fn http_literal_host_ok(host: &str) -> bool {
7324 if host.starts_with('[') {
7325 return bracketed_ipv6_host_end(host).is_some();
7326 }
7327 match host.chars().next() {
7328 Some(char) if char.is_ascii() && char.is_ascii_alphanumeric() => {
7329 check_domain(host, true).is_some()
7330 }
7331 Some(char) if !char.is_ascii() && is_valid_hostchar(char) => {
7332 check_domain(host, true).is_some()
7333 }
7334 _ => false,
7335 }
7336}
7337
7338fn bracketed_ipv6_host_end(host: &str) -> Option<usize> {
7339 let close = host.find(']')?;
7340 (close > 1).then_some(close + 1)
7341}
7342
7343fn is_valid_hostchar(char: char) -> bool {
7346 !char.is_whitespace() && !crate::unicode_punctuation::is_unicode_punctuation(char)
7347}
7348
7349fn check_domain(data: &str, allow_short: bool) -> Option<usize> {
7360 let mut np = 0usize;
7361 let mut uscore1 = 0usize;
7362 let mut uscore2 = 0usize;
7363 let mut host_len = 0usize;
7364
7365 let mut chars = data.char_indices().peekable();
7366 while let Some((offset, char)) = chars.next() {
7367 let account = offset != 0 && chars.peek().is_some();
7372 match char {
7373 '\\' => {
7374 host_len = offset + char.len_utf8();
7376 if let Some((next_off, next)) = chars.next() {
7377 host_len = next_off + next.len_utf8();
7378 }
7379 }
7380 '_' if account => {
7381 uscore2 += 1;
7382 host_len = offset + char.len_utf8();
7383 }
7384 '.' if account => {
7385 uscore1 = uscore2;
7386 uscore2 = 0;
7387 np += 1;
7388 host_len = offset + char.len_utf8();
7389 }
7390 '_' | '.' | '-' => {
7391 host_len = offset + char.len_utf8();
7392 }
7393 _ => {
7394 if !is_valid_hostchar(char) {
7395 break;
7396 }
7397 host_len = offset + char.len_utf8();
7398 }
7399 }
7400 }
7401
7402 if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
7403 return None;
7404 }
7405
7406 if allow_short || np > 0 {
7407 Some(host_len)
7408 } else {
7409 None
7410 }
7411}
7412
7413fn autolink_url_end(input: &str, start: usize, trim_from: usize, balanced: bool) -> usize {
7419 let bytes = input.as_bytes();
7420 let mut end = start;
7421 let mut bracket_depth = 0i32;
7428 let mut curly_depth = 0i32;
7429 let mut strict_has_open_bracket = false;
7430 let mut strict_inside_backticks = false;
7431 for (offset, char) in input[start..].char_indices() {
7432 if char.is_whitespace() || char == '<' || is_autolink_terminating_control(char) {
7433 break;
7434 }
7435 if balanced {
7436 match char {
7437 '[' => bracket_depth += 1,
7438 ']' => {
7439 if bracket_depth > 0 {
7440 bracket_depth -= 1;
7441 } else {
7442 break;
7443 }
7444 }
7445 '{' => curly_depth += 1,
7446 '}' => {
7447 if curly_depth > 0 {
7448 curly_depth -= 1;
7449 } else {
7450 break;
7451 }
7452 }
7453 _ => {}
7454 }
7455 } else {
7456 match char {
7457 '[' => strict_has_open_bracket = true,
7458 '`' => strict_inside_backticks = !strict_inside_backticks,
7459 ']' if !strict_has_open_bracket && !strict_inside_backticks => break,
7460 _ => {}
7461 }
7462 }
7463 if char == '\\' {
7472 if let Some(&next) = bytes.get(start + offset + 1) {
7473 let next_is_escapable_punct = next.is_ascii_punctuation() && next != b'.';
7474 if next_is_escapable_punct {
7475 break;
7476 }
7477 }
7478 }
7479 end = start + offset + char.len_utf8();
7480 }
7481 autolink_delim(input, trim_from, end)
7482}
7483
7484fn is_autolink_terminating_control(char: char) -> bool {
7485 matches!(char, '\u{2066}'..='\u{2069}')
7486}
7487
7488fn autolink_delim(input: &str, start: usize, mut end: usize) -> usize {
7493 let bytes = input.as_bytes();
7494 let mut opening = 0usize;
7495 let mut closing = 0usize;
7496 for &byte in &bytes[start..end] {
7497 match byte {
7498 b'(' => opening += 1,
7499 b')' => closing += 1,
7500 _ => {}
7501 }
7502 }
7503
7504 while end > start {
7505 match bytes[end - 1] {
7506 b')' => {
7507 if closing <= opening {
7508 break;
7509 }
7510 closing -= 1;
7511 end -= 1;
7512 }
7513 b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~' | b'\'' | b'"' => {
7514 end -= 1;
7515 }
7516 b';' => {
7517 if let Some(amp) = trailing_hex_entity_run_start(bytes, start, end) {
7524 end = amp;
7525 } else {
7526 let mut new_end = end - 1;
7529 while new_end > start && bytes[new_end - 1].is_ascii_alphanumeric() {
7530 new_end -= 1;
7531 }
7532 if new_end > start && new_end < end - 1 && bytes[new_end - 1] == b'&' {
7533 end = new_end - 1;
7534 } else {
7535 end -= 1;
7536 }
7537 }
7538 }
7539 _ => break,
7540 }
7541 }
7542 end
7543}
7544
7545fn trailing_hex_entity_run_start(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
7551 if end <= start || bytes[end - 1] != b';' {
7552 return None;
7553 }
7554 let mut cursor = end - 1;
7555 while cursor > start && bytes[cursor - 1].is_ascii_hexdigit() {
7556 cursor -= 1;
7557 }
7558 if cursor == end - 1 || cursor < start + 3 {
7560 return None;
7561 }
7562 let x = bytes[cursor - 1];
7563 if (x == b'x' || x == b'X') && bytes[cursor - 2] == b'#' && bytes[cursor - 3] == b'&' {
7564 Some(cursor - 3)
7565 } else {
7566 None
7567 }
7568}
7569
7570fn parse_literal_email(input: &str, index: usize) -> Option<(usize, String)> {
7576 let rest = &input[index..];
7577 let at = rest.find('@')?;
7578 if at == 0 {
7579 return None;
7580 }
7581 let local = &rest[..at];
7582
7583 let (auto_mailto, is_xmpp) = classify_email_local(local);
7587
7588 if !email_left_boundary_ok(input, index, auto_mailto) {
7593 return None;
7594 }
7595
7596 if !email_local_is_valid(local, auto_mailto) {
7597 return None;
7598 }
7599
7600 let domain_start = index + at + 1;
7601 let domain_end = literal_email_domain_end(input, domain_start, is_xmpp)?;
7602 let trimmed = autolink_delim(input, domain_start, domain_end);
7603 if trimmed <= domain_start {
7604 return None;
7605 }
7606
7607 let domain = &input[domain_start..trimmed];
7608 if !is_gfm_email_domain(domain, is_xmpp) {
7609 return None;
7610 }
7611
7612 let mut destination = String::new();
7613 if auto_mailto {
7614 destination.push_str("mailto:");
7615 }
7616 destination.push_str(&input[index..trimmed]);
7617 Some((trimmed, destination))
7618}
7619
7620fn classify_email_local(local: &str) -> (bool, bool) {
7625 if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7626 if !rest.is_empty() {
7627 return (false, false);
7628 }
7629 }
7630 if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7631 if !rest.is_empty() {
7632 return (false, true);
7633 }
7634 }
7635 (true, false)
7636}
7637
7638fn strip_ci_prefix<'a>(input: &'a str, prefix: &str) -> Option<&'a str> {
7639 let bytes = input.as_bytes();
7640 let plen = prefix.len();
7641 if bytes.len() >= plen && bytes[..plen].eq_ignore_ascii_case(prefix.as_bytes()) {
7642 Some(&input[plen..])
7643 } else {
7644 None
7645 }
7646}
7647
7648fn email_left_boundary_ok(input: &str, index: usize, auto_mailto: bool) -> bool {
7655 if index == 0 {
7656 return true;
7657 }
7658 let Some(previous) = input[..index].chars().next_back() else {
7659 return true;
7660 };
7661 if previous.is_ascii_alphanumeric() {
7662 if auto_mailto
7663 && input[index..].starts_with('+')
7664 && prefix_ends_with_gfm_email(input, index)
7665 {
7666 return true;
7667 }
7668 return false;
7669 }
7670 if auto_mailto && previous == '/' {
7671 return false;
7672 }
7673 true
7674}
7675
7676fn prefix_ends_with_gfm_email(input: &str, end: usize) -> bool {
7677 let start = input[..end]
7678 .rfind(char::is_whitespace)
7679 .map_or(0, |offset| offset + 1);
7680 let candidate = &input[start..end];
7681 let Some(at) = candidate.rfind('@') else {
7682 return false;
7683 };
7684 email_local_is_valid(&candidate[..at], true) && is_gfm_email_domain(&candidate[at + 1..], false)
7685}
7686
7687fn email_local_is_valid(local: &str, auto_mailto: bool) -> bool {
7691 let body = if auto_mailto {
7692 local
7693 } else if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7694 rest
7695 } else if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7696 rest
7697 } else {
7698 local
7699 };
7700 !body.is_empty() && body.bytes().all(is_gfm_email_local_byte)
7701}
7702
7703fn is_gfm_email_local_byte(byte: u8) -> bool {
7706 byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'+' | b'_' | b'-')
7707}
7708
7709fn is_email_local_part(input: &str) -> bool {
7710 !input.is_empty()
7711 && input
7712 .split('.')
7713 .all(|segment| !segment.is_empty() && segment.bytes().all(is_email_atext))
7714}
7715
7716fn is_email_atext(byte: u8) -> bool {
7717 byte.is_ascii_alphanumeric()
7718 || matches!(
7719 byte,
7720 b'!' | b'#'
7721 | b'$'
7722 | b'%'
7723 | b'&'
7724 | b'\''
7725 | b'*'
7726 | b'+'
7727 | b'/'
7728 | b'='
7729 | b'?'
7730 | b'^'
7731 | b'_'
7732 | b'`'
7733 | b'{'
7734 | b'|'
7735 | b'}'
7736 | b'~'
7737 | b'-'
7738 )
7739}
7740
7741fn literal_email_domain_end(input: &str, index: usize, is_xmpp: bool) -> Option<usize> {
7749 let bytes = input.as_bytes();
7750 let mut end = index;
7751 let mut np = 0usize;
7752 while end < bytes.len() {
7753 let byte = bytes[end];
7754 if byte.is_ascii_alphanumeric() {
7755 end += 1;
7756 } else if byte == b'.' && end + 1 < bytes.len() && bytes[end + 1].is_ascii_alphanumeric() {
7757 np += 1;
7758 end += 1;
7759 } else if byte == b'-' || byte == b'_' || (byte == b'/' && is_xmpp) {
7760 end += 1;
7763 } else {
7764 break;
7765 }
7766 }
7767 if end <= index {
7768 return None;
7769 }
7770 let len = end - index;
7771 let last = bytes[end - 1];
7772 if len < 1 || np == 0 || !(last.is_ascii_alphabetic() || last == b'.') {
7773 return None;
7774 }
7775 Some(end)
7776}
7777
7778fn is_gfm_email_domain(input: &str, is_xmpp: bool) -> bool {
7783 if input.is_empty() {
7784 return false;
7785 }
7786 let host = if is_xmpp {
7789 input.split('/').next().unwrap_or(input)
7790 } else {
7791 input
7792 };
7793 if !host.contains('.') {
7794 return false;
7795 }
7796 let last = host.as_bytes()[host.len() - 1];
7797 if matches!(last, b'-' | b'_') {
7800 return false;
7801 }
7802 host.split('.').all(|label| {
7803 !label.is_empty()
7804 && label
7805 .bytes()
7806 .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
7807 })
7808}
7809
7810fn is_email_domain(input: &str, min_labels: usize) -> bool {
7811 let mut label_count = 0usize;
7812 for label in input.split('.') {
7813 label_count += 1;
7814 let bytes = label.as_bytes();
7815 if bytes.is_empty()
7816 || bytes.len() > 63
7817 || !bytes
7818 .first()
7819 .is_some_and(|byte| byte.is_ascii_alphanumeric())
7820 || !bytes
7821 .last()
7822 .is_some_and(|byte| byte.is_ascii_alphanumeric())
7823 || !bytes
7824 .iter()
7825 .all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-')
7826 {
7827 return false;
7828 }
7829 }
7830 label_count >= min_labels
7831}
7832
7833fn is_footnote_label(label: &str) -> bool {
7834 !label.is_empty()
7835 && reference_label_is_within_limit(label)
7836 && !label.chars().any(char::is_whitespace)
7837}
7838
7839fn find_footnote_definition_label_end(input: &str) -> Option<usize> {
7840 let close = find_footnote_reference_label_end(input, 2)?;
7841 if input.as_bytes().get(close + 1) == Some(&b':') {
7842 Some(close)
7843 } else {
7844 None
7845 }
7846}
7847
7848fn find_footnote_reference_label_end(input: &str, mut cursor: usize) -> Option<usize> {
7849 while cursor < input.len() {
7850 let (next, char) = next_char(input, cursor)?;
7851 if char == ']' && !is_escaped_at(input, cursor) {
7852 return Some(cursor);
7853 }
7854 cursor = next;
7855 }
7856 None
7857}
7858
7859fn find_inline_footnote_end(input: &str, mut cursor: usize) -> Option<usize> {
7860 let mut depth = 0usize;
7861 while cursor < input.len() {
7862 let (next, char) = next_char(input, cursor)?;
7863 if !is_escaped_at(input, cursor) {
7864 match char {
7865 '[' => depth += 1,
7866 ']' if depth == 0 => return Some(cursor),
7867 ']' => depth = depth.saturating_sub(1),
7868 _ => {}
7869 }
7870 }
7871 cursor = next;
7872 }
7873 None
7874}