panache_parser/parser/blocks/
reference_links.rs1pub fn try_parse_reference_definition(
40 text: &str,
41 dialect: crate::options::Dialect,
42) -> Option<(usize, String, String, Option<String>)> {
43 try_parse_reference_definition_with_mode(text, true, dialect)
44}
45
46pub fn try_parse_reference_definition_lax(
50 text: &str,
51 dialect: crate::options::Dialect,
52) -> Option<(usize, String, String, Option<String>)> {
53 try_parse_reference_definition_with_mode(text, false, dialect)
54}
55
56fn try_parse_reference_definition_with_mode(
57 text: &str,
58 strict_eol: bool,
59 dialect: crate::options::Dialect,
60) -> Option<(usize, String, String, Option<String>)> {
61 let spans = reference_definition_spans(text, strict_eol, dialect)?;
62 let label = text[spans.indent + 1..spans.label_close].to_string();
63 let url = if spans.url_is_angle {
64 text[spans.url.start + 1..spans.url.end - 1].to_string()
65 } else {
66 text[spans.url.clone()].to_string()
67 };
68 let title = spans
69 .title
70 .as_ref()
71 .map(|r| text[r.start + 1..r.end - 1].to_string());
72 Some((spans.consumed, label, url, title))
73}
74
75#[derive(Debug, Clone)]
85pub(crate) struct ReferenceSpans {
86 pub indent: usize,
88 pub label_close: usize,
90 pub colon: usize,
92 pub url: std::ops::Range<usize>,
94 pub url_is_angle: bool,
96 pub title: Option<std::ops::Range<usize>>,
98 pub consumed: usize,
100}
101
102pub(crate) fn reference_definition_spans(
108 text: &str,
109 strict_eol: bool,
110 dialect: crate::options::Dialect,
111) -> Option<ReferenceSpans> {
112 let leading_spaces = text.chars().take_while(|&c| c == ' ').count();
113 if leading_spaces > 3 {
114 return None;
115 }
116 let inner = &text[leading_spaces..];
117 let bytes = inner.as_bytes();
118
119 if bytes.is_empty() || bytes[0] != b'[' {
121 return None;
122 }
123
124 if bytes.len() >= 2 && bytes[1] == b'^' {
126 return None;
127 }
128
129 let mut pos = 1;
133 let mut escape_next = false;
134
135 while pos < bytes.len() {
136 if escape_next {
137 escape_next = false;
138 pos += 1;
139 continue;
140 }
141
142 match bytes[pos] {
143 b'\\' => {
144 escape_next = true;
145 pos += 1;
146 }
147 b']' => {
148 break;
149 }
150 b'[' => {
151 return None;
152 }
153 b'\n' | b'\r' => {
154 let nl_end =
155 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
156 pos + 2
157 } else {
158 pos + 1
159 };
160 let mut probe = nl_end;
161 while probe < bytes.len() && matches!(bytes[probe], b' ' | b'\t') {
162 probe += 1;
163 }
164 if probe >= bytes.len() || bytes[probe] == b'\n' || bytes[probe] == b'\r' {
165 return None;
166 }
167 pos = nl_end;
168 }
169 _ => {
170 pos += 1;
171 }
172 }
173 }
174
175 if pos >= bytes.len() || bytes[pos] != b']' {
176 return None;
177 }
178
179 let label = &inner[1..pos];
180 if label.trim().is_empty() {
181 return None;
182 }
183 let label_close = leading_spaces + pos;
184
185 pos += 1; if pos >= bytes.len() || bytes[pos] != b':' {
189 return None;
190 }
191 let colon = leading_spaces + pos;
192 pos += 1;
193
194 pos = skip_ws_one_newline(bytes, pos)?;
196
197 let url_start = pos;
199 let url_is_angle = pos < bytes.len() && bytes[pos] == b'<';
200
201 if url_is_angle {
202 pos += 1;
203 while pos < bytes.len() && bytes[pos] != b'>' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
204 {
205 pos += 1;
206 }
207 if pos >= bytes.len() || bytes[pos] != b'>' {
208 return None;
209 }
210 pos += 1; } else {
212 while pos < bytes.len() && !matches!(bytes[pos], b' ' | b'\t' | b'\n' | b'\r') {
213 pos += 1;
214 }
215 if pos == url_start {
216 return None;
217 }
218 }
219 let url = (leading_spaces + url_start)..(leading_spaces + pos);
220
221 let after_url = pos;
227 let url_line_end = consume_to_eol(bytes, after_url);
228 let url_line_end_lax = if strict_eol {
229 url_line_end
230 } else {
231 Some(consume_to_eol_lax(bytes, after_url))
232 };
233
234 let mut title: Option<std::ops::Range<usize>> = None;
235 let mut end_pos: Option<usize> = None;
236
237 if let Some(title_start) = skip_ws_one_newline(bytes, after_url) {
238 let crossed_newline = bytes[after_url..title_start]
239 .iter()
240 .any(|&b| b == b'\n' || b == b'\r');
241 let cmark_requires_separator = dialect == crate::options::Dialect::CommonMark
246 && !crossed_newline
247 && title_start == after_url;
248 if cmark_requires_separator {
249 return Some(ReferenceSpans {
250 indent: leading_spaces,
251 label_close,
252 colon,
253 url,
254 url_is_angle,
255 title: None,
256 consumed: leading_spaces + url_line_end_lax?,
257 });
258 }
259 let mut title_pos = title_start;
260 match parse_title(bytes, &mut title_pos) {
261 Some(Some(range)) => {
262 let line_end = if strict_eol {
263 consume_to_eol(bytes, title_pos)
264 } else {
265 Some(consume_to_eol_lax(bytes, title_pos))
266 };
267 if let Some(end) = line_end {
268 title = Some((leading_spaces + range.start)..(leading_spaces + range.end));
269 end_pos = Some(end);
270 } else if !crossed_newline {
271 return None;
272 }
273 }
274 None => {
275 if !crossed_newline {
276 return None;
277 }
278 }
279 Some(None) => {}
280 }
281 }
282
283 let end = match end_pos {
284 Some(p) => p,
285 None => url_line_end_lax?,
286 };
287
288 Some(ReferenceSpans {
289 indent: leading_spaces,
290 label_close,
291 colon,
292 url,
293 url_is_angle,
294 title,
295 consumed: leading_spaces + end,
296 })
297}
298
299fn consume_to_eol_lax(bytes: &[u8], mut pos: usize) -> usize {
302 while pos < bytes.len() && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
303 pos += 1;
304 }
305 if pos < bytes.len() {
306 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
307 pos += 2;
308 } else {
309 pos += 1;
310 }
311 }
312 pos
313}
314
315fn consume_to_eol(bytes: &[u8], mut pos: usize) -> Option<usize> {
318 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
319 pos += 1;
320 }
321 if pos >= bytes.len() {
322 return Some(pos);
323 }
324 match bytes[pos] {
325 b'\n' => Some(pos + 1),
326 b'\r' => {
327 if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
328 Some(pos + 2)
329 } else {
330 Some(pos + 1)
331 }
332 }
333 _ => None,
334 }
335}
336
337fn skip_ws_one_newline(bytes: &[u8], mut pos: usize) -> Option<usize> {
342 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
343 pos += 1;
344 }
345 if pos < bytes.len() && (bytes[pos] == b'\n' || bytes[pos] == b'\r') {
346 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
347 pos += 2;
348 } else {
349 pos += 1;
350 }
351 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
352 pos += 1;
353 }
354 if pos < bytes.len() && (bytes[pos] == b'\n' || bytes[pos] == b'\r') {
355 return None;
356 }
357 }
358 Some(pos)
359}
360
361pub fn line_is_mmd_link_attribute_continuation(line: &str) -> bool {
362 if !(line.starts_with(' ') || line.starts_with('\t')) {
363 return false;
364 }
365
366 let trimmed = line.trim();
367 if trimmed.is_empty() {
368 return false;
369 }
370
371 let bytes = trimmed.as_bytes();
372 let mut pos = 0usize;
373 let len = bytes.len();
374 let mut saw_pair = false;
375
376 while pos < len {
377 while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
379 pos += 1;
380 }
381 if pos >= len {
382 break;
383 }
384
385 let key_start = pos;
387 while pos < len && bytes[pos] != b'=' && bytes[pos] != b' ' && bytes[pos] != b'\t' {
388 pos += 1;
389 }
390 if pos == key_start || pos >= len || bytes[pos] != b'=' {
391 return false;
392 }
393 pos += 1; if pos >= len {
397 return false;
398 }
399 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
400 let quote = bytes[pos];
401 pos += 1;
402 let value_start = pos;
403 while pos < len && bytes[pos] != quote {
404 pos += 1;
405 }
406 if pos == value_start || pos >= len {
407 return false;
408 }
409 pos += 1; } else {
411 let value_start = pos;
412 while pos < len && bytes[pos] != b' ' && bytes[pos] != b'\t' {
413 pos += 1;
414 }
415 if pos == value_start {
416 return false;
417 }
418 }
419
420 saw_pair = true;
421 }
422
423 saw_pair
424}
425
426fn parse_title(bytes: &[u8], pos: &mut usize) -> Option<Option<std::ops::Range<usize>>> {
434 let base_pos = *pos;
435
436 while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t' | b'\n' | b'\r') {
438 *pos += 1;
439 }
440
441 if *pos >= bytes.len() {
443 return Some(None);
444 }
445
446 let quote_char = bytes[*pos];
447 if !matches!(quote_char, b'"' | b'\'' | b'(') {
448 *pos = base_pos; return Some(None);
451 }
452
453 let closing_char = if quote_char == b'(' { b')' } else { quote_char };
454
455 let open = *pos;
456 *pos += 1; let mut escape_next = false;
460 while *pos < bytes.len() {
461 if escape_next {
462 escape_next = false;
463 *pos += 1;
464 continue;
465 }
466
467 match bytes[*pos] {
468 b'\\' => {
469 escape_next = true;
470 *pos += 1;
471 }
472 c if c == closing_char => {
473 *pos += 1; let close_end = *pos;
475
476 while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t') {
478 *pos += 1;
479 }
480
481 return Some(Some(open..close_end));
482 }
483 b'\n' if quote_char == b'(' => {
484 *pos += 1;
486 }
487 _ => {
488 *pos += 1;
489 }
490 }
491 }
492
493 None
495}
496
497pub fn try_parse_footnote_marker(line: &str) -> Option<(String, usize)> {
505 let bytes = line.as_bytes();
506
507 if bytes.len() < 4 || bytes[0] != b'[' || bytes[1] != b'^' {
509 return None;
510 }
511
512 let mut pos = 2;
514 while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
515 pos += 1;
516 }
517
518 if pos >= bytes.len() || bytes[pos] != b']' {
519 return None;
520 }
521
522 let id = &line[2..pos];
523 if id.is_empty() {
524 return None;
525 }
526
527 pos += 1; if pos >= bytes.len() || bytes[pos] != b':' {
531 return None;
532 }
533 pos += 1;
534
535 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
537 pos += 1;
538 }
539
540 Some((id.to_string(), pos))
541}
542
543#[cfg(test)]
544mod tests {
545 use super::{line_is_mmd_link_attribute_continuation, try_parse_reference_definition};
546 use crate::syntax::SyntaxKind;
547
548 #[test]
549 fn test_footnote_definition_body_layout_is_lossless() {
550 let input = "[^note-on-refs]:\n Note that if `--file-scope` is used,\n";
551 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
552 assert_eq!(tree.text().to_string(), input);
553 }
554
555 #[test]
556 fn test_footnote_definition_marker_emits_structural_tokens() {
557 let input = "[^note-on-refs]: body\n";
558 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
559 let def = tree
560 .descendants()
561 .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
562 .expect("footnote definition");
563 let token_kinds: Vec<_> = def
564 .children_with_tokens()
565 .filter_map(|e| e.into_token())
566 .map(|t| t.kind())
567 .collect();
568 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_START));
569 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_ID));
570 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_END));
571 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_COLON));
572 }
573
574 #[test]
575 fn footnote_multiline_dollar_math_parses_as_display_math_not_tex_block() {
576 let input = "[^note]: Intro line before math:\n $$\n \\begin{aligned} a &= b \\\\ c &= d \\end{aligned}\n $$\n";
577 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
578
579 let def = tree
580 .descendants()
581 .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
582 .expect("footnote definition");
583
584 let has_display_math = def
585 .descendants()
586 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
587 let has_tex_block = def.descendants().any(|n| n.kind() == SyntaxKind::TEX_BLOCK);
588
589 assert!(
590 has_display_math,
591 "Expected DISPLAY_MATH in footnote definition, got:\n{}",
592 tree
593 );
594 assert!(
595 !has_tex_block,
596 "Did not expect TEX_BLOCK in footnote definition for $$...$$ math, got:\n{}",
597 tree
598 );
599 }
600
601 #[test]
602 fn test_reference_definition_with_up_to_three_leading_spaces() {
603 let d = crate::options::Dialect::Pandoc;
604 assert!(try_parse_reference_definition(" [foo]: #bar", d).is_some());
605 assert!(try_parse_reference_definition(" [foo]: #bar", d).is_none());
606 }
607
608 #[test]
609 fn test_reference_definition_commonmark_requires_separator_before_title() {
610 let pandoc =
612 try_parse_reference_definition("[foo]: <bar>(baz)\n", crate::options::Dialect::Pandoc);
613 assert_eq!(
614 pandoc
615 .as_ref()
616 .map(|(_, _, url, title)| (url.as_str(), title.as_deref())),
617 Some(("bar", Some("baz")))
618 );
619
620 let cmark = try_parse_reference_definition(
624 "[foo]: <bar>(baz)\n",
625 crate::options::Dialect::CommonMark,
626 );
627 assert!(cmark.is_none());
628
629 let cmark_ok = try_parse_reference_definition(
632 "[foo]: <bar> (baz)\n",
633 crate::options::Dialect::CommonMark,
634 );
635 assert_eq!(
636 cmark_ok
637 .as_ref()
638 .map(|(_, _, url, title)| (url.as_str(), title.as_deref())),
639 Some(("bar", Some("baz")))
640 );
641 }
642
643 #[test]
644 fn test_reference_definition_emits_structured_url_and_title() {
645 let input = "[ref]: <https://example.com> \"The Title\"\n";
646 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
647 assert_eq!(tree.text().to_string(), input, "must stay lossless");
648
649 let def = tree
650 .descendants()
651 .find(|n| n.kind() == SyntaxKind::REFERENCE_DEFINITION)
652 .expect("reference definition");
653
654 let url = def
655 .children()
656 .find(|n| n.kind() == SyntaxKind::REFERENCE_URL)
657 .expect("REFERENCE_URL node");
658 assert_eq!(url.text().to_string(), "<https://example.com>");
659 assert!(
661 url.children_with_tokens()
662 .any(|e| e.kind() == SyntaxKind::LINK_DEST_START)
663 );
664 assert!(
665 url.children_with_tokens()
666 .any(|e| e.kind() == SyntaxKind::LINK_DEST_END)
667 );
668
669 let title = def
670 .children()
671 .find(|n| n.kind() == SyntaxKind::REFERENCE_TITLE)
672 .expect("REFERENCE_TITLE node");
673 assert_eq!(title.text().to_string(), "\"The Title\"");
674 }
675
676 #[test]
677 fn test_reference_definition_without_title_omits_title_node() {
678 let input = "[ref]: /url\n";
679 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
680 assert_eq!(tree.text().to_string(), input, "must stay lossless");
681
682 let def = tree
683 .descendants()
684 .find(|n| n.kind() == SyntaxKind::REFERENCE_DEFINITION)
685 .expect("reference definition");
686
687 let url = def
688 .children()
689 .find(|n| n.kind() == SyntaxKind::REFERENCE_URL)
690 .expect("REFERENCE_URL node");
691 assert_eq!(url.text().to_string(), "/url");
692 assert!(
693 !def.children()
694 .any(|n| n.kind() == SyntaxKind::REFERENCE_TITLE),
695 "no title => no REFERENCE_TITLE node"
696 );
697 }
698
699 #[test]
700 fn mmd_link_attribute_continuation_detects_valid_tokens() {
701 assert!(line_is_mmd_link_attribute_continuation(
702 " width=20px height=30px id=myId"
703 ));
704 assert!(line_is_mmd_link_attribute_continuation(
705 "\tclass=\"myClass1 myClass2\""
706 ));
707 }
708
709 #[test]
710 fn mmd_link_attribute_continuation_rejects_non_attribute_lines() {
711 assert!(!line_is_mmd_link_attribute_continuation(
712 "not-indented width=20px"
713 ));
714 assert!(!line_is_mmd_link_attribute_continuation(
715 " not-an-attr token"
716 ));
717 }
718}