panache_parser/parser/blocks/
reference_links.rs1pub fn try_parse_reference_definition(
40 text: &str,
41 dialect: crate::options::Dialect,
42) -> Option<(usize, String, String, Option<String>)> {
43 try_parse_reference_definition_with_mode(text, true, dialect)
44}
45
46pub fn try_parse_reference_definition_lax(
50 text: &str,
51 dialect: crate::options::Dialect,
52) -> Option<(usize, String, String, Option<String>)> {
53 try_parse_reference_definition_with_mode(text, false, dialect)
54}
55
56fn try_parse_reference_definition_with_mode(
57 text: &str,
58 strict_eol: bool,
59 dialect: crate::options::Dialect,
60) -> Option<(usize, String, String, Option<String>)> {
61 let leading_spaces = text.chars().take_while(|&c| c == ' ').count();
62 if leading_spaces > 3 {
63 return None;
64 }
65 let inner = &text[leading_spaces..];
66 let bytes = inner.as_bytes();
67
68 if bytes.is_empty() || bytes[0] != b'[' {
70 return None;
71 }
72
73 if bytes.len() >= 2 && bytes[1] == b'^' {
75 return None;
76 }
77
78 let mut pos = 1;
82 let mut escape_next = false;
83
84 while pos < bytes.len() {
85 if escape_next {
86 escape_next = false;
87 pos += 1;
88 continue;
89 }
90
91 match bytes[pos] {
92 b'\\' => {
93 escape_next = true;
94 pos += 1;
95 }
96 b']' => {
97 break;
98 }
99 b'[' => {
100 return None;
101 }
102 b'\n' | b'\r' => {
103 let nl_end =
104 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
105 pos + 2
106 } else {
107 pos + 1
108 };
109 let mut probe = nl_end;
110 while probe < bytes.len() && matches!(bytes[probe], b' ' | b'\t') {
111 probe += 1;
112 }
113 if probe >= bytes.len() || bytes[probe] == b'\n' || bytes[probe] == b'\r' {
114 return None;
115 }
116 pos = nl_end;
117 }
118 _ => {
119 pos += 1;
120 }
121 }
122 }
123
124 if pos >= bytes.len() || bytes[pos] != b']' {
125 return None;
126 }
127
128 let label = &inner[1..pos];
129 if label.trim().is_empty() {
130 return None;
131 }
132
133 pos += 1; if pos >= bytes.len() || bytes[pos] != b':' {
137 return None;
138 }
139 pos += 1;
140
141 pos = skip_ws_one_newline(bytes, pos)?;
143
144 let url_start = pos;
146
147 let url = if pos < bytes.len() && bytes[pos] == b'<' {
148 pos += 1;
149 let url_content_start = pos;
150 while pos < bytes.len() && bytes[pos] != b'>' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
151 {
152 pos += 1;
153 }
154 if pos >= bytes.len() || bytes[pos] != b'>' {
155 return None;
156 }
157 let url = inner[url_content_start..pos].to_string();
158 pos += 1; url
160 } else {
161 while pos < bytes.len() && !matches!(bytes[pos], b' ' | b'\t' | b'\n' | b'\r') {
162 pos += 1;
163 }
164 if pos == url_start {
165 return None;
166 }
167 inner[url_start..pos].to_string()
168 };
169
170 let after_url = pos;
176 let url_line_end = consume_to_eol(bytes, after_url);
177 let url_line_end_lax = if strict_eol {
178 url_line_end
179 } else {
180 Some(consume_to_eol_lax(bytes, after_url))
181 };
182
183 let mut title: Option<String> = None;
184 let mut end_pos: Option<usize> = None;
185
186 if let Some(title_start) = skip_ws_one_newline(bytes, after_url) {
187 let crossed_newline = bytes[after_url..title_start]
188 .iter()
189 .any(|&b| b == b'\n' || b == b'\r');
190 let cmark_requires_separator = dialect == crate::options::Dialect::CommonMark
195 && !crossed_newline
196 && title_start == after_url;
197 if cmark_requires_separator {
198 return Some((
199 leading_spaces + url_line_end_lax?,
200 label.to_string(),
201 url,
202 None,
203 ));
204 }
205 let mut title_pos = title_start;
206 match parse_title(inner, bytes, &mut title_pos) {
207 Some(Some(t)) => {
208 let line_end = if strict_eol {
209 consume_to_eol(bytes, title_pos)
210 } else {
211 Some(consume_to_eol_lax(bytes, title_pos))
212 };
213 if let Some(end) = line_end {
214 title = Some(t);
215 end_pos = Some(end);
216 } else if !crossed_newline {
217 return None;
218 }
219 }
220 None => {
221 if !crossed_newline {
222 return None;
223 }
224 }
225 Some(None) => {}
226 }
227 }
228
229 let end = match end_pos {
230 Some(p) => p,
231 None => url_line_end_lax?,
232 };
233
234 Some((leading_spaces + end, label.to_string(), url, title))
235}
236
237fn consume_to_eol_lax(bytes: &[u8], mut pos: usize) -> usize {
240 while pos < bytes.len() && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
241 pos += 1;
242 }
243 if pos < bytes.len() {
244 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
245 pos += 2;
246 } else {
247 pos += 1;
248 }
249 }
250 pos
251}
252
253fn consume_to_eol(bytes: &[u8], mut pos: usize) -> Option<usize> {
256 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
257 pos += 1;
258 }
259 if pos >= bytes.len() {
260 return Some(pos);
261 }
262 match bytes[pos] {
263 b'\n' => Some(pos + 1),
264 b'\r' => {
265 if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
266 Some(pos + 2)
267 } else {
268 Some(pos + 1)
269 }
270 }
271 _ => None,
272 }
273}
274
275fn skip_ws_one_newline(bytes: &[u8], mut pos: usize) -> Option<usize> {
280 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
281 pos += 1;
282 }
283 if pos < bytes.len() && (bytes[pos] == b'\n' || bytes[pos] == b'\r') {
284 if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
285 pos += 2;
286 } else {
287 pos += 1;
288 }
289 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
290 pos += 1;
291 }
292 if pos < bytes.len() && (bytes[pos] == b'\n' || bytes[pos] == b'\r') {
293 return None;
294 }
295 }
296 Some(pos)
297}
298
299pub fn line_is_mmd_link_attribute_continuation(line: &str) -> bool {
300 if !(line.starts_with(' ') || line.starts_with('\t')) {
301 return false;
302 }
303
304 let trimmed = line.trim();
305 if trimmed.is_empty() {
306 return false;
307 }
308
309 let bytes = trimmed.as_bytes();
310 let mut pos = 0usize;
311 let len = bytes.len();
312 let mut saw_pair = false;
313
314 while pos < len {
315 while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
317 pos += 1;
318 }
319 if pos >= len {
320 break;
321 }
322
323 let key_start = pos;
325 while pos < len && bytes[pos] != b'=' && bytes[pos] != b' ' && bytes[pos] != b'\t' {
326 pos += 1;
327 }
328 if pos == key_start || pos >= len || bytes[pos] != b'=' {
329 return false;
330 }
331 pos += 1; if pos >= len {
335 return false;
336 }
337 if bytes[pos] == b'"' || bytes[pos] == b'\'' {
338 let quote = bytes[pos];
339 pos += 1;
340 let value_start = pos;
341 while pos < len && bytes[pos] != quote {
342 pos += 1;
343 }
344 if pos == value_start || pos >= len {
345 return false;
346 }
347 pos += 1; } else {
349 let value_start = pos;
350 while pos < len && bytes[pos] != b' ' && bytes[pos] != b'\t' {
351 pos += 1;
352 }
353 if pos == value_start {
354 return false;
355 }
356 }
357
358 saw_pair = true;
359 }
360
361 saw_pair
362}
363
364fn parse_title(text: &str, bytes: &[u8], pos: &mut usize) -> Option<Option<String>> {
368 let base_pos = *pos;
369
370 while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t' | b'\n' | b'\r') {
372 *pos += 1;
373 }
374
375 if *pos >= bytes.len() {
377 return Some(None);
378 }
379
380 let quote_char = bytes[*pos];
381 if !matches!(quote_char, b'"' | b'\'' | b'(') {
382 *pos = base_pos; return Some(None);
385 }
386
387 let closing_char = if quote_char == b'(' { b')' } else { quote_char };
388
389 *pos += 1; let title_start = *pos;
391
392 let mut escape_next = false;
394 while *pos < bytes.len() {
395 if escape_next {
396 escape_next = false;
397 *pos += 1;
398 continue;
399 }
400
401 match bytes[*pos] {
402 b'\\' => {
403 escape_next = true;
404 *pos += 1;
405 }
406 c if c == closing_char => {
407 let title_end = *pos;
408 *pos += 1; while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t') {
412 *pos += 1;
413 }
414
415 let title = text[title_start..title_end].to_string();
417 return Some(Some(title));
418 }
419 b'\n' if quote_char == b'(' => {
420 *pos += 1;
422 }
423 _ => {
424 *pos += 1;
425 }
426 }
427 }
428
429 None
431}
432
433pub fn try_parse_footnote_marker(line: &str) -> Option<(String, usize)> {
441 let bytes = line.as_bytes();
442
443 if bytes.len() < 4 || bytes[0] != b'[' || bytes[1] != b'^' {
445 return None;
446 }
447
448 let mut pos = 2;
450 while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
451 pos += 1;
452 }
453
454 if pos >= bytes.len() || bytes[pos] != b']' {
455 return None;
456 }
457
458 let id = &line[2..pos];
459 if id.is_empty() {
460 return None;
461 }
462
463 pos += 1; if pos >= bytes.len() || bytes[pos] != b':' {
467 return None;
468 }
469 pos += 1;
470
471 while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
473 pos += 1;
474 }
475
476 Some((id.to_string(), pos))
477}
478
479#[cfg(test)]
480mod tests {
481 use super::{line_is_mmd_link_attribute_continuation, try_parse_reference_definition};
482 use crate::syntax::SyntaxKind;
483
484 #[test]
485 fn test_footnote_definition_body_layout_is_lossless() {
486 let input = "[^note-on-refs]:\n Note that if `--file-scope` is used,\n";
487 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
488 assert_eq!(tree.text().to_string(), input);
489 }
490
491 #[test]
492 fn test_footnote_definition_marker_emits_structural_tokens() {
493 let input = "[^note-on-refs]: body\n";
494 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
495 let def = tree
496 .descendants()
497 .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
498 .expect("footnote definition");
499 let token_kinds: Vec<_> = def
500 .children_with_tokens()
501 .filter_map(|e| e.into_token())
502 .map(|t| t.kind())
503 .collect();
504 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_START));
505 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_ID));
506 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_END));
507 assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_COLON));
508 }
509
510 #[test]
511 fn footnote_multiline_dollar_math_parses_as_display_math_not_tex_block() {
512 let input = "[^note]: Intro line before math:\n $$\n \\begin{aligned} a &= b \\\\ c &= d \\end{aligned}\n $$\n";
513 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
514
515 let def = tree
516 .descendants()
517 .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
518 .expect("footnote definition");
519
520 let has_display_math = def
521 .descendants()
522 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
523 let has_tex_block = def.descendants().any(|n| n.kind() == SyntaxKind::TEX_BLOCK);
524
525 assert!(
526 has_display_math,
527 "Expected DISPLAY_MATH in footnote definition, got:\n{}",
528 tree
529 );
530 assert!(
531 !has_tex_block,
532 "Did not expect TEX_BLOCK in footnote definition for $$...$$ math, got:\n{}",
533 tree
534 );
535 }
536
537 #[test]
538 fn test_reference_definition_with_up_to_three_leading_spaces() {
539 let d = crate::options::Dialect::Pandoc;
540 assert!(try_parse_reference_definition(" [foo]: #bar", d).is_some());
541 assert!(try_parse_reference_definition(" [foo]: #bar", d).is_none());
542 }
543
544 #[test]
545 fn test_reference_definition_commonmark_requires_separator_before_title() {
546 let pandoc =
548 try_parse_reference_definition("[foo]: <bar>(baz)\n", crate::options::Dialect::Pandoc);
549 assert_eq!(
550 pandoc
551 .as_ref()
552 .map(|(_, _, url, title)| (url.as_str(), title.as_deref())),
553 Some(("bar", Some("baz")))
554 );
555
556 let cmark = try_parse_reference_definition(
560 "[foo]: <bar>(baz)\n",
561 crate::options::Dialect::CommonMark,
562 );
563 assert!(cmark.is_none());
564
565 let cmark_ok = try_parse_reference_definition(
568 "[foo]: <bar> (baz)\n",
569 crate::options::Dialect::CommonMark,
570 );
571 assert_eq!(
572 cmark_ok
573 .as_ref()
574 .map(|(_, _, url, title)| (url.as_str(), title.as_deref())),
575 Some(("bar", Some("baz")))
576 );
577 }
578
579 #[test]
580 fn mmd_link_attribute_continuation_detects_valid_tokens() {
581 assert!(line_is_mmd_link_attribute_continuation(
582 " width=20px height=30px id=myId"
583 ));
584 assert!(line_is_mmd_link_attribute_continuation(
585 "\tclass=\"myClass1 myClass2\""
586 ));
587 }
588
589 #[test]
590 fn mmd_link_attribute_continuation_rejects_non_attribute_lines() {
591 assert!(!line_is_mmd_link_attribute_continuation(
592 "not-indented width=20px"
593 ));
594 assert!(!line_is_mmd_link_attribute_continuation(
595 " not-an-attr token"
596 ));
597 }
598}