panache_parser/parser/blocks/
headings.rs1use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use crate::parser::utils::attributes::{
8 emit_attribute_node, try_parse_trailing_attributes_with_pos,
9};
10use crate::parser::utils::helpers::trim_end_spaces_tabs;
11use crate::parser::utils::inline_emission;
12
13fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
14 let trimmed = trim_end_spaces_tabs(content);
15 let end = trimmed.len();
16 let bytes = trimmed.as_bytes();
17
18 if end == 0 || bytes[end - 1] != b']' {
19 return None;
20 }
21
22 let start = trimmed[..end - 1].rfind('[')?;
23 let raw = &trimmed[start..end];
24 let inner = &raw[1..raw.len() - 1];
25 if inner.trim().is_empty() {
26 return None;
27 }
28
29 let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
30 if normalized.is_empty() {
31 return None;
32 }
33
34 Some((normalized, start, end))
35}
36
37pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
39 let line = if let Some(stripped) = content.strip_suffix("\r\n") {
40 stripped
41 } else if let Some(stripped) = content.strip_suffix('\n') {
42 stripped
43 } else {
44 content
45 };
46 let trimmed = line.trim_start();
47
48 let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
50 if hash_count == 0 || hash_count > 6 {
51 return None;
52 }
53
54 let after_hashes = &trimmed[hash_count..];
58 if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
59 {
60 return None;
61 }
62
63 let leading_spaces = line.len() - trimmed.len();
65 if leading_spaces > 3 {
66 return None;
67 }
68
69 Some(hash_count)
70}
71
72pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
85 if pos >= lines.len() {
87 return None;
88 }
89
90 let text_line = lines[pos];
91 let next_pos = pos + 1;
92 if next_pos >= lines.len() {
93 return None;
94 }
95
96 let underline = lines[next_pos];
97
98 if crate::parser::utils::helpers::is_blank_line(text_line) {
100 return None;
101 }
102
103 let leading_spaces = text_line.len() - text_line.trim_start().len();
105 if leading_spaces >= 4 {
106 return None;
107 }
108
109 let underline_trimmed = underline.trim();
111
112 if underline_trimmed.is_empty() {
114 return None;
115 }
116
117 let first_char = underline_trimmed.chars().next()?;
119 if first_char != '=' && first_char != '-' {
120 return None;
121 }
122
123 if !underline_trimmed.chars().all(|c| c == first_char) {
125 return None;
126 }
127
128 let underline_leading_spaces = underline.len() - underline.trim_start().len();
130 if underline_leading_spaces >= 4 {
131 return None;
132 }
133
134 let level = if first_char == '=' { 1 } else { 2 };
136
137 Some((level, first_char))
138}
139
140pub(crate) fn emit_setext_heading(
145 builder: &mut GreenNodeBuilder<'static>,
146 text_line: &str,
147 underline_line: &str,
148 level: usize,
149 config: &ParserOptions,
150) {
151 builder.start_node(SyntaxKind::HEADING.into());
152 emit_setext_heading_body(builder, text_line, underline_line, level, config);
153 builder.finish_node(); }
155
156pub(crate) fn emit_setext_heading_body(
162 builder: &mut GreenNodeBuilder<'static>,
163 text_line: &str,
164 underline_line: &str,
165 _level: usize,
166 config: &ParserOptions,
167) {
168 let (text_without_newline, text_newline_str) =
170 if let Some(stripped) = text_line.strip_suffix("\r\n") {
171 (stripped, "\r\n")
172 } else if let Some(stripped) = text_line.strip_suffix('\n') {
173 (stripped, "\n")
174 } else {
175 (text_line, "")
176 };
177
178 let text_trimmed = text_without_newline.trim_start();
180 let leading_spaces = text_without_newline.len() - text_trimmed.len();
181
182 if leading_spaces > 0 {
183 builder.token(
184 SyntaxKind::WHITESPACE.into(),
185 &text_without_newline[..leading_spaces],
186 );
187 }
188
189 let (text_content, attr_text, space_before_attrs) =
191 if let Some((_attrs, text_before, start_brace_pos)) =
192 try_parse_trailing_attributes_with_pos(text_trimmed)
193 {
194 let space = &text_trimmed[text_before.len()..start_brace_pos];
195 let raw_attrs = &text_trimmed[start_brace_pos..];
196 (text_before, Some(raw_attrs), space)
197 } else if config.extensions.mmd_header_identifiers {
198 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
199 try_parse_mmd_header_identifier_with_pos(text_trimmed)
200 {
201 let text_before = trim_end_spaces_tabs(&text_trimmed[..start_bracket_pos]);
202 let space = &text_trimmed[text_before.len()..start_bracket_pos];
203 let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
204 (text_before, Some(raw_attrs), space)
205 } else {
206 (text_trimmed, None, "")
207 }
208 } else {
209 (text_trimmed, None, "")
210 };
211
212 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
214 if !text_content.is_empty() {
215 inline_emission::emit_inlines(builder, text_content, config, false);
216 }
217 builder.finish_node();
218
219 if !space_before_attrs.is_empty() {
221 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
222 }
223
224 if let Some(attr_text) = attr_text {
226 emit_attribute_node(builder, attr_text);
227 }
228
229 if !text_newline_str.is_empty() {
231 builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
232 }
233
234 let (underline_without_newline, underline_newline_str) =
236 if let Some(stripped) = underline_line.strip_suffix("\r\n") {
237 (stripped, "\r\n")
238 } else if let Some(stripped) = underline_line.strip_suffix('\n') {
239 (stripped, "\n")
240 } else {
241 (underline_line, "")
242 };
243
244 let underline_trimmed = underline_without_newline.trim_start();
246 let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
247
248 if underline_leading_spaces > 0 {
249 builder.token(
250 SyntaxKind::WHITESPACE.into(),
251 &underline_without_newline[..underline_leading_spaces],
252 );
253 }
254
255 builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
257 builder.token(
258 SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
259 underline_trimmed,
260 );
261 builder.finish_node();
262
263 if !underline_newline_str.is_empty() {
265 builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
266 }
267}
268
269pub(crate) fn emit_atx_heading(
271 builder: &mut GreenNodeBuilder<'static>,
272 content: &str,
273 level: usize,
274 config: &ParserOptions,
275) {
276 builder.start_node(SyntaxKind::HEADING.into());
277
278 let (content_without_newline, newline_str) =
280 if let Some(stripped) = content.strip_suffix("\r\n") {
281 (stripped, "\r\n")
282 } else if let Some(stripped) = content.strip_suffix('\n') {
283 (stripped, "\n")
284 } else {
285 (content, "")
286 };
287
288 let trimmed = content_without_newline.trim_start();
289 let leading_spaces = content_without_newline.len() - trimmed.len();
290
291 if leading_spaces > 0 {
293 builder.token(
294 SyntaxKind::WHITESPACE.into(),
295 &content_without_newline[..leading_spaces],
296 );
297 }
298
299 builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
301 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
302 builder.finish_node();
303
304 let after_marker = &trimmed[level..];
306 let spaces_after_marker_count = after_marker
307 .find(|c: char| !c.is_whitespace())
308 .unwrap_or(after_marker.len());
309
310 if spaces_after_marker_count > 0 {
312 builder.token(
313 SyntaxKind::WHITESPACE.into(),
314 &after_marker[..spaces_after_marker_count],
315 );
316 }
317
318 let heading_text = &after_marker[spaces_after_marker_count..];
320
321 let (heading_content, closing_suffix) = {
323 let without_trailing_ws = trim_end_spaces_tabs(heading_text);
324 let trailing_hashes = without_trailing_ws
325 .chars()
326 .rev()
327 .take_while(|&c| c == '#')
328 .count();
329
330 if trailing_hashes > 0 {
331 let hashes_start = without_trailing_ws.len() - trailing_hashes;
332 let before_hashes = &without_trailing_ws[..hashes_start];
333 let preceded_by_ws = before_hashes
338 .chars()
339 .last()
340 .is_some_and(|c| c == ' ' || c == '\t')
341 || (before_hashes.is_empty() && spaces_after_marker_count > 0);
342 if preceded_by_ws {
343 let content_end = trim_end_spaces_tabs(before_hashes).len();
344 (&heading_text[..content_end], &heading_text[content_end..])
345 } else {
346 (heading_text, "")
347 }
348 } else {
349 (heading_text, "")
350 }
351 };
352
353 let (text_content, attr_text, space_before_attrs) =
355 if let Some((_attrs, text_before, start_brace_pos)) =
356 try_parse_trailing_attributes_with_pos(heading_content)
357 {
358 let space = &heading_content[text_before.len()..start_brace_pos];
359 let raw_attrs = &heading_content[start_brace_pos..];
360 (text_before, Some(raw_attrs), space)
361 } else if config.extensions.mmd_header_identifiers {
362 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
363 try_parse_mmd_header_identifier_with_pos(heading_content)
364 {
365 let text_before = trim_end_spaces_tabs(&heading_content[..start_bracket_pos]);
366 let space = &heading_content[text_before.len()..start_bracket_pos];
367 let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
368 (text_before, Some(raw_attrs), space)
369 } else {
370 (heading_content, None, "")
371 }
372 } else {
373 (heading_content, None, "")
374 };
375
376 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
378 if !text_content.is_empty() {
379 inline_emission::emit_inlines(builder, text_content, config, false);
380 }
381 builder.finish_node();
382
383 if !space_before_attrs.is_empty() {
385 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
386 }
387
388 if let Some(attr_text) = attr_text {
390 emit_attribute_node(builder, attr_text);
391 }
392
393 if !closing_suffix.is_empty() {
394 let closing_trimmed = trim_end_spaces_tabs(
395 crate::parser::utils::helpers::trim_start_spaces_tabs(closing_suffix),
396 );
397 let leading_ws_len = closing_suffix
398 .find(|c: char| c != ' ' && c != '\t')
399 .unwrap_or(closing_suffix.len());
400 let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
401
402 if leading_ws_len > 0 {
403 builder.token(
404 SyntaxKind::WHITESPACE.into(),
405 &closing_suffix[..leading_ws_len],
406 );
407 }
408 if !closing_trimmed.is_empty() {
409 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
410 }
411 if trailing_ws_len > 0 {
412 builder.token(
413 SyntaxKind::WHITESPACE.into(),
414 &closing_suffix[closing_suffix.len() - trailing_ws_len..],
415 );
416 }
417 }
418
419 if !newline_str.is_empty() {
421 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
422 }
423
424 builder.finish_node(); }
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_simple_heading() {
433 assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
434 }
435
436 #[test]
437 fn test_level_3_heading() {
438 assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
439 }
440
441 #[test]
442 fn test_heading_with_leading_spaces() {
443 assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
444 }
445
446 #[test]
447 fn test_atx_heading_with_attributes_losslessness() {
448 use crate::ParserOptions;
449
450 let input = "# Test {#id}\n";
452 let config = ParserOptions::default();
453 let tree = crate::parse(input, Some(config));
454
455 assert_eq!(
457 tree.text().to_string(),
458 input,
459 "Parser must preserve all bytes including space before attributes"
460 );
461
462 let heading = tree.first_child().unwrap();
464 assert_eq!(heading.kind(), SyntaxKind::HEADING);
465
466 let mut found_whitespace = false;
468 for child in heading.children_with_tokens() {
469 if child.kind() == SyntaxKind::WHITESPACE
470 && let Some(token) = child.as_token()
471 {
472 let start: usize = token.text_range().start().into();
473 if token.text() == " " && start == 6 {
474 found_whitespace = true;
475 break;
476 }
477 }
478 }
479 assert!(
480 found_whitespace,
481 "Whitespace token between heading content and attributes must be present"
482 );
483 }
484
485 #[test]
486 fn test_atx_heading_closing_hashes_are_lossless() {
487 let input = "### Extension: `smart` ###\n";
488 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
489 assert_eq!(tree.text().to_string(), input);
490 }
491
492 #[test]
493 fn test_four_spaces_not_heading() {
494 assert_eq!(try_parse_atx_heading(" # Not heading"), None);
495 }
496
497 #[test]
498 fn test_no_space_after_hash() {
499 assert_eq!(try_parse_atx_heading("#NoSpace"), None);
500 }
501
502 #[test]
503 fn test_empty_heading() {
504 assert_eq!(try_parse_atx_heading("# "), Some(1));
505 }
506
507 #[test]
508 fn test_level_7_invalid() {
509 assert_eq!(try_parse_atx_heading("####### Too many"), None);
510 }
511
512 #[test]
514 fn test_setext_level_1() {
515 let lines = vec!["Heading", "======="];
516 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
517 }
518
519 #[test]
520 fn test_setext_level_2() {
521 let lines = vec!["Heading", "-------"];
522 assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
523 }
524
525 #[test]
526 fn test_setext_any_underline_length() {
527 let lines = vec!["Heading", "="];
531 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
532
533 let lines = vec!["Heading", "=="];
534 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
535
536 let lines = vec!["Heading", "==="];
537 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
538 }
539
540 #[test]
541 fn test_setext_mixed_chars_invalid() {
542 let lines = vec!["Heading", "==-=="];
543 assert_eq!(try_parse_setext_heading(&lines, 0), None);
544 }
545
546 #[test]
547 fn test_setext_with_leading_spaces() {
548 let lines = vec!["Heading", " ======="];
549 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
550 }
551
552 #[test]
553 fn test_setext_with_trailing_spaces() {
554 let lines = vec!["Heading", "======= "];
555 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
556 }
557
558 #[test]
559 fn test_setext_empty_text_line() {
560 let lines = vec!["", "======="];
561 assert_eq!(try_parse_setext_heading(&lines, 0), None);
562 }
563
564 #[test]
565 fn test_setext_no_next_line() {
566 let lines = vec!["Heading"];
567 assert_eq!(try_parse_setext_heading(&lines, 0), None);
568 }
569
570 #[test]
571 fn test_setext_four_spaces_indent() {
572 let lines = vec![" Heading", " ======="];
574 assert_eq!(try_parse_setext_heading(&lines, 0), None);
575 }
576
577 #[test]
578 fn test_setext_long_underline() {
579 let underline = "=".repeat(100);
580 let lines = vec!["Heading", underline.as_str()];
581 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
582 }
583
584 #[test]
585 fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
586 let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
587 .expect("should parse mmd header identifier");
588 assert_eq!(parsed.0, "myid");
589 assert_eq!(parsed.1, 10);
590 }
591}