panache_parser/parser/blocks/
headings.rs1use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
8use crate::parser::utils::helpers::trim_end_spaces_tabs;
9use crate::parser::utils::inline_emission;
10
11fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
12 let trimmed = trim_end_spaces_tabs(content);
13 let end = trimmed.len();
14 let bytes = trimmed.as_bytes();
15
16 if end == 0 || bytes[end - 1] != b']' {
17 return None;
18 }
19
20 let start = trimmed[..end - 1].rfind('[')?;
21 let raw = &trimmed[start..end];
22 let inner = &raw[1..raw.len() - 1];
23 if inner.trim().is_empty() {
24 return None;
25 }
26
27 let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
28 if normalized.is_empty() {
29 return None;
30 }
31
32 Some((normalized, start, end))
33}
34
35pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
37 let line = if let Some(stripped) = content.strip_suffix("\r\n") {
38 stripped
39 } else if let Some(stripped) = content.strip_suffix('\n') {
40 stripped
41 } else {
42 content
43 };
44 let trimmed = line.trim_start();
45
46 let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
48 if hash_count == 0 || hash_count > 6 {
49 return None;
50 }
51
52 let after_hashes = &trimmed[hash_count..];
56 if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
57 {
58 return None;
59 }
60
61 let leading_spaces = line.len() - trimmed.len();
63 if leading_spaces > 3 {
64 return None;
65 }
66
67 Some(hash_count)
68}
69
70pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
83 if pos >= lines.len() {
85 return None;
86 }
87
88 let text_line = lines[pos];
89 let next_pos = pos + 1;
90 if next_pos >= lines.len() {
91 return None;
92 }
93
94 let underline = lines[next_pos];
95
96 if crate::parser::utils::helpers::is_blank_line(text_line) {
98 return None;
99 }
100
101 let leading_spaces = text_line.len() - text_line.trim_start().len();
103 if leading_spaces >= 4 {
104 return None;
105 }
106
107 let underline_trimmed = underline.trim();
109
110 if underline_trimmed.is_empty() {
112 return None;
113 }
114
115 let first_char = underline_trimmed.chars().next()?;
117 if first_char != '=' && first_char != '-' {
118 return None;
119 }
120
121 if !underline_trimmed.chars().all(|c| c == first_char) {
123 return None;
124 }
125
126 let underline_leading_spaces = underline.len() - underline.trim_start().len();
128 if underline_leading_spaces >= 4 {
129 return None;
130 }
131
132 let level = if first_char == '=' { 1 } else { 2 };
134
135 Some((level, first_char))
136}
137
138pub(crate) fn emit_setext_heading(
143 builder: &mut GreenNodeBuilder<'static>,
144 text_line: &str,
145 underline_line: &str,
146 level: usize,
147 config: &ParserOptions,
148) {
149 builder.start_node(SyntaxKind::HEADING.into());
150 emit_setext_heading_body(builder, text_line, underline_line, level, config);
151 builder.finish_node(); }
153
154pub(crate) fn emit_setext_heading_body(
160 builder: &mut GreenNodeBuilder<'static>,
161 text_line: &str,
162 underline_line: &str,
163 _level: usize,
164 config: &ParserOptions,
165) {
166 let (text_without_newline, text_newline_str) =
168 if let Some(stripped) = text_line.strip_suffix("\r\n") {
169 (stripped, "\r\n")
170 } else if let Some(stripped) = text_line.strip_suffix('\n') {
171 (stripped, "\n")
172 } else {
173 (text_line, "")
174 };
175
176 let text_trimmed = text_without_newline.trim_start();
178 let leading_spaces = text_without_newline.len() - text_trimmed.len();
179
180 if leading_spaces > 0 {
181 builder.token(
182 SyntaxKind::WHITESPACE.into(),
183 &text_without_newline[..leading_spaces],
184 );
185 }
186
187 let (text_content, attr_text, space_before_attrs) =
189 if let Some((_attrs, text_before, start_brace_pos)) =
190 try_parse_trailing_attributes_with_pos(text_trimmed)
191 {
192 let space = &text_trimmed[text_before.len()..start_brace_pos];
193 let raw_attrs = &text_trimmed[start_brace_pos..];
194 (text_before, Some(raw_attrs), space)
195 } else if config.extensions.mmd_header_identifiers {
196 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
197 try_parse_mmd_header_identifier_with_pos(text_trimmed)
198 {
199 let text_before = trim_end_spaces_tabs(&text_trimmed[..start_bracket_pos]);
200 let space = &text_trimmed[text_before.len()..start_bracket_pos];
201 let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
202 (text_before, Some(raw_attrs), space)
203 } else {
204 (text_trimmed, None, "")
205 }
206 } else {
207 (text_trimmed, None, "")
208 };
209
210 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
212 if !text_content.is_empty() {
213 inline_emission::emit_inlines(builder, text_content, config, false);
214 }
215 builder.finish_node();
216
217 if !space_before_attrs.is_empty() {
219 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
220 }
221
222 if let Some(attr_text) = attr_text {
224 builder.start_node(SyntaxKind::ATTRIBUTE.into());
225 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
226 builder.finish_node();
227 }
228
229 if !text_newline_str.is_empty() {
231 builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
232 }
233
234 let (underline_without_newline, underline_newline_str) =
236 if let Some(stripped) = underline_line.strip_suffix("\r\n") {
237 (stripped, "\r\n")
238 } else if let Some(stripped) = underline_line.strip_suffix('\n') {
239 (stripped, "\n")
240 } else {
241 (underline_line, "")
242 };
243
244 let underline_trimmed = underline_without_newline.trim_start();
246 let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
247
248 if underline_leading_spaces > 0 {
249 builder.token(
250 SyntaxKind::WHITESPACE.into(),
251 &underline_without_newline[..underline_leading_spaces],
252 );
253 }
254
255 builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
257 builder.token(
258 SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
259 underline_trimmed,
260 );
261 builder.finish_node();
262
263 if !underline_newline_str.is_empty() {
265 builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
266 }
267}
268
269pub(crate) fn emit_atx_heading(
271 builder: &mut GreenNodeBuilder<'static>,
272 content: &str,
273 level: usize,
274 config: &ParserOptions,
275) {
276 builder.start_node(SyntaxKind::HEADING.into());
277
278 let (content_without_newline, newline_str) =
280 if let Some(stripped) = content.strip_suffix("\r\n") {
281 (stripped, "\r\n")
282 } else if let Some(stripped) = content.strip_suffix('\n') {
283 (stripped, "\n")
284 } else {
285 (content, "")
286 };
287
288 let trimmed = content_without_newline.trim_start();
289 let leading_spaces = content_without_newline.len() - trimmed.len();
290
291 if leading_spaces > 0 {
293 builder.token(
294 SyntaxKind::WHITESPACE.into(),
295 &content_without_newline[..leading_spaces],
296 );
297 }
298
299 builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
301 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
302 builder.finish_node();
303
304 let after_marker = &trimmed[level..];
306 let spaces_after_marker_count = after_marker
307 .find(|c: char| !c.is_whitespace())
308 .unwrap_or(after_marker.len());
309
310 if spaces_after_marker_count > 0 {
312 builder.token(
313 SyntaxKind::WHITESPACE.into(),
314 &after_marker[..spaces_after_marker_count],
315 );
316 }
317
318 let heading_text = &after_marker[spaces_after_marker_count..];
320
321 let (heading_content, closing_suffix) = {
323 let without_trailing_ws = trim_end_spaces_tabs(heading_text);
324 let trailing_hashes = without_trailing_ws
325 .chars()
326 .rev()
327 .take_while(|&c| c == '#')
328 .count();
329
330 if trailing_hashes > 0 {
331 let hashes_start = without_trailing_ws.len() - trailing_hashes;
332 let before_hashes = &without_trailing_ws[..hashes_start];
333 let preceded_by_ws = before_hashes
338 .chars()
339 .last()
340 .is_some_and(|c| c == ' ' || c == '\t')
341 || (before_hashes.is_empty() && spaces_after_marker_count > 0);
342 if preceded_by_ws {
343 let content_end = trim_end_spaces_tabs(before_hashes).len();
344 (&heading_text[..content_end], &heading_text[content_end..])
345 } else {
346 (heading_text, "")
347 }
348 } else {
349 (heading_text, "")
350 }
351 };
352
353 let (text_content, attr_text, space_before_attrs) =
355 if let Some((_attrs, text_before, start_brace_pos)) =
356 try_parse_trailing_attributes_with_pos(heading_content)
357 {
358 let space = &heading_content[text_before.len()..start_brace_pos];
359 let raw_attrs = &heading_content[start_brace_pos..];
360 (text_before, Some(raw_attrs), space)
361 } else if config.extensions.mmd_header_identifiers {
362 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
363 try_parse_mmd_header_identifier_with_pos(heading_content)
364 {
365 let text_before = trim_end_spaces_tabs(&heading_content[..start_bracket_pos]);
366 let space = &heading_content[text_before.len()..start_bracket_pos];
367 let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
368 (text_before, Some(raw_attrs), space)
369 } else {
370 (heading_content, None, "")
371 }
372 } else {
373 (heading_content, None, "")
374 };
375
376 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
378 if !text_content.is_empty() {
379 inline_emission::emit_inlines(builder, text_content, config, false);
380 }
381 builder.finish_node();
382
383 if !space_before_attrs.is_empty() {
385 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
386 }
387
388 if let Some(attr_text) = attr_text {
390 builder.start_node(SyntaxKind::ATTRIBUTE.into());
391 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
392 builder.finish_node();
393 }
394
395 if !closing_suffix.is_empty() {
396 let closing_trimmed = trim_end_spaces_tabs(
397 crate::parser::utils::helpers::trim_start_spaces_tabs(closing_suffix),
398 );
399 let leading_ws_len = closing_suffix
400 .find(|c: char| c != ' ' && c != '\t')
401 .unwrap_or(closing_suffix.len());
402 let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
403
404 if leading_ws_len > 0 {
405 builder.token(
406 SyntaxKind::WHITESPACE.into(),
407 &closing_suffix[..leading_ws_len],
408 );
409 }
410 if !closing_trimmed.is_empty() {
411 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
412 }
413 if trailing_ws_len > 0 {
414 builder.token(
415 SyntaxKind::WHITESPACE.into(),
416 &closing_suffix[closing_suffix.len() - trailing_ws_len..],
417 );
418 }
419 }
420
421 if !newline_str.is_empty() {
423 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
424 }
425
426 builder.finish_node(); }
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432
433 #[test]
434 fn test_simple_heading() {
435 assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
436 }
437
438 #[test]
439 fn test_level_3_heading() {
440 assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
441 }
442
443 #[test]
444 fn test_heading_with_leading_spaces() {
445 assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
446 }
447
448 #[test]
449 fn test_atx_heading_with_attributes_losslessness() {
450 use crate::ParserOptions;
451
452 let input = "# Test {#id}\n";
454 let config = ParserOptions::default();
455 let tree = crate::parse(input, Some(config));
456
457 assert_eq!(
459 tree.text().to_string(),
460 input,
461 "Parser must preserve all bytes including space before attributes"
462 );
463
464 let heading = tree.first_child().unwrap();
466 assert_eq!(heading.kind(), SyntaxKind::HEADING);
467
468 let mut found_whitespace = false;
470 for child in heading.children_with_tokens() {
471 if child.kind() == SyntaxKind::WHITESPACE
472 && let Some(token) = child.as_token()
473 {
474 let start: usize = token.text_range().start().into();
475 if token.text() == " " && start == 6 {
476 found_whitespace = true;
477 break;
478 }
479 }
480 }
481 assert!(
482 found_whitespace,
483 "Whitespace token between heading content and attributes must be present"
484 );
485 }
486
487 #[test]
488 fn test_atx_heading_closing_hashes_are_lossless() {
489 let input = "### Extension: `smart` ###\n";
490 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
491 assert_eq!(tree.text().to_string(), input);
492 }
493
494 #[test]
495 fn test_four_spaces_not_heading() {
496 assert_eq!(try_parse_atx_heading(" # Not heading"), None);
497 }
498
499 #[test]
500 fn test_no_space_after_hash() {
501 assert_eq!(try_parse_atx_heading("#NoSpace"), None);
502 }
503
504 #[test]
505 fn test_empty_heading() {
506 assert_eq!(try_parse_atx_heading("# "), Some(1));
507 }
508
509 #[test]
510 fn test_level_7_invalid() {
511 assert_eq!(try_parse_atx_heading("####### Too many"), None);
512 }
513
514 #[test]
516 fn test_setext_level_1() {
517 let lines = vec!["Heading", "======="];
518 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
519 }
520
521 #[test]
522 fn test_setext_level_2() {
523 let lines = vec!["Heading", "-------"];
524 assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
525 }
526
527 #[test]
528 fn test_setext_any_underline_length() {
529 let lines = vec!["Heading", "="];
533 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
534
535 let lines = vec!["Heading", "=="];
536 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
537
538 let lines = vec!["Heading", "==="];
539 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
540 }
541
542 #[test]
543 fn test_setext_mixed_chars_invalid() {
544 let lines = vec!["Heading", "==-=="];
545 assert_eq!(try_parse_setext_heading(&lines, 0), None);
546 }
547
548 #[test]
549 fn test_setext_with_leading_spaces() {
550 let lines = vec!["Heading", " ======="];
551 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
552 }
553
554 #[test]
555 fn test_setext_with_trailing_spaces() {
556 let lines = vec!["Heading", "======= "];
557 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
558 }
559
560 #[test]
561 fn test_setext_empty_text_line() {
562 let lines = vec!["", "======="];
563 assert_eq!(try_parse_setext_heading(&lines, 0), None);
564 }
565
566 #[test]
567 fn test_setext_no_next_line() {
568 let lines = vec!["Heading"];
569 assert_eq!(try_parse_setext_heading(&lines, 0), None);
570 }
571
572 #[test]
573 fn test_setext_four_spaces_indent() {
574 let lines = vec![" Heading", " ======="];
576 assert_eq!(try_parse_setext_heading(&lines, 0), None);
577 }
578
579 #[test]
580 fn test_setext_long_underline() {
581 let underline = "=".repeat(100);
582 let lines = vec!["Heading", underline.as_str()];
583 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
584 }
585
586 #[test]
587 fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
588 let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
589 .expect("should parse mmd header identifier");
590 assert_eq!(parsed.0, "myid");
591 assert_eq!(parsed.1, 10);
592 }
593}