panache_parser/parser/blocks/
headings.rs1use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
8use crate::parser::utils::inline_emission;
9
10fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
11 let trimmed = content.trim_end_matches([' ', '\t']);
12 let end = trimmed.len();
13 let bytes = trimmed.as_bytes();
14
15 if end == 0 || bytes[end - 1] != b']' {
16 return None;
17 }
18
19 let start = trimmed[..end - 1].rfind('[')?;
20 let raw = &trimmed[start..end];
21 let inner = &raw[1..raw.len() - 1];
22 if inner.trim().is_empty() {
23 return None;
24 }
25
26 let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
27 if normalized.is_empty() {
28 return None;
29 }
30
31 Some((normalized, start, end))
32}
33
34pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
36 let line = if let Some(stripped) = content.strip_suffix("\r\n") {
37 stripped
38 } else if let Some(stripped) = content.strip_suffix('\n') {
39 stripped
40 } else {
41 content
42 };
43 let trimmed = line.trim_start();
44
45 let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
47 if hash_count == 0 || hash_count > 6 {
48 return None;
49 }
50
51 let after_hashes = &trimmed[hash_count..];
55 if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
56 {
57 return None;
58 }
59
60 let leading_spaces = line.len() - trimmed.len();
62 if leading_spaces > 3 {
63 return None;
64 }
65
66 Some(hash_count)
67}
68
69pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
82 if pos >= lines.len() {
84 return None;
85 }
86
87 let text_line = lines[pos];
88 let next_pos = pos + 1;
89 if next_pos >= lines.len() {
90 return None;
91 }
92
93 let underline = lines[next_pos];
94
95 if text_line.trim().is_empty() {
97 return None;
98 }
99
100 let leading_spaces = text_line.len() - text_line.trim_start().len();
102 if leading_spaces >= 4 {
103 return None;
104 }
105
106 let underline_trimmed = underline.trim();
108
109 if underline_trimmed.is_empty() {
111 return None;
112 }
113
114 let first_char = underline_trimmed.chars().next()?;
116 if first_char != '=' && first_char != '-' {
117 return None;
118 }
119
120 if !underline_trimmed.chars().all(|c| c == first_char) {
122 return None;
123 }
124
125 let underline_leading_spaces = underline.len() - underline.trim_start().len();
127 if underline_leading_spaces >= 4 {
128 return None;
129 }
130
131 let level = if first_char == '=' { 1 } else { 2 };
133
134 Some((level, first_char))
135}
136
137pub(crate) fn emit_setext_heading(
142 builder: &mut GreenNodeBuilder<'static>,
143 text_line: &str,
144 underline_line: &str,
145 _level: usize,
146 config: &ParserOptions,
147) {
148 builder.start_node(SyntaxKind::HEADING.into());
149
150 let (text_without_newline, text_newline_str) =
152 if let Some(stripped) = text_line.strip_suffix("\r\n") {
153 (stripped, "\r\n")
154 } else if let Some(stripped) = text_line.strip_suffix('\n') {
155 (stripped, "\n")
156 } else {
157 (text_line, "")
158 };
159
160 let text_trimmed = text_without_newline.trim_start();
162 let leading_spaces = text_without_newline.len() - text_trimmed.len();
163
164 if leading_spaces > 0 {
165 builder.token(
166 SyntaxKind::WHITESPACE.into(),
167 &text_without_newline[..leading_spaces],
168 );
169 }
170
171 let (text_content, attr_text, space_before_attrs) =
173 if let Some((_attrs, text_before, start_brace_pos)) =
174 try_parse_trailing_attributes_with_pos(text_trimmed)
175 {
176 let space = &text_trimmed[text_before.len()..start_brace_pos];
177 let raw_attrs = &text_trimmed[start_brace_pos..];
178 (text_before, Some(raw_attrs), space)
179 } else if config.extensions.mmd_header_identifiers {
180 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
181 try_parse_mmd_header_identifier_with_pos(text_trimmed)
182 {
183 let text_before = text_trimmed[..start_bracket_pos].trim_end_matches([' ', '\t']);
184 let space = &text_trimmed[text_before.len()..start_bracket_pos];
185 let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
186 (text_before, Some(raw_attrs), space)
187 } else {
188 (text_trimmed, None, "")
189 }
190 } else {
191 (text_trimmed, None, "")
192 };
193
194 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
196 if !text_content.is_empty() {
197 inline_emission::emit_inlines(builder, text_content, config);
198 }
199 builder.finish_node();
200
201 if !space_before_attrs.is_empty() {
203 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
204 }
205
206 if let Some(attr_text) = attr_text {
208 builder.start_node(SyntaxKind::ATTRIBUTE.into());
209 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
210 builder.finish_node();
211 }
212
213 if !text_newline_str.is_empty() {
215 builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
216 }
217
218 let (underline_without_newline, underline_newline_str) =
220 if let Some(stripped) = underline_line.strip_suffix("\r\n") {
221 (stripped, "\r\n")
222 } else if let Some(stripped) = underline_line.strip_suffix('\n') {
223 (stripped, "\n")
224 } else {
225 (underline_line, "")
226 };
227
228 let underline_trimmed = underline_without_newline.trim_start();
230 let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
231
232 if underline_leading_spaces > 0 {
233 builder.token(
234 SyntaxKind::WHITESPACE.into(),
235 &underline_without_newline[..underline_leading_spaces],
236 );
237 }
238
239 builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
241 builder.token(
242 SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
243 underline_trimmed,
244 );
245 builder.finish_node();
246
247 if !underline_newline_str.is_empty() {
249 builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
250 }
251
252 builder.finish_node(); }
254
255pub(crate) fn emit_atx_heading(
257 builder: &mut GreenNodeBuilder<'static>,
258 content: &str,
259 level: usize,
260 config: &ParserOptions,
261) {
262 builder.start_node(SyntaxKind::HEADING.into());
263
264 let (content_without_newline, newline_str) =
266 if let Some(stripped) = content.strip_suffix("\r\n") {
267 (stripped, "\r\n")
268 } else if let Some(stripped) = content.strip_suffix('\n') {
269 (stripped, "\n")
270 } else {
271 (content, "")
272 };
273
274 let trimmed = content_without_newline.trim_start();
275 let leading_spaces = content_without_newline.len() - trimmed.len();
276
277 if leading_spaces > 0 {
279 builder.token(
280 SyntaxKind::WHITESPACE.into(),
281 &content_without_newline[..leading_spaces],
282 );
283 }
284
285 builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
287 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
288 builder.finish_node();
289
290 let after_marker = &trimmed[level..];
292 let spaces_after_marker_count = after_marker
293 .find(|c: char| !c.is_whitespace())
294 .unwrap_or(after_marker.len());
295
296 if spaces_after_marker_count > 0 {
298 builder.token(
299 SyntaxKind::WHITESPACE.into(),
300 &after_marker[..spaces_after_marker_count],
301 );
302 }
303
304 let heading_text = &after_marker[spaces_after_marker_count..];
306
307 let (heading_content, closing_suffix) = {
309 let without_trailing_ws = heading_text.trim_end_matches([' ', '\t']);
310 let trailing_hashes = without_trailing_ws
311 .chars()
312 .rev()
313 .take_while(|&c| c == '#')
314 .count();
315
316 if trailing_hashes > 0 {
317 let hashes_start = without_trailing_ws.len() - trailing_hashes;
318 let before_hashes = &without_trailing_ws[..hashes_start];
319 let preceded_by_ws = before_hashes
324 .chars()
325 .last()
326 .is_some_and(|c| c == ' ' || c == '\t')
327 || (before_hashes.is_empty() && spaces_after_marker_count > 0);
328 if preceded_by_ws {
329 let content_end = before_hashes.trim_end_matches([' ', '\t']).len();
330 (&heading_text[..content_end], &heading_text[content_end..])
331 } else {
332 (heading_text, "")
333 }
334 } else {
335 (heading_text, "")
336 }
337 };
338
339 let (text_content, attr_text, space_before_attrs) =
341 if let Some((_attrs, text_before, start_brace_pos)) =
342 try_parse_trailing_attributes_with_pos(heading_content)
343 {
344 let space = &heading_content[text_before.len()..start_brace_pos];
345 let raw_attrs = &heading_content[start_brace_pos..];
346 (text_before, Some(raw_attrs), space)
347 } else if config.extensions.mmd_header_identifiers {
348 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
349 try_parse_mmd_header_identifier_with_pos(heading_content)
350 {
351 let text_before =
352 heading_content[..start_bracket_pos].trim_end_matches([' ', '\t']);
353 let space = &heading_content[text_before.len()..start_bracket_pos];
354 let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
355 (text_before, Some(raw_attrs), space)
356 } else {
357 (heading_content, None, "")
358 }
359 } else {
360 (heading_content, None, "")
361 };
362
363 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
365 if !text_content.is_empty() {
366 inline_emission::emit_inlines(builder, text_content, config);
367 }
368 builder.finish_node();
369
370 if !space_before_attrs.is_empty() {
372 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
373 }
374
375 if let Some(attr_text) = attr_text {
377 builder.start_node(SyntaxKind::ATTRIBUTE.into());
378 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
379 builder.finish_node();
380 }
381
382 if !closing_suffix.is_empty() {
383 let closing_trimmed = closing_suffix.trim_matches(|c| c == ' ' || c == '\t');
384 let leading_ws_len = closing_suffix
385 .find(|c: char| c != ' ' && c != '\t')
386 .unwrap_or(closing_suffix.len());
387 let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
388
389 if leading_ws_len > 0 {
390 builder.token(
391 SyntaxKind::WHITESPACE.into(),
392 &closing_suffix[..leading_ws_len],
393 );
394 }
395 if !closing_trimmed.is_empty() {
396 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
397 }
398 if trailing_ws_len > 0 {
399 builder.token(
400 SyntaxKind::WHITESPACE.into(),
401 &closing_suffix[closing_suffix.len() - trailing_ws_len..],
402 );
403 }
404 }
405
406 if !newline_str.is_empty() {
408 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
409 }
410
411 builder.finish_node(); }
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417
418 #[test]
419 fn test_simple_heading() {
420 assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
421 }
422
423 #[test]
424 fn test_level_3_heading() {
425 assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
426 }
427
428 #[test]
429 fn test_heading_with_leading_spaces() {
430 assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
431 }
432
433 #[test]
434 fn test_atx_heading_with_attributes_losslessness() {
435 use crate::ParserOptions;
436
437 let input = "# Test {#id}\n";
439 let config = ParserOptions::default();
440 let tree = crate::parse(input, Some(config));
441
442 assert_eq!(
444 tree.text().to_string(),
445 input,
446 "Parser must preserve all bytes including space before attributes"
447 );
448
449 let heading = tree.first_child().unwrap();
451 assert_eq!(heading.kind(), SyntaxKind::HEADING);
452
453 let mut found_whitespace = false;
455 for child in heading.children_with_tokens() {
456 if child.kind() == SyntaxKind::WHITESPACE
457 && let Some(token) = child.as_token()
458 {
459 let start: usize = token.text_range().start().into();
460 if token.text() == " " && start == 6 {
461 found_whitespace = true;
462 break;
463 }
464 }
465 }
466 assert!(
467 found_whitespace,
468 "Whitespace token between heading content and attributes must be present"
469 );
470 }
471
472 #[test]
473 fn test_atx_heading_closing_hashes_are_lossless() {
474 let input = "### Extension: `smart` ###\n";
475 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
476 assert_eq!(tree.text().to_string(), input);
477 }
478
479 #[test]
480 fn test_four_spaces_not_heading() {
481 assert_eq!(try_parse_atx_heading(" # Not heading"), None);
482 }
483
484 #[test]
485 fn test_no_space_after_hash() {
486 assert_eq!(try_parse_atx_heading("#NoSpace"), None);
487 }
488
489 #[test]
490 fn test_empty_heading() {
491 assert_eq!(try_parse_atx_heading("# "), Some(1));
492 }
493
494 #[test]
495 fn test_level_7_invalid() {
496 assert_eq!(try_parse_atx_heading("####### Too many"), None);
497 }
498
499 #[test]
501 fn test_setext_level_1() {
502 let lines = vec!["Heading", "======="];
503 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
504 }
505
506 #[test]
507 fn test_setext_level_2() {
508 let lines = vec!["Heading", "-------"];
509 assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
510 }
511
512 #[test]
513 fn test_setext_any_underline_length() {
514 let lines = vec!["Heading", "="];
518 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
519
520 let lines = vec!["Heading", "=="];
521 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
522
523 let lines = vec!["Heading", "==="];
524 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
525 }
526
527 #[test]
528 fn test_setext_mixed_chars_invalid() {
529 let lines = vec!["Heading", "==-=="];
530 assert_eq!(try_parse_setext_heading(&lines, 0), None);
531 }
532
533 #[test]
534 fn test_setext_with_leading_spaces() {
535 let lines = vec!["Heading", " ======="];
536 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
537 }
538
539 #[test]
540 fn test_setext_with_trailing_spaces() {
541 let lines = vec!["Heading", "======= "];
542 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
543 }
544
545 #[test]
546 fn test_setext_empty_text_line() {
547 let lines = vec!["", "======="];
548 assert_eq!(try_parse_setext_heading(&lines, 0), None);
549 }
550
551 #[test]
552 fn test_setext_no_next_line() {
553 let lines = vec!["Heading"];
554 assert_eq!(try_parse_setext_heading(&lines, 0), None);
555 }
556
557 #[test]
558 fn test_setext_four_spaces_indent() {
559 let lines = vec![" Heading", " ======="];
561 assert_eq!(try_parse_setext_heading(&lines, 0), None);
562 }
563
564 #[test]
565 fn test_setext_long_underline() {
566 let underline = "=".repeat(100);
567 let lines = vec!["Heading", underline.as_str()];
568 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
569 }
570
571 #[test]
572 fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
573 let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
574 .expect("should parse mmd header identifier");
575 assert_eq!(parsed.0, "myid");
576 assert_eq!(parsed.1, 10);
577 }
578}