panache_parser/parser/blocks/
headings.rs1use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
8use crate::parser::utils::inline_emission;
9
10fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
11 let trimmed = content.trim_end_matches([' ', '\t']);
12 let end = trimmed.len();
13 let bytes = trimmed.as_bytes();
14
15 if end == 0 || bytes[end - 1] != b']' {
16 return None;
17 }
18
19 let start = trimmed[..end - 1].rfind('[')?;
20 let raw = &trimmed[start..end];
21 let inner = &raw[1..raw.len() - 1];
22 if inner.trim().is_empty() {
23 return None;
24 }
25
26 let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
27 if normalized.is_empty() {
28 return None;
29 }
30
31 Some((normalized, start, end))
32}
33
34pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
36 let line = if let Some(stripped) = content.strip_suffix("\r\n") {
37 stripped
38 } else if let Some(stripped) = content.strip_suffix('\n') {
39 stripped
40 } else {
41 content
42 };
43 let trimmed = line.trim_start();
44
45 let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
47 if hash_count == 0 || hash_count > 6 {
48 return None;
49 }
50
51 let after_hashes = &trimmed[hash_count..];
55 if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
56 {
57 return None;
58 }
59
60 let leading_spaces = line.len() - trimmed.len();
62 if leading_spaces > 3 {
63 return None;
64 }
65
66 Some(hash_count)
67}
68
69pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
82 if pos >= lines.len() {
84 return None;
85 }
86
87 let text_line = lines[pos];
88 let next_pos = pos + 1;
89 if next_pos >= lines.len() {
90 return None;
91 }
92
93 let underline = lines[next_pos];
94
95 if text_line.trim().is_empty() {
97 return None;
98 }
99
100 let leading_spaces = text_line.len() - text_line.trim_start().len();
102 if leading_spaces >= 4 {
103 return None;
104 }
105
106 let underline_trimmed = underline.trim();
108
109 if underline_trimmed.len() < 3 {
111 return None;
112 }
113
114 let first_char = underline_trimmed.chars().next()?;
116 if first_char != '=' && first_char != '-' {
117 return None;
118 }
119
120 if !underline_trimmed.chars().all(|c| c == first_char) {
122 return None;
123 }
124
125 let underline_leading_spaces = underline.len() - underline.trim_start().len();
127 if underline_leading_spaces >= 4 {
128 return None;
129 }
130
131 let level = if first_char == '=' { 1 } else { 2 };
133
134 Some((level, first_char))
135}
136
137pub(crate) fn emit_setext_heading(
142 builder: &mut GreenNodeBuilder<'static>,
143 text_line: &str,
144 underline_line: &str,
145 _level: usize,
146 config: &ParserOptions,
147) {
148 builder.start_node(SyntaxKind::HEADING.into());
149
150 let (text_without_newline, text_newline_str) =
152 if let Some(stripped) = text_line.strip_suffix("\r\n") {
153 (stripped, "\r\n")
154 } else if let Some(stripped) = text_line.strip_suffix('\n') {
155 (stripped, "\n")
156 } else {
157 (text_line, "")
158 };
159
160 let text_trimmed = text_without_newline.trim_start();
162 let leading_spaces = text_without_newline.len() - text_trimmed.len();
163
164 if leading_spaces > 0 {
165 builder.token(
166 SyntaxKind::WHITESPACE.into(),
167 &text_without_newline[..leading_spaces],
168 );
169 }
170
171 let (text_content, attr_text, space_before_attrs) =
173 if let Some((_attrs, text_before, start_brace_pos)) =
174 try_parse_trailing_attributes_with_pos(text_trimmed)
175 {
176 let space = &text_trimmed[text_before.len()..start_brace_pos];
177 let raw_attrs = &text_trimmed[start_brace_pos..];
178 (text_before, Some(raw_attrs), space)
179 } else if config.extensions.mmd_header_identifiers {
180 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
181 try_parse_mmd_header_identifier_with_pos(text_trimmed)
182 {
183 let text_before = text_trimmed[..start_bracket_pos].trim_end_matches([' ', '\t']);
184 let space = &text_trimmed[text_before.len()..start_bracket_pos];
185 let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
186 (text_before, Some(raw_attrs), space)
187 } else {
188 (text_trimmed, None, "")
189 }
190 } else {
191 (text_trimmed, None, "")
192 };
193
194 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
196 if !text_content.is_empty() {
197 inline_emission::emit_inlines(builder, text_content, config);
198 }
199 builder.finish_node();
200
201 if !space_before_attrs.is_empty() {
203 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
204 }
205
206 if let Some(attr_text) = attr_text {
208 builder.start_node(SyntaxKind::ATTRIBUTE.into());
209 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
210 builder.finish_node();
211 }
212
213 if !text_newline_str.is_empty() {
215 builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
216 }
217
218 let (underline_without_newline, underline_newline_str) =
220 if let Some(stripped) = underline_line.strip_suffix("\r\n") {
221 (stripped, "\r\n")
222 } else if let Some(stripped) = underline_line.strip_suffix('\n') {
223 (stripped, "\n")
224 } else {
225 (underline_line, "")
226 };
227
228 let underline_trimmed = underline_without_newline.trim_start();
230 let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
231
232 if underline_leading_spaces > 0 {
233 builder.token(
234 SyntaxKind::WHITESPACE.into(),
235 &underline_without_newline[..underline_leading_spaces],
236 );
237 }
238
239 builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
241 builder.token(
242 SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
243 underline_trimmed,
244 );
245 builder.finish_node();
246
247 if !underline_newline_str.is_empty() {
249 builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
250 }
251
252 builder.finish_node(); }
254
255pub(crate) fn emit_atx_heading(
257 builder: &mut GreenNodeBuilder<'static>,
258 content: &str,
259 level: usize,
260 config: &ParserOptions,
261) {
262 builder.start_node(SyntaxKind::HEADING.into());
263
264 let (content_without_newline, newline_str) =
266 if let Some(stripped) = content.strip_suffix("\r\n") {
267 (stripped, "\r\n")
268 } else if let Some(stripped) = content.strip_suffix('\n') {
269 (stripped, "\n")
270 } else {
271 (content, "")
272 };
273
274 let trimmed = content_without_newline.trim_start();
275 let leading_spaces = content_without_newline.len() - trimmed.len();
276
277 if leading_spaces > 0 {
279 builder.token(
280 SyntaxKind::WHITESPACE.into(),
281 &content_without_newline[..leading_spaces],
282 );
283 }
284
285 builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
287 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
288 builder.finish_node();
289
290 let after_marker = &trimmed[level..];
292 let spaces_after_marker_count = after_marker
293 .find(|c: char| !c.is_whitespace())
294 .unwrap_or(after_marker.len());
295
296 if spaces_after_marker_count > 0 {
298 builder.token(
299 SyntaxKind::WHITESPACE.into(),
300 &after_marker[..spaces_after_marker_count],
301 );
302 }
303
304 let heading_text = &after_marker[spaces_after_marker_count..];
306
307 let (heading_content, closing_suffix) = {
309 let without_trailing_ws = heading_text.trim_end_matches([' ', '\t']);
310 let trailing_hashes = without_trailing_ws
311 .chars()
312 .rev()
313 .take_while(|&c| c == '#')
314 .count();
315
316 if trailing_hashes > 0 {
317 let hashes_start = without_trailing_ws.len() - trailing_hashes;
318 let before_hashes = &without_trailing_ws[..hashes_start];
319 if before_hashes
320 .chars()
321 .last()
322 .is_some_and(|c| c == ' ' || c == '\t')
323 {
324 let content_end = before_hashes.trim_end_matches([' ', '\t']).len();
325 (&heading_text[..content_end], &heading_text[content_end..])
326 } else {
327 (heading_text, "")
328 }
329 } else {
330 (heading_text, "")
331 }
332 };
333
334 let (text_content, attr_text, space_before_attrs) =
336 if let Some((_attrs, text_before, start_brace_pos)) =
337 try_parse_trailing_attributes_with_pos(heading_content)
338 {
339 let space = &heading_content[text_before.len()..start_brace_pos];
340 let raw_attrs = &heading_content[start_brace_pos..];
341 (text_before, Some(raw_attrs), space)
342 } else if config.extensions.mmd_header_identifiers {
343 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
344 try_parse_mmd_header_identifier_with_pos(heading_content)
345 {
346 let text_before =
347 heading_content[..start_bracket_pos].trim_end_matches([' ', '\t']);
348 let space = &heading_content[text_before.len()..start_bracket_pos];
349 let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
350 (text_before, Some(raw_attrs), space)
351 } else {
352 (heading_content, None, "")
353 }
354 } else {
355 (heading_content, None, "")
356 };
357
358 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
360 if !text_content.is_empty() {
361 inline_emission::emit_inlines(builder, text_content, config);
362 }
363 builder.finish_node();
364
365 if !space_before_attrs.is_empty() {
367 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
368 }
369
370 if let Some(attr_text) = attr_text {
372 builder.start_node(SyntaxKind::ATTRIBUTE.into());
373 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
374 builder.finish_node();
375 }
376
377 if !closing_suffix.is_empty() {
378 let closing_trimmed = closing_suffix.trim_matches(|c| c == ' ' || c == '\t');
379 let leading_ws_len = closing_suffix
380 .find(|c: char| c != ' ' && c != '\t')
381 .unwrap_or(closing_suffix.len());
382 let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
383
384 if leading_ws_len > 0 {
385 builder.token(
386 SyntaxKind::WHITESPACE.into(),
387 &closing_suffix[..leading_ws_len],
388 );
389 }
390 if !closing_trimmed.is_empty() {
391 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
392 }
393 if trailing_ws_len > 0 {
394 builder.token(
395 SyntaxKind::WHITESPACE.into(),
396 &closing_suffix[closing_suffix.len() - trailing_ws_len..],
397 );
398 }
399 }
400
401 if !newline_str.is_empty() {
403 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
404 }
405
406 builder.finish_node(); }
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412
413 #[test]
414 fn test_simple_heading() {
415 assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
416 }
417
418 #[test]
419 fn test_level_3_heading() {
420 assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
421 }
422
423 #[test]
424 fn test_heading_with_leading_spaces() {
425 assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
426 }
427
428 #[test]
429 fn test_atx_heading_with_attributes_losslessness() {
430 use crate::ParserOptions;
431
432 let input = "# Test {#id}\n";
434 let config = ParserOptions::default();
435 let tree = crate::parse(input, Some(config));
436
437 assert_eq!(
439 tree.text().to_string(),
440 input,
441 "Parser must preserve all bytes including space before attributes"
442 );
443
444 let heading = tree.first_child().unwrap();
446 assert_eq!(heading.kind(), SyntaxKind::HEADING);
447
448 let mut found_whitespace = false;
450 for child in heading.children_with_tokens() {
451 if child.kind() == SyntaxKind::WHITESPACE
452 && let Some(token) = child.as_token()
453 {
454 let start: usize = token.text_range().start().into();
455 if token.text() == " " && start == 6 {
456 found_whitespace = true;
457 break;
458 }
459 }
460 }
461 assert!(
462 found_whitespace,
463 "Whitespace token between heading content and attributes must be present"
464 );
465 }
466
467 #[test]
468 fn test_atx_heading_closing_hashes_are_lossless() {
469 let input = "### Extension: `smart` ###\n";
470 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
471 assert_eq!(tree.text().to_string(), input);
472 }
473
474 #[test]
475 fn test_four_spaces_not_heading() {
476 assert_eq!(try_parse_atx_heading(" # Not heading"), None);
477 }
478
479 #[test]
480 fn test_no_space_after_hash() {
481 assert_eq!(try_parse_atx_heading("#NoSpace"), None);
482 }
483
484 #[test]
485 fn test_empty_heading() {
486 assert_eq!(try_parse_atx_heading("# "), Some(1));
487 }
488
489 #[test]
490 fn test_level_7_invalid() {
491 assert_eq!(try_parse_atx_heading("####### Too many"), None);
492 }
493
494 #[test]
496 fn test_setext_level_1() {
497 let lines = vec!["Heading", "======="];
498 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
499 }
500
501 #[test]
502 fn test_setext_level_2() {
503 let lines = vec!["Heading", "-------"];
504 assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
505 }
506
507 #[test]
508 fn test_setext_minimum_three_chars() {
509 let lines = vec!["Heading", "=="];
510 assert_eq!(try_parse_setext_heading(&lines, 0), None);
511
512 let lines = vec!["Heading", "==="];
513 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
514 }
515
516 #[test]
517 fn test_setext_mixed_chars_invalid() {
518 let lines = vec!["Heading", "==-=="];
519 assert_eq!(try_parse_setext_heading(&lines, 0), None);
520 }
521
522 #[test]
523 fn test_setext_with_leading_spaces() {
524 let lines = vec!["Heading", " ======="];
525 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
526 }
527
528 #[test]
529 fn test_setext_with_trailing_spaces() {
530 let lines = vec!["Heading", "======= "];
531 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
532 }
533
534 #[test]
535 fn test_setext_empty_text_line() {
536 let lines = vec!["", "======="];
537 assert_eq!(try_parse_setext_heading(&lines, 0), None);
538 }
539
540 #[test]
541 fn test_setext_no_next_line() {
542 let lines = vec!["Heading"];
543 assert_eq!(try_parse_setext_heading(&lines, 0), None);
544 }
545
546 #[test]
547 fn test_setext_four_spaces_indent() {
548 let lines = vec![" Heading", " ======="];
550 assert_eq!(try_parse_setext_heading(&lines, 0), None);
551 }
552
553 #[test]
554 fn test_setext_long_underline() {
555 let underline = "=".repeat(100);
556 let lines = vec!["Heading", underline.as_str()];
557 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
558 }
559
560 #[test]
561 fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
562 let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
563 .expect("should parse mmd header identifier");
564 assert_eq!(parsed.0, "myid");
565 assert_eq!(parsed.1, 10);
566 }
567}