panache_parser/parser/blocks/
headings.rs1use crate::options::ParserOptions;
4use crate::syntax::SyntaxKind;
5use rowan::GreenNodeBuilder;
6
7use crate::parser::utils::attributes::try_parse_trailing_attributes_with_pos;
8use crate::parser::utils::inline_emission;
9
10fn try_parse_mmd_header_identifier_with_pos(content: &str) -> Option<(String, usize, usize)> {
11 let trimmed = content.trim_end_matches([' ', '\t']);
12 let end = trimmed.len();
13 let bytes = trimmed.as_bytes();
14
15 if end == 0 || bytes[end - 1] != b']' {
16 return None;
17 }
18
19 let start = trimmed[..end - 1].rfind('[')?;
20 let raw = &trimmed[start..end];
21 let inner = &raw[1..raw.len() - 1];
22 if inner.trim().is_empty() {
23 return None;
24 }
25
26 let normalized = inner.split_whitespace().collect::<String>().to_lowercase();
27 if normalized.is_empty() {
28 return None;
29 }
30
31 Some((normalized, start, end))
32}
33
34pub fn try_parse_atx_heading(content: &str) -> Option<usize> {
36 let trimmed = content.trim_start();
37
38 let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
40 if hash_count == 0 || hash_count > 6 {
41 return None;
42 }
43
44 let after_hashes = &trimmed[hash_count..];
46 if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t')
47 {
48 return None;
49 }
50
51 let leading_spaces = content.len() - trimmed.len();
53 if leading_spaces > 3 {
54 return None;
55 }
56
57 Some(hash_count)
58}
59
60pub fn try_parse_setext_heading(lines: &[&str], pos: usize) -> Option<(usize, char)> {
73 if pos >= lines.len() {
75 return None;
76 }
77
78 let text_line = lines[pos];
79 let next_pos = pos + 1;
80 if next_pos >= lines.len() {
81 return None;
82 }
83
84 let underline = lines[next_pos];
85
86 if text_line.trim().is_empty() {
88 return None;
89 }
90
91 let leading_spaces = text_line.len() - text_line.trim_start().len();
93 if leading_spaces >= 4 {
94 return None;
95 }
96
97 let underline_trimmed = underline.trim();
99
100 if underline_trimmed.len() < 3 {
102 return None;
103 }
104
105 let first_char = underline_trimmed.chars().next()?;
107 if first_char != '=' && first_char != '-' {
108 return None;
109 }
110
111 if !underline_trimmed.chars().all(|c| c == first_char) {
113 return None;
114 }
115
116 let underline_leading_spaces = underline.len() - underline.trim_start().len();
118 if underline_leading_spaces >= 4 {
119 return None;
120 }
121
122 let level = if first_char == '=' { 1 } else { 2 };
124
125 Some((level, first_char))
126}
127
128pub(crate) fn emit_setext_heading(
133 builder: &mut GreenNodeBuilder<'static>,
134 text_line: &str,
135 underline_line: &str,
136 _level: usize,
137 config: &ParserOptions,
138) {
139 builder.start_node(SyntaxKind::HEADING.into());
140
141 let (text_without_newline, text_newline_str) =
143 if let Some(stripped) = text_line.strip_suffix("\r\n") {
144 (stripped, "\r\n")
145 } else if let Some(stripped) = text_line.strip_suffix('\n') {
146 (stripped, "\n")
147 } else {
148 (text_line, "")
149 };
150
151 let text_trimmed = text_without_newline.trim_start();
153 let leading_spaces = text_without_newline.len() - text_trimmed.len();
154
155 if leading_spaces > 0 {
156 builder.token(
157 SyntaxKind::WHITESPACE.into(),
158 &text_without_newline[..leading_spaces],
159 );
160 }
161
162 let (text_content, attr_text, space_before_attrs) =
164 if let Some((_attrs, text_before, start_brace_pos)) =
165 try_parse_trailing_attributes_with_pos(text_trimmed)
166 {
167 let space = &text_trimmed[text_before.len()..start_brace_pos];
168 let raw_attrs = &text_trimmed[start_brace_pos..];
169 (text_before, Some(raw_attrs), space)
170 } else if config.extensions.mmd_header_identifiers {
171 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
172 try_parse_mmd_header_identifier_with_pos(text_trimmed)
173 {
174 let text_before = text_trimmed[..start_bracket_pos].trim_end_matches([' ', '\t']);
175 let space = &text_trimmed[text_before.len()..start_bracket_pos];
176 let raw_attrs = &text_trimmed[start_bracket_pos..end_bracket_pos];
177 (text_before, Some(raw_attrs), space)
178 } else {
179 (text_trimmed, None, "")
180 }
181 } else {
182 (text_trimmed, None, "")
183 };
184
185 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
187 if !text_content.is_empty() {
188 inline_emission::emit_inlines(builder, text_content, config);
189 }
190 builder.finish_node();
191
192 if !space_before_attrs.is_empty() {
194 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
195 }
196
197 if let Some(attr_text) = attr_text {
199 builder.start_node(SyntaxKind::ATTRIBUTE.into());
200 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
201 builder.finish_node();
202 }
203
204 if !text_newline_str.is_empty() {
206 builder.token(SyntaxKind::NEWLINE.into(), text_newline_str);
207 }
208
209 let (underline_without_newline, underline_newline_str) =
211 if let Some(stripped) = underline_line.strip_suffix("\r\n") {
212 (stripped, "\r\n")
213 } else if let Some(stripped) = underline_line.strip_suffix('\n') {
214 (stripped, "\n")
215 } else {
216 (underline_line, "")
217 };
218
219 let underline_trimmed = underline_without_newline.trim_start();
221 let underline_leading_spaces = underline_without_newline.len() - underline_trimmed.len();
222
223 if underline_leading_spaces > 0 {
224 builder.token(
225 SyntaxKind::WHITESPACE.into(),
226 &underline_without_newline[..underline_leading_spaces],
227 );
228 }
229
230 builder.start_node(SyntaxKind::SETEXT_HEADING_UNDERLINE.into());
232 builder.token(
233 SyntaxKind::SETEXT_HEADING_UNDERLINE.into(),
234 underline_trimmed,
235 );
236 builder.finish_node();
237
238 if !underline_newline_str.is_empty() {
240 builder.token(SyntaxKind::NEWLINE.into(), underline_newline_str);
241 }
242
243 builder.finish_node(); }
245
246pub(crate) fn emit_atx_heading(
248 builder: &mut GreenNodeBuilder<'static>,
249 content: &str,
250 level: usize,
251 config: &ParserOptions,
252) {
253 builder.start_node(SyntaxKind::HEADING.into());
254
255 let (content_without_newline, newline_str) =
257 if let Some(stripped) = content.strip_suffix("\r\n") {
258 (stripped, "\r\n")
259 } else if let Some(stripped) = content.strip_suffix('\n') {
260 (stripped, "\n")
261 } else {
262 (content, "")
263 };
264
265 let trimmed = content_without_newline.trim_start();
266 let leading_spaces = content_without_newline.len() - trimmed.len();
267
268 if leading_spaces > 0 {
270 builder.token(
271 SyntaxKind::WHITESPACE.into(),
272 &content_without_newline[..leading_spaces],
273 );
274 }
275
276 builder.start_node(SyntaxKind::ATX_HEADING_MARKER.into());
278 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), &trimmed[..level]);
279 builder.finish_node();
280
281 let after_marker = &trimmed[level..];
283 let spaces_after_marker_count = after_marker
284 .find(|c: char| !c.is_whitespace())
285 .unwrap_or(after_marker.len());
286
287 if spaces_after_marker_count > 0 {
289 builder.token(
290 SyntaxKind::WHITESPACE.into(),
291 &after_marker[..spaces_after_marker_count],
292 );
293 }
294
295 let heading_text = &after_marker[spaces_after_marker_count..];
297
298 let (heading_content, closing_suffix) = {
300 let without_trailing_ws = heading_text.trim_end_matches([' ', '\t']);
301 let trailing_hashes = without_trailing_ws
302 .chars()
303 .rev()
304 .take_while(|&c| c == '#')
305 .count();
306
307 if trailing_hashes > 0 {
308 let hashes_start = without_trailing_ws.len() - trailing_hashes;
309 let before_hashes = &without_trailing_ws[..hashes_start];
310 if before_hashes
311 .chars()
312 .last()
313 .is_some_and(|c| c == ' ' || c == '\t')
314 {
315 let content_end = before_hashes.trim_end_matches([' ', '\t']).len();
316 (&heading_text[..content_end], &heading_text[content_end..])
317 } else {
318 (heading_text, "")
319 }
320 } else {
321 (heading_text, "")
322 }
323 };
324
325 let (text_content, attr_text, space_before_attrs) =
327 if let Some((_attrs, text_before, start_brace_pos)) =
328 try_parse_trailing_attributes_with_pos(heading_content)
329 {
330 let space = &heading_content[text_before.len()..start_brace_pos];
331 let raw_attrs = &heading_content[start_brace_pos..];
332 (text_before, Some(raw_attrs), space)
333 } else if config.extensions.mmd_header_identifiers {
334 if let Some((_normalized, start_bracket_pos, end_bracket_pos)) =
335 try_parse_mmd_header_identifier_with_pos(heading_content)
336 {
337 let text_before =
338 heading_content[..start_bracket_pos].trim_end_matches([' ', '\t']);
339 let space = &heading_content[text_before.len()..start_bracket_pos];
340 let raw_attrs = &heading_content[start_bracket_pos..end_bracket_pos];
341 (text_before, Some(raw_attrs), space)
342 } else {
343 (heading_content, None, "")
344 }
345 } else {
346 (heading_content, None, "")
347 };
348
349 builder.start_node(SyntaxKind::HEADING_CONTENT.into());
351 if !text_content.is_empty() {
352 inline_emission::emit_inlines(builder, text_content, config);
353 }
354 builder.finish_node();
355
356 if !space_before_attrs.is_empty() {
358 builder.token(SyntaxKind::WHITESPACE.into(), space_before_attrs);
359 }
360
361 if let Some(attr_text) = attr_text {
363 builder.start_node(SyntaxKind::ATTRIBUTE.into());
364 builder.token(SyntaxKind::ATTRIBUTE.into(), attr_text);
365 builder.finish_node();
366 }
367
368 if !closing_suffix.is_empty() {
369 let closing_trimmed = closing_suffix.trim_matches(|c| c == ' ' || c == '\t');
370 let leading_ws_len = closing_suffix
371 .find(|c: char| c != ' ' && c != '\t')
372 .unwrap_or(closing_suffix.len());
373 let trailing_ws_len = closing_suffix.len() - leading_ws_len - closing_trimmed.len();
374
375 if leading_ws_len > 0 {
376 builder.token(
377 SyntaxKind::WHITESPACE.into(),
378 &closing_suffix[..leading_ws_len],
379 );
380 }
381 if !closing_trimmed.is_empty() {
382 builder.token(SyntaxKind::ATX_HEADING_MARKER.into(), closing_trimmed);
383 }
384 if trailing_ws_len > 0 {
385 builder.token(
386 SyntaxKind::WHITESPACE.into(),
387 &closing_suffix[closing_suffix.len() - trailing_ws_len..],
388 );
389 }
390 }
391
392 if !newline_str.is_empty() {
394 builder.token(SyntaxKind::NEWLINE.into(), newline_str);
395 }
396
397 builder.finish_node(); }
399
400#[cfg(test)]
401mod tests {
402 use super::*;
403
404 #[test]
405 fn test_simple_heading() {
406 assert_eq!(try_parse_atx_heading("# Heading"), Some(1));
407 }
408
409 #[test]
410 fn test_level_3_heading() {
411 assert_eq!(try_parse_atx_heading("### Level 3"), Some(3));
412 }
413
414 #[test]
415 fn test_heading_with_leading_spaces() {
416 assert_eq!(try_parse_atx_heading(" # Heading"), Some(1));
417 }
418
419 #[test]
420 fn test_atx_heading_with_attributes_losslessness() {
421 use crate::ParserOptions;
422
423 let input = "# Test {#id}\n";
425 let config = ParserOptions::default();
426 let tree = crate::parse(input, Some(config));
427
428 assert_eq!(
430 tree.text().to_string(),
431 input,
432 "Parser must preserve all bytes including space before attributes"
433 );
434
435 let heading = tree.first_child().unwrap();
437 assert_eq!(heading.kind(), SyntaxKind::HEADING);
438
439 let mut found_whitespace = false;
441 for child in heading.children_with_tokens() {
442 if child.kind() == SyntaxKind::WHITESPACE
443 && let Some(token) = child.as_token()
444 {
445 let start: usize = token.text_range().start().into();
446 if token.text() == " " && start == 6 {
447 found_whitespace = true;
448 break;
449 }
450 }
451 }
452 assert!(
453 found_whitespace,
454 "Whitespace token between heading content and attributes must be present"
455 );
456 }
457
458 #[test]
459 fn test_atx_heading_closing_hashes_are_lossless() {
460 let input = "### Extension: `smart` ###\n";
461 let tree = crate::parse(input, Some(crate::ParserOptions::default()));
462 assert_eq!(tree.text().to_string(), input);
463 }
464
465 #[test]
466 fn test_four_spaces_not_heading() {
467 assert_eq!(try_parse_atx_heading(" # Not heading"), None);
468 }
469
470 #[test]
471 fn test_no_space_after_hash() {
472 assert_eq!(try_parse_atx_heading("#NoSpace"), None);
473 }
474
475 #[test]
476 fn test_empty_heading() {
477 assert_eq!(try_parse_atx_heading("# "), Some(1));
478 }
479
480 #[test]
481 fn test_level_7_invalid() {
482 assert_eq!(try_parse_atx_heading("####### Too many"), None);
483 }
484
485 #[test]
487 fn test_setext_level_1() {
488 let lines = vec!["Heading", "======="];
489 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
490 }
491
492 #[test]
493 fn test_setext_level_2() {
494 let lines = vec!["Heading", "-------"];
495 assert_eq!(try_parse_setext_heading(&lines, 0), Some((2, '-')));
496 }
497
498 #[test]
499 fn test_setext_minimum_three_chars() {
500 let lines = vec!["Heading", "=="];
501 assert_eq!(try_parse_setext_heading(&lines, 0), None);
502
503 let lines = vec!["Heading", "==="];
504 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
505 }
506
507 #[test]
508 fn test_setext_mixed_chars_invalid() {
509 let lines = vec!["Heading", "==-=="];
510 assert_eq!(try_parse_setext_heading(&lines, 0), None);
511 }
512
513 #[test]
514 fn test_setext_with_leading_spaces() {
515 let lines = vec!["Heading", " ======="];
516 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
517 }
518
519 #[test]
520 fn test_setext_with_trailing_spaces() {
521 let lines = vec!["Heading", "======= "];
522 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
523 }
524
525 #[test]
526 fn test_setext_empty_text_line() {
527 let lines = vec!["", "======="];
528 assert_eq!(try_parse_setext_heading(&lines, 0), None);
529 }
530
531 #[test]
532 fn test_setext_no_next_line() {
533 let lines = vec!["Heading"];
534 assert_eq!(try_parse_setext_heading(&lines, 0), None);
535 }
536
537 #[test]
538 fn test_setext_four_spaces_indent() {
539 let lines = vec![" Heading", " ======="];
541 assert_eq!(try_parse_setext_heading(&lines, 0), None);
542 }
543
544 #[test]
545 fn test_setext_long_underline() {
546 let underline = "=".repeat(100);
547 let lines = vec!["Heading", underline.as_str()];
548 assert_eq!(try_parse_setext_heading(&lines, 0), Some((1, '=')));
549 }
550
551 #[test]
552 fn test_parse_mmd_header_identifier_normalizes_like_pandoc() {
553 let parsed = try_parse_mmd_header_identifier_with_pos("A heading [My ID]")
554 .expect("should parse mmd header identifier");
555 assert_eq!(parsed.0, "myid");
556 assert_eq!(parsed.1, 10);
557 }
558}