panache_parser/parser/utils/
list_item_buffer.rs1use crate::options::{Dialect, ParserOptions};
7use crate::parser::blocks::container_prefix::{
8 ContainerPrefixLine, ContainerPrefixState, emit_container_prefix_tokens,
9};
10use crate::parser::blocks::headings::{emit_atx_heading, try_parse_atx_heading};
11use crate::parser::blocks::horizontal_rules::{emit_horizontal_rule, try_parse_horizontal_rule};
12use crate::parser::blocks::html_blocks::{
13 HtmlBlockType, count_tag_balance, is_pandoc_matched_pair_tag, try_parse_html_block_start,
14};
15use crate::parser::utils::inline_emission;
16use crate::parser::utils::text_buffer::ParagraphBuffer;
17use crate::syntax::{SyntaxKind, SyntaxNode};
18use rowan::{GreenNodeBuilder, TextSize};
19
20#[derive(Debug, Clone)]
22pub(crate) enum ListItemContent {
23 Text(String),
25 BlockquoteMarker {
27 leading_spaces: usize,
28 has_trailing_space: bool,
29 },
30}
31
32#[derive(Debug, Default, Clone)]
40pub(crate) struct ListItemBuffer {
41 segments: Vec<ListItemContent>,
43}
44
45impl ListItemBuffer {
46 pub(crate) fn new() -> Self {
48 Self {
49 segments: Vec::new(),
50 }
51 }
52
53 pub(crate) fn push_text(&mut self, text: impl Into<String>) {
55 let text = text.into();
56 if text.is_empty() {
57 return;
58 }
59 self.segments.push(ListItemContent::Text(text));
60 }
61
62 pub(crate) fn push_blockquote_marker(
63 &mut self,
64 leading_spaces: usize,
65 has_trailing_space: bool,
66 ) {
67 self.segments.push(ListItemContent::BlockquoteMarker {
68 leading_spaces,
69 has_trailing_space,
70 });
71 }
72
73 pub(crate) fn is_empty(&self) -> bool {
75 self.segments.is_empty()
76 }
77
78 pub(crate) fn segment_count(&self) -> usize {
80 self.segments.len()
81 }
82
83 pub(crate) fn first_text(&self) -> Option<&str> {
85 match self.segments.first()? {
86 ListItemContent::Text(t) => Some(t.as_str()),
87 ListItemContent::BlockquoteMarker { .. } => None,
88 }
89 }
90
91 pub(crate) fn unclosed_pandoc_matched_pair_tag(
103 &self,
104 config: &ParserOptions,
105 ) -> Option<String> {
106 if config.dialect != Dialect::Pandoc {
107 return None;
108 }
109 let first = self.first_text()?;
110 let first_line_with_nl = first.split_inclusive('\n').next()?;
111 let first_line_no_nl = first_line_with_nl
112 .strip_suffix("\r\n")
113 .or_else(|| first_line_with_nl.strip_suffix('\n'))
114 .unwrap_or(first_line_with_nl);
115 let HtmlBlockType::BlockTag {
116 tag_name,
117 is_closing: false,
118 ..
119 } = try_parse_html_block_start(first_line_no_nl, false)?
120 else {
121 return None;
122 };
123 if !is_pandoc_matched_pair_tag(&tag_name) {
124 return None;
125 }
126 let mut opens = 0usize;
127 let mut closes = 0usize;
128 for segment in &self.segments {
129 if let ListItemContent::Text(t) = segment {
130 let (o, c) = count_tag_balance(t, &tag_name);
131 opens += o;
132 closes += c;
133 }
134 }
135 if opens > closes { Some(tag_name) } else { None }
136 }
137
138 pub(crate) fn has_blank_lines_between_content(&self) -> bool {
143 log::trace!(
144 "has_blank_lines_between_content: segments={} result=false",
145 self.segments.len()
146 );
147
148 false
149 }
150
151 fn get_text_for_parsing(&self) -> String {
153 let mut result = String::new();
154 for segment in &self.segments {
155 if let ListItemContent::Text(text) = segment {
156 result.push_str(text);
157 }
158 }
159 result
160 }
161
162 fn to_paragraph_buffer(&self) -> ParagraphBuffer {
163 let mut paragraph_buffer = ParagraphBuffer::new();
164 for segment in &self.segments {
165 match segment {
166 ListItemContent::Text(text) => paragraph_buffer.push_text(text),
167 ListItemContent::BlockquoteMarker {
168 leading_spaces,
169 has_trailing_space,
170 } => paragraph_buffer.push_marker(*leading_spaces, *has_trailing_space),
171 }
172 }
173 paragraph_buffer
174 }
175
176 pub(crate) fn emit_as_block(
192 &self,
193 builder: &mut GreenNodeBuilder<'static>,
194 use_paragraph: bool,
195 config: &ParserOptions,
196 content_col: usize,
197 suppress_footnote_refs: bool,
198 ) {
199 if self.is_empty() {
200 return;
201 }
202
203 let text = self.get_text_for_parsing();
205
206 if !text.is_empty() {
207 let line_without_newline = text
208 .strip_suffix("\r\n")
209 .or_else(|| text.strip_suffix('\n'));
210 if let Some(line) = line_without_newline
211 && !line.contains('\n')
212 && !line.contains('\r')
213 {
214 if let Some(level) = try_parse_atx_heading(line) {
215 emit_atx_heading(builder, &text, level, config);
216 return;
217 }
218 if try_parse_horizontal_rule(line).is_some() {
219 emit_horizontal_rule(builder, &text);
220 return;
221 }
222 }
223
224 if self
229 .segments
230 .iter()
231 .all(|s| matches!(s, ListItemContent::Text(_)))
232 && let Some(first_nl) = text.find('\n')
233 {
234 let first_line = &text[..first_nl];
235 let after_first = &text[first_nl + 1..];
236 if !after_first.is_empty()
237 && let Some(level) = try_parse_atx_heading(first_line)
238 {
239 let heading_bytes = &text[..first_nl + 1];
240 emit_atx_heading(builder, heading_bytes, level, config);
241
242 let block_kind = if use_paragraph {
243 SyntaxKind::PARAGRAPH
244 } else {
245 SyntaxKind::PLAIN
246 };
247 builder.start_node(block_kind.into());
248 inline_emission::emit_inlines(
249 builder,
250 after_first,
251 config,
252 suppress_footnote_refs,
253 );
254 builder.finish_node();
255 return;
256 }
257 }
258
259 if config.dialect == Dialect::Pandoc
274 && self
275 .segments
276 .iter()
277 .all(|s| matches!(s, ListItemContent::Text(_)))
278 && try_emit_html_block_lift(builder, &text, config, content_col, use_paragraph)
279 {
280 return;
281 }
282
283 if self
292 .segments
293 .iter()
294 .all(|s| matches!(s, ListItemContent::Text(_)))
295 && try_emit_table_or_div_lift(builder, &text, config, content_col)
296 {
297 return;
298 }
299 }
300
301 let block_kind = if use_paragraph {
302 SyntaxKind::PARAGRAPH
303 } else {
304 SyntaxKind::PLAIN
305 };
306
307 builder.start_node(block_kind.into());
308
309 let paragraph_buffer = self.to_paragraph_buffer();
310 if !paragraph_buffer.is_empty() {
311 paragraph_buffer.emit_with_inlines(builder, config, suppress_footnote_refs);
312 } else if !text.is_empty() {
313 inline_emission::emit_inlines(builder, &text, config, suppress_footnote_refs);
314 }
315
316 builder.finish_node(); }
318
319 pub(crate) fn clear(&mut self) {
321 self.segments.clear();
322 }
323}
324
325fn try_emit_html_block_lift(
346 builder: &mut GreenNodeBuilder<'static>,
347 text: &str,
348 config: &ParserOptions,
349 content_col: usize,
350 use_paragraph: bool,
351) -> bool {
352 let first_line = text.split_inclusive('\n').next().unwrap_or(text);
353 let first_line_no_nl = first_line
354 .strip_suffix("\r\n")
355 .or_else(|| first_line.strip_suffix('\n'))
356 .unwrap_or(first_line);
357 if try_parse_html_block_start(first_line_no_nl, false).is_none() {
358 return false;
359 }
360
361 let (parse_text, prefixes) = if content_col > 0 {
362 strip_list_item_indent(text, content_col)
363 } else {
364 (text.to_string(), Vec::new())
365 };
366
367 let refdefs = config.refdef_labels.clone().unwrap_or_default();
368 let inner_root = crate::parser::parse_with_refdefs(&parse_text, Some(config.clone()), refdefs);
369
370 let children: Vec<SyntaxNode> = inner_root.children().collect();
371 if children.is_empty() {
372 return false;
373 }
374 let first = &children[0];
375 if !matches!(
376 first.kind(),
377 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
378 ) {
379 return false;
380 }
381 let total_end = children.last().unwrap().text_range().end();
382 if total_end != TextSize::of(parse_text.as_str()) {
383 return false;
384 }
385
386 let multi_child_trailing = if children.len() == 1 {
403 false
404 } else if children.len() == 2
405 && matches!(
406 first.kind(),
407 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
408 )
409 && children[1].kind() == SyntaxKind::PARAGRAPH
410 {
411 true
412 } else {
413 return false;
414 };
415
416 if first.kind() == SyntaxKind::HTML_BLOCK_DIV {
417 let html_block_tag_count = first
418 .children()
419 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
420 .count();
421 if html_block_tag_count < 2 {
422 return false;
423 }
424 }
425
426 let prefix_lines: Vec<ContainerPrefixLine> = prefixes
427 .into_iter()
428 .map(ContainerPrefixLine::list_only)
429 .collect();
430 let mut prefix_state = ContainerPrefixState::new(prefix_lines);
431 if multi_child_trailing {
432 graft_node(builder, first, &mut prefix_state);
433 let trailing_kind = if use_paragraph {
434 SyntaxKind::PARAGRAPH
435 } else {
436 SyntaxKind::PLAIN
437 };
438 graft_node_retag_root(builder, &children[1], &mut prefix_state, trailing_kind);
439 } else {
440 graft_node(builder, first, &mut prefix_state);
441 }
442 true
443}
444
445fn try_emit_table_or_div_lift(
453 builder: &mut GreenNodeBuilder<'static>,
454 text: &str,
455 config: &ParserOptions,
456 content_col: usize,
457) -> bool {
458 let first_line = text.split_inclusive('\n').next().unwrap_or(text);
459 let first_line_no_nl = first_line
460 .strip_suffix("\r\n")
461 .or_else(|| first_line.strip_suffix('\n'))
462 .unwrap_or(first_line);
463 let trimmed = first_line_no_nl.trim_start();
464 let first_byte = trimmed.as_bytes().first().copied();
465 if !matches!(first_byte, Some(b'|') | Some(b'+') | Some(b':')) {
466 return false;
467 }
468
469 let (parse_text, prefixes) = if content_col > 0 {
470 strip_list_item_indent(text, content_col)
471 } else {
472 (text.to_string(), Vec::new())
473 };
474
475 let refdefs = config.refdef_labels.clone().unwrap_or_default();
476 let inner_root = crate::parser::parse_with_refdefs(&parse_text, Some(config.clone()), refdefs);
477
478 let children: Vec<SyntaxNode> = inner_root.children().collect();
479 if children.len() != 1 {
480 return false;
481 }
482 let first = &children[0];
483 if !matches!(
484 first.kind(),
485 SyntaxKind::PIPE_TABLE | SyntaxKind::GRID_TABLE | SyntaxKind::FENCED_DIV
486 ) {
487 return false;
488 }
489 if first.text_range().end() != TextSize::of(parse_text.as_str()) {
490 return false;
491 }
492
493 let prefix_lines: Vec<ContainerPrefixLine> = prefixes
494 .into_iter()
495 .map(ContainerPrefixLine::list_only)
496 .collect();
497 let mut prefix_state = ContainerPrefixState::new(prefix_lines);
498 graft_node(builder, first, &mut prefix_state);
499 true
500}
501
502fn graft_node_retag_root(
503 builder: &mut GreenNodeBuilder<'static>,
504 node: &SyntaxNode,
505 prefix: &mut Option<ContainerPrefixState>,
506 new_kind: SyntaxKind,
507) {
508 builder.start_node(new_kind.into());
509 for child in node.children_with_tokens() {
510 match child {
511 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
512 rowan::NodeOrToken::Token(t) => {
513 emit_grafted_token(builder, t.kind(), t.text(), prefix);
514 }
515 }
516 }
517 builder.finish_node();
518}
519
520fn strip_list_item_indent(text: &str, content_col: usize) -> (String, Vec<String>) {
526 let mut stripped = String::with_capacity(text.len());
527 let mut prefixes: Vec<String> = Vec::new();
528 for (i, line) in text.split_inclusive('\n').enumerate() {
529 if i == 0 {
530 prefixes.push(String::new());
531 stripped.push_str(line);
532 continue;
533 }
534 let mut consumed = 0usize;
535 let mut col = 0usize;
536 for &b in line.as_bytes() {
537 if col >= content_col {
538 break;
539 }
540 match b {
541 b' ' => {
542 col += 1;
543 consumed += 1;
544 }
545 b'\t' => {
546 let next = (col / 4 + 1) * 4;
547 if next > content_col {
548 break;
549 }
550 col = next;
551 consumed += 1;
552 }
553 _ => break,
554 }
555 }
556 prefixes.push(line[..consumed].to_string());
557 stripped.push_str(&line[consumed..]);
558 }
559 (stripped, prefixes)
560}
561
562fn graft_node(
563 builder: &mut GreenNodeBuilder<'static>,
564 node: &SyntaxNode,
565 prefix: &mut Option<ContainerPrefixState>,
566) {
567 builder.start_node(node.kind().into());
568 for child in node.children_with_tokens() {
569 match child {
570 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
571 rowan::NodeOrToken::Token(t) => {
572 emit_grafted_token(builder, t.kind(), t.text(), prefix);
573 }
574 }
575 }
576 builder.finish_node();
577}
578
579fn emit_grafted_token(
580 builder: &mut GreenNodeBuilder<'static>,
581 kind: SyntaxKind,
582 text: &str,
583 prefix: &mut Option<ContainerPrefixState>,
584) {
585 if let Some(state) = prefix.as_mut() {
586 if state.at_line_start {
587 if let Some(line_prefix) = state.prefixes.get(state.line_idx) {
588 emit_container_prefix_tokens(builder, line_prefix);
589 }
590 state.at_line_start = false;
591 }
592 builder.token(kind.into(), text);
593 if kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE {
594 state.line_idx += 1;
595 state.at_line_start = true;
596 }
597 } else {
598 builder.token(kind.into(), text);
599 }
600}
601
602#[cfg(test)]
603mod tests {
604 use super::*;
605
606 #[test]
607 fn test_new_buffer_is_empty() {
608 let buffer = ListItemBuffer::new();
609 assert!(buffer.is_empty());
610 assert!(!buffer.has_blank_lines_between_content());
611 }
612
613 #[test]
614 fn test_push_single_text() {
615 let mut buffer = ListItemBuffer::new();
616 buffer.push_text("Hello, world!");
617 assert!(!buffer.is_empty());
618 assert!(!buffer.has_blank_lines_between_content());
619 assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
620 }
621
622 #[test]
623 fn test_push_multiple_text_segments() {
624 let mut buffer = ListItemBuffer::new();
625 buffer.push_text("Line 1\n");
626 buffer.push_text("Line 2\n");
627 buffer.push_text("Line 3");
628 assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\nLine 3");
629 }
630
631 #[test]
632 fn test_clear_buffer() {
633 let mut buffer = ListItemBuffer::new();
634 buffer.push_text("Some text");
635 assert!(!buffer.is_empty());
636
637 buffer.clear();
638 assert!(buffer.is_empty());
639 assert_eq!(buffer.get_text_for_parsing(), "");
640 }
641
642 #[test]
643 fn test_empty_text_ignored() {
644 let mut buffer = ListItemBuffer::new();
645 buffer.push_text("");
646 assert!(buffer.is_empty());
647 }
648}