panache_parser/parser/utils/
list_item_buffer.rs1use crate::options::{Dialect, ParserOptions};
7use crate::parser::blocks::headings::{emit_atx_heading, try_parse_atx_heading};
8use crate::parser::blocks::horizontal_rules::{emit_horizontal_rule, try_parse_horizontal_rule};
9use crate::parser::blocks::html_blocks::{
10 HtmlBlockType, count_tag_balance, is_pandoc_matched_pair_tag, try_parse_html_block_start,
11};
12use crate::parser::utils::inline_emission;
13use crate::parser::utils::text_buffer::ParagraphBuffer;
14use crate::syntax::{SyntaxKind, SyntaxNode};
15use rowan::{GreenNodeBuilder, TextSize};
16
17#[derive(Debug, Clone)]
19pub(crate) enum ListItemContent {
20 Text(String),
22 BlockquoteMarker {
24 leading_spaces: usize,
25 has_trailing_space: bool,
26 },
27}
28
29#[derive(Debug, Default, Clone)]
37pub(crate) struct ListItemBuffer {
38 segments: Vec<ListItemContent>,
40}
41
42impl ListItemBuffer {
43 pub(crate) fn new() -> Self {
45 Self {
46 segments: Vec::new(),
47 }
48 }
49
50 pub(crate) fn push_text(&mut self, text: impl Into<String>) {
52 let text = text.into();
53 if text.is_empty() {
54 return;
55 }
56 self.segments.push(ListItemContent::Text(text));
57 }
58
59 pub(crate) fn push_blockquote_marker(
60 &mut self,
61 leading_spaces: usize,
62 has_trailing_space: bool,
63 ) {
64 self.segments.push(ListItemContent::BlockquoteMarker {
65 leading_spaces,
66 has_trailing_space,
67 });
68 }
69
70 pub(crate) fn is_empty(&self) -> bool {
72 self.segments.is_empty()
73 }
74
75 pub(crate) fn segment_count(&self) -> usize {
77 self.segments.len()
78 }
79
80 pub(crate) fn first_text(&self) -> Option<&str> {
82 match self.segments.first()? {
83 ListItemContent::Text(t) => Some(t.as_str()),
84 ListItemContent::BlockquoteMarker { .. } => None,
85 }
86 }
87
88 pub(crate) fn unclosed_pandoc_matched_pair_tag(
100 &self,
101 config: &ParserOptions,
102 ) -> Option<String> {
103 if config.dialect != Dialect::Pandoc {
104 return None;
105 }
106 let first = self.first_text()?;
107 let first_line_with_nl = first.split_inclusive('\n').next()?;
108 let first_line_no_nl = first_line_with_nl
109 .strip_suffix("\r\n")
110 .or_else(|| first_line_with_nl.strip_suffix('\n'))
111 .unwrap_or(first_line_with_nl);
112 let HtmlBlockType::BlockTag {
113 tag_name,
114 is_closing: false,
115 ..
116 } = try_parse_html_block_start(first_line_no_nl, false)?
117 else {
118 return None;
119 };
120 if !is_pandoc_matched_pair_tag(&tag_name) {
121 return None;
122 }
123 let mut opens = 0usize;
124 let mut closes = 0usize;
125 for segment in &self.segments {
126 if let ListItemContent::Text(t) = segment {
127 let (o, c) = count_tag_balance(t, &tag_name);
128 opens += o;
129 closes += c;
130 }
131 }
132 if opens > closes { Some(tag_name) } else { None }
133 }
134
135 pub(crate) fn has_blank_lines_between_content(&self) -> bool {
140 log::trace!(
141 "has_blank_lines_between_content: segments={} result=false",
142 self.segments.len()
143 );
144
145 false
146 }
147
148 fn get_text_for_parsing(&self) -> String {
150 let mut result = String::new();
151 for segment in &self.segments {
152 if let ListItemContent::Text(text) = segment {
153 result.push_str(text);
154 }
155 }
156 result
157 }
158
159 fn to_paragraph_buffer(&self) -> ParagraphBuffer {
160 let mut paragraph_buffer = ParagraphBuffer::new();
161 for segment in &self.segments {
162 match segment {
163 ListItemContent::Text(text) => paragraph_buffer.push_text(text),
164 ListItemContent::BlockquoteMarker {
165 leading_spaces,
166 has_trailing_space,
167 } => paragraph_buffer.push_marker(*leading_spaces, *has_trailing_space),
168 }
169 }
170 paragraph_buffer
171 }
172
173 pub(crate) fn emit_as_block(
189 &self,
190 builder: &mut GreenNodeBuilder<'static>,
191 use_paragraph: bool,
192 config: &ParserOptions,
193 content_col: usize,
194 suppress_footnote_refs: bool,
195 ) {
196 if self.is_empty() {
197 return;
198 }
199
200 let text = self.get_text_for_parsing();
202
203 if !text.is_empty() {
204 let line_without_newline = text
205 .strip_suffix("\r\n")
206 .or_else(|| text.strip_suffix('\n'));
207 if let Some(line) = line_without_newline
208 && !line.contains('\n')
209 && !line.contains('\r')
210 {
211 if let Some(level) = try_parse_atx_heading(line) {
212 emit_atx_heading(builder, &text, level, config);
213 return;
214 }
215 if try_parse_horizontal_rule(line).is_some() {
216 emit_horizontal_rule(builder, &text);
217 return;
218 }
219 }
220
221 if self
226 .segments
227 .iter()
228 .all(|s| matches!(s, ListItemContent::Text(_)))
229 && let Some(first_nl) = text.find('\n')
230 {
231 let first_line = &text[..first_nl];
232 let after_first = &text[first_nl + 1..];
233 if !after_first.is_empty()
234 && let Some(level) = try_parse_atx_heading(first_line)
235 {
236 let heading_bytes = &text[..first_nl + 1];
237 emit_atx_heading(builder, heading_bytes, level, config);
238
239 let block_kind = if use_paragraph {
240 SyntaxKind::PARAGRAPH
241 } else {
242 SyntaxKind::PLAIN
243 };
244 builder.start_node(block_kind.into());
245 inline_emission::emit_inlines(
246 builder,
247 after_first,
248 config,
249 suppress_footnote_refs,
250 );
251 builder.finish_node();
252 return;
253 }
254 }
255
256 if config.dialect == Dialect::Pandoc
271 && self
272 .segments
273 .iter()
274 .all(|s| matches!(s, ListItemContent::Text(_)))
275 && try_emit_html_block_lift(builder, &text, config, content_col, use_paragraph)
276 {
277 return;
278 }
279 }
280
281 let block_kind = if use_paragraph {
282 SyntaxKind::PARAGRAPH
283 } else {
284 SyntaxKind::PLAIN
285 };
286
287 builder.start_node(block_kind.into());
288
289 let paragraph_buffer = self.to_paragraph_buffer();
290 if !paragraph_buffer.is_empty() {
291 paragraph_buffer.emit_with_inlines(builder, config, suppress_footnote_refs);
292 } else if !text.is_empty() {
293 inline_emission::emit_inlines(builder, &text, config, suppress_footnote_refs);
294 }
295
296 builder.finish_node(); }
298
299 pub(crate) fn clear(&mut self) {
301 self.segments.clear();
302 }
303}
304
305fn try_emit_html_block_lift(
326 builder: &mut GreenNodeBuilder<'static>,
327 text: &str,
328 config: &ParserOptions,
329 content_col: usize,
330 use_paragraph: bool,
331) -> bool {
332 let first_line = text.split_inclusive('\n').next().unwrap_or(text);
333 let first_line_no_nl = first_line
334 .strip_suffix("\r\n")
335 .or_else(|| first_line.strip_suffix('\n'))
336 .unwrap_or(first_line);
337 if try_parse_html_block_start(first_line_no_nl, false).is_none() {
338 return false;
339 }
340
341 let (parse_text, prefixes) = if content_col > 0 {
342 strip_list_item_indent(text, content_col)
343 } else {
344 (text.to_string(), Vec::new())
345 };
346
347 let refdefs = config.refdef_labels.clone().unwrap_or_default();
348 let inner_root = crate::parser::parse_with_refdefs(&parse_text, Some(config.clone()), refdefs);
349
350 let children: Vec<SyntaxNode> = inner_root.children().collect();
351 if children.is_empty() {
352 return false;
353 }
354 let first = &children[0];
355 if !matches!(
356 first.kind(),
357 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
358 ) {
359 return false;
360 }
361 let total_end = children.last().unwrap().text_range().end();
362 if total_end != TextSize::of(parse_text.as_str()) {
363 return false;
364 }
365
366 let multi_child_trailing = if children.len() == 1 {
383 false
384 } else if children.len() == 2
385 && matches!(
386 first.kind(),
387 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
388 )
389 && children[1].kind() == SyntaxKind::PARAGRAPH
390 {
391 true
392 } else {
393 return false;
394 };
395
396 if first.kind() == SyntaxKind::HTML_BLOCK_DIV {
397 let html_block_tag_count = first
398 .children()
399 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
400 .count();
401 if html_block_tag_count < 2 {
402 return false;
403 }
404 }
405
406 let mut prefix_state = if prefixes.is_empty() {
407 None
408 } else {
409 Some(LinePrefixState {
410 prefixes,
411 line_idx: 0,
412 at_line_start: true,
413 })
414 };
415 if multi_child_trailing {
416 graft_node(builder, first, &mut prefix_state);
417 let trailing_kind = if use_paragraph {
418 SyntaxKind::PARAGRAPH
419 } else {
420 SyntaxKind::PLAIN
421 };
422 graft_node_retag_root(builder, &children[1], &mut prefix_state, trailing_kind);
423 } else {
424 graft_node(builder, first, &mut prefix_state);
425 }
426 true
427}
428
429fn graft_node_retag_root(
430 builder: &mut GreenNodeBuilder<'static>,
431 node: &SyntaxNode,
432 prefix: &mut Option<LinePrefixState>,
433 new_kind: SyntaxKind,
434) {
435 builder.start_node(new_kind.into());
436 for child in node.children_with_tokens() {
437 match child {
438 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
439 rowan::NodeOrToken::Token(t) => {
440 emit_grafted_token(builder, t.kind(), t.text(), prefix);
441 }
442 }
443 }
444 builder.finish_node();
445}
446
447struct LinePrefixState {
456 prefixes: Vec<String>,
457 line_idx: usize,
458 at_line_start: bool,
459}
460
461fn strip_list_item_indent(text: &str, content_col: usize) -> (String, Vec<String>) {
467 let mut stripped = String::with_capacity(text.len());
468 let mut prefixes: Vec<String> = Vec::new();
469 for (i, line) in text.split_inclusive('\n').enumerate() {
470 if i == 0 {
471 prefixes.push(String::new());
472 stripped.push_str(line);
473 continue;
474 }
475 let mut consumed = 0usize;
476 let mut col = 0usize;
477 for &b in line.as_bytes() {
478 if col >= content_col {
479 break;
480 }
481 match b {
482 b' ' => {
483 col += 1;
484 consumed += 1;
485 }
486 b'\t' => {
487 let next = (col / 4 + 1) * 4;
488 if next > content_col {
489 break;
490 }
491 col = next;
492 consumed += 1;
493 }
494 _ => break,
495 }
496 }
497 prefixes.push(line[..consumed].to_string());
498 stripped.push_str(&line[consumed..]);
499 }
500 (stripped, prefixes)
501}
502
503fn graft_node(
504 builder: &mut GreenNodeBuilder<'static>,
505 node: &SyntaxNode,
506 prefix: &mut Option<LinePrefixState>,
507) {
508 builder.start_node(node.kind().into());
509 for child in node.children_with_tokens() {
510 match child {
511 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
512 rowan::NodeOrToken::Token(t) => {
513 emit_grafted_token(builder, t.kind(), t.text(), prefix);
514 }
515 }
516 }
517 builder.finish_node();
518}
519
520fn emit_grafted_token(
521 builder: &mut GreenNodeBuilder<'static>,
522 kind: SyntaxKind,
523 text: &str,
524 prefix: &mut Option<LinePrefixState>,
525) {
526 if let Some(state) = prefix.as_mut() {
527 if state.at_line_start {
528 if let Some(p) = state.prefixes.get(state.line_idx)
529 && !p.is_empty()
530 {
531 builder.token(SyntaxKind::WHITESPACE.into(), p);
532 }
533 state.at_line_start = false;
534 }
535 builder.token(kind.into(), text);
536 if kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE {
537 state.line_idx += 1;
538 state.at_line_start = true;
539 }
540 } else {
541 builder.token(kind.into(), text);
542 }
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548
549 #[test]
550 fn test_new_buffer_is_empty() {
551 let buffer = ListItemBuffer::new();
552 assert!(buffer.is_empty());
553 assert!(!buffer.has_blank_lines_between_content());
554 }
555
556 #[test]
557 fn test_push_single_text() {
558 let mut buffer = ListItemBuffer::new();
559 buffer.push_text("Hello, world!");
560 assert!(!buffer.is_empty());
561 assert!(!buffer.has_blank_lines_between_content());
562 assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
563 }
564
565 #[test]
566 fn test_push_multiple_text_segments() {
567 let mut buffer = ListItemBuffer::new();
568 buffer.push_text("Line 1\n");
569 buffer.push_text("Line 2\n");
570 buffer.push_text("Line 3");
571 assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\nLine 3");
572 }
573
574 #[test]
575 fn test_clear_buffer() {
576 let mut buffer = ListItemBuffer::new();
577 buffer.push_text("Some text");
578 assert!(!buffer.is_empty());
579
580 buffer.clear();
581 assert!(buffer.is_empty());
582 assert_eq!(buffer.get_text_for_parsing(), "");
583 }
584
585 #[test]
586 fn test_empty_text_ignored() {
587 let mut buffer = ListItemBuffer::new();
588 buffer.push_text("");
589 assert!(buffer.is_empty());
590 }
591}