panache_parser/parser/utils/
list_item_buffer.rs1use crate::options::{Dialect, ParserOptions};
7use crate::parser::blocks::container_prefix::{
8 ContainerPrefixLine, ContainerPrefixState, emit_container_prefix_tokens,
9};
10use crate::parser::blocks::headings::{emit_atx_heading, try_parse_atx_heading};
11use crate::parser::blocks::horizontal_rules::{emit_horizontal_rule, try_parse_horizontal_rule};
12use crate::parser::blocks::html_blocks::{
13 HtmlBlockType, count_tag_balance, is_pandoc_matched_pair_tag, try_parse_html_block_start,
14};
15use crate::parser::utils::inline_emission;
16use crate::parser::utils::text_buffer::ParagraphBuffer;
17use crate::syntax::{SyntaxKind, SyntaxNode};
18use rowan::{GreenNodeBuilder, TextSize};
19
20#[derive(Debug, Clone)]
22pub(crate) enum ListItemContent {
23 Text(String),
25 BlockquoteMarker {
27 leading_spaces: usize,
28 has_trailing_space: bool,
29 },
30}
31
32#[derive(Debug, Default, Clone)]
40pub(crate) struct ListItemBuffer {
41 segments: Vec<ListItemContent>,
43}
44
45impl ListItemBuffer {
46 pub(crate) fn new() -> Self {
48 Self {
49 segments: Vec::new(),
50 }
51 }
52
53 pub(crate) fn push_text(&mut self, text: impl Into<String>) {
55 let text = text.into();
56 if text.is_empty() {
57 return;
58 }
59 self.segments.push(ListItemContent::Text(text));
60 }
61
62 pub(crate) fn push_blockquote_marker(
63 &mut self,
64 leading_spaces: usize,
65 has_trailing_space: bool,
66 ) {
67 self.segments.push(ListItemContent::BlockquoteMarker {
68 leading_spaces,
69 has_trailing_space,
70 });
71 }
72
73 pub(crate) fn is_empty(&self) -> bool {
75 self.segments.is_empty()
76 }
77
78 pub(crate) fn segment_count(&self) -> usize {
80 self.segments.len()
81 }
82
83 pub(crate) fn first_text(&self) -> Option<&str> {
85 match self.segments.first()? {
86 ListItemContent::Text(t) => Some(t.as_str()),
87 ListItemContent::BlockquoteMarker { .. } => None,
88 }
89 }
90
91 pub(crate) fn unclosed_pandoc_matched_pair_tag(
103 &self,
104 config: &ParserOptions,
105 ) -> Option<String> {
106 if config.dialect != Dialect::Pandoc {
107 return None;
108 }
109 let first = self.first_text()?;
110 let first_line_with_nl = first.split_inclusive('\n').next()?;
111 let first_line_no_nl = first_line_with_nl
112 .strip_suffix("\r\n")
113 .or_else(|| first_line_with_nl.strip_suffix('\n'))
114 .unwrap_or(first_line_with_nl);
115 let HtmlBlockType::BlockTag {
116 tag_name,
117 is_closing: false,
118 ..
119 } = try_parse_html_block_start(first_line_no_nl, false)?
120 else {
121 return None;
122 };
123 if !is_pandoc_matched_pair_tag(&tag_name) {
124 return None;
125 }
126 let mut opens = 0usize;
127 let mut closes = 0usize;
128 for segment in &self.segments {
129 if let ListItemContent::Text(t) = segment {
130 let (o, c) = count_tag_balance(t, &tag_name);
131 opens += o;
132 closes += c;
133 }
134 }
135 if opens > closes { Some(tag_name) } else { None }
136 }
137
138 pub(crate) fn has_blank_lines_between_content(&self) -> bool {
143 log::trace!(
144 "has_blank_lines_between_content: segments={} result=false",
145 self.segments.len()
146 );
147
148 false
149 }
150
151 fn get_text_for_parsing(&self) -> String {
153 let mut result = String::new();
154 for segment in &self.segments {
155 if let ListItemContent::Text(text) = segment {
156 result.push_str(text);
157 }
158 }
159 result
160 }
161
162 fn to_paragraph_buffer(&self) -> ParagraphBuffer {
163 let mut paragraph_buffer = ParagraphBuffer::new();
164 for segment in &self.segments {
165 match segment {
166 ListItemContent::Text(text) => paragraph_buffer.push_text(text),
167 ListItemContent::BlockquoteMarker {
168 leading_spaces,
169 has_trailing_space,
170 } => paragraph_buffer.push_marker(*leading_spaces, *has_trailing_space),
171 }
172 }
173 paragraph_buffer
174 }
175
176 pub(crate) fn emit_as_block(
192 &self,
193 builder: &mut GreenNodeBuilder<'static>,
194 use_paragraph: bool,
195 config: &ParserOptions,
196 content_col: usize,
197 suppress_footnote_refs: bool,
198 ) {
199 if self.is_empty() {
200 return;
201 }
202
203 let text = self.get_text_for_parsing();
205
206 if !text.is_empty() {
207 let line_without_newline = text
208 .strip_suffix("\r\n")
209 .or_else(|| text.strip_suffix('\n'));
210 if let Some(line) = line_without_newline
211 && !line.contains('\n')
212 && !line.contains('\r')
213 {
214 if let Some(level) = try_parse_atx_heading(line) {
215 emit_atx_heading(builder, &text, level, config);
216 return;
217 }
218 if try_parse_horizontal_rule(line).is_some() {
219 emit_horizontal_rule(builder, &text);
220 return;
221 }
222 }
223
224 if self
229 .segments
230 .iter()
231 .all(|s| matches!(s, ListItemContent::Text(_)))
232 && let Some(first_nl) = text.find('\n')
233 {
234 let first_line = &text[..first_nl];
235 let after_first = &text[first_nl + 1..];
236 if !after_first.is_empty()
237 && let Some(level) = try_parse_atx_heading(first_line)
238 {
239 let heading_bytes = &text[..first_nl + 1];
240 emit_atx_heading(builder, heading_bytes, level, config);
241
242 let block_kind = if use_paragraph {
243 SyntaxKind::PARAGRAPH
244 } else {
245 SyntaxKind::PLAIN
246 };
247 builder.start_node(block_kind.into());
248 inline_emission::emit_inlines(
249 builder,
250 after_first,
251 config,
252 suppress_footnote_refs,
253 );
254 builder.finish_node();
255 return;
256 }
257 }
258
259 if config.dialect == Dialect::Pandoc
274 && self
275 .segments
276 .iter()
277 .all(|s| matches!(s, ListItemContent::Text(_)))
278 && try_emit_html_block_lift(builder, &text, config, content_col, use_paragraph)
279 {
280 return;
281 }
282 }
283
284 let block_kind = if use_paragraph {
285 SyntaxKind::PARAGRAPH
286 } else {
287 SyntaxKind::PLAIN
288 };
289
290 builder.start_node(block_kind.into());
291
292 let paragraph_buffer = self.to_paragraph_buffer();
293 if !paragraph_buffer.is_empty() {
294 paragraph_buffer.emit_with_inlines(builder, config, suppress_footnote_refs);
295 } else if !text.is_empty() {
296 inline_emission::emit_inlines(builder, &text, config, suppress_footnote_refs);
297 }
298
299 builder.finish_node(); }
301
302 pub(crate) fn clear(&mut self) {
304 self.segments.clear();
305 }
306}
307
308fn try_emit_html_block_lift(
329 builder: &mut GreenNodeBuilder<'static>,
330 text: &str,
331 config: &ParserOptions,
332 content_col: usize,
333 use_paragraph: bool,
334) -> bool {
335 let first_line = text.split_inclusive('\n').next().unwrap_or(text);
336 let first_line_no_nl = first_line
337 .strip_suffix("\r\n")
338 .or_else(|| first_line.strip_suffix('\n'))
339 .unwrap_or(first_line);
340 if try_parse_html_block_start(first_line_no_nl, false).is_none() {
341 return false;
342 }
343
344 let (parse_text, prefixes) = if content_col > 0 {
345 strip_list_item_indent(text, content_col)
346 } else {
347 (text.to_string(), Vec::new())
348 };
349
350 let refdefs = config.refdef_labels.clone().unwrap_or_default();
351 let inner_root = crate::parser::parse_with_refdefs(&parse_text, Some(config.clone()), refdefs);
352
353 let children: Vec<SyntaxNode> = inner_root.children().collect();
354 if children.is_empty() {
355 return false;
356 }
357 let first = &children[0];
358 if !matches!(
359 first.kind(),
360 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
361 ) {
362 return false;
363 }
364 let total_end = children.last().unwrap().text_range().end();
365 if total_end != TextSize::of(parse_text.as_str()) {
366 return false;
367 }
368
369 let multi_child_trailing = if children.len() == 1 {
386 false
387 } else if children.len() == 2
388 && matches!(
389 first.kind(),
390 SyntaxKind::HTML_BLOCK | SyntaxKind::HTML_BLOCK_DIV
391 )
392 && children[1].kind() == SyntaxKind::PARAGRAPH
393 {
394 true
395 } else {
396 return false;
397 };
398
399 if first.kind() == SyntaxKind::HTML_BLOCK_DIV {
400 let html_block_tag_count = first
401 .children()
402 .filter(|c| c.kind() == SyntaxKind::HTML_BLOCK_TAG)
403 .count();
404 if html_block_tag_count < 2 {
405 return false;
406 }
407 }
408
409 let prefix_lines: Vec<ContainerPrefixLine> = prefixes
410 .into_iter()
411 .map(ContainerPrefixLine::list_only)
412 .collect();
413 let mut prefix_state = ContainerPrefixState::new(prefix_lines);
414 if multi_child_trailing {
415 graft_node(builder, first, &mut prefix_state);
416 let trailing_kind = if use_paragraph {
417 SyntaxKind::PARAGRAPH
418 } else {
419 SyntaxKind::PLAIN
420 };
421 graft_node_retag_root(builder, &children[1], &mut prefix_state, trailing_kind);
422 } else {
423 graft_node(builder, first, &mut prefix_state);
424 }
425 true
426}
427
428fn graft_node_retag_root(
429 builder: &mut GreenNodeBuilder<'static>,
430 node: &SyntaxNode,
431 prefix: &mut Option<ContainerPrefixState>,
432 new_kind: SyntaxKind,
433) {
434 builder.start_node(new_kind.into());
435 for child in node.children_with_tokens() {
436 match child {
437 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
438 rowan::NodeOrToken::Token(t) => {
439 emit_grafted_token(builder, t.kind(), t.text(), prefix);
440 }
441 }
442 }
443 builder.finish_node();
444}
445
446fn strip_list_item_indent(text: &str, content_col: usize) -> (String, Vec<String>) {
452 let mut stripped = String::with_capacity(text.len());
453 let mut prefixes: Vec<String> = Vec::new();
454 for (i, line) in text.split_inclusive('\n').enumerate() {
455 if i == 0 {
456 prefixes.push(String::new());
457 stripped.push_str(line);
458 continue;
459 }
460 let mut consumed = 0usize;
461 let mut col = 0usize;
462 for &b in line.as_bytes() {
463 if col >= content_col {
464 break;
465 }
466 match b {
467 b' ' => {
468 col += 1;
469 consumed += 1;
470 }
471 b'\t' => {
472 let next = (col / 4 + 1) * 4;
473 if next > content_col {
474 break;
475 }
476 col = next;
477 consumed += 1;
478 }
479 _ => break,
480 }
481 }
482 prefixes.push(line[..consumed].to_string());
483 stripped.push_str(&line[consumed..]);
484 }
485 (stripped, prefixes)
486}
487
488fn graft_node(
489 builder: &mut GreenNodeBuilder<'static>,
490 node: &SyntaxNode,
491 prefix: &mut Option<ContainerPrefixState>,
492) {
493 builder.start_node(node.kind().into());
494 for child in node.children_with_tokens() {
495 match child {
496 rowan::NodeOrToken::Node(n) => graft_node(builder, &n, prefix),
497 rowan::NodeOrToken::Token(t) => {
498 emit_grafted_token(builder, t.kind(), t.text(), prefix);
499 }
500 }
501 }
502 builder.finish_node();
503}
504
505fn emit_grafted_token(
506 builder: &mut GreenNodeBuilder<'static>,
507 kind: SyntaxKind,
508 text: &str,
509 prefix: &mut Option<ContainerPrefixState>,
510) {
511 if let Some(state) = prefix.as_mut() {
512 if state.at_line_start {
513 if let Some(line_prefix) = state.prefixes.get(state.line_idx) {
514 emit_container_prefix_tokens(builder, line_prefix);
515 }
516 state.at_line_start = false;
517 }
518 builder.token(kind.into(), text);
519 if kind == SyntaxKind::NEWLINE || kind == SyntaxKind::BLANK_LINE {
520 state.line_idx += 1;
521 state.at_line_start = true;
522 }
523 } else {
524 builder.token(kind.into(), text);
525 }
526}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531
532 #[test]
533 fn test_new_buffer_is_empty() {
534 let buffer = ListItemBuffer::new();
535 assert!(buffer.is_empty());
536 assert!(!buffer.has_blank_lines_between_content());
537 }
538
539 #[test]
540 fn test_push_single_text() {
541 let mut buffer = ListItemBuffer::new();
542 buffer.push_text("Hello, world!");
543 assert!(!buffer.is_empty());
544 assert!(!buffer.has_blank_lines_between_content());
545 assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
546 }
547
548 #[test]
549 fn test_push_multiple_text_segments() {
550 let mut buffer = ListItemBuffer::new();
551 buffer.push_text("Line 1\n");
552 buffer.push_text("Line 2\n");
553 buffer.push_text("Line 3");
554 assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\nLine 3");
555 }
556
557 #[test]
558 fn test_clear_buffer() {
559 let mut buffer = ListItemBuffer::new();
560 buffer.push_text("Some text");
561 assert!(!buffer.is_empty());
562
563 buffer.clear();
564 assert!(buffer.is_empty());
565 assert_eq!(buffer.get_text_for_parsing(), "");
566 }
567
568 #[test]
569 fn test_empty_text_ignored() {
570 let mut buffer = ListItemBuffer::new();
571 buffer.push_text("");
572 assert!(buffer.is_empty());
573 }
574}