panache_parser/parser/utils/continuation.rs
1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::container_prefix::{ContainerPrefix, StrippedLines};
12use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
13use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
14use crate::parser::utils::helpers::is_blank_line;
15
16pub(crate) struct ContinuationPolicy<'a, 'cfg> {
17 config: &'cfg ParserOptions,
18 block_registry: &'a BlockParserRegistry,
19}
20
21impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
22 pub(crate) fn new(
23 config: &'cfg ParserOptions,
24 block_registry: &'a BlockParserRegistry,
25 ) -> Self {
26 Self {
27 config,
28 block_registry,
29 }
30 }
31
32 fn definition_min_block_indent(&self, content_col: usize) -> usize {
33 if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
34 content_col.max(4)
35 } else {
36 content_col
37 }
38 }
39
40 pub(crate) fn compute_levels_to_keep(
41 &self,
42 current_bq_depth: usize,
43 containers: &ContainerStack,
44 lines: &[&str],
45 next_line_pos: usize,
46 next_line: &str,
47 ) -> usize {
48 let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
49 let (raw_indent_cols, _) = leading_indent(next_inner);
50 let next_marker = lists::try_parse_list_marker(
51 next_inner,
52 self.config,
53 lists::open_list_hint_at_indent(containers, raw_indent_cols),
54 );
55 let next_is_definition_marker =
56 definition_lists::try_parse_definition_marker(next_inner).is_some();
57 let next_is_definition_term = !is_blank_line(next_inner)
58 && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
59
60 // Re-detect the definition marker after stripping a content-container
61 // indent (e.g. the 4-space footnote body indent). Without this, a `:`
62 // line nested inside a footnote body fails the 0-3-space marker test
63 // and the parent DefinitionList/DefinitionItem incorrectly closes
64 // across blank lines, splitting one logical item into many.
65 let stripped_is_definition_marker = |content_indent_so_far: usize| -> bool {
66 if content_indent_so_far == 0 || raw_indent_cols < content_indent_so_far {
67 return false;
68 }
69 let strip_bytes = crate::parser::utils::container_stack::byte_index_at_column(
70 next_inner,
71 content_indent_so_far,
72 );
73 if strip_bytes > next_inner.len() {
74 return false;
75 }
76 definition_lists::try_parse_definition_marker(&next_inner[strip_bytes..]).is_some()
77 };
78
79 // `current_bq_depth` is used for proper indent calculation when the next line
80 // increases blockquote nesting.
81
82 let mut keep_level = 0;
83 let mut content_indent_so_far = 0usize;
84
85 // First, account for blockquotes
86 for (i, c) in containers.stack.iter().enumerate() {
87 match c {
88 crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
89 let bq_count = containers.stack[..=i]
90 .iter()
91 .filter(|x| {
92 matches!(
93 x,
94 crate::parser::utils::container_stack::Container::BlockQuote { .. }
95 )
96 })
97 .count();
98 if bq_count <= next_bq_depth {
99 keep_level = i + 1;
100 }
101 }
102 crate::parser::utils::container_stack::Container::FootnoteDefinition {
103 content_col,
104 ..
105 } => {
106 content_indent_so_far += *content_col;
107 let min_indent = (*content_col).max(4);
108 if raw_indent_cols >= min_indent {
109 keep_level = i + 1;
110 }
111 }
112 crate::parser::utils::container_stack::Container::Definition {
113 content_col,
114 ..
115 } => {
116 // A blank line does not necessarily end a definition, but the continuation
117 // indent must be measured relative to any outer content containers (e.g.
118 // footnotes). Otherwise a line indented only for the footnote would wrongly
119 // continue the definition.
120 let min_indent = self.definition_min_block_indent(*content_col);
121 let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
122 if effective_indent >= min_indent {
123 keep_level = i + 1;
124 }
125 content_indent_so_far += *content_col;
126 }
127 crate::parser::utils::container_stack::Container::DefinitionItem { .. }
128 if next_is_definition_marker
129 || stripped_is_definition_marker(content_indent_so_far) =>
130 {
131 keep_level = i + 1;
132 }
133 crate::parser::utils::container_stack::Container::DefinitionList { .. }
134 if next_is_definition_marker
135 || next_is_definition_term
136 || stripped_is_definition_marker(content_indent_so_far) =>
137 {
138 keep_level = i + 1;
139 }
140 crate::parser::utils::container_stack::Container::List {
141 marker,
142 base_indent_cols,
143 ..
144 } => {
145 let definition_ancestor_kept = containers.stack[..i]
146 .iter()
147 .enumerate()
148 .rev()
149 .find_map(|(idx, container)| {
150 matches!(
151 container,
152 crate::parser::utils::container_stack::Container::Definition { .. }
153 )
154 .then_some(keep_level > idx)
155 })
156 .unwrap_or(true);
157 if !definition_ancestor_kept {
158 continue;
159 }
160
161 let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
162 let continues_list = if let Some(ref marker_match) = next_marker {
163 // Ordered markers can be right-aligned across items
164 // (e.g. `i.`, `ii.`, `iii.`), so they need a symmetric
165 // drift tolerance. Bullets are directional: a marker
166 // outdented from the list's base indent belongs to an
167 // outer list, not this one. Without that lower bound,
168 // a blank line followed by an outer-level marker keeps
169 // the inner list open and parks the BLANK_LINE inside
170 // it, breaking idempotency for nested-list outputs.
171 let indent_in_range = match marker {
172 lists::ListMarker::Ordered(_) => {
173 effective_indent.abs_diff(*base_indent_cols) <= 3
174 }
175 lists::ListMarker::Bullet(_) => {
176 // A bullet marker at indent ≥ 4 cannot continue
177 // a shallow-base bullet list across a blank line:
178 // pandoc treats the would-be marker as the start
179 // of an indented code block once the list is
180 // ineligible to absorb it as a sublist of the
181 // open item. The LIST_ITEM branch below still
182 // rescues the LIST when the previous item's
183 // content column accommodates the new indent
184 // (keep_level is monotonic), so this guard only
185 // closes the list when no item can absorb it.
186 let jumps_out_of_shallow_list =
187 effective_indent >= 4 && *base_indent_cols < 4;
188 if jumps_out_of_shallow_list {
189 false
190 } else if effective_indent >= *base_indent_cols {
191 effective_indent <= base_indent_cols + 3
192 } else {
193 // Bullets are directional, but only when an
194 // outer bullet list with matching marker can
195 // absorb the outdented marker. With no such
196 // outer list, pandoc keeps the current list
197 // open (the marker continues this list with
198 // a small leftward drift). Closing here would
199 // split one logical list into two and surface
200 // as an idempotency failure once the
201 // formatter normalizes indents.
202 let has_outer_match =
203 containers.stack[..i].iter().any(|outer| {
204 matches!(
205 outer,
206 crate::parser::utils::container_stack::Container::List {
207 marker: outer_marker,
208 base_indent_cols: outer_base,
209 ..
210 } if matches!(
211 outer_marker,
212 lists::ListMarker::Bullet(_)
213 ) && lists::markers_match(
214 outer_marker,
215 &marker_match.marker,
216 self.config.dialect,
217 ) && *outer_base <= effective_indent
218 )
219 });
220 !has_outer_match
221 && base_indent_cols.saturating_sub(effective_indent) <= 3
222 }
223 }
224 };
225 lists::markers_match(marker, &marker_match.marker, self.config.dialect)
226 && indent_in_range
227 } else {
228 let item_content_col = containers
229 .stack
230 .get(i + 1)
231 .and_then(|c| match c {
232 crate::parser::utils::container_stack::Container::ListItem {
233 content_col,
234 ..
235 } => Some(*content_col),
236 _ => None,
237 })
238 .unwrap_or(1);
239 effective_indent >= item_content_col
240 };
241 if continues_list {
242 keep_level = i + 1;
243 }
244 }
245 crate::parser::utils::container_stack::Container::ListItem {
246 content_col,
247 marker_only,
248 ..
249 } => {
250 let definition_ancestor_kept = containers.stack[..i]
251 .iter()
252 .enumerate()
253 .rev()
254 .find_map(|(idx, container)| {
255 matches!(
256 container,
257 crate::parser::utils::container_stack::Container::Definition { .. }
258 )
259 .then_some(keep_level > idx)
260 })
261 .unwrap_or(true);
262 if !definition_ancestor_kept {
263 continue;
264 }
265
266 // CommonMark §5.2: a list item that has only seen its
267 // marker line is closed by the first blank line. Any
268 // subsequent indented content is no longer part of the
269 // item. Pandoc keeps the item open across the blank.
270 if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
271 // If the next line doesn't start another list marker,
272 // the parent List has nothing to continue with — close
273 // it too. (The List's own branch above optimistically
274 // kept itself based on indent ≥ content_col, which
275 // assumes a continuing item; that assumption fails
276 // once the empty item is closed by the blank.)
277 if next_marker.is_none() && i > 0 && keep_level == i {
278 keep_level = i - 1;
279 }
280 continue;
281 }
282
283 let effective_indent = if next_bq_depth > current_bq_depth {
284 let after_current_bq =
285 strip_n_blockquote_markers(next_line, current_bq_depth);
286 let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
287 spaces_before_next_marker.saturating_sub(content_indent_so_far)
288 } else {
289 raw_indent_cols.saturating_sub(content_indent_so_far)
290 };
291
292 let is_new_item_at_outer_level = if next_marker.is_some() {
293 effective_indent < *content_col
294 } else {
295 false
296 };
297
298 if !is_new_item_at_outer_level && effective_indent >= *content_col {
299 keep_level = i + 1;
300 }
301 }
302 _ => {}
303 }
304 }
305
306 keep_level
307 }
308
309 /// Checks whether a line inside a definition should be treated as a plain continuation
310 /// (and buffered into the definition PLAIN), rather than parsed as a new block.
311 pub(crate) fn definition_plain_can_continue(
312 &self,
313 stripped_content: &str,
314 raw_content: &str,
315 content_indent: usize,
316 block_ctx: &BlockContext,
317 lines: &[&str],
318 pos: usize,
319 ) -> bool {
320 let prev_line_blank = if pos > 0 {
321 let prev_line = lines[pos - 1];
322 let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
323 is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
324 } else {
325 false
326 };
327
328 // A blank line that isn't indented to the definition content column ends the definition.
329 let (indent_cols, _) = leading_indent(raw_content);
330 if is_blank_line(raw_content) && indent_cols < content_indent {
331 return false;
332 }
333 let min_block_indent = self.definition_min_block_indent(content_indent);
334 if prev_line_blank && indent_cols < min_block_indent {
335 return false;
336 }
337
338 // If it's a block element marker, don't continue as plain.
339 if definition_lists::try_parse_definition_marker(stripped_content).is_some()
340 && leading_indent(raw_content).0 <= 3
341 && !stripped_content.starts_with(':')
342 {
343 let is_next_definition = {
344 let prefix = ContainerPrefix::from_ctx(block_ctx);
345 let stripped = StrippedLines::new(lines, pos, &prefix);
346 self.block_registry
347 .detect_prepared(block_ctx, &stripped)
348 .map(|match_result| {
349 match_result.effect
350 == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
351 })
352 .unwrap_or(false)
353 };
354 if is_next_definition {
355 return false;
356 }
357 }
358 if lists::try_parse_list_marker(stripped_content, self.config, block_ctx.open_alpha_hint)
359 .is_some()
360 {
361 if prev_line_blank {
362 return false;
363 }
364 if block_ctx.in_list {
365 return false;
366 }
367 // A list marker indented to the definition's content column opens a
368 // nested list inside the definition (matches pandoc-native), even
369 // without a separating blank line.
370 let (raw_indent_cols, _) = leading_indent(raw_content);
371 if content_indent > 0 && raw_indent_cols >= content_indent {
372 return false;
373 }
374 }
375 if count_blockquote_markers(stripped_content).0 > 0 {
376 return false;
377 }
378 if self.config.extensions.raw_html
379 && html_blocks::try_parse_html_block_start(
380 stripped_content,
381 self.config.dialect == crate::options::Dialect::CommonMark,
382 )
383 .is_some()
384 {
385 return false;
386 }
387 if self.config.extensions.raw_tex
388 && raw_blocks::extract_environment_name(stripped_content).is_some()
389 {
390 return false;
391 }
392
393 let prefix = ContainerPrefix::from_ctx(block_ctx);
394 let stripped = StrippedLines::new(lines, pos, &prefix);
395 if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, &stripped) {
396 if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
397 && !prev_line_blank
398 {
399 return true;
400 }
401 if match_result.effect
402 == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
403 && match_result
404 .payload
405 .as_ref()
406 .and_then(|payload| {
407 payload
408 .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
409 })
410 .is_some_and(|prepared| {
411 matches!(
412 prepared,
413 crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
414 )
415 })
416 {
417 return true;
418 }
419 return false;
420 }
421
422 true
423 }
424}