panache_parser/parser/utils/continuation.rs
1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
12use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
13use crate::parser::utils::helpers::is_blank_line;
14
15pub(crate) struct ContinuationPolicy<'a, 'cfg> {
16 config: &'cfg ParserOptions,
17 block_registry: &'a BlockParserRegistry,
18}
19
20impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
21 pub(crate) fn new(
22 config: &'cfg ParserOptions,
23 block_registry: &'a BlockParserRegistry,
24 ) -> Self {
25 Self {
26 config,
27 block_registry,
28 }
29 }
30
31 fn definition_min_block_indent(&self, content_col: usize) -> usize {
32 if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
33 content_col.max(4)
34 } else {
35 content_col
36 }
37 }
38
39 pub(crate) fn compute_levels_to_keep(
40 &self,
41 current_bq_depth: usize,
42 containers: &ContainerStack,
43 lines: &[&str],
44 next_line_pos: usize,
45 next_line: &str,
46 ) -> usize {
47 let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
48 let (raw_indent_cols, _) = leading_indent(next_inner);
49 let next_marker = lists::try_parse_list_marker(next_inner, self.config);
50 let next_is_definition_marker =
51 definition_lists::try_parse_definition_marker(next_inner).is_some();
52 let next_is_definition_term = !is_blank_line(next_inner)
53 && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
54
55 // Re-detect the definition marker after stripping a content-container
56 // indent (e.g. the 4-space footnote body indent). Without this, a `:`
57 // line nested inside a footnote body fails the 0-3-space marker test
58 // and the parent DefinitionList/DefinitionItem incorrectly closes
59 // across blank lines, splitting one logical item into many.
60 let stripped_is_definition_marker = |content_indent_so_far: usize| -> bool {
61 if content_indent_so_far == 0 || raw_indent_cols < content_indent_so_far {
62 return false;
63 }
64 let strip_bytes = crate::parser::utils::container_stack::byte_index_at_column(
65 next_inner,
66 content_indent_so_far,
67 );
68 if strip_bytes > next_inner.len() {
69 return false;
70 }
71 definition_lists::try_parse_definition_marker(&next_inner[strip_bytes..]).is_some()
72 };
73
74 // `current_bq_depth` is used for proper indent calculation when the next line
75 // increases blockquote nesting.
76
77 let mut keep_level = 0;
78 let mut content_indent_so_far = 0usize;
79
80 // First, account for blockquotes
81 for (i, c) in containers.stack.iter().enumerate() {
82 match c {
83 crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
84 let bq_count = containers.stack[..=i]
85 .iter()
86 .filter(|x| {
87 matches!(
88 x,
89 crate::parser::utils::container_stack::Container::BlockQuote { .. }
90 )
91 })
92 .count();
93 if bq_count <= next_bq_depth {
94 keep_level = i + 1;
95 }
96 }
97 crate::parser::utils::container_stack::Container::FootnoteDefinition {
98 content_col,
99 ..
100 } => {
101 content_indent_so_far += *content_col;
102 let min_indent = (*content_col).max(4);
103 if raw_indent_cols >= min_indent {
104 keep_level = i + 1;
105 }
106 }
107 crate::parser::utils::container_stack::Container::Definition {
108 content_col,
109 ..
110 } => {
111 // A blank line does not necessarily end a definition, but the continuation
112 // indent must be measured relative to any outer content containers (e.g.
113 // footnotes). Otherwise a line indented only for the footnote would wrongly
114 // continue the definition.
115 let min_indent = self.definition_min_block_indent(*content_col);
116 let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
117 if effective_indent >= min_indent {
118 keep_level = i + 1;
119 }
120 content_indent_so_far += *content_col;
121 }
122 crate::parser::utils::container_stack::Container::DefinitionItem { .. }
123 if next_is_definition_marker
124 || stripped_is_definition_marker(content_indent_so_far) =>
125 {
126 keep_level = i + 1;
127 }
128 crate::parser::utils::container_stack::Container::DefinitionList { .. }
129 if next_is_definition_marker
130 || next_is_definition_term
131 || stripped_is_definition_marker(content_indent_so_far) =>
132 {
133 keep_level = i + 1;
134 }
135 crate::parser::utils::container_stack::Container::List {
136 marker,
137 base_indent_cols,
138 ..
139 } => {
140 let definition_ancestor_kept = containers.stack[..i]
141 .iter()
142 .enumerate()
143 .rev()
144 .find_map(|(idx, container)| {
145 matches!(
146 container,
147 crate::parser::utils::container_stack::Container::Definition { .. }
148 )
149 .then_some(keep_level > idx)
150 })
151 .unwrap_or(true);
152 if !definition_ancestor_kept {
153 continue;
154 }
155
156 let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
157 let continues_list = if let Some(ref marker_match) = next_marker {
158 // Ordered markers can be right-aligned across items
159 // (e.g. `i.`, `ii.`, `iii.`), so they need a symmetric
160 // drift tolerance. Bullets are directional: a marker
161 // outdented from the list's base indent belongs to an
162 // outer list, not this one. Without that lower bound,
163 // a blank line followed by an outer-level marker keeps
164 // the inner list open and parks the BLANK_LINE inside
165 // it, breaking idempotency for nested-list outputs.
166 let indent_in_range = match marker {
167 lists::ListMarker::Ordered(_) => {
168 effective_indent.abs_diff(*base_indent_cols) <= 3
169 }
170 lists::ListMarker::Bullet(_) => {
171 // A bullet marker at indent ≥ 4 cannot continue
172 // a shallow-base bullet list across a blank line:
173 // pandoc treats the would-be marker as the start
174 // of an indented code block once the list is
175 // ineligible to absorb it as a sublist of the
176 // open item. The LIST_ITEM branch below still
177 // rescues the LIST when the previous item's
178 // content column accommodates the new indent
179 // (keep_level is monotonic), so this guard only
180 // closes the list when no item can absorb it.
181 let jumps_out_of_shallow_list =
182 effective_indent >= 4 && *base_indent_cols < 4;
183 if jumps_out_of_shallow_list {
184 false
185 } else if effective_indent >= *base_indent_cols {
186 effective_indent <= base_indent_cols + 3
187 } else {
188 // Bullets are directional, but only when an
189 // outer bullet list with matching marker can
190 // absorb the outdented marker. With no such
191 // outer list, pandoc keeps the current list
192 // open (the marker continues this list with
193 // a small leftward drift). Closing here would
194 // split one logical list into two and surface
195 // as an idempotency failure once the
196 // formatter normalizes indents.
197 let has_outer_match =
198 containers.stack[..i].iter().any(|outer| {
199 matches!(
200 outer,
201 crate::parser::utils::container_stack::Container::List {
202 marker: outer_marker,
203 base_indent_cols: outer_base,
204 ..
205 } if matches!(
206 outer_marker,
207 lists::ListMarker::Bullet(_)
208 ) && lists::markers_match(
209 outer_marker,
210 &marker_match.marker,
211 self.config.dialect,
212 ) && *outer_base <= effective_indent
213 )
214 });
215 !has_outer_match
216 && base_indent_cols.saturating_sub(effective_indent) <= 3
217 }
218 }
219 };
220 lists::markers_match(marker, &marker_match.marker, self.config.dialect)
221 && indent_in_range
222 } else {
223 let item_content_col = containers
224 .stack
225 .get(i + 1)
226 .and_then(|c| match c {
227 crate::parser::utils::container_stack::Container::ListItem {
228 content_col,
229 ..
230 } => Some(*content_col),
231 _ => None,
232 })
233 .unwrap_or(1);
234 effective_indent >= item_content_col
235 };
236 if continues_list {
237 keep_level = i + 1;
238 }
239 }
240 crate::parser::utils::container_stack::Container::ListItem {
241 content_col,
242 marker_only,
243 ..
244 } => {
245 let definition_ancestor_kept = containers.stack[..i]
246 .iter()
247 .enumerate()
248 .rev()
249 .find_map(|(idx, container)| {
250 matches!(
251 container,
252 crate::parser::utils::container_stack::Container::Definition { .. }
253 )
254 .then_some(keep_level > idx)
255 })
256 .unwrap_or(true);
257 if !definition_ancestor_kept {
258 continue;
259 }
260
261 // CommonMark §5.2: a list item that has only seen its
262 // marker line is closed by the first blank line. Any
263 // subsequent indented content is no longer part of the
264 // item. Pandoc keeps the item open across the blank.
265 if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
266 // If the next line doesn't start another list marker,
267 // the parent List has nothing to continue with — close
268 // it too. (The List's own branch above optimistically
269 // kept itself based on indent ≥ content_col, which
270 // assumes a continuing item; that assumption fails
271 // once the empty item is closed by the blank.)
272 if next_marker.is_none() && i > 0 && keep_level == i {
273 keep_level = i - 1;
274 }
275 continue;
276 }
277
278 let effective_indent = if next_bq_depth > current_bq_depth {
279 let after_current_bq =
280 strip_n_blockquote_markers(next_line, current_bq_depth);
281 let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
282 spaces_before_next_marker.saturating_sub(content_indent_so_far)
283 } else {
284 raw_indent_cols.saturating_sub(content_indent_so_far)
285 };
286
287 let is_new_item_at_outer_level = if next_marker.is_some() {
288 effective_indent < *content_col
289 } else {
290 false
291 };
292
293 if !is_new_item_at_outer_level && effective_indent >= *content_col {
294 keep_level = i + 1;
295 }
296 }
297 _ => {}
298 }
299 }
300
301 keep_level
302 }
303
304 /// Checks whether a line inside a definition should be treated as a plain continuation
305 /// (and buffered into the definition PLAIN), rather than parsed as a new block.
306 pub(crate) fn definition_plain_can_continue(
307 &self,
308 stripped_content: &str,
309 raw_content: &str,
310 content_indent: usize,
311 block_ctx: &BlockContext,
312 lines: &[&str],
313 pos: usize,
314 ) -> bool {
315 let prev_line_blank = if pos > 0 {
316 let prev_line = lines[pos - 1];
317 let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
318 is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
319 } else {
320 false
321 };
322
323 // A blank line that isn't indented to the definition content column ends the definition.
324 let (indent_cols, _) = leading_indent(raw_content);
325 if is_blank_line(raw_content) && indent_cols < content_indent {
326 return false;
327 }
328 let min_block_indent = self.definition_min_block_indent(content_indent);
329 if prev_line_blank && indent_cols < min_block_indent {
330 return false;
331 }
332
333 // If it's a block element marker, don't continue as plain.
334 if definition_lists::try_parse_definition_marker(stripped_content).is_some()
335 && leading_indent(raw_content).0 <= 3
336 && !stripped_content.starts_with(':')
337 {
338 let is_next_definition = self
339 .block_registry
340 .detect_prepared(block_ctx, lines, pos)
341 .map(|match_result| {
342 match_result.effect
343 == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
344 })
345 .unwrap_or(false);
346 if is_next_definition {
347 return false;
348 }
349 }
350 if lists::try_parse_list_marker(stripped_content, self.config).is_some() {
351 if prev_line_blank {
352 return false;
353 }
354 if block_ctx.in_list {
355 return false;
356 }
357 // A list marker indented to the definition's content column opens a
358 // nested list inside the definition (matches pandoc-native), even
359 // without a separating blank line.
360 let (raw_indent_cols, _) = leading_indent(raw_content);
361 if content_indent > 0 && raw_indent_cols >= content_indent {
362 return false;
363 }
364 }
365 if count_blockquote_markers(stripped_content).0 > 0 {
366 return false;
367 }
368 if self.config.extensions.raw_html
369 && html_blocks::try_parse_html_block_start(
370 stripped_content,
371 self.config.dialect == crate::options::Dialect::CommonMark,
372 )
373 .is_some()
374 {
375 return false;
376 }
377 if self.config.extensions.raw_tex
378 && raw_blocks::extract_environment_name(stripped_content).is_some()
379 {
380 return false;
381 }
382
383 if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, lines, pos) {
384 if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
385 && !prev_line_blank
386 {
387 return true;
388 }
389 if match_result.effect
390 == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
391 && match_result
392 .payload
393 .as_ref()
394 .and_then(|payload| {
395 payload
396 .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
397 })
398 .is_some_and(|prepared| {
399 matches!(
400 prepared,
401 crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
402 )
403 })
404 {
405 return true;
406 }
407 return false;
408 }
409
410 true
411 }
412}