Skip to main content

panache_parser/parser/utils/
continuation.rs

1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
12use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
13use crate::parser::utils::helpers::is_blank_line;
14
15pub(crate) struct ContinuationPolicy<'a, 'cfg> {
16    config: &'cfg ParserOptions,
17    block_registry: &'a BlockParserRegistry,
18}
19
20impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
21    pub(crate) fn new(
22        config: &'cfg ParserOptions,
23        block_registry: &'a BlockParserRegistry,
24    ) -> Self {
25        Self {
26            config,
27            block_registry,
28        }
29    }
30
31    fn definition_min_block_indent(&self, content_col: usize) -> usize {
32        if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
33            content_col.max(4)
34        } else {
35            content_col
36        }
37    }
38
39    pub(crate) fn compute_levels_to_keep(
40        &self,
41        current_bq_depth: usize,
42        containers: &ContainerStack,
43        lines: &[&str],
44        next_line_pos: usize,
45        next_line: &str,
46    ) -> usize {
47        let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
48        let (raw_indent_cols, _) = leading_indent(next_inner);
49        let next_marker = lists::try_parse_list_marker(next_inner, self.config);
50        let next_is_definition_marker =
51            definition_lists::try_parse_definition_marker(next_inner).is_some();
52        let next_is_definition_term = !is_blank_line(next_inner)
53            && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
54
55        // Re-detect the definition marker after stripping a content-container
56        // indent (e.g. the 4-space footnote body indent). Without this, a `:`
57        // line nested inside a footnote body fails the 0-3-space marker test
58        // and the parent DefinitionList/DefinitionItem incorrectly closes
59        // across blank lines, splitting one logical item into many.
60        let stripped_is_definition_marker = |content_indent_so_far: usize| -> bool {
61            if content_indent_so_far == 0 || raw_indent_cols < content_indent_so_far {
62                return false;
63            }
64            let strip_bytes = crate::parser::utils::container_stack::byte_index_at_column(
65                next_inner,
66                content_indent_so_far,
67            );
68            if strip_bytes > next_inner.len() {
69                return false;
70            }
71            definition_lists::try_parse_definition_marker(&next_inner[strip_bytes..]).is_some()
72        };
73
74        // `current_bq_depth` is used for proper indent calculation when the next line
75        // increases blockquote nesting.
76
77        let mut keep_level = 0;
78        let mut content_indent_so_far = 0usize;
79
80        // First, account for blockquotes
81        for (i, c) in containers.stack.iter().enumerate() {
82            match c {
83                crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
84                    let bq_count = containers.stack[..=i]
85                        .iter()
86                        .filter(|x| {
87                            matches!(
88                                x,
89                                crate::parser::utils::container_stack::Container::BlockQuote { .. }
90                            )
91                        })
92                        .count();
93                    if bq_count <= next_bq_depth {
94                        keep_level = i + 1;
95                    }
96                }
97                crate::parser::utils::container_stack::Container::FootnoteDefinition {
98                    content_col,
99                    ..
100                } => {
101                    content_indent_so_far += *content_col;
102                    let min_indent = (*content_col).max(4);
103                    if raw_indent_cols >= min_indent {
104                        keep_level = i + 1;
105                    }
106                }
107                crate::parser::utils::container_stack::Container::Definition {
108                    content_col,
109                    ..
110                } => {
111                    // A blank line does not necessarily end a definition, but the continuation
112                    // indent must be measured relative to any outer content containers (e.g.
113                    // footnotes). Otherwise a line indented only for the footnote would wrongly
114                    // continue the definition.
115                    let min_indent = self.definition_min_block_indent(*content_col);
116                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
117                    if effective_indent >= min_indent {
118                        keep_level = i + 1;
119                    }
120                    content_indent_so_far += *content_col;
121                }
122                crate::parser::utils::container_stack::Container::DefinitionItem { .. }
123                    if next_is_definition_marker
124                        || stripped_is_definition_marker(content_indent_so_far) =>
125                {
126                    keep_level = i + 1;
127                }
128                crate::parser::utils::container_stack::Container::DefinitionList { .. }
129                    if next_is_definition_marker
130                        || next_is_definition_term
131                        || stripped_is_definition_marker(content_indent_so_far) =>
132                {
133                    keep_level = i + 1;
134                }
135                crate::parser::utils::container_stack::Container::List {
136                    marker,
137                    base_indent_cols,
138                    ..
139                } => {
140                    let definition_ancestor_kept = containers.stack[..i]
141                        .iter()
142                        .enumerate()
143                        .rev()
144                        .find_map(|(idx, container)| {
145                            matches!(
146                                container,
147                                crate::parser::utils::container_stack::Container::Definition { .. }
148                            )
149                            .then_some(keep_level > idx)
150                        })
151                        .unwrap_or(true);
152                    if !definition_ancestor_kept {
153                        continue;
154                    }
155
156                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
157                    let continues_list = if let Some(ref marker_match) = next_marker {
158                        // Ordered markers can be right-aligned across items
159                        // (e.g. `i.`, `ii.`, `iii.`), so they need a symmetric
160                        // drift tolerance. Bullets are directional: a marker
161                        // outdented from the list's base indent belongs to an
162                        // outer list, not this one. Without that lower bound,
163                        // a blank line followed by an outer-level marker keeps
164                        // the inner list open and parks the BLANK_LINE inside
165                        // it, breaking idempotency for nested-list outputs.
166                        let indent_in_range = match marker {
167                            lists::ListMarker::Ordered(_) => {
168                                effective_indent.abs_diff(*base_indent_cols) <= 3
169                            }
170                            lists::ListMarker::Bullet(_) => {
171                                // A bullet marker at indent ≥ 4 cannot continue
172                                // a shallow-base bullet list across a blank line:
173                                // pandoc treats the would-be marker as the start
174                                // of an indented code block once the list is
175                                // ineligible to absorb it as a sublist of the
176                                // open item. The LIST_ITEM branch below still
177                                // rescues the LIST when the previous item's
178                                // content column accommodates the new indent
179                                // (keep_level is monotonic), so this guard only
180                                // closes the list when no item can absorb it.
181                                let jumps_out_of_shallow_list =
182                                    effective_indent >= 4 && *base_indent_cols < 4;
183                                if jumps_out_of_shallow_list {
184                                    false
185                                } else if effective_indent >= *base_indent_cols {
186                                    effective_indent <= base_indent_cols + 3
187                                } else {
188                                    // Bullets are directional, but only when an
189                                    // outer bullet list with matching marker can
190                                    // absorb the outdented marker. With no such
191                                    // outer list, pandoc keeps the current list
192                                    // open (the marker continues this list with
193                                    // a small leftward drift). Closing here would
194                                    // split one logical list into two and surface
195                                    // as an idempotency failure once the
196                                    // formatter normalizes indents.
197                                    let has_outer_match =
198                                        containers.stack[..i].iter().any(|outer| {
199                                            matches!(
200                                                outer,
201                                                crate::parser::utils::container_stack::Container::List {
202                                                    marker: outer_marker,
203                                                    base_indent_cols: outer_base,
204                                                    ..
205                                                } if matches!(
206                                                    outer_marker,
207                                                    lists::ListMarker::Bullet(_)
208                                                ) && lists::markers_match(
209                                                    outer_marker,
210                                                    &marker_match.marker,
211                                                    self.config.dialect,
212                                                ) && *outer_base <= effective_indent
213                                            )
214                                        });
215                                    !has_outer_match
216                                        && base_indent_cols.saturating_sub(effective_indent) <= 3
217                                }
218                            }
219                        };
220                        lists::markers_match(marker, &marker_match.marker, self.config.dialect)
221                            && indent_in_range
222                    } else {
223                        let item_content_col = containers
224                            .stack
225                            .get(i + 1)
226                            .and_then(|c| match c {
227                                crate::parser::utils::container_stack::Container::ListItem {
228                                    content_col,
229                                    ..
230                                } => Some(*content_col),
231                                _ => None,
232                            })
233                            .unwrap_or(1);
234                        effective_indent >= item_content_col
235                    };
236                    if continues_list {
237                        keep_level = i + 1;
238                    }
239                }
240                crate::parser::utils::container_stack::Container::ListItem {
241                    content_col,
242                    marker_only,
243                    ..
244                } => {
245                    let definition_ancestor_kept = containers.stack[..i]
246                        .iter()
247                        .enumerate()
248                        .rev()
249                        .find_map(|(idx, container)| {
250                            matches!(
251                                container,
252                                crate::parser::utils::container_stack::Container::Definition { .. }
253                            )
254                            .then_some(keep_level > idx)
255                        })
256                        .unwrap_or(true);
257                    if !definition_ancestor_kept {
258                        continue;
259                    }
260
261                    // CommonMark §5.2: a list item that has only seen its
262                    // marker line is closed by the first blank line. Any
263                    // subsequent indented content is no longer part of the
264                    // item. Pandoc keeps the item open across the blank.
265                    if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
266                        // If the next line doesn't start another list marker,
267                        // the parent List has nothing to continue with — close
268                        // it too. (The List's own branch above optimistically
269                        // kept itself based on indent ≥ content_col, which
270                        // assumes a continuing item; that assumption fails
271                        // once the empty item is closed by the blank.)
272                        if next_marker.is_none() && i > 0 && keep_level == i {
273                            keep_level = i - 1;
274                        }
275                        continue;
276                    }
277
278                    let effective_indent = if next_bq_depth > current_bq_depth {
279                        let after_current_bq =
280                            strip_n_blockquote_markers(next_line, current_bq_depth);
281                        let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
282                        spaces_before_next_marker.saturating_sub(content_indent_so_far)
283                    } else {
284                        raw_indent_cols.saturating_sub(content_indent_so_far)
285                    };
286
287                    let is_new_item_at_outer_level = if next_marker.is_some() {
288                        effective_indent < *content_col
289                    } else {
290                        false
291                    };
292
293                    if !is_new_item_at_outer_level && effective_indent >= *content_col {
294                        keep_level = i + 1;
295                    }
296                }
297                _ => {}
298            }
299        }
300
301        keep_level
302    }
303
304    /// Checks whether a line inside a definition should be treated as a plain continuation
305    /// (and buffered into the definition PLAIN), rather than parsed as a new block.
306    pub(crate) fn definition_plain_can_continue(
307        &self,
308        stripped_content: &str,
309        raw_content: &str,
310        content_indent: usize,
311        block_ctx: &BlockContext,
312        lines: &[&str],
313        pos: usize,
314    ) -> bool {
315        let prev_line_blank = if pos > 0 {
316            let prev_line = lines[pos - 1];
317            let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
318            is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
319        } else {
320            false
321        };
322
323        // A blank line that isn't indented to the definition content column ends the definition.
324        let (indent_cols, _) = leading_indent(raw_content);
325        if is_blank_line(raw_content) && indent_cols < content_indent {
326            return false;
327        }
328        let min_block_indent = self.definition_min_block_indent(content_indent);
329        if prev_line_blank && indent_cols < min_block_indent {
330            return false;
331        }
332
333        // If it's a block element marker, don't continue as plain.
334        if definition_lists::try_parse_definition_marker(stripped_content).is_some()
335            && leading_indent(raw_content).0 <= 3
336            && !stripped_content.starts_with(':')
337        {
338            let is_next_definition = self
339                .block_registry
340                .detect_prepared(block_ctx, lines, pos)
341                .map(|match_result| {
342                    match_result.effect
343                        == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
344                })
345                .unwrap_or(false);
346            if is_next_definition {
347                return false;
348            }
349        }
350        if lists::try_parse_list_marker(stripped_content, self.config).is_some() {
351            if prev_line_blank {
352                return false;
353            }
354            if block_ctx.in_list {
355                return false;
356            }
357            // A list marker indented to the definition's content column opens a
358            // nested list inside the definition (matches pandoc-native), even
359            // without a separating blank line.
360            let (raw_indent_cols, _) = leading_indent(raw_content);
361            if content_indent > 0 && raw_indent_cols >= content_indent {
362                return false;
363            }
364        }
365        if count_blockquote_markers(stripped_content).0 > 0 {
366            return false;
367        }
368        if self.config.extensions.raw_html
369            && html_blocks::try_parse_html_block_start(
370                stripped_content,
371                self.config.dialect == crate::options::Dialect::CommonMark,
372            )
373            .is_some()
374        {
375            return false;
376        }
377        if self.config.extensions.raw_tex
378            && raw_blocks::extract_environment_name(stripped_content).is_some()
379        {
380            return false;
381        }
382
383        if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, lines, pos) {
384            if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
385                && !prev_line_blank
386            {
387                return true;
388            }
389            if match_result.effect
390                == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
391                && match_result
392                    .payload
393                    .as_ref()
394                    .and_then(|payload| {
395                        payload
396                            .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
397                    })
398                    .is_some_and(|prepared| {
399                        matches!(
400                            prepared,
401                            crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
402                        )
403                    })
404            {
405                return true;
406            }
407            return false;
408        }
409
410        true
411    }
412}