Skip to main content

panache_parser/parser/utils/
continuation.rs

1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
12use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
13use crate::parser::utils::helpers::is_blank_line;
14
15pub(crate) struct ContinuationPolicy<'a, 'cfg> {
16    config: &'cfg ParserOptions,
17    block_registry: &'a BlockParserRegistry,
18}
19
20impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
21    pub(crate) fn new(
22        config: &'cfg ParserOptions,
23        block_registry: &'a BlockParserRegistry,
24    ) -> Self {
25        Self {
26            config,
27            block_registry,
28        }
29    }
30
31    fn definition_min_block_indent(&self, content_col: usize) -> usize {
32        if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
33            content_col.max(4)
34        } else {
35            content_col
36        }
37    }
38
39    pub(crate) fn compute_levels_to_keep(
40        &self,
41        current_bq_depth: usize,
42        containers: &ContainerStack,
43        lines: &[&str],
44        next_line_pos: usize,
45        next_line: &str,
46    ) -> usize {
47        let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
48        let (raw_indent_cols, _) = leading_indent(next_inner);
49        let next_marker = lists::try_parse_list_marker(next_inner, self.config);
50        let next_is_definition_marker =
51            definition_lists::try_parse_definition_marker(next_inner).is_some();
52        let next_is_definition_term = !is_blank_line(next_inner)
53            && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
54
55        // Re-detect the definition marker after stripping a content-container
56        // indent (e.g. the 4-space footnote body indent). Without this, a `:`
57        // line nested inside a footnote body fails the 0-3-space marker test
58        // and the parent DefinitionList/DefinitionItem incorrectly closes
59        // across blank lines, splitting one logical item into many.
60        let stripped_is_definition_marker = |content_indent_so_far: usize| -> bool {
61            if content_indent_so_far == 0 || raw_indent_cols < content_indent_so_far {
62                return false;
63            }
64            let strip_bytes = crate::parser::utils::container_stack::byte_index_at_column(
65                next_inner,
66                content_indent_so_far,
67            );
68            if strip_bytes > next_inner.len() {
69                return false;
70            }
71            definition_lists::try_parse_definition_marker(&next_inner[strip_bytes..]).is_some()
72        };
73
74        // `current_bq_depth` is used for proper indent calculation when the next line
75        // increases blockquote nesting.
76
77        let mut keep_level = 0;
78        let mut content_indent_so_far = 0usize;
79
80        // First, account for blockquotes
81        for (i, c) in containers.stack.iter().enumerate() {
82            match c {
83                crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
84                    let bq_count = containers.stack[..=i]
85                        .iter()
86                        .filter(|x| {
87                            matches!(
88                                x,
89                                crate::parser::utils::container_stack::Container::BlockQuote { .. }
90                            )
91                        })
92                        .count();
93                    if bq_count <= next_bq_depth {
94                        keep_level = i + 1;
95                    }
96                }
97                crate::parser::utils::container_stack::Container::FootnoteDefinition {
98                    content_col,
99                    ..
100                } => {
101                    content_indent_so_far += *content_col;
102                    let min_indent = (*content_col).max(4);
103                    if raw_indent_cols >= min_indent {
104                        keep_level = i + 1;
105                    }
106                }
107                crate::parser::utils::container_stack::Container::Definition {
108                    content_col,
109                    ..
110                } => {
111                    // A blank line does not necessarily end a definition, but the continuation
112                    // indent must be measured relative to any outer content containers (e.g.
113                    // footnotes). Otherwise a line indented only for the footnote would wrongly
114                    // continue the definition.
115                    let min_indent = self.definition_min_block_indent(*content_col);
116                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
117                    if effective_indent >= min_indent {
118                        keep_level = i + 1;
119                    }
120                    content_indent_so_far += *content_col;
121                }
122                crate::parser::utils::container_stack::Container::DefinitionItem { .. }
123                    if next_is_definition_marker
124                        || stripped_is_definition_marker(content_indent_so_far) =>
125                {
126                    keep_level = i + 1;
127                }
128                crate::parser::utils::container_stack::Container::DefinitionList { .. }
129                    if next_is_definition_marker
130                        || next_is_definition_term
131                        || stripped_is_definition_marker(content_indent_so_far) =>
132                {
133                    keep_level = i + 1;
134                }
135                crate::parser::utils::container_stack::Container::List {
136                    marker,
137                    base_indent_cols,
138                    ..
139                } => {
140                    let definition_ancestor_kept = containers.stack[..i]
141                        .iter()
142                        .enumerate()
143                        .rev()
144                        .find_map(|(idx, container)| {
145                            matches!(
146                                container,
147                                crate::parser::utils::container_stack::Container::Definition { .. }
148                            )
149                            .then_some(keep_level > idx)
150                        })
151                        .unwrap_or(true);
152                    if !definition_ancestor_kept {
153                        continue;
154                    }
155
156                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
157                    let continues_list = if let Some(ref marker_match) = next_marker {
158                        // Ordered markers can be right-aligned across items
159                        // (e.g. `i.`, `ii.`, `iii.`), so they need a symmetric
160                        // drift tolerance. Bullets are directional: a marker
161                        // outdented from the list's base indent belongs to an
162                        // outer list, not this one. Without that lower bound,
163                        // a blank line followed by an outer-level marker keeps
164                        // the inner list open and parks the BLANK_LINE inside
165                        // it, breaking idempotency for nested-list outputs.
166                        let indent_in_range = match marker {
167                            lists::ListMarker::Ordered(_) => {
168                                effective_indent.abs_diff(*base_indent_cols) <= 3
169                            }
170                            lists::ListMarker::Bullet(_) => {
171                                // A bullet marker at indent ≥ 4 cannot continue
172                                // a shallow-base bullet list across a blank line:
173                                // pandoc treats the would-be marker as the start
174                                // of an indented code block once the list is
175                                // ineligible to absorb it as a sublist of the
176                                // open item. The LIST_ITEM branch below still
177                                // rescues the LIST when the previous item's
178                                // content column accommodates the new indent
179                                // (keep_level is monotonic), so this guard only
180                                // closes the list when no item can absorb it.
181                                let jumps_out_of_shallow_list =
182                                    effective_indent >= 4 && *base_indent_cols < 4;
183                                !jumps_out_of_shallow_list
184                                    && effective_indent >= *base_indent_cols
185                                    && effective_indent <= base_indent_cols + 3
186                            }
187                        };
188                        lists::markers_match(marker, &marker_match.marker, self.config.dialect)
189                            && indent_in_range
190                    } else {
191                        let item_content_col = containers
192                            .stack
193                            .get(i + 1)
194                            .and_then(|c| match c {
195                                crate::parser::utils::container_stack::Container::ListItem {
196                                    content_col,
197                                    ..
198                                } => Some(*content_col),
199                                _ => None,
200                            })
201                            .unwrap_or(1);
202                        effective_indent >= item_content_col
203                    };
204                    if continues_list {
205                        keep_level = i + 1;
206                    }
207                }
208                crate::parser::utils::container_stack::Container::ListItem {
209                    content_col,
210                    marker_only,
211                    ..
212                } => {
213                    let definition_ancestor_kept = containers.stack[..i]
214                        .iter()
215                        .enumerate()
216                        .rev()
217                        .find_map(|(idx, container)| {
218                            matches!(
219                                container,
220                                crate::parser::utils::container_stack::Container::Definition { .. }
221                            )
222                            .then_some(keep_level > idx)
223                        })
224                        .unwrap_or(true);
225                    if !definition_ancestor_kept {
226                        continue;
227                    }
228
229                    // CommonMark §5.2: a list item that has only seen its
230                    // marker line is closed by the first blank line. Any
231                    // subsequent indented content is no longer part of the
232                    // item. Pandoc keeps the item open across the blank.
233                    if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
234                        // If the next line doesn't start another list marker,
235                        // the parent List has nothing to continue with — close
236                        // it too. (The List's own branch above optimistically
237                        // kept itself based on indent ≥ content_col, which
238                        // assumes a continuing item; that assumption fails
239                        // once the empty item is closed by the blank.)
240                        if next_marker.is_none() && i > 0 && keep_level == i {
241                            keep_level = i - 1;
242                        }
243                        continue;
244                    }
245
246                    let effective_indent = if next_bq_depth > current_bq_depth {
247                        let after_current_bq =
248                            strip_n_blockquote_markers(next_line, current_bq_depth);
249                        let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
250                        spaces_before_next_marker.saturating_sub(content_indent_so_far)
251                    } else {
252                        raw_indent_cols.saturating_sub(content_indent_so_far)
253                    };
254
255                    let is_new_item_at_outer_level = if next_marker.is_some() {
256                        effective_indent < *content_col
257                    } else {
258                        false
259                    };
260
261                    if !is_new_item_at_outer_level && effective_indent >= *content_col {
262                        keep_level = i + 1;
263                    }
264                }
265                _ => {}
266            }
267        }
268
269        keep_level
270    }
271
272    /// Checks whether a line inside a definition should be treated as a plain continuation
273    /// (and buffered into the definition PLAIN), rather than parsed as a new block.
274    pub(crate) fn definition_plain_can_continue(
275        &self,
276        stripped_content: &str,
277        raw_content: &str,
278        content_indent: usize,
279        block_ctx: &BlockContext,
280        lines: &[&str],
281        pos: usize,
282    ) -> bool {
283        let prev_line_blank = if pos > 0 {
284            let prev_line = lines[pos - 1];
285            let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
286            is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
287        } else {
288            false
289        };
290
291        // A blank line that isn't indented to the definition content column ends the definition.
292        let (indent_cols, _) = leading_indent(raw_content);
293        if is_blank_line(raw_content) && indent_cols < content_indent {
294            return false;
295        }
296        let min_block_indent = self.definition_min_block_indent(content_indent);
297        if prev_line_blank && indent_cols < min_block_indent {
298            return false;
299        }
300
301        // If it's a block element marker, don't continue as plain.
302        if definition_lists::try_parse_definition_marker(stripped_content).is_some()
303            && leading_indent(raw_content).0 <= 3
304            && !stripped_content.starts_with(':')
305        {
306            let is_next_definition = self
307                .block_registry
308                .detect_prepared(block_ctx, lines, pos)
309                .map(|match_result| {
310                    match_result.effect
311                        == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
312                })
313                .unwrap_or(false);
314            if is_next_definition {
315                return false;
316            }
317        }
318        if lists::try_parse_list_marker(stripped_content, self.config).is_some() {
319            if prev_line_blank {
320                return false;
321            }
322            if block_ctx.in_list {
323                return false;
324            }
325            // A list marker indented to the definition's content column opens a
326            // nested list inside the definition (matches pandoc-native), even
327            // without a separating blank line.
328            let (raw_indent_cols, _) = leading_indent(raw_content);
329            if content_indent > 0 && raw_indent_cols >= content_indent {
330                return false;
331            }
332        }
333        if count_blockquote_markers(stripped_content).0 > 0 {
334            return false;
335        }
336        if self.config.extensions.raw_html
337            && html_blocks::try_parse_html_block_start(
338                stripped_content,
339                self.config.dialect == crate::options::Dialect::CommonMark,
340            )
341            .is_some()
342        {
343            return false;
344        }
345        if self.config.extensions.raw_tex
346            && raw_blocks::extract_environment_name(stripped_content).is_some()
347        {
348            return false;
349        }
350
351        if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, lines, pos) {
352            if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
353                && !prev_line_blank
354            {
355                return true;
356            }
357            if match_result.effect
358                == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
359                && match_result
360                    .payload
361                    .as_ref()
362                    .and_then(|payload| {
363                        payload
364                            .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
365                    })
366                    .is_some_and(|prepared| {
367                        matches!(
368                            prepared,
369                            crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
370                        )
371                    })
372            {
373                return true;
374            }
375            return false;
376        }
377
378        true
379    }
380}