Skip to main content

panache_parser/parser/utils/
continuation.rs

1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::container_prefix::{ContainerPrefix, StrippedLines};
12use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
13use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
14use crate::parser::utils::helpers::is_blank_line;
15
16pub(crate) struct ContinuationPolicy<'a, 'cfg> {
17    config: &'cfg ParserOptions,
18    block_registry: &'a BlockParserRegistry,
19}
20
21impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
22    pub(crate) fn new(
23        config: &'cfg ParserOptions,
24        block_registry: &'a BlockParserRegistry,
25    ) -> Self {
26        Self {
27            config,
28            block_registry,
29        }
30    }
31
32    fn definition_min_block_indent(&self, content_col: usize) -> usize {
33        if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
34            content_col.max(4)
35        } else {
36            content_col
37        }
38    }
39
40    pub(crate) fn compute_levels_to_keep(
41        &self,
42        current_bq_depth: usize,
43        containers: &ContainerStack,
44        lines: &[&str],
45        next_line_pos: usize,
46        next_line: &str,
47    ) -> usize {
48        let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
49        let (raw_indent_cols, _) = leading_indent(next_inner);
50        let next_marker = lists::try_parse_list_marker(
51            next_inner,
52            self.config,
53            lists::open_list_hint_at_indent(containers, raw_indent_cols),
54        );
55        let next_is_definition_marker =
56            definition_lists::try_parse_definition_marker(next_inner).is_some();
57        let next_is_definition_term = !is_blank_line(next_inner)
58            && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
59
60        // Re-detect the definition marker after stripping a content-container
61        // indent (e.g. the 4-space footnote body indent). Without this, a `:`
62        // line nested inside a footnote body fails the 0-3-space marker test
63        // and the parent DefinitionList/DefinitionItem incorrectly closes
64        // across blank lines, splitting one logical item into many.
65        let stripped_is_definition_marker = |content_indent_so_far: usize| -> bool {
66            if content_indent_so_far == 0 || raw_indent_cols < content_indent_so_far {
67                return false;
68            }
69            let strip_bytes = crate::parser::utils::container_stack::byte_index_at_column(
70                next_inner,
71                content_indent_so_far,
72            );
73            if strip_bytes > next_inner.len() {
74                return false;
75            }
76            definition_lists::try_parse_definition_marker(&next_inner[strip_bytes..]).is_some()
77        };
78
79        // `current_bq_depth` is used for proper indent calculation when the next line
80        // increases blockquote nesting.
81
82        let mut keep_level = 0;
83        let mut content_indent_so_far = 0usize;
84
85        // First, account for blockquotes
86        for (i, c) in containers.stack.iter().enumerate() {
87            match c {
88                crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
89                    let bq_count = containers.stack[..=i]
90                        .iter()
91                        .filter(|x| {
92                            matches!(
93                                x,
94                                crate::parser::utils::container_stack::Container::BlockQuote { .. }
95                            )
96                        })
97                        .count();
98                    if bq_count <= next_bq_depth {
99                        keep_level = i + 1;
100                    }
101                }
102                crate::parser::utils::container_stack::Container::FootnoteDefinition {
103                    content_col,
104                    ..
105                } => {
106                    content_indent_so_far += *content_col;
107                    let min_indent = (*content_col).max(4);
108                    if raw_indent_cols >= min_indent {
109                        keep_level = i + 1;
110                    }
111                }
112                crate::parser::utils::container_stack::Container::Definition {
113                    content_col,
114                    ..
115                } => {
116                    // A blank line does not necessarily end a definition, but the continuation
117                    // indent must be measured relative to any outer content containers (e.g.
118                    // footnotes). Otherwise a line indented only for the footnote would wrongly
119                    // continue the definition.
120                    let min_indent = self.definition_min_block_indent(*content_col);
121                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
122                    if effective_indent >= min_indent {
123                        keep_level = i + 1;
124                    }
125                    content_indent_so_far += *content_col;
126                }
127                crate::parser::utils::container_stack::Container::DefinitionItem { .. }
128                    if next_is_definition_marker
129                        || stripped_is_definition_marker(content_indent_so_far) =>
130                {
131                    keep_level = i + 1;
132                }
133                crate::parser::utils::container_stack::Container::DefinitionList { .. }
134                    if next_is_definition_marker
135                        || next_is_definition_term
136                        || stripped_is_definition_marker(content_indent_so_far) =>
137                {
138                    keep_level = i + 1;
139                }
140                crate::parser::utils::container_stack::Container::List {
141                    marker,
142                    base_indent_cols,
143                    ..
144                } => {
145                    let definition_ancestor_kept = containers.stack[..i]
146                        .iter()
147                        .enumerate()
148                        .rev()
149                        .find_map(|(idx, container)| {
150                            matches!(
151                                container,
152                                crate::parser::utils::container_stack::Container::Definition { .. }
153                            )
154                            .then_some(keep_level > idx)
155                        })
156                        .unwrap_or(true);
157                    if !definition_ancestor_kept {
158                        continue;
159                    }
160
161                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
162                    let continues_list = if let Some(ref marker_match) = next_marker {
163                        // Ordered markers can be right-aligned across items
164                        // (e.g. `i.`, `ii.`, `iii.`), so they need a symmetric
165                        // drift tolerance. Bullets are directional: a marker
166                        // outdented from the list's base indent belongs to an
167                        // outer list, not this one. Without that lower bound,
168                        // a blank line followed by an outer-level marker keeps
169                        // the inner list open and parks the BLANK_LINE inside
170                        // it, breaking idempotency for nested-list outputs.
171                        let indent_in_range = match marker {
172                            lists::ListMarker::Ordered(_) => {
173                                effective_indent.abs_diff(*base_indent_cols) <= 3
174                            }
175                            lists::ListMarker::Bullet(_) => {
176                                // A bullet marker at indent ≥ 4 cannot continue
177                                // a shallow-base bullet list across a blank line:
178                                // pandoc treats the would-be marker as the start
179                                // of an indented code block once the list is
180                                // ineligible to absorb it as a sublist of the
181                                // open item. The LIST_ITEM branch below still
182                                // rescues the LIST when the previous item's
183                                // content column accommodates the new indent
184                                // (keep_level is monotonic), so this guard only
185                                // closes the list when no item can absorb it.
186                                let jumps_out_of_shallow_list =
187                                    effective_indent >= 4 && *base_indent_cols < 4;
188                                if jumps_out_of_shallow_list {
189                                    false
190                                } else if effective_indent >= *base_indent_cols {
191                                    effective_indent <= base_indent_cols + 3
192                                } else {
193                                    // Bullets are directional, but only when an
194                                    // outer bullet list with matching marker can
195                                    // absorb the outdented marker. With no such
196                                    // outer list, pandoc keeps the current list
197                                    // open (the marker continues this list with
198                                    // a small leftward drift). Closing here would
199                                    // split one logical list into two and surface
200                                    // as an idempotency failure once the
201                                    // formatter normalizes indents.
202                                    let has_outer_match =
203                                        containers.stack[..i].iter().any(|outer| {
204                                            matches!(
205                                                outer,
206                                                crate::parser::utils::container_stack::Container::List {
207                                                    marker: outer_marker,
208                                                    base_indent_cols: outer_base,
209                                                    ..
210                                                } if matches!(
211                                                    outer_marker,
212                                                    lists::ListMarker::Bullet(_)
213                                                ) && lists::markers_match(
214                                                    outer_marker,
215                                                    &marker_match.marker,
216                                                    self.config.dialect,
217                                                ) && *outer_base <= effective_indent
218                                            )
219                                        });
220                                    !has_outer_match
221                                        && base_indent_cols.saturating_sub(effective_indent) <= 3
222                                }
223                            }
224                        };
225                        lists::markers_match(marker, &marker_match.marker, self.config.dialect)
226                            && indent_in_range
227                    } else {
228                        let item_content_col = containers
229                            .stack
230                            .get(i + 1)
231                            .and_then(|c| match c {
232                                crate::parser::utils::container_stack::Container::ListItem {
233                                    content_col,
234                                    ..
235                                } => Some(*content_col),
236                                _ => None,
237                            })
238                            .unwrap_or(1);
239                        effective_indent >= item_content_col
240                    };
241                    if continues_list {
242                        keep_level = i + 1;
243                    }
244                }
245                crate::parser::utils::container_stack::Container::ListItem {
246                    content_col,
247                    marker_only,
248                    ..
249                } => {
250                    let definition_ancestor_kept = containers.stack[..i]
251                        .iter()
252                        .enumerate()
253                        .rev()
254                        .find_map(|(idx, container)| {
255                            matches!(
256                                container,
257                                crate::parser::utils::container_stack::Container::Definition { .. }
258                            )
259                            .then_some(keep_level > idx)
260                        })
261                        .unwrap_or(true);
262                    if !definition_ancestor_kept {
263                        continue;
264                    }
265
266                    // CommonMark §5.2: a list item that has only seen its
267                    // marker line is closed by the first blank line. Any
268                    // subsequent indented content is no longer part of the
269                    // item. Pandoc keeps the item open across the blank.
270                    if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
271                        // If the next line doesn't start another list marker,
272                        // the parent List has nothing to continue with — close
273                        // it too. (The List's own branch above optimistically
274                        // kept itself based on indent ≥ content_col, which
275                        // assumes a continuing item; that assumption fails
276                        // once the empty item is closed by the blank.)
277                        if next_marker.is_none() && i > 0 && keep_level == i {
278                            keep_level = i - 1;
279                        }
280                        continue;
281                    }
282
283                    let effective_indent = if next_bq_depth > current_bq_depth {
284                        let after_current_bq =
285                            strip_n_blockquote_markers(next_line, current_bq_depth);
286                        let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
287                        spaces_before_next_marker.saturating_sub(content_indent_so_far)
288                    } else {
289                        raw_indent_cols.saturating_sub(content_indent_so_far)
290                    };
291
292                    let is_new_item_at_outer_level = if next_marker.is_some() {
293                        effective_indent < *content_col
294                    } else {
295                        false
296                    };
297
298                    if !is_new_item_at_outer_level && effective_indent >= *content_col {
299                        keep_level = i + 1;
300                    }
301                }
302                _ => {}
303            }
304        }
305
306        keep_level
307    }
308
309    /// Checks whether a line inside a definition should be treated as a plain continuation
310    /// (and buffered into the definition PLAIN), rather than parsed as a new block.
311    pub(crate) fn definition_plain_can_continue(
312        &self,
313        stripped_content: &str,
314        raw_content: &str,
315        content_indent: usize,
316        block_ctx: &BlockContext,
317        lines: &[&str],
318        pos: usize,
319    ) -> bool {
320        let prev_line_blank = if pos > 0 {
321            let prev_line = lines[pos - 1];
322            let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
323            is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
324        } else {
325            false
326        };
327
328        // A blank line that isn't indented to the definition content column ends the definition.
329        let (indent_cols, _) = leading_indent(raw_content);
330        if is_blank_line(raw_content) && indent_cols < content_indent {
331            return false;
332        }
333        let min_block_indent = self.definition_min_block_indent(content_indent);
334        if prev_line_blank && indent_cols < min_block_indent {
335            return false;
336        }
337
338        // If it's a block element marker, don't continue as plain.
339        if definition_lists::try_parse_definition_marker(stripped_content).is_some()
340            && leading_indent(raw_content).0 <= 3
341            && !stripped_content.starts_with(':')
342        {
343            let is_next_definition = {
344                let prefix = ContainerPrefix::from_ctx(block_ctx);
345                let stripped = StrippedLines::new(lines, pos, &prefix);
346                self.block_registry
347                    .detect_prepared(block_ctx, &stripped)
348                    .map(|match_result| {
349                        match_result.effect
350                            == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
351                    })
352                    .unwrap_or(false)
353            };
354            if is_next_definition {
355                return false;
356            }
357        }
358        if lists::try_parse_list_marker(stripped_content, self.config, block_ctx.open_alpha_hint)
359            .is_some()
360        {
361            if prev_line_blank {
362                return false;
363            }
364            if block_ctx.in_list {
365                return false;
366            }
367            // A list marker indented to the definition's content column opens a
368            // nested list inside the definition (matches pandoc-native), even
369            // without a separating blank line.
370            let (raw_indent_cols, _) = leading_indent(raw_content);
371            if content_indent > 0 && raw_indent_cols >= content_indent {
372                return false;
373            }
374        }
375        if count_blockquote_markers(stripped_content).0 > 0 {
376            return false;
377        }
378        if self.config.extensions.raw_html
379            && html_blocks::try_parse_html_block_start(
380                stripped_content,
381                self.config.dialect == crate::options::Dialect::CommonMark,
382            )
383            .is_some()
384        {
385            return false;
386        }
387        if self.config.extensions.raw_tex
388            && raw_blocks::extract_environment_name(stripped_content).is_some()
389        {
390            return false;
391        }
392
393        let prefix = ContainerPrefix::from_ctx(block_ctx);
394        let stripped = StrippedLines::new(lines, pos, &prefix);
395        if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, &stripped) {
396            if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
397                && !prev_line_blank
398            {
399                return true;
400            }
401            if match_result.effect
402                == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
403                && match_result
404                    .payload
405                    .as_ref()
406                    .and_then(|payload| {
407                        payload
408                            .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
409                    })
410                    .is_some_and(|prepared| {
411                        matches!(
412                            prepared,
413                            crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
414                        )
415                    })
416            {
417                return true;
418            }
419            return false;
420        }
421
422        true
423    }
424}