Skip to main content

panache_parser/parser/utils/
continuation.rs

1//! Continuation/blank-line handling policy.
2//!
3//! This module centralizes the parser's "should this line continue an existing container?"
4//! logic (especially across blank lines). Keeping this logic in one place reduces the
5//! risk of scattered ad-hoc heuristics diverging as blocks move into the dispatcher.
6
7use crate::options::{PandocCompat, ParserOptions};
8
9use crate::parser::block_dispatcher::{BlockContext, BlockParserRegistry};
10use crate::parser::blocks::blockquotes::{count_blockquote_markers, strip_n_blockquote_markers};
11use crate::parser::blocks::{definition_lists, html_blocks, lists, raw_blocks};
12use crate::parser::utils::container_stack::{ContainerStack, leading_indent};
13use crate::parser::utils::helpers::is_blank_line;
14
15pub(crate) struct ContinuationPolicy<'a, 'cfg> {
16    config: &'cfg ParserOptions,
17    block_registry: &'a BlockParserRegistry,
18}
19
20impl<'a, 'cfg> ContinuationPolicy<'a, 'cfg> {
21    pub(crate) fn new(
22        config: &'cfg ParserOptions,
23        block_registry: &'a BlockParserRegistry,
24    ) -> Self {
25        Self {
26            config,
27            block_registry,
28        }
29    }
30
31    fn definition_min_block_indent(&self, content_col: usize) -> usize {
32        if self.config.effective_pandoc_compat() == PandocCompat::V3_7 {
33            content_col.max(4)
34        } else {
35            content_col
36        }
37    }
38
39    pub(crate) fn compute_levels_to_keep(
40        &self,
41        current_bq_depth: usize,
42        containers: &ContainerStack,
43        lines: &[&str],
44        next_line_pos: usize,
45        next_line: &str,
46    ) -> usize {
47        let (next_bq_depth, next_inner) = count_blockquote_markers(next_line);
48        let (raw_indent_cols, _) = leading_indent(next_inner);
49        let next_marker = lists::try_parse_list_marker(next_inner, self.config);
50        let next_is_definition_marker =
51            definition_lists::try_parse_definition_marker(next_inner).is_some();
52        let next_is_definition_term = !is_blank_line(next_inner)
53            && definition_lists::next_line_is_definition_marker(lines, next_line_pos).is_some();
54
55        // `current_bq_depth` is used for proper indent calculation when the next line
56        // increases blockquote nesting.
57
58        let mut keep_level = 0;
59        let mut content_indent_so_far = 0usize;
60
61        // First, account for blockquotes
62        for (i, c) in containers.stack.iter().enumerate() {
63            match c {
64                crate::parser::utils::container_stack::Container::BlockQuote { .. } => {
65                    let bq_count = containers.stack[..=i]
66                        .iter()
67                        .filter(|x| {
68                            matches!(
69                                x,
70                                crate::parser::utils::container_stack::Container::BlockQuote { .. }
71                            )
72                        })
73                        .count();
74                    if bq_count <= next_bq_depth {
75                        keep_level = i + 1;
76                    }
77                }
78                crate::parser::utils::container_stack::Container::FootnoteDefinition {
79                    content_col,
80                    ..
81                } => {
82                    content_indent_so_far += *content_col;
83                    let min_indent = (*content_col).max(4);
84                    if raw_indent_cols >= min_indent {
85                        keep_level = i + 1;
86                    }
87                }
88                crate::parser::utils::container_stack::Container::Definition {
89                    content_col,
90                    ..
91                } => {
92                    // A blank line does not necessarily end a definition, but the continuation
93                    // indent must be measured relative to any outer content containers (e.g.
94                    // footnotes). Otherwise a line indented only for the footnote would wrongly
95                    // continue the definition.
96                    let min_indent = self.definition_min_block_indent(*content_col);
97                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
98                    if effective_indent >= min_indent {
99                        keep_level = i + 1;
100                    }
101                    content_indent_so_far += *content_col;
102                }
103                crate::parser::utils::container_stack::Container::DefinitionItem { .. }
104                    if next_is_definition_marker =>
105                {
106                    keep_level = i + 1;
107                }
108                crate::parser::utils::container_stack::Container::DefinitionList { .. }
109                    if next_is_definition_marker || next_is_definition_term =>
110                {
111                    keep_level = i + 1;
112                }
113                crate::parser::utils::container_stack::Container::List {
114                    marker,
115                    base_indent_cols,
116                    ..
117                } => {
118                    let definition_ancestor_kept = containers.stack[..i]
119                        .iter()
120                        .enumerate()
121                        .rev()
122                        .find_map(|(idx, container)| {
123                            matches!(
124                                container,
125                                crate::parser::utils::container_stack::Container::Definition { .. }
126                            )
127                            .then_some(keep_level > idx)
128                        })
129                        .unwrap_or(true);
130                    if !definition_ancestor_kept {
131                        continue;
132                    }
133
134                    let effective_indent = raw_indent_cols.saturating_sub(content_indent_so_far);
135                    let continues_list = if let Some(ref marker_match) = next_marker {
136                        lists::markers_match(marker, &marker_match.marker, self.config.dialect)
137                            && effective_indent <= base_indent_cols + 3
138                    } else {
139                        let item_content_col = containers
140                            .stack
141                            .get(i + 1)
142                            .and_then(|c| match c {
143                                crate::parser::utils::container_stack::Container::ListItem {
144                                    content_col,
145                                    ..
146                                } => Some(*content_col),
147                                _ => None,
148                            })
149                            .unwrap_or(1);
150                        effective_indent >= item_content_col
151                    };
152                    if continues_list {
153                        keep_level = i + 1;
154                    }
155                }
156                crate::parser::utils::container_stack::Container::ListItem {
157                    content_col,
158                    marker_only,
159                    ..
160                } => {
161                    let definition_ancestor_kept = containers.stack[..i]
162                        .iter()
163                        .enumerate()
164                        .rev()
165                        .find_map(|(idx, container)| {
166                            matches!(
167                                container,
168                                crate::parser::utils::container_stack::Container::Definition { .. }
169                            )
170                            .then_some(keep_level > idx)
171                        })
172                        .unwrap_or(true);
173                    if !definition_ancestor_kept {
174                        continue;
175                    }
176
177                    // CommonMark §5.2: a list item that has only seen its
178                    // marker line is closed by the first blank line. Any
179                    // subsequent indented content is no longer part of the
180                    // item. Pandoc keeps the item open across the blank.
181                    if *marker_only && self.config.dialect == crate::options::Dialect::CommonMark {
182                        // If the next line doesn't start another list marker,
183                        // the parent List has nothing to continue with — close
184                        // it too. (The List's own branch above optimistically
185                        // kept itself based on indent ≥ content_col, which
186                        // assumes a continuing item; that assumption fails
187                        // once the empty item is closed by the blank.)
188                        if next_marker.is_none() && i > 0 && keep_level == i {
189                            keep_level = i - 1;
190                        }
191                        continue;
192                    }
193
194                    let effective_indent = if next_bq_depth > current_bq_depth {
195                        let after_current_bq =
196                            strip_n_blockquote_markers(next_line, current_bq_depth);
197                        let (spaces_before_next_marker, _) = leading_indent(after_current_bq);
198                        spaces_before_next_marker.saturating_sub(content_indent_so_far)
199                    } else {
200                        raw_indent_cols.saturating_sub(content_indent_so_far)
201                    };
202
203                    let is_new_item_at_outer_level = if next_marker.is_some() {
204                        effective_indent < *content_col
205                    } else {
206                        false
207                    };
208
209                    if !is_new_item_at_outer_level && effective_indent >= *content_col {
210                        keep_level = i + 1;
211                    }
212                }
213                _ => {}
214            }
215        }
216
217        keep_level
218    }
219
220    /// Checks whether a line inside a definition should be treated as a plain continuation
221    /// (and buffered into the definition PLAIN), rather than parsed as a new block.
222    pub(crate) fn definition_plain_can_continue(
223        &self,
224        stripped_content: &str,
225        raw_content: &str,
226        content_indent: usize,
227        block_ctx: &BlockContext,
228        lines: &[&str],
229        pos: usize,
230    ) -> bool {
231        let prev_line_blank = if pos > 0 {
232            let prev_line = lines[pos - 1];
233            let (prev_bq_depth, prev_inner) = count_blockquote_markers(prev_line);
234            is_blank_line(prev_line) || (prev_bq_depth > 0 && is_blank_line(prev_inner))
235        } else {
236            false
237        };
238
239        // A blank line that isn't indented to the definition content column ends the definition.
240        let (indent_cols, _) = leading_indent(raw_content);
241        if is_blank_line(raw_content) && indent_cols < content_indent {
242            return false;
243        }
244        let min_block_indent = self.definition_min_block_indent(content_indent);
245        if prev_line_blank && indent_cols < min_block_indent {
246            return false;
247        }
248
249        // If it's a block element marker, don't continue as plain.
250        if definition_lists::try_parse_definition_marker(stripped_content).is_some()
251            && leading_indent(raw_content).0 <= 3
252            && !stripped_content.starts_with(':')
253        {
254            let is_next_definition = self
255                .block_registry
256                .detect_prepared(block_ctx, lines, pos)
257                .map(|match_result| {
258                    match_result.effect
259                        == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
260                })
261                .unwrap_or(false);
262            if is_next_definition {
263                return false;
264            }
265        }
266        if lists::try_parse_list_marker(stripped_content, self.config).is_some() {
267            if prev_line_blank {
268                return false;
269            }
270            if block_ctx.in_list {
271                return false;
272            }
273        }
274        if count_blockquote_markers(stripped_content).0 > 0 {
275            return false;
276        }
277        if self.config.extensions.raw_html
278            && html_blocks::try_parse_html_block_start(
279                stripped_content,
280                self.config.dialect == crate::options::Dialect::CommonMark,
281            )
282            .is_some()
283        {
284            return false;
285        }
286        if self.config.extensions.raw_tex
287            && raw_blocks::extract_environment_name(stripped_content).is_some()
288        {
289            return false;
290        }
291
292        if let Some(match_result) = self.block_registry.detect_prepared(block_ctx, lines, pos) {
293            if match_result.effect == crate::parser::block_dispatcher::BlockEffect::OpenList
294                && !prev_line_blank
295            {
296                return true;
297            }
298            if match_result.effect
299                == crate::parser::block_dispatcher::BlockEffect::OpenDefinitionList
300                && match_result
301                    .payload
302                    .as_ref()
303                    .and_then(|payload| {
304                        payload
305                            .downcast_ref::<crate::parser::block_dispatcher::DefinitionPrepared>()
306                    })
307                    .is_some_and(|prepared| {
308                        matches!(
309                            prepared,
310                            crate::parser::block_dispatcher::DefinitionPrepared::Term { .. }
311                        )
312                    })
313            {
314                return true;
315            }
316            return false;
317        }
318
319        true
320    }
321}