Skip to main content

panache_parser/parser/blocks/
indented_code.rs

1//! Indented code block parsing utilities.
2//!
3//! A block of text indented four spaces (or one tab) is treated as verbatim text.
4//! The initial (four space or one tab) indentation is not considered part of the
5//! verbatim text and is removed in the output.
6//!
7//! Note: blank lines in the verbatim text need not begin with four spaces.
8
9use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12use crate::parser::utils::helpers::strip_newline;
13
14/// Check if a line is indented enough to be part of an indented code block.
15/// Returns true if the leading whitespace expands to at least 4 columns
16/// (tabs count toward the next column-4 stop, so `  \t` is 4 cols).
17pub(crate) fn is_indented_code_line(content: &str) -> bool {
18    if content.is_empty() {
19        return false;
20    }
21    let (cols, _) = leading_indent(content);
22    cols >= 4
23}
24
25/// Parse an indented code block, consuming lines from the parser.
26/// Returns the new position after the code block.
27///
28/// An indented code block consists of consecutive lines that are either:
29/// - Indented by 4+ spaces or 1+ tab
30/// - Blank lines (which don't need indentation)
31///
32/// The block ends when we hit a non-blank line that isn't indented enough.
33/// Parse an indented code block, consuming lines from the parser.
34/// Returns the new position after the code block.
35///
36/// An indented code block consists of consecutive lines that are either:
37/// - Indented by 4+ spaces or 1+ tab (beyond base_indent)
38/// - Blank lines (which don't need indentation)
39///
40/// The block ends when we hit a non-blank line that isn't indented enough.
41pub(crate) fn parse_indented_code_block(
42    builder: &mut GreenNodeBuilder<'static>,
43    lines: &[&str],
44    start_pos: usize,
45    bq_depth: usize,
46    base_indent: usize,
47) -> usize {
48    use super::blockquotes::{
49        count_blockquote_markers, emit_one_blockquote_marker, strip_n_blockquote_markers,
50    };
51    use crate::parser::utils::marker_utils::parse_blockquote_marker_info;
52
53    builder.start_node(SyntaxKind::CODE_BLOCK.into());
54    builder.start_node(SyntaxKind::CODE_CONTENT.into());
55
56    let mut current_pos = start_pos;
57    // Total indent needed: base (e.g., footnote) + 4 for code
58    let code_indent = base_indent + 4;
59
60    while current_pos < lines.len() {
61        let line = lines[current_pos];
62
63        // Strip exactly the enclosing blockquote depth; deeper markers remain as content.
64        let (line_bq_depth, _) = count_blockquote_markers(line);
65        let inner = if bq_depth > 0 {
66            strip_n_blockquote_markers(line, bq_depth)
67        } else {
68            line
69        };
70
71        // If blockquote depth decreases, code block ends (we've left the blockquote)
72        if line_bq_depth < bq_depth {
73            break;
74        }
75
76        // Blank lines need look-ahead: only include if next non-blank line continues the code
77        if inner.trim().is_empty() {
78            // Check if code continues after this blank line
79            let mut look_pos = current_pos + 1;
80            let mut continues = false;
81            while look_pos < lines.len() {
82                let (look_bq_depth, look_inner) = count_blockquote_markers(lines[look_pos]);
83                if look_bq_depth < bq_depth {
84                    break;
85                }
86                if look_inner.trim_end_matches('\n').trim().is_empty() {
87                    look_pos += 1;
88                    continue;
89                }
90                let (look_indent, _) = leading_indent(look_inner);
91                if look_indent >= code_indent {
92                    continues = true;
93                }
94                break;
95            }
96            if !continues {
97                break;
98            }
99            if bq_depth > 0 && current_pos > start_pos {
100                let marker_info = parse_blockquote_marker_info(line);
101                for i in 0..bq_depth {
102                    if let Some(info) = marker_info.get(i) {
103                        emit_one_blockquote_marker(
104                            builder,
105                            info.leading_spaces,
106                            info.has_trailing_space,
107                        );
108                    }
109                }
110            }
111            let (blank_content, newline_str) = strip_newline(inner);
112            if !blank_content.is_empty() {
113                builder.token(SyntaxKind::WHITESPACE.into(), blank_content);
114            }
115            builder.token(SyntaxKind::TEXT.into(), "");
116            builder.token(
117                SyntaxKind::NEWLINE.into(),
118                if newline_str.is_empty() {
119                    "\n"
120                } else {
121                    newline_str
122                },
123            );
124            current_pos += 1;
125            continue;
126        }
127
128        // Check if line is indented enough (base_indent + 4 for code)
129        let (indent_cols, indent_bytes) = leading_indent(inner);
130        if indent_cols < code_indent {
131            break;
132        }
133
134        if bq_depth > 0 && current_pos > start_pos {
135            let marker_info = parse_blockquote_marker_info(line);
136            for i in 0..bq_depth {
137                if let Some(info) = marker_info.get(i) {
138                    emit_one_blockquote_marker(
139                        builder,
140                        info.leading_spaces,
141                        info.has_trailing_space,
142                    );
143                }
144            }
145        }
146
147        // For losslessness: emit ALL indentation as WHITESPACE, then emit remaining content
148        // The formatter can decide how to handle the indentation
149        if indent_bytes > 0 {
150            let indent_str = &inner[..indent_bytes];
151            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
152        }
153
154        // Get the content after the indentation
155        let content = &inner[indent_bytes..];
156
157        // Split off trailing newline if present (from split_inclusive)
158        let (content_without_newline, newline_str) = strip_newline(content);
159
160        if !content_without_newline.is_empty() {
161            builder.token(SyntaxKind::TEXT.into(), content_without_newline);
162        }
163
164        if !newline_str.is_empty() {
165            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
166        }
167
168        current_pos += 1;
169    }
170
171    builder.finish_node(); // CodeContent
172    builder.finish_node(); // CodeBlock
173
174    current_pos
175}
176
177use crate::parser::utils::container_stack::leading_indent;
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    #[test]
184    fn test_is_indented_code_line() {
185        assert!(is_indented_code_line("    code"));
186        assert!(is_indented_code_line("        code"));
187        assert!(is_indented_code_line("\tcode"));
188        assert!(!is_indented_code_line("   not enough"));
189        assert!(!is_indented_code_line(""));
190        assert!(!is_indented_code_line("no indent"));
191    }
192
193    #[test]
194    fn test_parse_simple_code_block() {
195        let input = vec!["    code line 1", "    code line 2"];
196        let mut builder = GreenNodeBuilder::new();
197        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
198        assert_eq!(new_pos, 2);
199    }
200
201    #[test]
202    fn test_parse_code_block_with_blank_line() {
203        let input = vec!["    code line 1", "", "    code line 2"];
204        let mut builder = GreenNodeBuilder::new();
205        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
206        assert_eq!(new_pos, 3);
207    }
208
209    #[test]
210    fn test_parse_code_block_stops_at_unindented() {
211        let input = vec!["    code line 1", "    code line 2", "not code"];
212        let mut builder = GreenNodeBuilder::new();
213        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
214        assert_eq!(new_pos, 2);
215    }
216
217    #[test]
218    fn test_parse_code_block_with_tab() {
219        let input = vec!["\tcode with tab", "\tanother line"];
220        let mut builder = GreenNodeBuilder::new();
221        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
222        assert_eq!(new_pos, 2);
223    }
224}