Skip to main content

panache_parser/parser/blocks/
indented_code.rs

1//! Indented code block parsing utilities.
2//!
3//! A block of text indented four spaces (or one tab) is treated as verbatim text.
4//! The initial (four space or one tab) indentation is not considered part of the
5//! verbatim text and is removed in the output.
6//!
7//! Note: blank lines in the verbatim text need not begin with four spaces.
8
9use crate::syntax::SyntaxKind;
10use rowan::GreenNodeBuilder;
11
12use crate::parser::utils::helpers::strip_newline;
13
14/// Check if a line is indented enough to be part of an indented code block.
15/// Returns true if the line starts with 4+ spaces or 1+ tab.
16pub(crate) fn is_indented_code_line(content: &str) -> bool {
17    if content.is_empty() {
18        return false;
19    }
20
21    // Check for tab
22    if content.starts_with('\t') {
23        return true;
24    }
25
26    // Check for 4+ spaces
27    let spaces = content.chars().take_while(|&c| c == ' ').count();
28    spaces >= 4
29}
30
31/// Parse an indented code block, consuming lines from the parser.
32/// Returns the new position after the code block.
33///
34/// An indented code block consists of consecutive lines that are either:
35/// - Indented by 4+ spaces or 1+ tab
36/// - Blank lines (which don't need indentation)
37///
38/// The block ends when we hit a non-blank line that isn't indented enough.
39/// Parse an indented code block, consuming lines from the parser.
40/// Returns the new position after the code block.
41///
42/// An indented code block consists of consecutive lines that are either:
43/// - Indented by 4+ spaces or 1+ tab (beyond base_indent)
44/// - Blank lines (which don't need indentation)
45///
46/// The block ends when we hit a non-blank line that isn't indented enough.
47pub(crate) fn parse_indented_code_block(
48    builder: &mut GreenNodeBuilder<'static>,
49    lines: &[&str],
50    start_pos: usize,
51    bq_depth: usize,
52    base_indent: usize,
53) -> usize {
54    use super::blockquotes::{
55        count_blockquote_markers, emit_one_blockquote_marker, strip_n_blockquote_markers,
56    };
57    use crate::parser::utils::marker_utils::parse_blockquote_marker_info;
58
59    builder.start_node(SyntaxKind::CODE_BLOCK.into());
60    builder.start_node(SyntaxKind::CODE_CONTENT.into());
61
62    let mut current_pos = start_pos;
63    // Total indent needed: base (e.g., footnote) + 4 for code
64    let code_indent = base_indent + 4;
65
66    while current_pos < lines.len() {
67        let line = lines[current_pos];
68
69        // Strip exactly the enclosing blockquote depth; deeper markers remain as content.
70        let (line_bq_depth, _) = count_blockquote_markers(line);
71        let inner = if bq_depth > 0 {
72            strip_n_blockquote_markers(line, bq_depth)
73        } else {
74            line
75        };
76
77        // If blockquote depth decreases, code block ends (we've left the blockquote)
78        if line_bq_depth < bq_depth {
79            break;
80        }
81
82        // Blank lines need look-ahead: only include if next non-blank line continues the code
83        if inner.trim().is_empty() {
84            // Check if code continues after this blank line
85            let mut look_pos = current_pos + 1;
86            let mut continues = false;
87            while look_pos < lines.len() {
88                let (look_bq_depth, look_inner) = count_blockquote_markers(lines[look_pos]);
89                if look_bq_depth < bq_depth {
90                    break;
91                }
92                if look_inner.trim_end_matches('\n').trim().is_empty() {
93                    look_pos += 1;
94                    continue;
95                }
96                let (look_indent, _) = leading_indent(look_inner);
97                if look_indent >= code_indent {
98                    continues = true;
99                }
100                break;
101            }
102            if !continues {
103                break;
104            }
105            if bq_depth > 0 && current_pos > start_pos {
106                let marker_info = parse_blockquote_marker_info(line);
107                for i in 0..bq_depth {
108                    if let Some(info) = marker_info.get(i) {
109                        emit_one_blockquote_marker(
110                            builder,
111                            info.leading_spaces,
112                            info.has_trailing_space,
113                        );
114                    }
115                }
116            }
117            let (blank_content, newline_str) = strip_newline(inner);
118            if !blank_content.is_empty() {
119                builder.token(SyntaxKind::WHITESPACE.into(), blank_content);
120            }
121            builder.token(SyntaxKind::TEXT.into(), "");
122            builder.token(
123                SyntaxKind::NEWLINE.into(),
124                if newline_str.is_empty() {
125                    "\n"
126                } else {
127                    newline_str
128                },
129            );
130            current_pos += 1;
131            continue;
132        }
133
134        // Check if line is indented enough (base_indent + 4 for code)
135        let (indent_cols, indent_bytes) = leading_indent(inner);
136        if indent_cols < code_indent {
137            break;
138        }
139
140        if bq_depth > 0 && current_pos > start_pos {
141            let marker_info = parse_blockquote_marker_info(line);
142            for i in 0..bq_depth {
143                if let Some(info) = marker_info.get(i) {
144                    emit_one_blockquote_marker(
145                        builder,
146                        info.leading_spaces,
147                        info.has_trailing_space,
148                    );
149                }
150            }
151        }
152
153        // For losslessness: emit ALL indentation as WHITESPACE, then emit remaining content
154        // The formatter can decide how to handle the indentation
155        if indent_bytes > 0 {
156            let indent_str = &inner[..indent_bytes];
157            builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
158        }
159
160        // Get the content after the indentation
161        let content = &inner[indent_bytes..];
162
163        // Split off trailing newline if present (from split_inclusive)
164        let (content_without_newline, newline_str) = strip_newline(content);
165
166        if !content_without_newline.is_empty() {
167            builder.token(SyntaxKind::TEXT.into(), content_without_newline);
168        }
169
170        if !newline_str.is_empty() {
171            builder.token(SyntaxKind::NEWLINE.into(), newline_str);
172        }
173
174        current_pos += 1;
175    }
176
177    builder.finish_node(); // CodeContent
178    builder.finish_node(); // CodeBlock
179
180    current_pos
181}
182
183use crate::parser::utils::container_stack::leading_indent;
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn test_is_indented_code_line() {
191        assert!(is_indented_code_line("    code"));
192        assert!(is_indented_code_line("        code"));
193        assert!(is_indented_code_line("\tcode"));
194        assert!(!is_indented_code_line("   not enough"));
195        assert!(!is_indented_code_line(""));
196        assert!(!is_indented_code_line("no indent"));
197    }
198
199    #[test]
200    fn test_parse_simple_code_block() {
201        let input = vec!["    code line 1", "    code line 2"];
202        let mut builder = GreenNodeBuilder::new();
203        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
204        assert_eq!(new_pos, 2);
205    }
206
207    #[test]
208    fn test_parse_code_block_with_blank_line() {
209        let input = vec!["    code line 1", "", "    code line 2"];
210        let mut builder = GreenNodeBuilder::new();
211        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
212        assert_eq!(new_pos, 3);
213    }
214
215    #[test]
216    fn test_parse_code_block_stops_at_unindented() {
217        let input = vec!["    code line 1", "    code line 2", "not code"];
218        let mut builder = GreenNodeBuilder::new();
219        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
220        assert_eq!(new_pos, 2);
221    }
222
223    #[test]
224    fn test_parse_code_block_with_tab() {
225        let input = vec!["\tcode with tab", "\tanother line"];
226        let mut builder = GreenNodeBuilder::new();
227        let new_pos = parse_indented_code_block(&mut builder, &input, 0, 0, 0);
228        assert_eq!(new_pos, 2);
229    }
230}