marco_core/parser/blocks/
shared.rs

1// Shared utilities for block-level parsers
2// Contains span conversion helpers and common types
3
4pub use crate::parser::shared::{
5    opt_span, opt_span_range_inclusive as opt_span_range, to_parser_span,
6    to_parser_span_range_inclusive as to_parser_span_range, GrammarSpan,
7};
8// No local Position/Span imports required here; use canonical helpers from parser::shared
9
10#[cfg(test)]
11use nom_locate::LocatedSpan;
12
13/// Dedent list item content by removing the specified indent width.
14/// This function is used to strip the list item indentation from nested content.
15///
16/// # Arguments
17/// * `content` - The content to dedent
18/// * `content_indent` - Number of spaces to remove from each line
19///
20/// # Returns
21/// The dedented content with proper handling of:
22/// - Tab expansion to spaces (based on actual column position)
23/// - Trailing newline preservation
24/// - Leading space removal up to content_indent
25///
26/// # Tab Expansion
27/// Tabs are expanded based on their actual column position in the line.
28/// Starting at `content_indent` column, each tab advances to the next multiple of 4.
29/// This matches the CommonMark spec for list item indentation handling.
30pub fn dedent_list_item_content(content: &str, content_indent: usize) -> String {
31    let had_trailing_newline = content.ends_with('\n');
32
33    let mut result = content
34        .lines()
35        .map(|line| {
36            // First, expand tabs to spaces based on ACTUAL column position
37            // Tabs must be expanded based on their column position (content_indent + column in line)
38            let mut expanded = String::with_capacity(line.len() * 2);
39            let mut column = content_indent; // Start at the content_indent column
40
41            for ch in line.chars() {
42                if ch == '\t' {
43                    // Tab advances to next multiple of 4
44                    let spaces_to_add = 4 - (column % 4);
45                    for _ in 0..spaces_to_add {
46                        expanded.push(' ');
47                        column += 1;
48                    }
49                } else {
50                    expanded.push(ch);
51                    column += 1;
52                }
53            }
54
55            // Now count and strip leading spaces up to content_indent
56            let mut spaces_to_strip = 0;
57            let mut chars = expanded.chars();
58            while spaces_to_strip < content_indent {
59                match chars.next() {
60                    Some(' ') => spaces_to_strip += 1,
61                    _ => break,
62                }
63            }
64
65            // Return the rest of the line after stripping
66            expanded[spaces_to_strip..].to_string()
67        })
68        .collect::<Vec<_>>()
69        .join("\n");
70
71    // Preserve trailing newline if original had one
72    if had_trailing_newline {
73        result.push('\n');
74    }
75
76    result
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn smoke_test_to_parser_span() {
85        let input = "line1\nline2\nline3";
86        let span = LocatedSpan::new(input);
87        let parser_span = to_parser_span(span);
88        assert_eq!(parser_span.start.line, 1);
89        assert_eq!(parser_span.start.column, 1);
90    }
91
92    #[test]
93    fn test_to_parser_span_single_line_ascii() {
94        // Test: "**bold**" at start of document
95        let input = LocatedSpan::new("**bold**");
96        let span = to_parser_span(input);
97
98        // Start should be at line 1, column 1
99        assert_eq!(span.start.line, 1);
100        assert_eq!(span.start.column, 1);
101
102        // End should be at line 1, column 9 (8 chars + 1-based = 9)
103        assert_eq!(span.end.line, 1);
104        assert_eq!(span.end.column, 9);
105    }
106
107    #[test]
108    fn test_to_parser_span_single_line_utf8() {
109        // Test: "Tëst" where 'ë' is 2 bytes (0xC3 0xAB)
110        // Byte layout: T(1) ë(2+3) s(4) t(5) = 5 bytes total
111        let input = LocatedSpan::new("Tëst");
112        let span = to_parser_span(input);
113
114        assert_eq!(span.start.line, 1);
115        assert_eq!(span.start.column, 1);
116
117        // End should be at byte position 6 (5 bytes + 1-based = 6)
118        assert_eq!(span.end.line, 1);
119        assert_eq!(span.end.column, 6);
120    }
121
122    #[test]
123    fn test_to_parser_span_single_line_emoji() {
124        // Test: "🎨" emoji is 4 bytes (0xF0 0x9F 0x8E 0xA8)
125        let input = LocatedSpan::new("🎨");
126        let span = to_parser_span(input);
127
128        assert_eq!(span.start.line, 1);
129        assert_eq!(span.start.column, 1);
130
131        // End should be at byte position 5 (4 bytes + 1-based = 5)
132        assert_eq!(span.end.line, 1);
133        assert_eq!(span.end.column, 5);
134    }
135
136    #[test]
137    fn test_to_parser_span_multi_line_code_block() {
138        // Test: Code block spanning 3 lines
139        // "```rust\nfn main() {}\n```"
140        let input = LocatedSpan::new("```rust\nfn main() {}\n```");
141        let span = to_parser_span(input);
142
143        // Start at line 1, column 1
144        assert_eq!(span.start.line, 1);
145        assert_eq!(span.start.column, 1);
146
147        // End at line 3 (1 + 2 newlines = 3)
148        assert_eq!(span.end.line, 3);
149
150        // End column should be 4 (3 backticks + 1-based = 4)
151        assert_eq!(span.end.column, 4);
152    }
153
154    #[test]
155    fn test_to_parser_span_ends_with_newline() {
156        // Test: Span ending with newline should have end.column = 1
157        let input = LocatedSpan::new("line1\nline2\n");
158        let span = to_parser_span(input);
159
160        assert_eq!(span.start.line, 1);
161        assert_eq!(span.start.column, 1);
162
163        // End at line 3 (1 + 2 newlines = 3), column 1
164        assert_eq!(span.end.line, 3);
165        assert_eq!(span.end.column, 1);
166    }
167
168    #[test]
169    fn test_to_parser_span_multi_line_utf8() {
170        // Test: Multi-line with UTF-8 on last line
171        // "Line1\nTëst" where 'ë' is 2 bytes
172        let input = LocatedSpan::new("Line1\nTëst");
173        let span = to_parser_span(input);
174
175        assert_eq!(span.start.line, 1);
176        assert_eq!(span.start.column, 1);
177
178        // End at line 2
179        assert_eq!(span.end.line, 2);
180
181        // "Tëst" = 5 bytes, so end column = 6 (1-based)
182        assert_eq!(span.end.column, 6);
183    }
184
185    #[test]
186    fn test_to_parser_span_offset_correctness() {
187        // Verify that absolute offsets are calculated correctly
188        let input = LocatedSpan::new("abc\ndef");
189        let span = to_parser_span(input);
190
191        // Start offset should be 0
192        assert_eq!(span.start.offset, 0);
193
194        // End offset should be 7 (3 + 1 newline + 3)
195        assert_eq!(span.end.offset, 7);
196    }
197
198    #[test]
199    fn smoke_test_dedent_simple() {
200        let content = "  Line 1\n  Line 2\n";
201        let result = dedent_list_item_content(content, 2);
202        assert_eq!(result, "Line 1\nLine 2\n");
203    }
204
205    #[test]
206    fn smoke_test_dedent_preserves_extra_indent() {
207        let content = "  Line 1\n    Indented\n";
208        let result = dedent_list_item_content(content, 2);
209        assert_eq!(result, "Line 1\n  Indented\n");
210    }
211
212    #[test]
213    fn smoke_test_dedent_preserves_blank_lines() {
214        let content = "  Line 1\n\n  Line 2\n";
215        let result = dedent_list_item_content(content, 2);
216        assert_eq!(result, "Line 1\n\nLine 2\n");
217    }
218
219    #[test]
220    fn smoke_test_dedent_with_tabs() {
221        let content = "\tLine 1\n\tLine 2\n";
222        let result = dedent_list_item_content(content, 4);
223        assert_eq!(result, "Line 1\nLine 2\n");
224    }
225}
marco_core/parser/blocks/shared.rs

marco_core/parser/blocks/
shared.rs