marco_core/parser/blocks/
shared.rs

1// Shared utilities for block-level parsers
2// Contains span conversion helpers and common types
3
4pub use crate::parser::shared::{
5    to_parser_span, to_parser_span_range_inclusive as to_parser_span_range, GrammarSpan,
6};
7// No local Position/Span imports required here; use canonical helpers from parser::shared
8
9#[cfg(test)]
10use nom_locate::LocatedSpan;
11
12/// Dedent list item content by removing the specified indent width.
13/// This function is used to strip the list item indentation from nested content.
14///
15/// # Arguments
16/// * `content` - The content to dedent
17/// * `content_indent` - Number of spaces to remove from each line
18///
19/// # Returns
20/// The dedented content with proper handling of:
21/// - Tab expansion to spaces (based on actual column position)
22/// - Trailing newline preservation
23/// - Leading space removal up to content_indent
24///
25/// # Tab Expansion
26/// Tabs are expanded based on their actual column position in the line.
27/// Starting at `content_indent` column, each tab advances to the next multiple of 4.
28/// This matches the CommonMark spec for list item indentation handling.
29pub fn dedent_list_item_content(content: &str, content_indent: usize) -> String {
30    let had_trailing_newline = content.ends_with('\n');
31
32    let mut result = content
33        .lines()
34        .map(|line| {
35            // First, expand tabs to spaces based on ACTUAL column position
36            // Tabs must be expanded based on their column position (content_indent + column in line)
37            let mut expanded = String::with_capacity(line.len() * 2);
38            let mut column = content_indent; // Start at the content_indent column
39
40            for ch in line.chars() {
41                if ch == '\t' {
42                    // Tab advances to next multiple of 4
43                    let spaces_to_add = 4 - (column % 4);
44                    for _ in 0..spaces_to_add {
45                        expanded.push(' ');
46                        column += 1;
47                    }
48                } else {
49                    expanded.push(ch);
50                    column += 1;
51                }
52            }
53
54            // Now count and strip leading spaces up to content_indent
55            let mut spaces_to_strip = 0;
56            let mut chars = expanded.chars();
57            while spaces_to_strip < content_indent {
58                match chars.next() {
59                    Some(' ') => spaces_to_strip += 1,
60                    _ => break,
61                }
62            }
63
64            // Return the rest of the line after stripping
65            expanded[spaces_to_strip..].to_string()
66        })
67        .collect::<Vec<_>>()
68        .join("\n");
69
70    // Preserve trailing newline if original had one
71    if had_trailing_newline {
72        result.push('\n');
73    }
74
75    result
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn smoke_test_to_parser_span() {
84        let input = "line1\nline2\nline3";
85        let span = LocatedSpan::new(input);
86        let parser_span = to_parser_span(span);
87        assert_eq!(parser_span.start.line, 1);
88        assert_eq!(parser_span.start.column, 1);
89    }
90
91    #[test]
92    fn test_to_parser_span_single_line_ascii() {
93        // Test: "**bold**" at start of document
94        let input = LocatedSpan::new("**bold**");
95        let span = to_parser_span(input);
96
97        // Start should be at line 1, column 1
98        assert_eq!(span.start.line, 1);
99        assert_eq!(span.start.column, 1);
100
101        // End should be at line 1, column 9 (8 chars + 1-based = 9)
102        assert_eq!(span.end.line, 1);
103        assert_eq!(span.end.column, 9);
104    }
105
106    #[test]
107    fn test_to_parser_span_single_line_utf8() {
108        // Test: "Tëst" where 'ë' is 2 bytes (0xC3 0xAB)
109        // Byte layout: T(1) ë(2+3) s(4) t(5) = 5 bytes total
110        let input = LocatedSpan::new("Tëst");
111        let span = to_parser_span(input);
112
113        assert_eq!(span.start.line, 1);
114        assert_eq!(span.start.column, 1);
115
116        // End should be at byte position 6 (5 bytes + 1-based = 6)
117        assert_eq!(span.end.line, 1);
118        assert_eq!(span.end.column, 6);
119    }
120
121    #[test]
122    fn test_to_parser_span_single_line_emoji() {
123        // Test: "🎨" emoji is 4 bytes (0xF0 0x9F 0x8E 0xA8)
124        let input = LocatedSpan::new("🎨");
125        let span = to_parser_span(input);
126
127        assert_eq!(span.start.line, 1);
128        assert_eq!(span.start.column, 1);
129
130        // End should be at byte position 5 (4 bytes + 1-based = 5)
131        assert_eq!(span.end.line, 1);
132        assert_eq!(span.end.column, 5);
133    }
134
135    #[test]
136    fn test_to_parser_span_multi_line_code_block() {
137        // Test: Code block spanning 3 lines
138        // "```rust\nfn main() {}\n```"
139        let input = LocatedSpan::new("```rust\nfn main() {}\n```");
140        let span = to_parser_span(input);
141
142        // Start at line 1, column 1
143        assert_eq!(span.start.line, 1);
144        assert_eq!(span.start.column, 1);
145
146        // End at line 3 (1 + 2 newlines = 3)
147        assert_eq!(span.end.line, 3);
148
149        // End column should be 4 (3 backticks + 1-based = 4)
150        assert_eq!(span.end.column, 4);
151    }
152
153    #[test]
154    fn test_to_parser_span_ends_with_newline() {
155        // Test: Span ending with newline should have end.column = 1
156        let input = LocatedSpan::new("line1\nline2\n");
157        let span = to_parser_span(input);
158
159        assert_eq!(span.start.line, 1);
160        assert_eq!(span.start.column, 1);
161
162        // End at line 3 (1 + 2 newlines = 3), column 1
163        assert_eq!(span.end.line, 3);
164        assert_eq!(span.end.column, 1);
165    }
166
167    #[test]
168    fn test_to_parser_span_multi_line_utf8() {
169        // Test: Multi-line with UTF-8 on last line
170        // "Line1\nTëst" where 'ë' is 2 bytes
171        let input = LocatedSpan::new("Line1\nTëst");
172        let span = to_parser_span(input);
173
174        assert_eq!(span.start.line, 1);
175        assert_eq!(span.start.column, 1);
176
177        // End at line 2
178        assert_eq!(span.end.line, 2);
179
180        // "Tëst" = 5 bytes, so end column = 6 (1-based)
181        assert_eq!(span.end.column, 6);
182    }
183
184    #[test]
185    fn test_to_parser_span_offset_correctness() {
186        // Verify that absolute offsets are calculated correctly
187        let input = LocatedSpan::new("abc\ndef");
188        let span = to_parser_span(input);
189
190        // Start offset should be 0
191        assert_eq!(span.start.offset, 0);
192
193        // End offset should be 7 (3 + 1 newline + 3)
194        assert_eq!(span.end.offset, 7);
195    }
196
197    #[test]
198    fn smoke_test_dedent_simple() {
199        let content = "  Line 1\n  Line 2\n";
200        let result = dedent_list_item_content(content, 2);
201        assert_eq!(result, "Line 1\nLine 2\n");
202    }
203
204    #[test]
205    fn smoke_test_dedent_preserves_extra_indent() {
206        let content = "  Line 1\n    Indented\n";
207        let result = dedent_list_item_content(content, 2);
208        assert_eq!(result, "Line 1\n  Indented\n");
209    }
210
211    #[test]
212    fn smoke_test_dedent_preserves_blank_lines() {
213        let content = "  Line 1\n\n  Line 2\n";
214        let result = dedent_list_item_content(content, 2);
215        assert_eq!(result, "Line 1\n\nLine 2\n");
216    }
217
218    #[test]
219    fn smoke_test_dedent_with_tabs() {
220        let content = "\tLine 1\n\tLine 2\n";
221        let result = dedent_list_item_content(content, 4);
222        assert_eq!(result, "Line 1\nLine 2\n");
223    }
224}
marco_core/parser/blocks/shared.rs

marco_core/parser/blocks/
shared.rs