panache_parser/
range_utils.rs

1use crate::syntax::{SyntaxKind, SyntaxNode};
2
3fn is_block_element(kind: SyntaxKind) -> bool {
4    matches!(
5        kind,
6        SyntaxKind::PARAGRAPH
7            | SyntaxKind::FIGURE
8            | SyntaxKind::HEADING
9            | SyntaxKind::LIST
10            | SyntaxKind::DEFINITION_LIST
11            | SyntaxKind::BLOCK_QUOTE
12            | SyntaxKind::CODE_BLOCK
13            | SyntaxKind::SIMPLE_TABLE
14            | SyntaxKind::MULTILINE_TABLE
15            | SyntaxKind::PIPE_TABLE
16            | SyntaxKind::LINE_BLOCK
17    )
18}
19
20/// Convert 1-indexed line range to byte offsets
21pub fn line_range_to_byte_offsets(
22    text: &str,
23    start_line: usize,
24    end_line: usize,
25) -> Option<(usize, usize)> {
26    if start_line == 0 || end_line == 0 || start_line > end_line {
27        return None;
28    }
29
30    let mut current_line = 1;
31    let mut start_offset = None;
32    let mut byte_offset = 0;
33
34    for line in text.split_inclusive('\n') {
35        if current_line == start_line {
36            start_offset = Some(byte_offset);
37        }
38
39        if current_line == end_line {
40            // End offset is at the end of the end_line (inclusive)
41            let end_offset = byte_offset + line.len();
42            return start_offset.map(|start| (start, end_offset));
43        }
44
45        byte_offset += line.len();
46        current_line += 1;
47    }
48
49    // If we reached end of document
50    if current_line == end_line + 1 && start_offset.is_some() {
51        // end_line was the last line
52        return start_offset.map(|start| (start, byte_offset));
53    }
54
55    // end_line is beyond document
56    None
57}
58
59/// Find the smallest block-level node containing the given offset
60fn find_enclosing_block(node: &SyntaxNode, offset: usize) -> Option<SyntaxNode> {
61    let text_offset = rowan::TextSize::try_from(offset).ok()?;
62
63    // Start with the node at this offset
64    let token = node.token_at_offset(text_offset).right_biased()?;
65    let mut current = token.parent()?;
66
67    // Walk up the tree to find the smallest block element
68    loop {
69        if is_block_element(current.kind()) {
70            return Some(current);
71        }
72
73        current = current.parent()?;
74    }
75}
76
77/// Check if a node or any of its ancestors is a container that should be expanded as a unit
78fn find_expandable_container(node: &SyntaxNode) -> Option<SyntaxNode> {
79    let mut current = node.clone();
80    let mut best: Option<SyntaxNode> = None;
81    let mut best_priority = 0u8;
82
83    loop {
84        let priority = match current.kind() {
85            SyntaxKind::LIST => 1,
86            SyntaxKind::DEFINITION_LIST => 2,
87            SyntaxKind::DEFINITION_ITEM => 3,
88            SyntaxKind::LINE_BLOCK => 2,
89            SyntaxKind::BLOCK_QUOTE | SyntaxKind::FENCED_DIV => 4,
90            _ => 0,
91        };
92        if priority >= best_priority && priority > 0 {
93            best_priority = priority;
94            best = Some(current.clone());
95        }
96
97        let Some(parent) = current.parent() else {
98            break;
99        };
100        current = parent;
101    }
102
103    best
104}
105
106/// Expand a byte range to encompass complete block-level elements (internal helper).
107///
108/// This ensures that formatting doesn't split blocks mid-content and that
109/// context-dependent formatting (lists, blockquotes) works correctly.
110///
111/// # Arguments
112/// * `tree` - The syntax tree root
113/// * `start` - Starting byte offset (inclusive)
114/// * `end` - Ending byte offset (exclusive)
115///
116/// # Returns
117/// Expanded byte range `(start, end)` that covers complete blocks
118pub fn expand_byte_range_to_blocks(tree: &SyntaxNode, start: usize, end: usize) -> (usize, usize) {
119    // Handle empty or invalid ranges
120    if start >= end {
121        // Treat as cursor position - find enclosing block
122        if let Some(block) = find_enclosing_block(tree, start) {
123            let range = block.text_range();
124            return (range.start().into(), range.end().into());
125        }
126        return (start, start);
127    }
128
129    // Find blocks at start and end positions
130    let start_block = find_enclosing_block(tree, start);
131    let end_block = find_enclosing_block(tree, end.saturating_sub(1)); // end is exclusive
132
133    let (mut expanded_start, mut expanded_end) = match (start_block, end_block) {
134        (Some(start_node), Some(end_node)) => {
135            let start_range = start_node.text_range();
136            let end_range = end_node.text_range();
137            (start_range.start().into(), end_range.end().into())
138        }
139        (Some(start_node), None) => {
140            // Only start is in a block
141            let range = start_node.text_range();
142            (range.start().into(), end)
143        }
144        (None, Some(end_node)) => {
145            // Only end is in a block
146            let range = end_node.text_range();
147            (start, range.end().into())
148        }
149        (None, None) => {
150            // Neither position is in a block (shouldn't normally happen)
151            return (start, end);
152        }
153    };
154
155    // Check if we need to expand to encompass parent containers
156    // This handles cases where the range touches list items, blockquotes, etc.
157    if let Some(start_node) = find_enclosing_block(tree, expanded_start)
158        && let Some(container) = find_expandable_container(&start_node)
159    {
160        let container_range = container.text_range();
161        expanded_start = expanded_start.min(container_range.start().into());
162        expanded_end = expanded_end.max(container_range.end().into());
163    }
164
165    if let Some(end_node) = find_enclosing_block(tree, expanded_end.saturating_sub(1))
166        && let Some(container) = find_expandable_container(&end_node)
167    {
168        let container_range = container.text_range();
169        expanded_start = expanded_start.min(container_range.start().into());
170        expanded_end = expanded_end.max(container_range.end().into());
171    }
172
173    (expanded_start, expanded_end)
174}
175
176/// Find a conservative restart offset for incremental reparsing.
177///
178/// This uses block expansion and then widens to include the previous sibling block,
179/// which helps preserve continuation-sensitive parser context.
180pub fn find_incremental_restart_offset(tree: &SyntaxNode, start: usize, end: usize) -> usize {
181    let (expanded_start, _) = expand_byte_range_to_blocks(tree, start, end);
182    let Some(block) = find_enclosing_block(tree, expanded_start) else {
183        return expanded_start;
184    };
185
186    if let Some(prev) = block.prev_sibling()
187        && is_block_element(prev.kind())
188    {
189        return prev.text_range().start().into();
190    }
191
192    expanded_start
193}
194
195/// Expand a 1-indexed line range to encompass complete block-level elements.
196///
197/// This is the public API for range formatting. It converts line numbers to byte offsets,
198/// expands to block boundaries, and returns the expanded byte range.
199///
200/// # Arguments
201/// * `tree` - The syntax tree root
202/// * `text` - The original document text
203/// * `start_line` - Starting line number (1-indexed, inclusive)
204/// * `end_line` - Ending line number (1-indexed, inclusive)
205///
206/// # Returns
207/// Expanded byte range `(start, end)` that covers complete blocks, or None if range is invalid
208pub fn expand_line_range_to_blocks(
209    tree: &SyntaxNode,
210    text: &str,
211    start_line: usize,
212    end_line: usize,
213) -> Option<(usize, usize)> {
214    let (start, end) = line_range_to_byte_offsets(text, start_line, end_line)?;
215    Some(expand_byte_range_to_blocks(tree, start, end))
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use crate::config::Config;
222
223    fn parse_test_doc(input: &str) -> SyntaxNode {
224        crate::parse(input, Some(Config::default()))
225    }
226
227    #[test]
228    fn test_line_range_to_byte_offsets() {
229        let doc = "Line 1\nLine 2\nLine 3\n";
230
231        // Line 1 (1-indexed)
232        let (start, end) = line_range_to_byte_offsets(doc, 1, 1).unwrap();
233        assert_eq!(&doc[start..end], "Line 1\n");
234
235        // Line 2
236        let (start, end) = line_range_to_byte_offsets(doc, 2, 2).unwrap();
237        assert_eq!(&doc[start..end], "Line 2\n");
238
239        // Lines 1-2
240        let (start, end) = line_range_to_byte_offsets(doc, 1, 2).unwrap();
241        assert_eq!(&doc[start..end], "Line 1\nLine 2\n");
242
243        // Invalid ranges
244        assert!(line_range_to_byte_offsets(doc, 0, 1).is_none()); // 0-indexed not allowed
245        assert!(line_range_to_byte_offsets(doc, 2, 1).is_none()); // start > end
246        assert!(line_range_to_byte_offsets(doc, 1, 10).is_none()); // beyond document
247    }
248
249    #[test]
250    fn test_expand_single_paragraph() {
251        let doc = "Para 1\n\nPara 2\n\nPara 3\n";
252        let tree = parse_test_doc(doc);
253
254        // Select line 3 (Para 2)
255        let (start, end) = expand_line_range_to_blocks(&tree, doc, 3, 3).unwrap();
256
257        let selected = &doc[start..end];
258        assert!(selected.contains("Para 2"), "Range should include Para 2");
259        assert!(
260            !selected.contains("Para 1"),
261            "Range should not include Para 1"
262        );
263        assert!(
264            !selected.contains("Para 3"),
265            "Range should not include Para 3"
266        );
267    }
268
269    #[test]
270    fn test_expand_code_block() {
271        let doc = "Text before\n\n```rust\nfn main() {}\n```\n\nText after\n";
272        let tree = parse_test_doc(doc);
273
274        // Line 3 is "```rust", line 4 is "fn main() {}", line 5 is "```"
275        // Select line 4 (inside code block)
276        let (start, end) = expand_line_range_to_blocks(&tree, doc, 4, 4).unwrap();
277
278        // Should expand to entire code block
279        let selected = &doc[start..end];
280        assert!(
281            selected.contains("```rust"),
282            "Range should include opening fence"
283        );
284        assert!(
285            selected.contains("fn main() {}"),
286            "Range should include code"
287        );
288        assert!(
289            selected.contains("```"),
290            "Range should include closing fence"
291        );
292        assert!(
293            !selected.contains("Text before"),
294            "Range should not include text before"
295        );
296        assert!(
297            !selected.contains("Text after"),
298            "Range should not include text after"
299        );
300    }
301
302    #[test]
303    fn test_expand_list_item_to_full_list() {
304        let doc = "Before\n\n- Item 1\n- Item 2\n- Item 3\n\nAfter\n";
305        let tree = parse_test_doc(doc);
306
307        // Line 4 is "- Item 2"
308        let (start, end) = expand_line_range_to_blocks(&tree, doc, 4, 4).unwrap();
309
310        // Should expand to entire list (all items)
311        let selected = &doc[start..end];
312        assert!(selected.contains("Item 1"), "Range should include Item 1");
313        assert!(selected.contains("Item 2"), "Range should include Item 2");
314        assert!(selected.contains("Item 3"), "Range should include Item 3");
315        assert!(
316            !selected.contains("Before"),
317            "Range should not include Before"
318        );
319        assert!(
320            !selected.contains("After"),
321            "Range should not include After"
322        );
323    }
324
325    #[test]
326    fn test_single_line_expands_to_block() {
327        let doc = "# Heading\n\nParagraph text here.\n";
328        let tree = parse_test_doc(doc);
329
330        // Line 3 is "Paragraph text here."
331        let (start, end) = expand_line_range_to_blocks(&tree, doc, 3, 3).unwrap();
332
333        // Should expand to entire paragraph
334        let selected = &doc[start..end];
335        assert!(
336            selected.contains("Paragraph text here."),
337            "Range should include paragraph"
338        );
339        assert!(
340            !selected.contains("Heading"),
341            "Range should not include heading"
342        );
343    }
344
345    #[test]
346    fn test_expand_blockquote() {
347        let doc = "Before\n\n> Line 1\n> Line 2\n> Line 3\n\nAfter\n";
348        let tree = parse_test_doc(doc);
349
350        // Line 4 is "> Line 2"
351        let result = expand_line_range_to_blocks(&tree, doc, 4, 4);
352        assert!(result.is_some(), "Failed to expand range for line 4");
353        let (start, end) = result.unwrap();
354
355        // Should expand to entire blockquote (note: parser strips "> " markers)
356        // So the range will be "Line 1\nLine 2\nLine 3\n" without markers
357        let selected = &doc[start..end];
358
359        // The range should include all three lines
360        assert!(selected.contains("Line 1"), "Range should include Line 1");
361        assert!(selected.contains("Line 2"), "Range should include Line 2");
362        assert!(selected.contains("Line 3"), "Range should include Line 3");
363        assert!(
364            !selected.contains("Before"),
365            "Range should not include Before"
366        );
367        assert!(
368            !selected.contains("After"),
369            "Range should not include After"
370        );
371
372        // Range should cover the blockquote and nothing else (offsets may vary with parser changes)
373    }
374}
panache_parser/range_utils.rs

panache_parser/
range_utils.rs