rumdl_lib/utils/
range_utils.rs

1//! Utilities for position/range conversions
2
3use std::collections::HashSet;
4use std::ops::Range;
5
6#[derive(Debug)]
7pub struct LineIndex {
8    line_starts: Vec<usize>,
9    content: String,
10    code_block_lines: Option<HashSet<usize>>,
11}
12
13impl LineIndex {
14    pub fn new(content: String) -> Self {
15        let mut line_starts = vec![0];
16        let mut pos = 0;
17
18        for c in content.chars() {
19            pos += c.len_utf8();
20            if c == '\n' {
21                line_starts.push(pos);
22            }
23        }
24
25        let mut index = Self {
26            line_starts,
27            content,
28            code_block_lines: None,
29        };
30
31        // Pre-compute code block lines for better performance
32        index.compute_code_block_lines();
33
34        index
35    }
36
37    pub fn line_col_to_byte_range(&self, line: usize, column: usize) -> Range<usize> {
38        let line = line.saturating_sub(1);
39        let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
40
41        let current_line = self.content.lines().nth(line).unwrap_or("");
42        let col = column.clamp(1, current_line.len() + 1);
43
44        let start = line_start + col - 1;
45        start..start
46    }
47
48    /// Calculate a proper byte range for replacing text with a specific length
49    /// This is the correct function to use for LSP fixes
50    pub fn line_col_to_byte_range_with_length(&self, line: usize, column: usize, length: usize) -> Range<usize> {
51        let line = line.saturating_sub(1);
52        let line_start = *self.line_starts.get(line).unwrap_or(&self.content.len());
53
54        let current_line = self.content.lines().nth(line).unwrap_or("");
55        let col = column.clamp(1, current_line.len() + 1);
56
57        let start = line_start + col - 1;
58        let end = (start + length).min(line_start + current_line.len());
59        start..end
60    }
61
62    /// Calculate byte range for entire line replacement (including newline)
63    /// This is ideal for rules that need to replace complete lines
64    pub fn whole_line_range(&self, line: usize) -> Range<usize> {
65        let line_idx = line.saturating_sub(1);
66        let start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
67        let end = self
68            .line_starts
69            .get(line_idx + 1)
70            .copied()
71            .unwrap_or(self.content.len());
72        start..end
73    }
74
75    /// Calculate byte range for text within a line (excluding newline)
76    /// Useful for replacing specific parts of a line
77    pub fn line_text_range(&self, line: usize, start_col: usize, end_col: usize) -> Range<usize> {
78        let line_idx = line.saturating_sub(1);
79        let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
80
81        // Get the actual line content to ensure we don't exceed bounds
82        let current_line = self.content.lines().nth(line_idx).unwrap_or("");
83        let line_len = current_line.len();
84
85        let start = line_start + start_col.saturating_sub(1).min(line_len);
86        let end = line_start + end_col.saturating_sub(1).min(line_len);
87        start..end.max(start)
88    }
89
90    /// Calculate byte range from start of line to end of line content (excluding newline)
91    /// Useful for replacing line content while preserving line structure
92    pub fn line_content_range(&self, line: usize) -> Range<usize> {
93        let line_idx = line.saturating_sub(1);
94        let line_start = *self.line_starts.get(line_idx).unwrap_or(&self.content.len());
95
96        let current_line = self.content.lines().nth(line_idx).unwrap_or("");
97        let line_end = line_start + current_line.len();
98        line_start..line_end
99    }
100
101    /// Get the global start byte offset for a given 1-based line number.
102    pub fn get_line_start_byte(&self, line_num: usize) -> Option<usize> {
103        if line_num == 0 {
104            return None; // Lines are 1-based
105        }
106        // line_num is 1-based, line_starts index is 0-based
107        self.line_starts.get(line_num - 1).cloned()
108    }
109
110    /// Check if the line at the given index is within a code block
111    pub fn is_code_block(&self, line: usize) -> bool {
112        if let Some(ref code_block_lines) = self.code_block_lines {
113            code_block_lines.contains(&line)
114        } else {
115            // Fallback to a simpler check if pre-computation wasn't done
116            self.is_code_fence(line)
117        }
118    }
119
120    /// Check if the line is a code fence marker (``` or ~~~)
121    pub fn is_code_fence(&self, line: usize) -> bool {
122        self.content.lines().nth(line).is_some_and(|l| {
123            let trimmed = l.trim();
124            trimmed.starts_with("```") || trimmed.starts_with("~~~")
125        })
126    }
127
128    /// Check if the line is a tilde code fence marker (~~~)
129    pub fn is_tilde_code_block(&self, line: usize) -> bool {
130        self.content
131            .lines()
132            .nth(line)
133            .is_some_and(|l| l.trim().starts_with("~~~"))
134    }
135
136    /// Get a reference to the content
137    pub fn get_content(&self) -> &str {
138        &self.content
139    }
140
141    /// Pre-compute which lines are within code blocks for faster lookup
142    fn compute_code_block_lines(&mut self) {
143        let mut code_block_lines = HashSet::new();
144        let lines: Vec<&str> = self.content.lines().collect();
145
146        // Initialize block tracking
147        let mut in_block = false;
148        let mut active_fence_type = ' '; // '`' or '~'
149        let mut block_indent = 0;
150        let mut block_fence_length = 0;
151        let mut in_markdown_block = false;
152        let mut nested_fence_start = None;
153        let mut nested_fence_end = None;
154
155        // Process each line
156        for (i, line) in lines.iter().enumerate() {
157            let trimmed = line.trim();
158            let indent = line.len() - trimmed.len();
159
160            // 1. Detect indented code blocks (independent of fenced code blocks)
161            if line.starts_with("    ") || line.starts_with("\t") {
162                code_block_lines.insert(i);
163                continue; // Skip further processing for indented code blocks
164            }
165
166            // 2. Handle fenced code blocks (backticks and tildes)
167            if !in_block {
168                // Check for opening fences
169                if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
170                    let char_type = if trimmed.starts_with("```") { '`' } else { '~' };
171                    let count = trimmed.chars().take_while(|&c| c == char_type).count();
172                    let info_string = if trimmed.len() > count {
173                        trimmed[count..].trim()
174                    } else {
175                        ""
176                    };
177
178                    // Mark the start of a new code block
179                    in_block = true;
180                    active_fence_type = char_type;
181                    block_indent = indent;
182                    block_fence_length = count;
183                    in_markdown_block = info_string == "markdown";
184                    nested_fence_start = None;
185                    nested_fence_end = None;
186
187                    code_block_lines.insert(i);
188                }
189            } else {
190                // We're inside a code block
191                code_block_lines.insert(i);
192
193                // Detection of nested fences in markdown blocks
194                if in_markdown_block && nested_fence_start.is_none() && trimmed.starts_with("```") {
195                    // Check if this looks like a nested fence opening (has content after the backticks)
196                    let count = trimmed.chars().take_while(|&c| c == '`').count();
197                    let remaining = if trimmed.len() > count {
198                        trimmed[count..].trim()
199                    } else {
200                        ""
201                    };
202
203                    if !remaining.is_empty() {
204                        nested_fence_start = Some(i);
205                    }
206                }
207
208                // Check if we've found a nested fence end (only if we have a start)
209                if in_markdown_block
210                    && nested_fence_start.is_some()
211                    && nested_fence_end.is_none()
212                    && trimmed.starts_with("```")
213                    && trimmed.trim_start_matches('`').trim().is_empty()
214                {
215                    nested_fence_end = Some(i);
216                }
217
218                // Check if this line matches the closing fence pattern for the outer block
219                if trimmed.starts_with(&active_fence_type.to_string().repeat(3)) {
220                    let count = trimmed.chars().take_while(|&c| c == active_fence_type).count();
221                    let remaining = if trimmed.len() > count {
222                        trimmed[count..].trim()
223                    } else {
224                        ""
225                    };
226
227                    // A line is a closing fence if:
228                    // 1. It uses the same fence character as the opening fence
229                    // 2. It has at least as many fence characters as the opening fence
230                    // 3. It has no content after the fence characters (except for whitespace)
231                    // 4. Its indentation level is less than or equal to the opening fence
232                    let is_valid_closing_fence =
233                        count >= block_fence_length && remaining.is_empty() && indent <= block_indent;
234
235                    // For nested code blocks in markdown, the first backtick fence after the nested content
236                    // should be recognized as the closing fence for the outer block
237                    let is_nested_closing = nested_fence_end.is_some() && i == nested_fence_end.unwrap();
238
239                    // Skip nested closing fences
240                    if is_valid_closing_fence && !is_nested_closing {
241                        in_block = false;
242                        in_markdown_block = false;
243                    }
244                }
245            }
246        }
247
248        self.code_block_lines = Some(code_block_lines);
249    }
250}
251
252/// Calculate end position for a single-line range
253pub fn calculate_single_line_range(line: usize, start_col: usize, length: usize) -> (usize, usize, usize, usize) {
254    (line, start_col, line, start_col + length)
255}
256
257/// Calculate range for entire line
258pub fn calculate_line_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
259    let trimmed_len = line_content.trim_end().len();
260    (line, 1, line, trimmed_len + 1)
261}
262
263/// Calculate range from regex match on a line
264pub fn calculate_match_range(
265    line: usize,
266    line_content: &str,
267    match_start: usize,
268    match_len: usize,
269) -> (usize, usize, usize, usize) {
270    // Bounds check to prevent panic
271    let line_len = line_content.len();
272    if match_start > line_len {
273        // If match_start is beyond line bounds, return a safe range at end of line
274        let char_count = line_content.chars().count();
275        return (line, char_count + 1, line, char_count + 1);
276    }
277
278    let safe_match_end = (match_start + match_len).min(line_len);
279    let safe_match_len = safe_match_end.saturating_sub(match_start);
280
281    // Convert byte positions to character positions
282    let char_start = line_content[..match_start].chars().count() + 1; // 1-indexed
283    let char_len = if safe_match_len > 0 {
284        line_content[match_start..safe_match_end].chars().count()
285    } else {
286        0
287    };
288    (line, char_start, line, char_start + char_len)
289}
290
291/// Calculate range for trailing content (like trailing spaces)
292pub fn calculate_trailing_range(line: usize, line_content: &str, content_end: usize) -> (usize, usize, usize, usize) {
293    let char_content_end = line_content[..content_end].chars().count() + 1; // 1-indexed
294    let line_char_len = line_content.chars().count() + 1;
295    (line, char_content_end, line, line_char_len)
296}
297
298/// Calculate range for a heading (entire line)
299pub fn calculate_heading_range(line: usize, line_content: &str) -> (usize, usize, usize, usize) {
300    calculate_line_range(line, line_content)
301}
302
303/// Calculate range for emphasis markers and content
304pub fn calculate_emphasis_range(
305    line: usize,
306    line_content: &str,
307    start_pos: usize,
308    end_pos: usize,
309) -> (usize, usize, usize, usize) {
310    let char_start = line_content[..start_pos].chars().count() + 1; // 1-indexed
311    let char_end = line_content[..end_pos].chars().count() + 1; // 1-indexed
312    (line, char_start, line, char_end)
313}
314
315/// Calculate range for HTML tags
316pub fn calculate_html_tag_range(
317    line: usize,
318    line_content: &str,
319    tag_start: usize,
320    tag_len: usize,
321) -> (usize, usize, usize, usize) {
322    calculate_match_range(line, line_content, tag_start, tag_len)
323}
324
325/// Calculate range for URLs
326pub fn calculate_url_range(
327    line: usize,
328    line_content: &str,
329    url_start: usize,
330    url_len: usize,
331) -> (usize, usize, usize, usize) {
332    calculate_match_range(line, line_content, url_start, url_len)
333}
334
335/// Calculate range for list markers
336pub fn calculate_list_marker_range(
337    line: usize,
338    line_content: &str,
339    marker_start: usize,
340    marker_len: usize,
341) -> (usize, usize, usize, usize) {
342    calculate_match_range(line, line_content, marker_start, marker_len)
343}
344
345/// Calculate range that exceeds a limit (like line length)
346pub fn calculate_excess_range(line: usize, line_content: &str, limit: usize) -> (usize, usize, usize, usize) {
347    let char_limit = std::cmp::min(limit, line_content.chars().count());
348    let line_char_len = line_content.chars().count() + 1;
349    (line, char_limit + 1, line, line_char_len)
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_single_line_range() {
358        let (start_line, start_col, end_line, end_col) = calculate_single_line_range(5, 10, 3);
359        assert_eq!(start_line, 5);
360        assert_eq!(start_col, 10);
361        assert_eq!(end_line, 5);
362        assert_eq!(end_col, 13);
363    }
364
365    #[test]
366    fn test_line_range() {
367        let content = "# This is a heading  ";
368        let (start_line, start_col, end_line, end_col) = calculate_line_range(1, content);
369        assert_eq!(start_line, 1);
370        assert_eq!(start_col, 1);
371        assert_eq!(end_line, 1);
372        assert_eq!(end_col, 20); // Trimmed length + 1
373    }
374
375    #[test]
376    fn test_match_range() {
377        let content = "Text <div>content</div> more";
378        let tag_start = 5; // Position of '<'
379        let tag_len = 5; // Length of "<div>"
380        let (start_line, start_col, end_line, end_col) = calculate_match_range(1, content, tag_start, tag_len);
381        assert_eq!(start_line, 1);
382        assert_eq!(start_col, 6); // 1-indexed
383        assert_eq!(end_line, 1);
384        assert_eq!(end_col, 11); // 6 + 5
385    }
386
387    #[test]
388    fn test_trailing_range() {
389        let content = "Text content   "; // 3 trailing spaces
390        let content_end = 12; // End of "Text content"
391        let (start_line, start_col, end_line, end_col) = calculate_trailing_range(1, content, content_end);
392        assert_eq!(start_line, 1);
393        assert_eq!(start_col, 13); // content_end + 1 (1-indexed)
394        assert_eq!(end_line, 1);
395        assert_eq!(end_col, 16); // Total length + 1
396    }
397
398    #[test]
399    fn test_excess_range() {
400        let content = "This line is too long for the limit";
401        let limit = 20;
402        let (start_line, start_col, end_line, end_col) = calculate_excess_range(1, content, limit);
403        assert_eq!(start_line, 1);
404        assert_eq!(start_col, 21); // limit + 1
405        assert_eq!(end_line, 1);
406        assert_eq!(end_col, 36); // Total length + 1 (35 chars + 1 = 36)
407    }
408
409    #[test]
410    fn test_whole_line_range() {
411        let content = "Line 1\nLine 2\nLine 3".to_string();
412        let line_index = LineIndex::new(content);
413
414        // Test first line (includes newline)
415        let range = line_index.whole_line_range(1);
416        assert_eq!(range, 0..7); // "Line 1\n"
417
418        // Test middle line
419        let range = line_index.whole_line_range(2);
420        assert_eq!(range, 7..14); // "Line 2\n"
421
422        // Test last line (no newline)
423        let range = line_index.whole_line_range(3);
424        assert_eq!(range, 14..20); // "Line 3"
425    }
426
427    #[test]
428    fn test_line_content_range() {
429        let content = "Line 1\nLine 2\nLine 3".to_string();
430        let line_index = LineIndex::new(content);
431
432        // Test first line content (excludes newline)
433        let range = line_index.line_content_range(1);
434        assert_eq!(range, 0..6); // "Line 1"
435
436        // Test middle line content
437        let range = line_index.line_content_range(2);
438        assert_eq!(range, 7..13); // "Line 2"
439
440        // Test last line content
441        let range = line_index.line_content_range(3);
442        assert_eq!(range, 14..20); // "Line 3"
443    }
444
445    #[test]
446    fn test_line_text_range() {
447        let content = "Hello world\nAnother line".to_string();
448        let line_index = LineIndex::new(content);
449
450        // Test partial text in first line
451        let range = line_index.line_text_range(1, 1, 5); // "Hell"
452        assert_eq!(range, 0..4);
453
454        // Test partial text in second line
455        let range = line_index.line_text_range(2, 1, 7); // "Anothe"
456        assert_eq!(range, 12..18);
457
458        // Test bounds checking
459        let range = line_index.line_text_range(1, 1, 100); // Should clamp to line end
460        assert_eq!(range, 0..11); // "Hello world"
461    }
462
463    #[test]
464    fn test_calculate_match_range_bounds_checking() {
465        // Test case 1: match_start beyond line bounds
466        let line_content = "] not a link [";
467        let (line, start_col, end_line, end_col) = calculate_match_range(121, line_content, 57, 10);
468        assert_eq!(line, 121);
469        assert_eq!(start_col, 15); // line length + 1
470        assert_eq!(end_line, 121);
471        assert_eq!(end_col, 15); // same as start when out of bounds
472
473        // Test case 2: match extends beyond line end
474        let line_content = "short";
475        let (line, start_col, end_line, end_col) = calculate_match_range(1, line_content, 2, 10);
476        assert_eq!(line, 1);
477        assert_eq!(start_col, 3); // position 2 + 1
478        assert_eq!(end_line, 1);
479        assert_eq!(end_col, 6); // clamped to line length + 1
480
481        // Test case 3: normal case within bounds
482        let line_content = "normal text here";
483        let (line, start_col, end_line, end_col) = calculate_match_range(5, line_content, 7, 4);
484        assert_eq!(line, 5);
485        assert_eq!(start_col, 8); // position 7 + 1
486        assert_eq!(end_line, 5);
487        assert_eq!(end_col, 12); // position 7 + 4 + 1
488
489        // Test case 4: zero length match
490        let line_content = "test line";
491        let (line, start_col, end_line, end_col) = calculate_match_range(10, line_content, 5, 0);
492        assert_eq!(line, 10);
493        assert_eq!(start_col, 6); // position 5 + 1
494        assert_eq!(end_line, 10);
495        assert_eq!(end_col, 6); // same as start for zero length
496    }
497}