Skip to main content

perl_position_tracking/
mapper.rs

1//! Centralized position mapping for correct LSP position handling
2//!
3//! Handles:
4//! - CRLF/LF/CR line endings
5//! - UTF-16 code units (LSP protocol)
6//! - Byte offsets (parser)
7//! - Efficient conversions using rope data structure
8
9use crate::WirePosition as Position;
10use ropey::Rope;
11use serde_json::Value;
12
13/// Centralized position mapper using rope for efficiency.
14///
15/// Converts between byte offsets (used by the parser) and LSP positions
16/// (line/character in UTF-16 code units) while handling mixed line endings.
17///
18/// # Examples
19///
20/// ```
21/// use perl_position_tracking::PositionMapper;
22///
23/// let text = "my $x = 1;\nmy $y = 2;\n";
24/// let mapper = PositionMapper::new(text);
25///
26/// // Convert byte offset 0 → LSP position (line 0, char 0)
27/// let pos = mapper.byte_to_lsp_pos(0);
28/// assert_eq!(pos.line, 0);
29/// assert_eq!(pos.character, 0);
30///
31/// // Second line starts at byte 11
32/// let pos = mapper.byte_to_lsp_pos(11);
33/// assert_eq!(pos.line, 1);
34/// assert_eq!(pos.character, 0);
35/// ```
36pub struct PositionMapper {
37    /// The rope containing the document text
38    rope: Rope,
39    /// Cache of line ending style
40    line_ending: LineEnding,
41}
42
43/// Line ending style detected in a document
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum LineEnding {
46    /// Unix-style line endings (LF only)
47    Lf,
48    /// Windows-style line endings (CRLF)
49    CrLf,
50    /// Classic Mac line endings (CR only)
51    Cr,
52    /// Mixed line endings detected
53    Mixed,
54}
55
56impl PositionMapper {
57    /// Create a new position mapper from text.
58    ///
59    /// Detects line endings and builds an internal rope for efficient
60    /// position conversions.
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use perl_position_tracking::PositionMapper;
66    ///
67    /// let mapper = PositionMapper::new("print 'hello';\n");
68    /// let pos = mapper.byte_to_lsp_pos(6);
69    /// assert_eq!(pos.line, 0);
70    /// assert_eq!(pos.character, 6);
71    /// ```
72    pub fn new(text: &str) -> Self {
73        let rope = Rope::from_str(text);
74        let line_ending = detect_line_ending(text);
75        Self { rope, line_ending }
76    }
77
78    /// Update the text content
79    pub fn update(&mut self, text: &str) {
80        self.rope = Rope::from_str(text);
81        self.line_ending = detect_line_ending(text);
82    }
83
84    /// Apply an incremental edit
85    pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
86        // Clamp to valid range
87        let start_byte = start_byte.min(self.rope.len_bytes());
88        let end_byte = end_byte.min(self.rope.len_bytes());
89
90        // Convert byte offsets to char indices (rope uses chars!)
91        let start_char = self.rope.byte_to_char(start_byte);
92        let end_char = self.rope.byte_to_char(end_byte);
93
94        // Remove old text
95        if end_char > start_char {
96            self.rope.remove(start_char..end_char);
97        }
98
99        // Insert new text
100        if !new_text.is_empty() {
101            self.rope.insert(start_char, new_text);
102        }
103
104        // Update line ending detection
105        self.line_ending = detect_line_ending(&self.rope.to_string());
106    }
107
108    /// Convert LSP position to byte offset.
109    ///
110    /// Takes a line/character position (UTF-16 code units, as specified by the
111    /// LSP protocol) and returns the corresponding byte offset in the source.
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// use perl_position_tracking::{PositionMapper, WirePosition};
117    ///
118    /// let mapper = PositionMapper::new("my $x = 1;\nmy $y = 2;\n");
119    /// // Line 1, character 3 → "$y"
120    /// let byte = mapper.lsp_pos_to_byte(WirePosition { line: 1, character: 3 });
121    /// assert_eq!(byte, Some(14));
122    /// ```
123    pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
124        let line_idx = pos.line as usize;
125        if line_idx >= self.rope.len_lines() {
126            return None;
127        }
128
129        let line_start_byte = self.rope.line_to_byte(line_idx);
130        let line = self.rope.line(line_idx);
131
132        // Convert UTF-16 code units to byte offset
133        let mut utf16_offset = 0u32;
134        let mut byte_offset = 0;
135
136        for ch in line.chars() {
137            if utf16_offset >= pos.character {
138                break;
139            }
140            let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
141            utf16_offset += ch_utf16_len;
142            byte_offset += ch.len_utf8();
143        }
144
145        Some(line_start_byte + byte_offset)
146    }
147
148    /// Convert byte offset to LSP position.
149    ///
150    /// Returns line/character (UTF-16 code units) suitable for LSP responses.
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// use perl_position_tracking::PositionMapper;
156    ///
157    /// let mapper = PositionMapper::new("sub foo {\n    return 1;\n}\n");
158    /// let pos = mapper.byte_to_lsp_pos(14);  // points into "return"
159    /// assert_eq!(pos.line, 1);
160    /// assert_eq!(pos.character, 4);
161    /// ```
162    pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
163        let byte_offset = byte_offset.min(self.rope.len_bytes());
164
165        let line_idx = self.rope.byte_to_line(byte_offset);
166        let line_start_byte = self.rope.line_to_byte(line_idx);
167        let byte_in_line = byte_offset - line_start_byte;
168
169        // Convert byte offset to UTF-16 code units
170        let line = self.rope.line(line_idx);
171        let mut utf16_offset = 0u32;
172        let mut current_byte = 0;
173
174        for ch in line.chars() {
175            if current_byte >= byte_in_line {
176                break;
177            }
178            let ch_len = ch.len_utf8();
179            if current_byte + ch_len > byte_in_line {
180                // We're in the middle of this character
181                break;
182            }
183            current_byte += ch_len;
184            let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
185            utf16_offset += ch_utf16_len;
186        }
187
188        Position { line: line_idx as u32, character: utf16_offset }
189    }
190
191    /// Get the text content
192    pub fn text(&self) -> String {
193        self.rope.to_string()
194    }
195
196    /// Get a slice of text
197    pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
198        let start = start_byte.min(self.rope.len_bytes());
199        let end = end_byte.min(self.rope.len_bytes());
200        self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
201    }
202
203    /// Get total byte length
204    pub fn len_bytes(&self) -> usize {
205        self.rope.len_bytes()
206    }
207
208    /// Get total number of lines
209    pub fn len_lines(&self) -> usize {
210        self.rope.len_lines()
211    }
212
213    /// Convert LSP position to char index (for rope operations)
214    pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
215        self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
216    }
217
218    /// Convert char index to LSP position
219    pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
220        let byte_offset = self.rope.char_to_byte(char_idx);
221        self.byte_to_lsp_pos(byte_offset)
222    }
223
224    /// Check if empty
225    pub fn is_empty(&self) -> bool {
226        self.rope.len_bytes() == 0
227    }
228
229    /// Get line ending style
230    pub fn line_ending(&self) -> LineEnding {
231        self.line_ending
232    }
233}
234
235/// Convert JSON LSP position to our Position type.
236///
237/// Extracts line and character fields from a JSON object.
238pub fn json_to_position(pos: &Value) -> Option<Position> {
239    Some(Position {
240        line: pos["line"].as_u64()? as u32,
241        character: pos["character"].as_u64()? as u32,
242    })
243}
244
245/// Convert Position to JSON for LSP.
246///
247/// Creates a JSON object with line and character fields.
248pub fn position_to_json(pos: Position) -> Value {
249    serde_json::json!({
250        "line": pos.line,
251        "character": pos.character
252    })
253}
254
255/// Detect the predominant line ending style
256fn detect_line_ending(text: &str) -> LineEnding {
257    let mut crlf_count = 0;
258    let mut lf_count = 0;
259    let mut cr_count = 0;
260
261    let bytes = text.as_bytes();
262    let mut i = 0;
263    while i < bytes.len() {
264        if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
265            crlf_count += 1;
266            i += 2;
267        } else if bytes[i] == b'\n' {
268            lf_count += 1;
269            i += 1;
270        } else if bytes[i] == b'\r' {
271            cr_count += 1;
272            i += 1;
273        } else {
274            i += 1;
275        }
276    }
277
278    // Determine predominant style
279    if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
280        LineEnding::CrLf
281    } else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
282        LineEnding::Lf
283    } else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
284        LineEnding::Cr
285    } else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
286        LineEnding::Mixed
287    } else {
288        LineEnding::Lf // Default
289    }
290}
291
292/// Apply UTF-8 edit to a string.
293///
294/// Replaces the byte range with the given replacement text.
295pub fn apply_edit_utf8(
296    text: &mut String,
297    start_byte: usize,
298    old_end_byte: usize,
299    replacement: &str,
300) {
301    if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
302        // Safety: ensure we're at char boundaries
303        return;
304    }
305    text.replace_range(start_byte..old_end_byte, replacement);
306}
307
308/// Count newlines in text.
309///
310/// Returns the number of LF characters in the string.
311pub fn newline_count(text: &str) -> usize {
312    text.chars().filter(|&c| c == '\n').count()
313}
314
315/// Get the column (in UTF-8 bytes) of the last line.
316///
317/// Returns the byte offset from the last newline to the end of the string.
318pub fn last_line_column_utf8(text: &str) -> u32 {
319    if let Some(last_newline) = text.rfind('\n') {
320        (text.len() - last_newline - 1) as u32
321    } else {
322        text.len() as u32
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_lf_positions() {
332        let text = "line 1\nline 2\nline 3";
333        let mapper = PositionMapper::new(text);
334
335        // Start of document
336        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
337        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
338
339        // Middle of first line
340        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
341        assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
342
343        // Start of second line
344        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
345        assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
346    }
347
348    #[test]
349    fn test_crlf_positions() {
350        let text = "line 1\r\nline 2\r\nline 3";
351        let mapper = PositionMapper::new(text);
352
353        assert_eq!(mapper.line_ending(), LineEnding::CrLf);
354
355        // Start of second line (after \r\n)
356        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
357        assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
358
359        // Start of third line
360        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
361        assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
362    }
363
364    #[test]
365    fn test_utf16_positions() {
366        let text = "hello 😀 world"; // Emoji is 2 UTF-16 code units
367        let mapper = PositionMapper::new(text);
368
369        // Before emoji
370        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
371
372        // After emoji (6 + 2 UTF-16 units = 8)
373        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10)); // 6 + 4 bytes for emoji
374
375        // Convert back
376        assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
377    }
378
379    #[test]
380    fn test_mixed_line_endings() {
381        let text = "line 1\r\nline 2\nline 3\rline 4";
382        let mapper = PositionMapper::new(text);
383
384        assert_eq!(mapper.line_ending(), LineEnding::Mixed);
385
386        // Each line start
387        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
388        assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
389        assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
390        assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
391    }
392
393    #[test]
394    fn test_incremental_edit() {
395        let mut mapper = PositionMapper::new("hello world");
396
397        // Replace "world" with "Rust"
398        mapper.apply_edit(6, 11, "Rust");
399        assert_eq!(mapper.text(), "hello Rust");
400
401        // Insert in middle
402        mapper.apply_edit(5, 5, " beautiful");
403        assert_eq!(mapper.text(), "hello beautiful Rust");
404
405        // Delete "beautiful " (keep one space)
406        mapper.apply_edit(5, 16, " ");
407        assert_eq!(mapper.text(), "hello Rust");
408    }
409}