perl_position_tracking/
mapper.rs

1//! Centralized position mapping for correct LSP position handling
2//!
3//! Handles:
4//! - CRLF/LF/CR line endings
5//! - UTF-16 code units (LSP protocol)
6//! - Byte offsets (parser)
7//! - Efficient conversions using rope data structure
8
9use crate::WirePosition as Position;
10use ropey::Rope;
11use serde_json::Value;
12
13/// Centralized position mapper using rope for efficiency.
14///
15/// Converts between byte offsets (used by the parser) and LSP positions
16/// (line/character in UTF-16 code units) while handling mixed line endings.
17///
18/// # Examples
19///
20/// ```
21/// use perl_position_tracking::PositionMapper;
22///
23/// let text = "my $x = 1;\nmy $y = 2;\n";
24/// let mapper = PositionMapper::new(text);
25///
26/// // Convert byte offset 0 → LSP position (line 0, char 0)
27/// let pos = mapper.byte_to_lsp_pos(0);
28/// assert_eq!(pos.line, 0);
29/// assert_eq!(pos.character, 0);
30///
31/// // Second line starts at byte 11
32/// let pos = mapper.byte_to_lsp_pos(11);
33/// assert_eq!(pos.line, 1);
34/// assert_eq!(pos.character, 0);
35/// ```
36pub struct PositionMapper {
37    /// The rope containing the document text
38    rope: Rope,
39    /// Cache of line ending style
40    line_ending: LineEnding,
41}
42
43/// Line ending style detected in a document
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum LineEnding {
46    /// Unix-style line endings (LF only)
47    Lf,
48    /// Windows-style line endings (CRLF)
49    CrLf,
50    /// Classic Mac line endings (CR only)
51    Cr,
52    /// Mixed line endings detected
53    Mixed,
54}
55
56impl PositionMapper {
57    /// Create a new position mapper from text.
58    ///
59    /// Detects line endings and builds an internal rope for efficient
60    /// position conversions.
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use perl_position_tracking::PositionMapper;
66    ///
67    /// let mapper = PositionMapper::new("print 'hello';\n");
68    /// let pos = mapper.byte_to_lsp_pos(6);
69    /// assert_eq!(pos.line, 0);
70    /// assert_eq!(pos.character, 6);
71    /// ```
72    pub fn new(text: &str) -> Self {
73        let rope = Rope::from_str(text);
74        let line_ending = detect_line_ending(text);
75        Self { rope, line_ending }
76    }
77
78    /// Update the text content
79    pub fn update(&mut self, text: &str) {
80        self.rope = Rope::from_str(text);
81        self.line_ending = detect_line_ending(text);
82    }
83
84    /// Apply an incremental edit
85    pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
86        // Clamp to valid range
87        let start_byte = start_byte.min(self.rope.len_bytes());
88        let end_byte = end_byte.min(self.rope.len_bytes());
89
90        // Convert byte offsets to char indices (rope uses chars!)
91        let start_char = self.rope.byte_to_char(start_byte);
92        let end_char = self.rope.byte_to_char(end_byte);
93
94        // Remove old text
95        if end_char > start_char {
96            self.rope.remove(start_char..end_char);
97        }
98
99        // Insert new text
100        if !new_text.is_empty() {
101            self.rope.insert(start_char, new_text);
102        }
103
104        // Update line ending detection
105        self.line_ending = detect_line_ending(&self.rope.to_string());
106    }
107
108    /// Convert LSP position to byte offset.
109    ///
110    /// Takes a line/character position (UTF-16 code units, as specified by the
111    /// LSP protocol) and returns the corresponding byte offset in the source.
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// use perl_position_tracking::{PositionMapper, WirePosition};
117    ///
118    /// let mapper = PositionMapper::new("my $x = 1;\nmy $y = 2;\n");
119    /// // Line 1, character 3 → "$y"
120    /// let byte = mapper.lsp_pos_to_byte(WirePosition { line: 1, character: 3 });
121    /// assert_eq!(byte, Some(14));
122    /// ```
123    pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
124        let line_idx = pos.line as usize;
125        if line_idx >= self.rope.len_lines() {
126            return None;
127        }
128
129        let line_start_byte = self.rope.line_to_byte(line_idx);
130        let line = self.rope.line(line_idx);
131
132        // Convert UTF-16 code units to byte offset
133        let mut utf16_offset = 0u32;
134        let mut byte_offset = 0;
135
136        for ch in line.chars() {
137            if utf16_offset >= pos.character {
138                break;
139            }
140
141            let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
142            let next_utf16 = utf16_offset + ch_utf16_len;
143
144            // Clamp positions inside a surrogate pair to the start of the
145            // code point, matching `utf16_line_col_to_offset`.
146            if next_utf16 > pos.character {
147                break;
148            }
149
150            utf16_offset = next_utf16;
151            byte_offset += ch.len_utf8();
152        }
153
154        Some(line_start_byte + byte_offset)
155    }
156
157    /// Convert byte offset to LSP position.
158    ///
159    /// Returns line/character (UTF-16 code units) suitable for LSP responses.
160    ///
161    /// # Examples
162    ///
163    /// ```
164    /// use perl_position_tracking::PositionMapper;
165    ///
166    /// let mapper = PositionMapper::new("sub foo {\n    return 1;\n}\n");
167    /// let pos = mapper.byte_to_lsp_pos(14);  // points into "return"
168    /// assert_eq!(pos.line, 1);
169    /// assert_eq!(pos.character, 4);
170    /// ```
171    pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
172        let byte_offset = byte_offset.min(self.rope.len_bytes());
173
174        let line_idx = self.rope.byte_to_line(byte_offset);
175        let line_start_byte = self.rope.line_to_byte(line_idx);
176        let byte_in_line = byte_offset - line_start_byte;
177
178        // Convert byte offset to UTF-16 code units
179        let line = self.rope.line(line_idx);
180        let mut utf16_offset = 0u32;
181        let mut current_byte = 0;
182
183        for ch in line.chars() {
184            if current_byte >= byte_in_line {
185                break;
186            }
187            let ch_len = ch.len_utf8();
188            if current_byte + ch_len > byte_in_line {
189                // We're in the middle of this character
190                break;
191            }
192            current_byte += ch_len;
193            let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
194            utf16_offset += ch_utf16_len;
195        }
196
197        Position { line: line_idx as u32, character: utf16_offset }
198    }
199
200    /// Get the text content
201    pub fn text(&self) -> String {
202        self.rope.to_string()
203    }
204
205    /// Get a slice of text
206    pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
207        let start = start_byte.min(self.rope.len_bytes());
208        let end = end_byte.min(self.rope.len_bytes());
209        self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
210    }
211
212    /// Get total byte length
213    pub fn len_bytes(&self) -> usize {
214        self.rope.len_bytes()
215    }
216
217    /// Get total number of lines
218    pub fn len_lines(&self) -> usize {
219        self.rope.len_lines()
220    }
221
222    /// Convert LSP position to char index (for rope operations)
223    pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
224        self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
225    }
226
227    /// Convert char index to LSP position
228    pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
229        let byte_offset = self.rope.char_to_byte(char_idx);
230        self.byte_to_lsp_pos(byte_offset)
231    }
232
233    /// Check if empty
234    pub fn is_empty(&self) -> bool {
235        self.rope.len_bytes() == 0
236    }
237
238    /// Get line ending style
239    pub fn line_ending(&self) -> LineEnding {
240        self.line_ending
241    }
242}
243
244/// Convert JSON LSP position to our Position type.
245///
246/// Extracts line and character fields from a JSON object.
247pub fn json_to_position(pos: &Value) -> Option<Position> {
248    Some(Position {
249        line: pos["line"].as_u64()? as u32,
250        character: pos["character"].as_u64()? as u32,
251    })
252}
253
254/// Convert Position to JSON for LSP.
255///
256/// Creates a JSON object with line and character fields.
257pub fn position_to_json(pos: Position) -> Value {
258    serde_json::json!({
259        "line": pos.line,
260        "character": pos.character
261    })
262}
263
264/// Detect the predominant line ending style
265fn detect_line_ending(text: &str) -> LineEnding {
266    let mut crlf_count = 0;
267    let mut lf_count = 0;
268    let mut cr_count = 0;
269
270    let bytes = text.as_bytes();
271    let mut i = 0;
272    while i < bytes.len() {
273        if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
274            crlf_count += 1;
275            i += 2;
276        } else if bytes[i] == b'\n' {
277            lf_count += 1;
278            i += 1;
279        } else if bytes[i] == b'\r' {
280            cr_count += 1;
281            i += 1;
282        } else {
283            i += 1;
284        }
285    }
286
287    // Determine predominant style
288    if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
289        LineEnding::CrLf
290    } else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
291        LineEnding::Lf
292    } else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
293        LineEnding::Cr
294    } else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
295        LineEnding::Mixed
296    } else {
297        LineEnding::Lf // Default
298    }
299}
300
301/// Apply UTF-8 edit to a string.
302///
303/// Replaces the byte range with the given replacement text.
304pub fn apply_edit_utf8(
305    text: &mut String,
306    start_byte: usize,
307    old_end_byte: usize,
308    replacement: &str,
309) {
310    if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
311        // Safety: ensure we're at char boundaries
312        return;
313    }
314    text.replace_range(start_byte..old_end_byte, replacement);
315}
316
317/// Count newlines in text.
318///
319/// Returns the number of LF characters in the string.
320pub fn newline_count(text: &str) -> usize {
321    text.chars().filter(|&c| c == '\n').count()
322}
323
324/// Get the column (in UTF-8 bytes) of the last line.
325///
326/// Returns the byte offset from the last newline to the end of the string.
327pub fn last_line_column_utf8(text: &str) -> u32 {
328    if let Some(last_newline) = text.rfind('\n') {
329        (text.len() - last_newline - 1) as u32
330    } else {
331        text.len() as u32
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338    use perl_tdd_support::must_some;
339
340    #[test]
341    fn test_lf_positions() {
342        let text = "line 1\nline 2\nline 3";
343        let mapper = PositionMapper::new(text);
344
345        // Start of document
346        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
347        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
348
349        // Middle of first line
350        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
351        assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
352
353        // Start of second line
354        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
355        assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
356    }
357
358    #[test]
359    fn test_crlf_positions() {
360        let text = "line 1\r\nline 2\r\nline 3";
361        let mapper = PositionMapper::new(text);
362
363        assert_eq!(mapper.line_ending(), LineEnding::CrLf);
364
365        // Start of second line (after \r\n)
366        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
367        assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
368
369        // Start of third line
370        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
371        assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
372    }
373
374    #[test]
375    fn test_utf16_positions() {
376        let text = "hello 😀 world"; // Emoji is 2 UTF-16 code units
377        let mapper = PositionMapper::new(text);
378
379        // Before emoji
380        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
381
382        // After emoji (6 + 2 UTF-16 units = 8)
383        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10)); // 6 + 4 bytes for emoji
384
385        // Convert back
386        assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
387    }
388
389    #[test]
390    fn test_utf16_positions_clamp_mid_surrogate_to_char_start() {
391        let text = "a😀b";
392        let mapper = PositionMapper::new(text);
393
394        // UTF-16 position 2 lands inside 😀 (which spans code units 1..3).
395        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
396    }
397
398    #[test]
399    fn test_utf16_surrogate_pair_boundaries() {
400        // 💖 (U+1F496) is a non-BMP char requiring a surrogate pair.
401        // Byte layout: 'x'=1 byte, '💖'=4 bytes (U+1F496), 'y'=1 byte.
402        // UTF-16 layout: 'x'=1 unit, '💖'=2 units (surrogate pair), 'y'=1 unit.
403        let text = "x💖y";
404        let mapper = PositionMapper::new(text);
405
406        // Before surrogate pair (column 0, 1)
407        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
408        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
409
410        // Mid-surrogate (column 2) — must clamp to start of emoji (byte 1),
411        // matching `utf16_line_col_to_offset` behavior.
412        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
413
414        // End of surrogate pair (column 3) — points just past emoji.
415        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
416
417        // After 'y' (column 4) — end of string.
418        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
419    }
420
421    #[test]
422    fn test_utf16_max_code_point() {
423        // U+10FFFF is the highest valid Unicode code point.
424        // Encoded as UTF-8 it's 4 bytes; in UTF-16 it's a surrogate pair (2 units).
425        let max_char = '\u{10FFFF}';
426        let text = format!("a{max_char}b");
427        let mapper = PositionMapper::new(&text);
428
429        // 'a' is col 0, U+10FFFF occupies cols 1..3, 'b' is col 3.
430        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
431        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
432        // Mid-surrogate clamp
433        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
434        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
435        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
436
437        // Round-trip the byte offsets back to positions (non-mid-surrogate).
438        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
439        assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 });
440        assert_eq!(mapper.byte_to_lsp_pos(5), Position { line: 0, character: 3 });
441        assert_eq!(mapper.byte_to_lsp_pos(6), Position { line: 0, character: 4 });
442    }
443
444    #[test]
445    fn test_utf16_mixed_bmp_and_supplementary_plane() {
446        // é (U+00E9, BMP, 2 bytes UTF-8, 1 UTF-16 unit)
447        // 💖 (U+1F496, supplementary, 4 bytes UTF-8, 2 UTF-16 units)
448        // ñ (U+00F1, BMP, 2 bytes UTF-8, 1 UTF-16 unit)
449        // 🎉 (U+1F389, supplementary, 4 bytes UTF-8, 2 UTF-16 units)
450        let text = "aé💖ñ🎉b";
451        let mapper = PositionMapper::new(text);
452
453        // Columns:
454        //   a  = 0
455        //   é  = 1
456        //   💖 = 2..4 (surrogate pair)
457        //   ñ  = 4
458        //   🎉 = 5..7 (surrogate pair)
459        //   b  = 7
460        //   end = 8
461        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0)); // a
462        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1)); // é
463        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(3)); // 💖 start
464        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3)); // mid-surrogate clamp
465        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(7)); // ñ
466        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(9)); // 🎉 start
467        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(9)); // mid-surrogate clamp
468        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 7 }), Some(13)); // b
469        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(14)); // end
470    }
471
472    #[test]
473    fn test_utf16_zero_length_input() {
474        let text = "";
475        let mapper = PositionMapper::new(text);
476
477        // An empty rope has one logical line (line 0) of length 0.
478        // Position (0, 0) should map to byte 0.
479        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
480        // Any character beyond 0 should clamp to byte 0 (end of empty line).
481        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(0));
482
483        // Line past end of document returns None.
484        assert!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }).is_none());
485
486        // Reverse direction: byte 0 should map to (0, 0).
487        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
488    }
489
490    #[test]
491    fn test_utf16_consecutive_surrogate_pairs() {
492        // Back-to-back supplementary-plane chars to ensure mid-surrogate
493        // clamping doesn't advance past the current char.
494        let text = "💖💖";
495        let mapper = PositionMapper::new(text);
496
497        // First 💖 is cols 0..2, second 💖 is cols 2..4.
498        // Bytes: first = 0..4, second = 4..8.
499        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
500        // Mid first surrogate pair — clamp to start of first emoji (byte 0).
501        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(0));
502        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(4));
503        // Mid second surrogate pair — clamp to start of second emoji (byte 4).
504        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(4));
505        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(8));
506    }
507
508    #[test]
509    fn test_utf16_clamp_matches_convert_helper() {
510        // Parity: PositionMapper::lsp_pos_to_byte should agree with the
511        // convert::utf16_line_col_to_offset helper at every column, including
512        // mid-surrogate positions. These are the two canonical UTF-16 -> byte
513        // converters and they must never disagree.
514        use crate::convert::utf16_line_col_to_offset;
515
516        let text = "a😀b💖c\nx💡y";
517        let mapper = PositionMapper::new(text);
518
519        // Line 0: "a😀b💖c"
520        //   a=0, 😀=1..3, b=3, 💖=4..6, c=6, end=7
521        for col in 0..=7 {
522            let mapper_byte =
523                mapper.lsp_pos_to_byte(Position { line: 0, character: col }).unwrap_or(usize::MAX);
524            let helper_byte = utf16_line_col_to_offset(text, 0, col);
525            assert_eq!(
526                mapper_byte, helper_byte,
527                "disagreement at line 0 col {col}: mapper={mapper_byte} helper={helper_byte}"
528            );
529        }
530    }
531
532    #[test]
533    fn test_mixed_line_endings() {
534        let text = "line 1\r\nline 2\nline 3\rline 4";
535        let mapper = PositionMapper::new(text);
536
537        assert_eq!(mapper.line_ending(), LineEnding::Mixed);
538
539        // Each line start
540        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
541        assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
542        assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
543        assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
544    }
545
546    #[test]
547    fn test_incremental_edit() {
548        let mut mapper = PositionMapper::new("hello world");
549
550        // Replace "world" with "Rust"
551        mapper.apply_edit(6, 11, "Rust");
552        assert_eq!(mapper.text(), "hello Rust");
553
554        // Insert in middle
555        mapper.apply_edit(5, 5, " beautiful");
556        assert_eq!(mapper.text(), "hello beautiful Rust");
557
558        // Delete "beautiful " (keep one space)
559        mapper.apply_edit(5, 16, " ");
560        assert_eq!(mapper.text(), "hello Rust");
561    }
562
563    // --- Additional targeted tests for lsp_pos_to_byte, lsp_pos_to_char,
564    //     char_to_lsp_pos with multi-byte UTF-8, CRLF, and out-of-bounds ---
565
566    /// Round-trip: byte → lsp_pos → byte for a string containing a 🦀 (crab emoji,
567    /// U+1F980, 4 bytes UTF-8, 2 UTF-16 code units) and accented é (U+00E9,
568    /// 2 bytes UTF-8, 1 UTF-16 code unit).
569    #[test]
570    fn test_multibyte_utf8_round_trip_byte_to_lsp_pos_to_byte() {
571        // Layout (bytes):  'a'=1, 'é'=2, '🦀'=4, 'b'=1  → total 8 bytes
572        // UTF-16 columns:   a=0,   é=1,   🦀=2..4, b=4, end=5
573        let text = "aé🦀b";
574        let mapper = PositionMapper::new(text);
575
576        // Verify byte → LSP pos for the start of each character
577        assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 }); // 'a'
578        assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 }); // 'é'
579        assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 2 }); // '🦀'
580        assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 0, character: 4 }); // 'b'
581
582        // Round-trip: lsp_pos → byte → lsp_pos must be identity for
583        // positions at the start of each character.
584        for (byte_offset, col) in [(0u32, 0u32), (1, 1), (3, 2), (7, 4)] {
585            let pos = Position { line: 0, character: col };
586            let got_byte = must_some(mapper.lsp_pos_to_byte(pos));
587            assert_eq!(
588                got_byte, byte_offset as usize,
589                "lsp_pos_to_byte for col {col} should be byte {byte_offset}"
590            );
591            // And back
592            assert_eq!(
593                mapper.byte_to_lsp_pos(got_byte),
594                pos,
595                "byte_to_lsp_pos should round-trip for col {col}"
596            );
597        }
598    }
599
600    /// lsp_pos_to_char and char_to_lsp_pos round-trip on a text containing é.
601    #[test]
602    fn test_lsp_pos_to_char_and_char_to_lsp_pos_round_trip() {
603        // 'é' is 2 UTF-8 bytes but 1 char index in the rope and 1 UTF-16 unit.
604        let text = "aéb";
605        let mapper = PositionMapper::new(text);
606
607        // lsp_pos_to_char: line 0, col 1 (UTF-16) → é starts at char index 1
608        let char_idx = must_some(mapper.lsp_pos_to_char(Position { line: 0, character: 1 }));
609        assert_eq!(char_idx, 1, "char index of 'é' is 1");
610
611        // char_to_lsp_pos: char index 1 → line 0, col 1 (UTF-16)
612        let pos = mapper.char_to_lsp_pos(char_idx);
613        assert_eq!(pos, Position { line: 0, character: 1 });
614
615        // Another round: 'b' is char index 2
616        let char_b = must_some(mapper.lsp_pos_to_char(Position { line: 0, character: 2 }));
617        assert_eq!(char_b, 2);
618        assert_eq!(mapper.char_to_lsp_pos(char_b), Position { line: 0, character: 2 });
619    }
620
621    /// CRLF multi-line text: verify lsp_pos_to_byte returns the correct byte offset
622    /// for positions on each line, including the byte that follows the \r\n pair.
623    #[test]
624    fn test_crlf_lsp_pos_to_byte_per_line() {
625        // "abc\r\ndef\r\nghi"
626        // bytes:  a=0,b=1,c=2,\r=3,\n=4,d=5,e=6,f=7,\r=8,\n=9,g=10,h=11,i=12
627        let text = "abc\r\ndef\r\nghi";
628        let mapper = PositionMapper::new(text);
629        assert_eq!(mapper.line_ending(), LineEnding::CrLf);
630
631        // Line 0 start
632        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
633        // Line 0 char 2 → byte 2 ('c')
634        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(2));
635        // Line 1 start → byte 5 ('d')
636        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(5));
637        // Line 1 char 2 → byte 7 ('f')
638        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 2 }), Some(7));
639        // Line 2 start → byte 10 ('g')
640        assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(10));
641    }
642
643    /// Out-of-bounds line → lsp_pos_to_byte and lsp_pos_to_char return None.
644    #[test]
645    fn test_out_of_bounds_line_returns_none() {
646        let text = "one\ntwo\n";
647        let mapper = PositionMapper::new(text);
648
649        // The rope sees "one\ntwo\n" as 3 lines (line 0, 1, and an empty line 2).
650        // Line 3 does not exist.
651        assert!(
652            mapper.lsp_pos_to_byte(Position { line: 3, character: 0 }).is_none(),
653            "line past end of document should return None"
654        );
655        assert!(
656            mapper.lsp_pos_to_char(Position { line: 3, character: 0 }).is_none(),
657            "line past end of document should return None for lsp_pos_to_char"
658        );
659    }
660
661    /// Out-of-bounds column on a line — the implementation clamps to the end of
662    /// the line rather than returning None.
663    #[test]
664    fn test_out_of_bounds_column_clamps_to_line_end() {
665        let text = "hello\nworld\n";
666        let mapper = PositionMapper::new(text);
667
668        // "hello\n" has 5 visible chars (cols 0..5) + newline.
669        // Asking for col 9999 should clamp to the newline / end of line content.
670        let clamped = must_some(mapper.lsp_pos_to_byte(Position { line: 0, character: 9999 }));
671
672        // The byte returned must be within the span of line 0 (bytes 0..6 inclusive,
673        // where byte 5 is '\n').  We accept anything in [0, 6].
674        assert!(clamped <= 6, "clamped byte {clamped} should not exceed end of line 0 (byte 6)");
675    }
676}
perl_position_tracking/mapper.rs

perl_position_tracking/
mapper.rs