Skip to main content

gram_data/
utf16.rs

1//! Map UTF-8 byte offsets to LSP positions (UTF-16 code units).
2
3use tree_sitter::Point;
4
5/// UTF-16 column of `line_prefix` (full line content before cursor on that line).
6fn utf16_col_in_line(line_prefix: &str) -> u32 {
7    let mut utf16 = 0u32;
8    for ch in line_prefix.chars() {
9        utf16 += ch.len_utf16() as u32;
10    }
11    utf16
12}
13
14/// Convert a UTF-8 byte index in `source` to LSP `(line, character)` with UTF-16 `character`.
15pub fn byte_offset_to_position(source: &str, byte_idx: usize) -> (u32, u32) {
16    let byte_idx = byte_idx.min(source.len());
17    let mut line = 0u32;
18    let mut line_start = 0usize;
19    for (i, ch) in source.char_indices() {
20        if i == byte_idx {
21            let col = utf16_col_in_line(&source[line_start..i]);
22            return (line, col);
23        }
24        if ch == '\n' {
25            line += 1;
26            line_start = i + ch.len_utf8();
27        }
28    }
29    (line, utf16_col_in_line(&source[line_start..]))
30}
31
32/// Byte range (start inclusive, end exclusive) to LSP `(line, char)` pairs.
33pub fn byte_range_to_lsp_range(source: &str, start_byte: usize, end_byte: usize) -> ((u32, u32), (u32, u32)) {
34    let start = byte_offset_to_position(source, start_byte);
35    let end = byte_offset_to_position(source, end_byte.max(start_byte));
36    (start, end)
37}
38
39/// Map tree-sitter `Point` to byte offset then to LSP range.
40pub fn point_range_to_lsp_range(source: &str, start: Point, end: Point) -> ((u32, u32), (u32, u32)) {
41    let start_byte = point_to_byte_offset(source, start);
42    let end_byte = point_to_byte_offset(source, end).max(start_byte);
43    byte_range_to_lsp_range(source, start_byte, end_byte)
44}
45
46fn point_to_byte_offset(source: &str, point: Point) -> usize {
47    let mut row = 0usize;
48    let mut line_start = 0usize;
49    for (i, ch) in source.char_indices() {
50        if row == point.row {
51            let rest = &source[line_start..];
52            let line_len = rest.find('\n').unwrap_or(rest.len());
53            let col_b = point.column.min(line_len);
54            return line_start + col_b;
55        }
56        if ch == '\n' {
57            row += 1;
58            line_start = i + ch.len_utf8();
59        }
60    }
61    source.len()
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67
68    #[test]
69    fn ascii_lines() {
70        let s = "ab\ncd";
71        assert_eq!(byte_offset_to_position(s, 0), (0, 0));
72        assert_eq!(byte_offset_to_position(s, 1), (0, 1));
73        assert_eq!(byte_offset_to_position(s, 2), (0, 2)); // start of `\n`
74        assert_eq!(byte_offset_to_position(s, 3), (1, 0)); // `c`
75    }
76
77    #[test]
78    fn point_column_clamped_to_current_line_bytes() {
79        let s = "hi\nyo";
80        // Row 0, column past end of "hi" (2 bytes) must not extend into "yo" or `\n`.
81        let p = Point { row: 0, column: 99 };
82        assert_eq!(point_to_byte_offset(s, p), 2);
83    }
84}