texlab/
line_index.rs

1// The following code has been copied from rust-analyzer.
2
3//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
4//! representation.
5use std::iter;
6
7use rowan::{TextRange, TextSize};
8use rustc_hash::FxHashMap;
9
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub struct LineIndex {
12    /// Offset the the beginning of each line, zero-based
13    pub(crate) newlines: Vec<TextSize>,
14    /// List of non-ASCII characters on each line
15    pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
16}
17
18#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
19pub struct LineColUtf16 {
20    /// Zero-based
21    pub line: u32,
22    /// Zero-based
23    pub col: u32,
24}
25
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
27pub struct LineCol {
28    /// Zero-based
29    pub line: u32,
30    /// Zero-based utf8 offset
31    pub col: u32,
32}
33
34#[derive(Clone, Debug, Hash, PartialEq, Eq)]
35pub(crate) struct Utf16Char {
36    /// Start offset of a character inside a line, zero-based
37    pub(crate) start: TextSize,
38    /// End offset of a character inside a line, zero-based
39    pub(crate) end: TextSize,
40}
41
42impl Utf16Char {
43    /// Returns the length in 8-bit UTF-8 code units.
44    fn len(&self) -> TextSize {
45        self.end - self.start
46    }
47
48    /// Returns the length in 16-bit UTF-16 code units.
49    fn len_utf16(&self) -> usize {
50        if self.len() == TextSize::from(4) {
51            2
52        } else {
53            1
54        }
55    }
56}
57
58impl LineIndex {
59    pub fn new(text: &str) -> LineIndex {
60        let mut utf16_lines = FxHashMap::default();
61        let mut utf16_chars = Vec::new();
62
63        let mut newlines = vec![0.into()];
64        let mut curr_row = 0.into();
65        let mut curr_col = 0.into();
66        let mut line = 0;
67        for c in text.chars() {
68            let c_len = TextSize::of(c);
69            curr_row += c_len;
70            if c == '\n' {
71                newlines.push(curr_row);
72
73                // Save any utf-16 characters seen in the previous line
74                if !utf16_chars.is_empty() {
75                    utf16_lines.insert(line, utf16_chars);
76                    utf16_chars = Vec::new();
77                }
78
79                // Prepare for processing the next line
80                curr_col = 0.into();
81                line += 1;
82                continue;
83            }
84
85            if !c.is_ascii() {
86                utf16_chars.push(Utf16Char {
87                    start: curr_col,
88                    end: curr_col + c_len,
89                });
90            }
91
92            curr_col += c_len;
93        }
94
95        // Save any utf-16 characters seen in the last line
96        if !utf16_chars.is_empty() {
97            utf16_lines.insert(line, utf16_chars);
98        }
99
100        LineIndex {
101            newlines,
102            utf16_lines,
103        }
104    }
105
106    pub fn line_col(&self, offset: TextSize) -> LineCol {
107        let line = partition_point(&self.newlines, |&it| it <= offset) - 1;
108        let line_start_offset = self.newlines[line];
109        let col = offset - line_start_offset;
110        LineCol {
111            line: line as u32,
112            col: col.into(),
113        }
114    }
115
116    pub fn offset(&self, line_col: LineCol) -> TextSize {
117        self.newlines[line_col.line as usize] + TextSize::from(line_col.col)
118    }
119
120    pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
121        let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
122        LineColUtf16 {
123            line: line_col.line,
124            col: col as u32,
125        }
126    }
127
128    pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
129        let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
130        LineCol {
131            line: line_col.line,
132            col: col.into(),
133        }
134    }
135
136    pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
137        let lo = partition_point(&self.newlines, |&it| it < range.start());
138        let hi = partition_point(&self.newlines, |&it| it <= range.end());
139        let all = iter::once(range.start())
140            .chain(self.newlines[lo..hi].iter().copied())
141            .chain(iter::once(range.end()));
142
143        all.clone()
144            .zip(all.skip(1))
145            .map(|(lo, hi)| TextRange::new(lo, hi))
146            .filter(|it| !it.is_empty())
147    }
148
149    fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
150        let mut res: usize = col.into();
151        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
152            for c in utf16_chars {
153                if c.end <= col {
154                    res -= usize::from(c.len()) - c.len_utf16();
155                } else {
156                    // From here on, all utf16 characters come *after* the character we are mapping,
157                    // so we don't need to take them into account
158                    break;
159                }
160            }
161        }
162        res
163    }
164
165    fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
166        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
167            for c in utf16_chars {
168                if col > u32::from(c.start) {
169                    col += u32::from(c.len()) - c.len_utf16() as u32;
170                } else {
171                    // From here on, all utf16 characters come *after* the character we are mapping,
172                    // so we don't need to take them into account
173                    break;
174                }
175            }
176        }
177
178        col.into()
179    }
180}
181
182/// Returns `idx` such that:
183///
184/// ```text
185///     ∀ x in slice[..idx]:  pred(x)
186///  && ∀ x in slice[idx..]: !pred(x)
187/// ```
188///
189/// https://github.com/rust-lang/rust/issues/73831
190fn partition_point<T, P>(slice: &[T], mut pred: P) -> usize
191where
192    P: FnMut(&T) -> bool,
193{
194    let mut left = 0;
195    let mut right = slice.len();
196
197    while left != right {
198        let mid = left + (right - left) / 2;
199        // SAFETY:
200        // When left < right, left <= mid < right.
201        // Therefore left always increases and right always decreases,
202        // and either of them is selected.
203        // In both cases left <= right is satisfied.
204        // Therefore if left < right in a step,
205        // left <= right is satisfied in the next step.
206        // Therefore as long as left != right, 0 <= left < right <= len is satisfied
207        // and if this case 0 <= mid < len is satisfied too.
208        let value = unsafe { slice.get_unchecked(mid) };
209        if pred(value) {
210            left = mid + 1;
211        } else {
212            right = mid;
213        }
214    }
215
216    left
217}