1use std::hash::{Hash, Hasher};
2use std::iter::zip;
3use std::ops::Range;
4use std::sync::Arc;
5
6use crate::is_newline;
7
8#[derive(Clone)]
12pub struct Lines<S>(Arc<Repr<S>>);
13
14#[derive(Clone)]
15struct Repr<T> {
16 lines: Vec<Line>,
17 text: T,
18}
19
20#[derive(Debug, Copy, Clone, Eq, PartialEq)]
22pub struct Line {
23 byte_idx: usize,
25 utf16_idx: usize,
27}
28
29impl<T: AsRef<str>> Lines<T> {
30 pub fn new(text: T) -> Self {
32 let lines = lines(text.as_ref());
33 Lines(Arc::new(Repr { lines, text }))
34 }
35
36 pub fn text(&self) -> &str {
38 self.0.text.as_ref()
39 }
40
41 pub fn len_bytes(&self) -> usize {
43 self.0.text.as_ref().len()
44 }
45
46 pub fn len_utf16(&self) -> usize {
48 let last = self.0.lines.last().unwrap();
49 last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
50 }
51
52 pub fn len_lines(&self) -> usize {
54 self.0.lines.len()
55 }
56
57 pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
59 let line_idx = self.byte_to_line(byte_idx)?;
60 let line = self.0.lines.get(line_idx)?;
61 let head = self.text().get(line.byte_idx..byte_idx)?;
62 Some(line.utf16_idx + len_utf16(head))
63 }
64
65 pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
67 (byte_idx <= self.text().len()).then(|| {
68 match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
69 Ok(i) => i,
70 Err(i) => i - 1,
71 }
72 })
73 }
74
75 pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
80 let line = self.byte_to_line(byte_idx)?;
81 let start = self.line_to_byte(line)?;
82 let head = self.text().get(start..byte_idx)?;
83 Some(head.chars().count())
84 }
85
86 pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
88 let line = self.byte_to_line(byte_idx)?;
89 let start = self.line_to_byte(line)?;
90 let head = self.text().get(start..byte_idx)?;
91 let col = head.chars().count();
92 Some((line, col))
93 }
94
95 pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
97 let line = self.0.lines.get(
98 match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
99 Ok(i) => i,
100 Err(i) => i - 1,
101 },
102 )?;
103
104 let text = self.text();
105 let mut k = line.utf16_idx;
106 for (i, c) in text[line.byte_idx..].char_indices() {
107 if k >= utf16_idx {
108 return Some(line.byte_idx + i);
109 }
110 k += c.len_utf16();
111 }
112
113 (k == utf16_idx).then_some(text.len())
114 }
115
116 pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
118 self.0.lines.get(line_idx).map(|line| line.byte_idx)
119 }
120
121 pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
123 let start = self.line_to_byte(line_idx)?;
124 let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
125 Some(start..end)
126 }
127
128 pub fn line_column_to_byte(
133 &self,
134 line_idx: usize,
135 column_idx: usize,
136 ) -> Option<usize> {
137 let range = self.line_to_range(line_idx)?;
138 let line = self.text().get(range.clone())?;
139 let mut chars = line.chars();
140 for _ in 0..column_idx {
141 chars.next();
142 }
143 Some(range.start + (line.len() - chars.as_str().len()))
144 }
145}
146
147impl Lines<String> {
148 pub fn replace(&mut self, new: &str) -> bool {
156 let Some((prefix, suffix)) = self.replacement_range(new) else {
157 return false;
158 };
159
160 let old = self.text();
161 let replace = prefix..old.len() - suffix;
162 let with = &new[prefix..new.len() - suffix];
163 self.edit(replace, with);
164
165 true
166 }
167
168 pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
171 let old = self.text();
172
173 let mut prefix =
174 zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
175
176 if prefix == old.len() && prefix == new.len() {
177 return None;
178 }
179
180 while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
181 prefix -= 1;
182 }
183
184 let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
185 .take_while(|(x, y)| x == y)
186 .count();
187
188 while !old.is_char_boundary(old.len() - suffix)
189 || !new.is_char_boundary(new.len() - suffix)
190 {
191 suffix += 1;
192 }
193
194 Some((prefix, suffix))
195 }
196
197 #[track_caller]
203 pub fn edit(&mut self, replace: Range<usize>, with: &str) {
204 let start_byte = replace.start;
205 let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
206 let line = self.byte_to_line(start_byte).unwrap();
207
208 let inner = Arc::make_mut(&mut self.0);
209
210 inner.text.replace_range(replace.clone(), with);
212
213 inner.lines.truncate(line + 1);
215
216 if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
218 inner.lines.pop();
219 }
220
221 inner.lines.extend(lines_from(
223 start_byte,
224 start_utf16,
225 &inner.text[start_byte..],
226 ));
227 }
228}
229
230impl<S: Hash> Hash for Lines<S> {
231 fn hash<H: Hasher>(&self, state: &mut H) {
232 self.0.text.hash(state);
233 }
234}
235
236impl<S: AsRef<str>> AsRef<str> for Lines<S> {
237 fn as_ref(&self) -> &str {
238 self.0.text.as_ref()
239 }
240}
241
242fn lines(text: &str) -> Vec<Line> {
244 std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
245 .chain(lines_from(0, 0, text))
246 .collect()
247}
248
249fn lines_from(
251 byte_offset: usize,
252 utf16_offset: usize,
253 text: &str,
254) -> impl Iterator<Item = Line> + '_ {
255 let mut s = unscanny::Scanner::new(text);
256 let mut utf16_idx = utf16_offset;
257
258 std::iter::from_fn(move || {
259 s.eat_until(|c: char| {
260 utf16_idx += c.len_utf16();
261 is_newline(c)
262 });
263
264 if s.done() {
265 return None;
266 }
267
268 if s.eat() == Some('\r') && s.eat_if('\n') {
269 utf16_idx += 1;
270 }
271
272 Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
273 })
274}
275
276fn len_utf16(string: &str) -> usize {
279 string.chars().map(char::len_utf16).sum()
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
287
288 #[test]
289 fn test_source_file_new() {
290 let lines = Lines::new(TEST);
291 assert_eq!(
292 lines.0.lines,
293 [
294 Line { byte_idx: 0, utf16_idx: 0 },
295 Line { byte_idx: 7, utf16_idx: 6 },
296 Line { byte_idx: 15, utf16_idx: 12 },
297 Line { byte_idx: 18, utf16_idx: 15 },
298 ]
299 );
300 }
301
302 #[test]
303 fn test_source_file_pos_to_line() {
304 let lines = Lines::new(TEST);
305 assert_eq!(lines.byte_to_line(0), Some(0));
306 assert_eq!(lines.byte_to_line(2), Some(0));
307 assert_eq!(lines.byte_to_line(6), Some(0));
308 assert_eq!(lines.byte_to_line(7), Some(1));
309 assert_eq!(lines.byte_to_line(8), Some(1));
310 assert_eq!(lines.byte_to_line(12), Some(1));
311 assert_eq!(lines.byte_to_line(21), Some(3));
312 assert_eq!(lines.byte_to_line(22), None);
313 }
314
315 #[test]
316 fn test_source_file_pos_to_column() {
317 let lines = Lines::new(TEST);
318 assert_eq!(lines.byte_to_column(0), Some(0));
319 assert_eq!(lines.byte_to_column(2), Some(1));
320 assert_eq!(lines.byte_to_column(6), Some(5));
321 assert_eq!(lines.byte_to_column(7), Some(0));
322 assert_eq!(lines.byte_to_column(8), Some(1));
323 assert_eq!(lines.byte_to_column(12), Some(2));
324 }
325
326 #[test]
327 fn test_source_file_utf16() {
328 #[track_caller]
329 fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
330 let middle = lines.byte_to_utf16(byte_idx).unwrap();
331 let result = lines.utf16_to_byte(middle).unwrap();
332 assert_eq!(middle, utf16_idx);
333 assert_eq!(result, byte_idx);
334 }
335
336 let lines = Lines::new(TEST);
337 roundtrip(&lines, 0, 0);
338 roundtrip(&lines, 2, 1);
339 roundtrip(&lines, 3, 2);
340 roundtrip(&lines, 8, 7);
341 roundtrip(&lines, 12, 9);
342 roundtrip(&lines, 21, 18);
343 assert_eq!(lines.byte_to_utf16(22), None);
344 assert_eq!(lines.utf16_to_byte(19), None);
345 }
346
347 #[test]
348 fn test_source_file_roundtrip() {
349 #[track_caller]
350 fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
351 let line = lines.byte_to_line(byte_idx).unwrap();
352 let column = lines.byte_to_column(byte_idx).unwrap();
353 let result = lines.line_column_to_byte(line, column).unwrap();
354 assert_eq!(result, byte_idx);
355 }
356
357 let lines = Lines::new(TEST);
358 roundtrip(&lines, 0);
359 roundtrip(&lines, 7);
360 roundtrip(&lines, 12);
361 roundtrip(&lines, 21);
362 }
363
364 #[test]
365 fn test_source_file_edit() {
366 #[track_caller]
369 fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
370 let reference = Lines::new(after);
371
372 let mut edited = Lines::new(prev.to_string());
373 edited.edit(range.clone(), with);
374 assert_eq!(edited.text(), reference.text());
375 assert_eq!(edited.0.lines, reference.0.lines);
376
377 let mut replaced = Lines::new(prev.to_string());
378 replaced.replace(&{
379 let mut s = prev.to_string();
380 s.replace_range(range, with);
381 s
382 });
383 assert_eq!(replaced.text(), reference.text());
384 assert_eq!(replaced.0.lines, reference.0.lines);
385 }
386
387 test("abc\n", 0..0, "hi\n", "hi\nabc\n");
389 test("\nabc", 0..0, "hi\r", "hi\r\nabc");
390
391 test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
393
394 test("abc\ndef", 7..7, "hi", "abc\ndefhi");
396 test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
397
398 test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
400
401 test(TEST, 0..21, "", "");
403 }
404}