1use std::hash::{Hash, Hasher};
2use std::iter::zip;
3use std::ops::Range;
4use std::sync::Arc;
5
6use crate::is_newline;
7
8#[derive(Clone)]
12pub struct Lines<S>(Arc<LinesInner<S>>);
13
14#[derive(Clone)]
16struct LinesInner<T> {
17 lines: Vec<Line>,
18 text: T,
19}
20
21#[derive(Debug, Copy, Clone, Eq, PartialEq)]
23pub struct Line {
24 byte_idx: usize,
26 utf16_idx: usize,
28}
29
30impl<T: AsRef<str>> Lines<T> {
31 pub fn new(text: T) -> Self {
33 let lines = lines(text.as_ref());
34 Lines(Arc::new(LinesInner { lines, text }))
35 }
36
37 pub fn text(&self) -> &str {
39 self.0.text.as_ref()
40 }
41
42 pub fn len_bytes(&self) -> usize {
44 self.0.text.as_ref().len()
45 }
46
47 pub fn len_utf16(&self) -> usize {
49 let last = self.0.lines.last().unwrap();
50 last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
51 }
52
53 pub fn len_lines(&self) -> usize {
55 self.0.lines.len()
56 }
57
58 pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
60 let line_idx = self.byte_to_line(byte_idx)?;
61 let line = self.0.lines.get(line_idx)?;
62 let head = self.text().get(line.byte_idx..byte_idx)?;
63 Some(line.utf16_idx + len_utf16(head))
64 }
65
66 pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
68 (byte_idx <= self.text().len()).then(|| {
69 match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
70 Ok(i) => i,
71 Err(i) => i - 1,
72 }
73 })
74 }
75
76 pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
81 let line = self.byte_to_line(byte_idx)?;
82 let start = self.line_to_byte(line)?;
83 let head = self.text().get(start..byte_idx)?;
84 Some(head.chars().count())
85 }
86
87 pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
89 let line = self.byte_to_line(byte_idx)?;
90 let start = self.line_to_byte(line)?;
91 let head = self.text().get(start..byte_idx)?;
92 let col = head.chars().count();
93 Some((line, col))
94 }
95
96 pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
98 let line = self.0.lines.get(
99 match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
100 Ok(i) => i,
101 Err(i) => i - 1,
102 },
103 )?;
104
105 let text = self.text();
106 let mut k = line.utf16_idx;
107 for (i, c) in text[line.byte_idx..].char_indices() {
108 if k >= utf16_idx {
109 return Some(line.byte_idx + i);
110 }
111 k += c.len_utf16();
112 }
113
114 (k == utf16_idx).then_some(text.len())
115 }
116
117 pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
119 self.0.lines.get(line_idx).map(|line| line.byte_idx)
120 }
121
122 pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
124 let start = self.line_to_byte(line_idx)?;
125 let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
126 Some(start..end)
127 }
128
129 pub fn line_column_to_byte(
135 &self,
136 line_idx: usize,
137 column_idx: usize,
138 ) -> Option<usize> {
139 let range = self.line_to_range(line_idx)?;
140 let line = &self.text()[range.clone()];
141 let mut chars = line.chars();
142 for _ in 0..column_idx {
143 chars.next()?;
144 }
145 Some(range.start + (line.len() - chars.as_str().len()))
146 }
147}
148
149impl Lines<String> {
150 pub fn replace(&mut self, new: &str) -> bool {
158 let Some((prefix, suffix)) = self.replacement_range(new) else {
159 return false;
160 };
161
162 let old = self.text();
163 let replace = prefix..old.len() - suffix;
164 let with = &new[prefix..new.len() - suffix];
165 self.edit(replace, with);
166
167 true
168 }
169
170 pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
173 let old = self.text();
174
175 let mut prefix =
176 zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
177
178 if prefix == old.len() && prefix == new.len() {
179 return None;
180 }
181
182 while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
183 prefix -= 1;
184 }
185
186 let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
187 .take_while(|(x, y)| x == y)
188 .count();
189
190 while !old.is_char_boundary(old.len() - suffix)
191 || !new.is_char_boundary(new.len() - suffix)
192 {
193 suffix += 1;
194 }
195
196 Some((prefix, suffix))
197 }
198
199 #[track_caller]
205 pub fn edit(&mut self, replace: Range<usize>, with: &str) {
206 let start_byte = replace.start;
207 let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
208 let line = self.byte_to_line(start_byte).unwrap();
209
210 let inner = Arc::make_mut(&mut self.0);
211
212 inner.text.replace_range(replace.clone(), with);
214
215 inner.lines.truncate(line + 1);
217
218 if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
220 inner.lines.pop();
221 }
222
223 inner.lines.extend(lines_from(
225 start_byte,
226 start_utf16,
227 &inner.text[start_byte..],
228 ));
229 }
230}
231
232impl<S: Hash> Hash for Lines<S> {
233 fn hash<H: Hasher>(&self, state: &mut H) {
234 self.0.text.hash(state);
235 }
236}
237
238impl<S: AsRef<str>> AsRef<str> for Lines<S> {
239 fn as_ref(&self) -> &str {
240 self.0.text.as_ref()
241 }
242}
243
244fn lines(text: &str) -> Vec<Line> {
246 std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
247 .chain(lines_from(0, 0, text))
248 .collect()
249}
250
251fn lines_from(
253 byte_offset: usize,
254 utf16_offset: usize,
255 text: &str,
256) -> impl Iterator<Item = Line> + '_ {
257 let mut s = unscanny::Scanner::new(text);
258 let mut utf16_idx = utf16_offset;
259
260 std::iter::from_fn(move || {
261 s.eat_until(|c: char| {
262 utf16_idx += c.len_utf16();
263 is_newline(c)
264 });
265
266 if s.done() {
267 return None;
268 }
269
270 if s.eat() == Some('\r') && s.eat_if('\n') {
271 utf16_idx += 1;
272 }
273
274 Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
275 })
276}
277
278fn len_utf16(string: &str) -> usize {
281 string.chars().map(char::len_utf16).sum()
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
289
290 #[test]
291 fn test_source_file_new() {
292 let lines = Lines::new(TEST);
293 assert_eq!(
294 lines.0.lines,
295 [
296 Line { byte_idx: 0, utf16_idx: 0 },
297 Line { byte_idx: 7, utf16_idx: 6 },
298 Line { byte_idx: 15, utf16_idx: 12 },
299 Line { byte_idx: 18, utf16_idx: 15 },
300 ]
301 );
302 }
303
304 #[test]
305 fn test_source_file_pos_to_line() {
306 let lines = Lines::new(TEST);
307 assert_eq!(lines.byte_to_line(0), Some(0));
308 assert_eq!(lines.byte_to_line(2), Some(0));
309 assert_eq!(lines.byte_to_line(6), Some(0));
310 assert_eq!(lines.byte_to_line(7), Some(1));
311 assert_eq!(lines.byte_to_line(8), Some(1));
312 assert_eq!(lines.byte_to_line(12), Some(1));
313 assert_eq!(lines.byte_to_line(21), Some(3));
314 assert_eq!(lines.byte_to_line(22), None);
315 }
316
317 #[test]
318 fn test_source_file_pos_to_column() {
319 let lines = Lines::new(TEST);
320 assert_eq!(lines.byte_to_column(0), Some(0));
321 assert_eq!(lines.byte_to_column(2), Some(1));
322 assert_eq!(lines.byte_to_column(6), Some(5));
323 assert_eq!(lines.byte_to_column(7), Some(0));
324 assert_eq!(lines.byte_to_column(8), Some(1));
325 assert_eq!(lines.byte_to_column(12), Some(2));
326 }
327
328 #[test]
329 fn test_source_file_utf16() {
330 #[track_caller]
331 fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
332 let middle = lines.byte_to_utf16(byte_idx).unwrap();
333 let result = lines.utf16_to_byte(middle).unwrap();
334 assert_eq!(middle, utf16_idx);
335 assert_eq!(result, byte_idx);
336 }
337
338 let lines = Lines::new(TEST);
339 roundtrip(&lines, 0, 0);
340 roundtrip(&lines, 2, 1);
341 roundtrip(&lines, 3, 2);
342 roundtrip(&lines, 8, 7);
343 roundtrip(&lines, 12, 9);
344 roundtrip(&lines, 21, 18);
345 assert_eq!(lines.byte_to_utf16(22), None);
346 assert_eq!(lines.utf16_to_byte(19), None);
347 }
348
349 #[test]
350 fn test_source_file_roundtrip() {
351 #[track_caller]
352 fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
353 let line = lines.byte_to_line(byte_idx).unwrap();
354 let column = lines.byte_to_column(byte_idx).unwrap();
355 let result = lines.line_column_to_byte(line, column).unwrap();
356 assert_eq!(result, byte_idx);
357 }
358
359 let lines = Lines::new(TEST);
360 roundtrip(&lines, 0);
361 roundtrip(&lines, 7);
362 roundtrip(&lines, 12);
363 roundtrip(&lines, 21);
364 }
365
366 #[test]
367 fn test_source_file_edit() {
368 #[track_caller]
371 fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
372 let reference = Lines::new(after);
373
374 let mut edited = Lines::new(prev.to_string());
375 edited.edit(range.clone(), with);
376 assert_eq!(edited.text(), reference.text());
377 assert_eq!(edited.0.lines, reference.0.lines);
378
379 let mut replaced = Lines::new(prev.to_string());
380 replaced.replace(&{
381 let mut s = prev.to_string();
382 s.replace_range(range, with);
383 s
384 });
385 assert_eq!(replaced.text(), reference.text());
386 assert_eq!(replaced.0.lines, reference.0.lines);
387 }
388
389 test("abc\n", 0..0, "hi\n", "hi\nabc\n");
391 test("\nabc", 0..0, "hi\r", "hi\r\nabc");
392
393 test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
395
396 test("abc\ndef", 7..7, "hi", "abc\ndefhi");
398 test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
399
400 test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
402
403 test(TEST, 0..21, "", "");
405 }
406}