1use std::fmt::{self, Debug, Formatter};
4use std::hash::{Hash, Hasher};
5use std::iter::zip;
6use std::ops::Range;
7use std::sync::Arc;
8
9use typst_utils::LazyHash;
10
11use crate::reparser::reparse;
12use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
13
14#[derive(Clone)]
21pub struct Source(Arc<Repr>);
22
23#[derive(Clone)]
25struct Repr {
26 id: FileId,
27 text: LazyHash<String>,
28 root: LazyHash<SyntaxNode>,
29 lines: Vec<Line>,
30}
31
32impl Source {
33 pub fn new(id: FileId, text: String) -> Self {
35 let _scope = typst_timing::TimingScope::new("create source");
36 let mut root = parse(&text);
37 root.numberize(id, Span::FULL).unwrap();
38 Self(Arc::new(Repr {
39 id,
40 lines: lines(&text),
41 text: LazyHash::new(text),
42 root: LazyHash::new(root),
43 }))
44 }
45
46 pub fn detached(text: impl Into<String>) -> Self {
48 Self::new(FileId::new(None, VirtualPath::new("main.typ")), text.into())
49 }
50
51 pub fn root(&self) -> &SyntaxNode {
53 &self.0.root
54 }
55
56 pub fn id(&self) -> FileId {
58 self.0.id
59 }
60
61 pub fn text(&self) -> &str {
63 &self.0.text
64 }
65
66 pub fn get(&self, range: Range<usize>) -> Option<&str> {
68 self.text().get(range)
69 }
70
71 pub fn replace(&mut self, new: &str) -> Range<usize> {
79 let _scope = typst_timing::TimingScope::new("replace source");
80 let old = self.text();
81
82 let mut prefix =
83 zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
84
85 if prefix == old.len() && prefix == new.len() {
86 return 0..0;
87 }
88
89 while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
90 prefix -= 1;
91 }
92
93 let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
94 .take_while(|(x, y)| x == y)
95 .count();
96
97 while !old.is_char_boundary(old.len() - suffix)
98 || !new.is_char_boundary(new.len() - suffix)
99 {
100 suffix += 1;
101 }
102
103 let replace = prefix..old.len() - suffix;
104 let with = &new[prefix..new.len() - suffix];
105 self.edit(replace, with)
106 }
107
108 #[track_caller]
114 pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
115 let start_byte = replace.start;
116 let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
117 let line = self.byte_to_line(start_byte).unwrap();
118
119 let inner = Arc::make_mut(&mut self.0);
120
121 inner.text.replace_range(replace.clone(), with);
123
124 inner.lines.truncate(line + 1);
126
127 if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
129 inner.lines.pop();
130 }
131
132 inner.lines.extend(lines_from(
134 start_byte,
135 start_utf16,
136 &inner.text[start_byte..],
137 ));
138
139 reparse(&mut inner.root, &inner.text, replace, with.len())
141 }
142
143 pub fn len_bytes(&self) -> usize {
145 self.text().len()
146 }
147
148 pub fn len_utf16(&self) -> usize {
150 let last = self.0.lines.last().unwrap();
151 last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
152 }
153
154 pub fn len_lines(&self) -> usize {
156 self.0.lines.len()
157 }
158
159 pub fn find(&self, span: Span) -> Option<LinkedNode<'_>> {
163 LinkedNode::new(self.root()).find(span)
164 }
165
166 pub fn range(&self, span: Span) -> Option<Range<usize>> {
172 Some(self.find(span)?.range())
173 }
174
175 pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
177 let line_idx = self.byte_to_line(byte_idx)?;
178 let line = self.0.lines.get(line_idx)?;
179 let head = self.0.text.get(line.byte_idx..byte_idx)?;
180 Some(line.utf16_idx + len_utf16(head))
181 }
182
183 pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
185 (byte_idx <= self.0.text.len()).then(|| {
186 match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
187 Ok(i) => i,
188 Err(i) => i - 1,
189 }
190 })
191 }
192
193 pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
198 let line = self.byte_to_line(byte_idx)?;
199 let start = self.line_to_byte(line)?;
200 let head = self.get(start..byte_idx)?;
201 Some(head.chars().count())
202 }
203
204 pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
206 let line = self.0.lines.get(
207 match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
208 Ok(i) => i,
209 Err(i) => i - 1,
210 },
211 )?;
212
213 let mut k = line.utf16_idx;
214 for (i, c) in self.0.text[line.byte_idx..].char_indices() {
215 if k >= utf16_idx {
216 return Some(line.byte_idx + i);
217 }
218 k += c.len_utf16();
219 }
220
221 (k == utf16_idx).then_some(self.0.text.len())
222 }
223
224 pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
226 self.0.lines.get(line_idx).map(|line| line.byte_idx)
227 }
228
229 pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
231 let start = self.line_to_byte(line_idx)?;
232 let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
233 Some(start..end)
234 }
235
236 pub fn line_column_to_byte(
241 &self,
242 line_idx: usize,
243 column_idx: usize,
244 ) -> Option<usize> {
245 let range = self.line_to_range(line_idx)?;
246 let line = self.get(range.clone())?;
247 let mut chars = line.chars();
248 for _ in 0..column_idx {
249 chars.next();
250 }
251 Some(range.start + (line.len() - chars.as_str().len()))
252 }
253}
254
255impl Debug for Source {
256 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
257 write!(f, "Source({:?})", self.id().vpath())
258 }
259}
260
261impl Hash for Source {
262 fn hash<H: Hasher>(&self, state: &mut H) {
263 self.0.id.hash(state);
264 self.0.text.hash(state);
265 self.0.root.hash(state);
266 }
267}
268
269impl AsRef<str> for Source {
270 fn as_ref(&self) -> &str {
271 self.text()
272 }
273}
274
275#[derive(Debug, Copy, Clone, Eq, PartialEq)]
277struct Line {
278 byte_idx: usize,
280 utf16_idx: usize,
282}
283
284fn lines(text: &str) -> Vec<Line> {
286 std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
287 .chain(lines_from(0, 0, text))
288 .collect()
289}
290
291fn lines_from(
293 byte_offset: usize,
294 utf16_offset: usize,
295 text: &str,
296) -> impl Iterator<Item = Line> + '_ {
297 let mut s = unscanny::Scanner::new(text);
298 let mut utf16_idx = utf16_offset;
299
300 std::iter::from_fn(move || {
301 s.eat_until(|c: char| {
302 utf16_idx += c.len_utf16();
303 is_newline(c)
304 });
305
306 if s.done() {
307 return None;
308 }
309
310 if s.eat() == Some('\r') && s.eat_if('\n') {
311 utf16_idx += 1;
312 }
313
314 Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
315 })
316}
317
318fn len_utf16(string: &str) -> usize {
321 string.chars().map(char::len_utf16).sum()
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
329
330 #[test]
331 fn test_source_file_new() {
332 let source = Source::detached(TEST);
333 assert_eq!(
334 source.0.lines,
335 [
336 Line { byte_idx: 0, utf16_idx: 0 },
337 Line { byte_idx: 7, utf16_idx: 6 },
338 Line { byte_idx: 15, utf16_idx: 12 },
339 Line { byte_idx: 18, utf16_idx: 15 },
340 ]
341 );
342 }
343
344 #[test]
345 fn test_source_file_pos_to_line() {
346 let source = Source::detached(TEST);
347 assert_eq!(source.byte_to_line(0), Some(0));
348 assert_eq!(source.byte_to_line(2), Some(0));
349 assert_eq!(source.byte_to_line(6), Some(0));
350 assert_eq!(source.byte_to_line(7), Some(1));
351 assert_eq!(source.byte_to_line(8), Some(1));
352 assert_eq!(source.byte_to_line(12), Some(1));
353 assert_eq!(source.byte_to_line(21), Some(3));
354 assert_eq!(source.byte_to_line(22), None);
355 }
356
357 #[test]
358 fn test_source_file_pos_to_column() {
359 let source = Source::detached(TEST);
360 assert_eq!(source.byte_to_column(0), Some(0));
361 assert_eq!(source.byte_to_column(2), Some(1));
362 assert_eq!(source.byte_to_column(6), Some(5));
363 assert_eq!(source.byte_to_column(7), Some(0));
364 assert_eq!(source.byte_to_column(8), Some(1));
365 assert_eq!(source.byte_to_column(12), Some(2));
366 }
367
368 #[test]
369 fn test_source_file_utf16() {
370 #[track_caller]
371 fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
372 let middle = source.byte_to_utf16(byte_idx).unwrap();
373 let result = source.utf16_to_byte(middle).unwrap();
374 assert_eq!(middle, utf16_idx);
375 assert_eq!(result, byte_idx);
376 }
377
378 let source = Source::detached(TEST);
379 roundtrip(&source, 0, 0);
380 roundtrip(&source, 2, 1);
381 roundtrip(&source, 3, 2);
382 roundtrip(&source, 8, 7);
383 roundtrip(&source, 12, 9);
384 roundtrip(&source, 21, 18);
385 assert_eq!(source.byte_to_utf16(22), None);
386 assert_eq!(source.utf16_to_byte(19), None);
387 }
388
389 #[test]
390 fn test_source_file_roundtrip() {
391 #[track_caller]
392 fn roundtrip(source: &Source, byte_idx: usize) {
393 let line = source.byte_to_line(byte_idx).unwrap();
394 let column = source.byte_to_column(byte_idx).unwrap();
395 let result = source.line_column_to_byte(line, column).unwrap();
396 assert_eq!(result, byte_idx);
397 }
398
399 let source = Source::detached(TEST);
400 roundtrip(&source, 0);
401 roundtrip(&source, 7);
402 roundtrip(&source, 12);
403 roundtrip(&source, 21);
404 }
405
406 #[test]
407 fn test_source_file_edit() {
408 #[track_caller]
411 fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
412 let reference = Source::detached(after);
413
414 let mut edited = Source::detached(prev);
415 edited.edit(range.clone(), with);
416 assert_eq!(edited.text(), reference.text());
417 assert_eq!(edited.0.lines, reference.0.lines);
418
419 let mut replaced = Source::detached(prev);
420 replaced.replace(&{
421 let mut s = prev.to_string();
422 s.replace_range(range, with);
423 s
424 });
425 assert_eq!(replaced.text(), reference.text());
426 assert_eq!(replaced.0.lines, reference.0.lines);
427 }
428
429 test("abc\n", 0..0, "hi\n", "hi\nabc\n");
431 test("\nabc", 0..0, "hi\r", "hi\r\nabc");
432
433 test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
435
436 test("abc\ndef", 7..7, "hi", "abc\ndefhi");
438 test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
439
440 test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
442
443 test(TEST, 0..21, "", "");
445 }
446}