rolldown_ariadne/
source.rs

1use super::*;
2
3use std::io::Error;
4use std::{
5    collections::{hash_map::Entry, HashMap},
6    fs,
7    path::{Path, PathBuf},
8};
9
10/// A trait implemented by [`Source`] caches.
11pub trait Cache<Id: ?Sized> {
12    /// The type used to store the string data for this cache.
13    ///
14    /// Alternative types other than String can be used, but at the moment, the storage must be
15    /// contiguous. A primary use case for this is to use a reference-counted string instead of
16    /// copying the whole contents into a [`Source`].
17    type Storage: AsRef<str>;
18
19    /// Fetch the [`Source`] identified by the given ID, if possible.
20    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug>;
21
22    /// Display the given ID. as a single inline value.
23    ///
24    /// This function may make use of attributes from the [`Fmt`] trait.
25    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a>;
26}
27
28impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for &mut C {
29    type Storage = C::Storage;
30
31    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
32        C::fetch(self, id)
33    }
34    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
35        C::display(self, id)
36    }
37}
38
39impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
40    type Storage = C::Storage;
41
42    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
43        C::fetch(self, id)
44    }
45    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
46        C::display(self, id)
47    }
48}
49
50/// A type representing a single line of a [`Source`].
51#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
52pub struct Line {
53    offset: usize,
54    char_len: usize,
55    byte_offset: usize,
56    byte_len: usize,
57}
58
59impl Line {
60    /// Get the offset of this line in the original [`Source`] (i.e: the number of characters that precede it).
61    pub fn offset(&self) -> usize {
62        self.offset
63    }
64
65    /// Get the byte offset of this line in the original [`Source`].
66    pub fn byte_offset(&self) -> usize {
67        self.byte_offset
68    }
69
70    /// Get the character length of this line.
71    pub fn len(&self) -> usize {
72        self.char_len
73    }
74
75    /// Returns `true` if this line contains no characters.
76    pub fn is_empty(&self) -> bool {
77        self.len() == 0
78    }
79
80    /// Get the offset span of this line in the original [`Source`].
81    pub fn span(&self) -> Range<usize> {
82        self.offset..self.offset + self.char_len
83    }
84
85    /// Get the byte offset span of this line in the original [`Source`]. This can be used to
86    /// directly slice into its source text.
87    fn byte_span(&self) -> Range<usize> {
88        self.byte_offset..self.byte_offset + self.byte_len
89    }
90}
91
92/// A type representing a single source that may be referred to by [`Span`]s.
93///
94/// In most cases, a source is a single input file.
95#[derive(Clone, Debug, Hash, PartialEq, Eq)]
96pub struct Source<I: AsRef<str> = String> {
97    text: I,
98    lines: Vec<Line>,
99    len: usize,
100    byte_len: usize,
101    display_line_offset: usize,
102}
103
104impl<I: AsRef<str>> Source<I> {
105    /// Get the full text of this source file.
106    pub fn text(&self) -> &str {
107        self.text.as_ref()
108    }
109}
110
111impl<I: AsRef<str>> From<I> for Source<I> {
112    /// Generate a [`Source`] from the given [`str`].
113    ///
114    /// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
115    fn from(input: I) -> Self {
116        // `input.split_inclusive()` will not iterate at all,
117        // but an empty input still ought to count as a single empty line.
118        if input.as_ref().is_empty() {
119            return Self {
120                text: input,
121                lines: vec![Line {
122                    offset: 0,
123                    char_len: 0,
124                    byte_offset: 0,
125                    byte_len: 0,
126                }],
127                len: 0,
128                byte_len: 0,
129                display_line_offset: 0,
130            };
131        }
132
133        let mut char_offset = 0;
134        let mut byte_offset = 0;
135        let mut lines = Vec::new();
136
137        const SEPARATORS: [char; 7] = [
138            '\r',       // Carriage return
139            '\n',       // Line feed
140            '\x0B',     // Vertical tab
141            '\x0C',     // Form feed
142            '\u{0085}', // Next line
143            '\u{2028}', // Line separator
144            '\u{2029}', // Paragraph separator
145        ];
146        let mut remaining = input.as_ref().split_inclusive(SEPARATORS).peekable();
147        while let Some(line) = remaining.next() {
148            let mut byte_len = line.len();
149            let mut char_len = line.chars().count();
150            // Handle CRLF as a single terminator.
151            if line.ends_with('\r') && remaining.next_if_eq(&"\n").is_some() {
152                byte_len += 1;
153                char_len += 1;
154            }
155            lines.push(Line {
156                offset: char_offset,
157                char_len,
158                byte_offset,
159                byte_len,
160            });
161
162            char_offset += char_len;
163            byte_offset += byte_len;
164        }
165
166        Self {
167            text: input,
168            lines,
169            len: char_offset,
170            byte_len: byte_offset,
171            display_line_offset: 0,
172        }
173    }
174}
175
176impl<I: AsRef<str>> Source<I> {
177    /// Add an offset to the printed line numbers
178    pub fn with_display_line_offset(mut self, offset: usize) -> Self {
179        self.display_line_offset = offset;
180        self
181    }
182
183    /// Get the offset added to printed line numbers
184    pub fn display_line_offset(&self) -> usize {
185        self.display_line_offset
186    }
187
188    /// Get the length of the total number of characters in the source.
189    pub fn len(&self) -> usize {
190        self.len
191    }
192
193    /// Returns `true` if this source contains no characters.
194    pub fn is_empty(&self) -> bool {
195        self.len() == 0
196    }
197
198    /// Return an iterator over the characters in the source.
199    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
200        self.text.as_ref().chars()
201    }
202
203    /// Get access to a specific, zero-indexed [`Line`].
204    pub fn line(&self, idx: usize) -> Option<Line> {
205        self.lines.get(idx).copied()
206    }
207
208    /// Return an iterator over the [`Line`]s in this source.
209    pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ {
210        self.lines.iter().copied()
211    }
212
213    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
214    ///
215    /// Note that the line/column numbers are zero-indexed.
216    pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
217        if offset <= self.len {
218            let idx = self
219                .lines
220                .binary_search_by_key(&offset, |line| line.offset)
221                .unwrap_or_else(|idx| idx.saturating_sub(1));
222            let line = self.line(idx)?;
223            assert!(
224                offset >= line.offset,
225                "offset = {}, line.offset = {}",
226                offset,
227                line.offset
228            );
229            Some((line, idx, offset - line.offset))
230        } else {
231            None
232        }
233    }
234
235    /// Get the line that the given byte offset appears on, and the line/byte column numbers of the offset.
236    ///
237    /// Note that the line/column numbers are zero-indexed.
238    pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
239        if byte_offset <= self.byte_len {
240            let idx = self
241                .lines
242                .binary_search_by_key(&byte_offset, |line| line.byte_offset)
243                .unwrap_or_else(|idx| idx.saturating_sub(1));
244            let line = self.line(idx)?;
245            assert!(
246                byte_offset >= line.byte_offset,
247                "byte_offset = {}, line.byte_offset = {}",
248                byte_offset,
249                line.byte_offset
250            );
251            Some((line, idx, byte_offset - line.byte_offset))
252        } else {
253            None
254        }
255    }
256
257    /// Get the range of lines that this span runs across.
258    ///
259    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
260    /// [`Source::line`]).
261    pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
262        let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
263        let end = self
264            .get_offset_line(span.end().saturating_sub(1).max(span.start()))
265            .map_or(self.lines.len(), |(_, l, _)| l + 1);
266        start..end
267    }
268
269    /// Get the source text for a line, includes trailing whitespace and the newline
270    pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
271        self.text.as_ref().get(line.byte_span())
272    }
273}
274
275impl<I: AsRef<str>> Cache<()> for Source<I> {
276    type Storage = I;
277
278    fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
279        Ok::<_, ()>(self)
280    }
281    fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
282        None::<&str>
283    }
284}
285
286impl<I: AsRef<str>> Cache<()> for &'_ Source<I> {
287    type Storage = I;
288
289    fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
290        Ok::<_, ()>(*self)
291    }
292    fn display<'a>(&self, _: &'a ()) -> std::option::Option<impl std::fmt::Display + 'a> {
293        None::<&str>
294    }
295}
296
297impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
298    type Storage = I;
299
300    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
301        if id == &self.0 {
302            Ok(&self.1)
303        } else {
304            Err(Box::new(format!("Failed to fetch source '{}'", id)))
305        }
306    }
307    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
308        Some(Box::new(id))
309    }
310}
311
312impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, &'_ Source<I>) {
313    type Storage = I;
314
315    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
316        if id == &self.0 {
317            Ok(self.1)
318        } else {
319            Err(Box::new(format!("Failed to fetch source '{}'", id)))
320        }
321    }
322    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
323        Some(Box::new(id))
324    }
325}
326
327/// A [`Cache`] that fetches [`Source`]s from the filesystem.
328#[derive(Default, Debug, Clone)]
329pub struct FileCache {
330    files: HashMap<PathBuf, Source>,
331}
332
333impl Cache<Path> for FileCache {
334    type Storage = String;
335
336    fn fetch(&mut self, path: &Path) -> Result<&Source, impl fmt::Debug> {
337        Ok::<_, Error>(match self.files.entry(path.to_path_buf()) {
338            // TODO: Don't allocate here
339            Entry::Occupied(entry) => entry.into_mut(),
340            Entry::Vacant(entry) => entry.insert(Source::from(fs::read_to_string(path)?)),
341        })
342    }
343    fn display<'a>(&self, path: &'a Path) -> Option<impl fmt::Display + 'a> {
344        Some(Box::new(path.display()))
345    }
346}
347
348/// A [`Cache`] that fetches [`Source`]s using the provided function.
349#[derive(Debug, Clone)]
350pub struct FnCache<Id, F, I>
351where
352    I: AsRef<str>,
353{
354    sources: HashMap<Id, Source<I>>,
355    get: F,
356}
357
358impl<Id, F, I> FnCache<Id, F, I>
359where
360    I: AsRef<str>,
361{
362    /// Create a new [`FnCache`] with the given fetch function.
363    pub fn new(get: F) -> Self {
364        Self {
365            sources: HashMap::default(),
366            get,
367        }
368    }
369
370    /// Pre-insert a selection of [`Source`]s into this cache.
371    pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
372    where
373        Id: Eq + Hash,
374    {
375        self.sources.reserve(sources.len());
376        for (id, src) in sources {
377            self.sources.insert(id, src);
378        }
379        self
380    }
381}
382
383impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I, E> Cache<Id> for FnCache<Id, F, I>
384where
385    I: AsRef<str>,
386    E: fmt::Debug,
387    F: for<'a> FnMut(&'a Id) -> Result<I, E>,
388{
389    type Storage = I;
390
391    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
392        Ok::<_, E>(match self.sources.entry(id.clone()) {
393            Entry::Occupied(entry) => entry.into_mut(),
394            Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
395        })
396    }
397    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
398        Some(Box::new(id))
399    }
400}
401
402/// Create a [`Cache`] from a collection of ID/strings, where each corresponds to a [`Source`].
403pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
404where
405    Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
406    S: AsRef<str>,
407    I: IntoIterator<Item = (Id, S)>,
408{
409    FnCache::new((move |id| Err(format!("Failed to fetch source '{}'", id))) as fn(&_) -> _)
410        .with_sources(
411            iter.into_iter()
412                .map(|(id, s)| (id, Source::from(s)))
413                .collect(),
414        )
415}
416
417#[cfg(test)]
418mod tests {
419    use std::iter::zip;
420    use std::sync::Arc;
421
422    use super::Source;
423
424    fn test_with_lines(lines: Vec<&str>) {
425        let source: String = lines.iter().copied().collect();
426        let source = Source::from(source);
427
428        assert_eq!(source.lines.len(), lines.len());
429
430        let mut offset = 0;
431        for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
432            assert_eq!(source_line.offset, offset);
433            assert_eq!(source_line.char_len, raw_line.chars().count());
434            assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
435            offset += source_line.char_len;
436        }
437
438        assert_eq!(source.len, offset);
439    }
440
441    #[test]
442    fn source_from_empty() {
443        test_with_lines(vec![""]); // Empty string
444    }
445
446    #[test]
447    fn source_from_single() {
448        test_with_lines(vec!["Single line"]);
449        test_with_lines(vec!["Single line with LF\n"]);
450        test_with_lines(vec!["Single line with CRLF\r\n"]);
451    }
452
453    #[test]
454    fn source_from_multi() {
455        test_with_lines(vec!["Two\r\n", "lines\n"]);
456        test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
457        test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
458    }
459
460    #[test]
461    fn source_from_trims_trailing_spaces() {
462        test_with_lines(vec!["Trailing spaces  \n", "are trimmed\t"]);
463    }
464
465    #[test]
466    fn source_from_alternate_line_endings() {
467        // Line endings other than LF or CRLF
468        test_with_lines(vec![
469            "CR\r",
470            "VT\x0B",
471            "FF\x0C",
472            "NEL\u{0085}",
473            "LS\u{2028}",
474            "PS\u{2029}",
475        ]);
476    }
477
478    #[test]
479    fn source_from_other_string_types() {
480        let raw = r#"A raw string
481            with multiple
482            lines behind
483            an Arc"#;
484        let arc = Arc::from(raw);
485        let source = Source::from(arc);
486
487        assert_eq!(source.lines.len(), 4);
488
489        let mut offset = 0;
490        for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
491        {
492            assert_eq!(source_line.offset, offset);
493            assert_eq!(source_line.char_len, raw_line.chars().count());
494            assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
495            offset += source_line.char_len;
496        }
497
498        assert_eq!(source.len, offset);
499    }
500
501    #[test]
502    fn source_from_reference() {
503        let raw = r#"A raw string
504            with multiple
505            lines"#;
506
507        fn non_owning_source(input: &str) -> Source<&str> {
508            Source::from(input)
509        }
510
511        let source = non_owning_source(raw);
512        assert_eq!(source.lines.len(), 3);
513    }
514}