ariadne/
source.rs

1use super::*;
2
3use std::io::Error;
4use std::{
5    collections::{hash_map::Entry, HashMap},
6    fs,
7    path::{Path, PathBuf},
8};
9
10/// A trait implemented by [`Source`] caches.
11pub trait Cache<Id: ?Sized> {
12    /// The type used to store the string data for this cache.
13    ///
14    /// Alternative types other than String can be used, but at the moment, the storage must be
15    /// contiguous. A primary use case for this is to use a reference-counted string instead of
16    /// copying the whole contents into a [`Source`].
17    type Storage: AsRef<str>;
18
19    /// Fetch the [`Source`] identified by the given ID, if possible.
20    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug>;
21
22    /// Display the given ID. as a single inline value.
23    ///
24    /// This function may make use of attributes from the [`Fmt`] trait.
25    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a>;
26}
27
28impl<'b, C: Cache<Id>, Id: ?Sized> Cache<Id> for &'b mut C {
29    type Storage = C::Storage;
30
31    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
32        C::fetch(self, id)
33    }
34    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
35        C::display(self, id)
36    }
37}
38
39impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
40    type Storage = C::Storage;
41
42    fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
43        C::fetch(self, id)
44    }
45    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
46        C::display(self, id)
47    }
48}
49
50/// A type representing a single line of a [`Source`].
51#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
52pub struct Line {
53    offset: usize,
54    char_len: usize,
55    byte_offset: usize,
56    byte_len: usize,
57}
58
59impl Line {
60    /// Get the offset of this line in the original [`Source`] (i.e: the number of characters that precede it).
61    pub fn offset(&self) -> usize {
62        self.offset
63    }
64
65    /// Get the character length of this line.
66    pub fn len(&self) -> usize {
67        self.char_len
68    }
69
70    /// Returns `true` if this line contains no characters.
71    pub fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74
75    /// Get the offset span of this line in the original [`Source`].
76    pub fn span(&self) -> Range<usize> {
77        self.offset..self.offset + self.char_len
78    }
79
80    /// Get the byte offset span of this line in the original [`Source`]. This can be used to
81    /// directly slice into its source text.
82    fn byte_span(&self) -> Range<usize> {
83        self.byte_offset..self.byte_offset + self.byte_len
84    }
85}
86
87/// A type representing a single source that may be referred to by [`Span`]s.
88///
89/// In most cases, a source is a single input file.
90#[derive(Clone, Debug, Hash, PartialEq, Eq)]
91pub struct Source<I: AsRef<str> = String> {
92    text: I,
93    lines: Vec<Line>,
94    len: usize,
95    byte_len: usize,
96    display_line_offset: usize,
97}
98
99impl<I: AsRef<str>> Source<I> {
100    /// Get the full text of this source file.
101    pub fn text(&self) -> &str {
102        self.text.as_ref()
103    }
104}
105
106impl<I: AsRef<str>> From<I> for Source<I> {
107    /// Generate a [`Source`] from the given [`str`].
108    ///
109    /// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
110    fn from(input: I) -> Self {
111        // `input.split_inclusive()` will not iterate at all,
112        // but an empty input still ought to count as a single empty line.
113        if input.as_ref().is_empty() {
114            return Self {
115                text: input,
116                lines: vec![Line {
117                    offset: 0,
118                    char_len: 0,
119                    byte_offset: 0,
120                    byte_len: 0,
121                }],
122                len: 0,
123                byte_len: 0,
124                display_line_offset: 0,
125            };
126        }
127
128        let mut char_offset = 0;
129        let mut byte_offset = 0;
130        let mut lines = Vec::new();
131
132        const SEPARATORS: [char; 7] = [
133            '\r',       // Carriage return
134            '\n',       // Line feed
135            '\x0B',     // Vertical tab
136            '\x0C',     // Form feed
137            '\u{0085}', // Next line
138            '\u{2028}', // Line separator
139            '\u{2029}', // Paragraph separator
140        ];
141        let mut remaining = input.as_ref().split_inclusive(SEPARATORS).peekable();
142        while let Some(line) = remaining.next() {
143            let mut byte_len = line.len();
144            let mut char_len = line.chars().count();
145            // Handle CRLF as a single terminator.
146            if line.ends_with('\r') && remaining.next_if_eq(&"\n").is_some() {
147                byte_len += 1;
148                char_len += 1;
149            }
150            lines.push(Line {
151                offset: char_offset,
152                char_len,
153                byte_offset,
154                byte_len,
155            });
156
157            char_offset += char_len;
158            byte_offset += byte_len;
159        }
160
161        Self {
162            text: input,
163            lines,
164            len: char_offset,
165            byte_len: byte_offset,
166            display_line_offset: 0,
167        }
168    }
169}
170
171impl<I: AsRef<str>> Source<I> {
172    /// Add an offset to the printed line numbers
173    pub fn with_display_line_offset(mut self, offset: usize) -> Self {
174        self.display_line_offset = offset;
175        self
176    }
177
178    /// Get the offset added to printed line numbers
179    pub fn display_line_offset(&self) -> usize {
180        self.display_line_offset
181    }
182
183    /// Get the length of the total number of characters in the source.
184    pub fn len(&self) -> usize {
185        self.len
186    }
187
188    /// Returns `true` if this source contains no characters.
189    pub fn is_empty(&self) -> bool {
190        self.len() == 0
191    }
192
193    /// Return an iterator over the characters in the source.
194    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
195        self.text.as_ref().chars()
196    }
197
198    /// Get access to a specific, zero-indexed [`Line`].
199    pub fn line(&self, idx: usize) -> Option<Line> {
200        self.lines.get(idx).copied()
201    }
202
203    /// Return an iterator over the [`Line`]s in this source.
204    pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ {
205        self.lines.iter().copied()
206    }
207
208    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
209    ///
210    /// Note that the line/column numbers are zero-indexed.
211    pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
212        if offset <= self.len {
213            let idx = self
214                .lines
215                .binary_search_by_key(&offset, |line| line.offset)
216                .unwrap_or_else(|idx| idx.saturating_sub(1));
217            let line = self.line(idx)?;
218            assert!(
219                offset >= line.offset,
220                "offset = {}, line.offset = {}",
221                offset,
222                line.offset
223            );
224            Some((line, idx, offset - line.offset))
225        } else {
226            None
227        }
228    }
229
230    /// Get the line that the given byte offset appears on, and the line/byte column numbers of the offset.
231    ///
232    /// Note that the line/column numbers are zero-indexed.
233    pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
234        if byte_offset <= self.byte_len {
235            let idx = self
236                .lines
237                .binary_search_by_key(&byte_offset, |line| line.byte_offset)
238                .unwrap_or_else(|idx| idx.saturating_sub(1));
239            let line = self.line(idx)?;
240            assert!(
241                byte_offset >= line.byte_offset,
242                "byte_offset = {}, line.byte_offset = {}",
243                byte_offset,
244                line.byte_offset
245            );
246            Some((line, idx, byte_offset - line.byte_offset))
247        } else {
248            None
249        }
250    }
251
252    /// Get the range of lines that this span runs across.
253    ///
254    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
255    /// [`Source::line`]).
256    pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
257        let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
258        let end = self
259            .get_offset_line(span.end().saturating_sub(1).max(span.start()))
260            .map_or(self.lines.len(), |(_, l, _)| l + 1);
261        start..end
262    }
263
264    /// Get the source text for a line, includes trailing whitespace and the newline
265    pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
266        self.text.as_ref().get(line.byte_span())
267    }
268}
269
270impl<I: AsRef<str>> Cache<()> for Source<I> {
271    type Storage = I;
272
273    fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
274        Ok::<_, ()>(self)
275    }
276    fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
277        None::<&str>
278    }
279}
280
281impl<I: AsRef<str>> Cache<()> for &'_ Source<I> {
282    type Storage = I;
283
284    fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
285        Ok::<_, ()>(*self)
286    }
287    fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
288        None::<u8>
289    }
290}
291
292impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
293    type Storage = I;
294
295    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
296        if id == &self.0 {
297            Ok(&self.1)
298        } else {
299            Err(Box::new(format!("Failed to fetch source '{}'", id)))
300        }
301    }
302    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
303        Some(Box::new(id))
304    }
305}
306
307impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, &'_ Source<I>) {
308    type Storage = I;
309
310    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
311        if id == &self.0 {
312            Ok(self.1)
313        } else {
314            Err(Box::new(format!("Failed to fetch source '{}'", id)))
315        }
316    }
317    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
318        Some(Box::new(id))
319    }
320}
321
322/// A [`Cache`] that fetches [`Source`]s from the filesystem.
323#[derive(Default, Debug, Clone)]
324pub struct FileCache {
325    files: HashMap<PathBuf, Source>,
326}
327
328impl Cache<Path> for FileCache {
329    type Storage = String;
330
331    fn fetch(&mut self, path: &Path) -> Result<&Source, impl fmt::Debug> {
332        Ok::<_, Error>(match self.files.entry(path.to_path_buf()) {
333            // TODO: Don't allocate here
334            Entry::Occupied(entry) => entry.into_mut(),
335            Entry::Vacant(entry) => entry.insert(Source::from(fs::read_to_string(path)?)),
336        })
337    }
338    fn display<'a>(&self, path: &'a Path) -> Option<impl fmt::Display + 'a> {
339        Some(Box::new(path.display()))
340    }
341}
342
343/// A [`Cache`] that fetches [`Source`]s using the provided function.
344#[derive(Debug, Clone)]
345pub struct FnCache<Id, F, I>
346where
347    I: AsRef<str>,
348{
349    sources: HashMap<Id, Source<I>>,
350    get: F,
351}
352
353impl<Id, F, I> FnCache<Id, F, I>
354where
355    I: AsRef<str>,
356{
357    /// Create a new [`FnCache`] with the given fetch function.
358    pub fn new(get: F) -> Self {
359        Self {
360            sources: HashMap::default(),
361            get,
362        }
363    }
364
365    /// Pre-insert a selection of [`Source`]s into this cache.
366    pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
367    where
368        Id: Eq + Hash,
369    {
370        self.sources.reserve(sources.len());
371        for (id, src) in sources {
372            self.sources.insert(id, src);
373        }
374        self
375    }
376}
377
378impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I, E> Cache<Id> for FnCache<Id, F, I>
379where
380    I: AsRef<str>,
381    E: fmt::Debug,
382    F: for<'a> FnMut(&'a Id) -> Result<I, E>,
383{
384    type Storage = I;
385
386    fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
387        Ok::<_, E>(match self.sources.entry(id.clone()) {
388            Entry::Occupied(entry) => entry.into_mut(),
389            Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
390        })
391    }
392    fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
393        Some(Box::new(id))
394    }
395}
396
397/// Create a [`Cache`] from a collection of ID/strings, where each corresponds to a [`Source`].
398pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
399where
400    Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
401    I: IntoIterator<Item = (Id, S)>,
402    S: AsRef<str>,
403{
404    FnCache::new((move |id| Err(format!("Failed to fetch source '{}'", id))) as fn(&_) -> _)
405        .with_sources(
406            iter.into_iter()
407                .map(|(id, s)| (id, Source::from(s)))
408                .collect(),
409        )
410}
411
412#[cfg(test)]
413mod tests {
414    use std::iter::zip;
415    use std::sync::Arc;
416
417    use super::Source;
418
419    fn test_with_lines(lines: Vec<&str>) {
420        let source: String = lines.iter().copied().collect();
421        let source = Source::from(source);
422
423        assert_eq!(source.lines.len(), lines.len());
424
425        let mut offset = 0;
426        for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
427            assert_eq!(source_line.offset, offset);
428            assert_eq!(source_line.char_len, raw_line.chars().count());
429            assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
430            offset += source_line.char_len;
431        }
432
433        assert_eq!(source.len, offset);
434    }
435
436    #[test]
437    fn source_from_empty() {
438        test_with_lines(vec![""]); // Empty string
439    }
440
441    #[test]
442    fn source_from_single() {
443        test_with_lines(vec!["Single line"]);
444        test_with_lines(vec!["Single line with LF\n"]);
445        test_with_lines(vec!["Single line with CRLF\r\n"]);
446    }
447
448    #[test]
449    fn source_from_multi() {
450        test_with_lines(vec!["Two\r\n", "lines\n"]);
451        test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
452        test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
453    }
454
455    #[test]
456    fn source_from_trims_trailing_spaces() {
457        test_with_lines(vec!["Trailing spaces  \n", "are trimmed\t"]);
458    }
459
460    #[test]
461    fn source_from_alternate_line_endings() {
462        // Line endings other than LF or CRLF
463        test_with_lines(vec![
464            "CR\r",
465            "VT\x0B",
466            "FF\x0C",
467            "NEL\u{0085}",
468            "LS\u{2028}",
469            "PS\u{2029}",
470        ]);
471    }
472
473    #[test]
474    fn source_from_other_string_types() {
475        let raw = r#"A raw string
476            with multiple
477            lines behind
478            an Arc"#;
479        let arc = Arc::from(raw);
480        let source = Source::from(arc);
481
482        assert_eq!(source.lines.len(), 4);
483
484        let mut offset = 0;
485        for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
486        {
487            assert_eq!(source_line.offset, offset);
488            assert_eq!(source_line.char_len, raw_line.chars().count());
489            assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
490            offset += source_line.char_len;
491        }
492
493        assert_eq!(source.len, offset);
494    }
495
496    #[test]
497    fn source_from_reference() {
498        let raw = r#"A raw string
499            with multiple
500            lines"#;
501
502        fn non_owning_source(input: &str) -> Source<&str> {
503            Source::from(input)
504        }
505
506        let source = non_owning_source(raw);
507        assert_eq!(source.lines.len(), 3);
508    }
509}