syntax_error/
source.rs

1use super::*;
2
3use std::{
4    collections::HashMap,
5    hash::{BuildHasher, Hasher},
6    mem::replace,
7    path::PathBuf,
8};
9
10/// A type representing a single line of a [`Source`].
11#[derive(Clone, Debug, Hash, PartialEq, Eq)]
12pub struct Line {
13    offset: usize,
14    len: usize,
15    chars: String,
16}
17
18impl Line {
19    /// Get the offset of this line in the original [`Source`] (i.e: the number of characters that precede it).
20    pub fn offset(&self) -> usize {
21        self.offset
22    }
23
24    /// Get the character length of this line.
25    pub fn len(&self) -> usize {
26        self.len
27    }
28
29    /// Get the offset span of this line in the original [`Source`].
30    pub fn span(&self) -> Range<usize> {
31        self.offset..self.offset + self.len
32    }
33
34    /// Return an iterator over the characters in the line, excluding trailing whitespace.
35    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
36        self.chars.chars()
37    }
38    /// Get the view of this line in the original [`Source`].
39    pub fn view(&self) -> &str {
40        &self.chars
41    }
42}
43
44/// A type representing a single source that may be referred to by [`Span`]s.
45///
46/// In most cases, a source is a single input file.
47#[derive(Clone, Debug, Hash, PartialEq, Eq)]
48pub struct Source {
49    file_name: String,
50    lines: Vec<Line>,
51    len: usize,
52}
53
54impl<S: AsRef<str>> From<S> for Source {
55    /// Generate a [`Source`] from the given [`str`].
56    ///
57    /// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
58    fn from(s: S) -> Self {
59        let mut offset = 0;
60        // (Last line, last line ends with CR)
61        let mut last_line: Option<(Line, bool)> = None;
62        let mut lines: Vec<Line> = s
63            .as_ref()
64            .split_inclusive([
65                '\r',       // Carriage return
66                '\n',       // Line feed
67                '\x0B',     // Vertical tab
68                '\x0C',     // Form feed
69                '\u{0085}', // Next line
70                '\u{2028}', // Line separator
71                '\u{2029}', // Paragraph separator
72            ])
73            .flat_map(|line| {
74                // Returns last line and set `last_line` to current `line`
75                // A hack that makes `flat_map` deals with consecutive lines
76
77                if let Some((last, ends_with_cr)) = last_line.as_mut() {
78                    if *ends_with_cr && line == "\n" {
79                        last.len += 1;
80                        offset += 1;
81                        return replace(&mut last_line, None).map(|(l, _)| l);
82                    }
83                }
84
85                let len = line.chars().count();
86                let ends_with_cr = line.ends_with('\r');
87                let line = Line { offset, len, chars: line.trim_end().to_owned() };
88                offset += len;
89                replace(&mut last_line, Some((line, ends_with_cr))).map(|(l, _)| l)
90            })
91            .collect();
92
93        if let Some((l, _)) = last_line {
94            lines.push(l);
95        }
96
97        Self { file_name: "<anonymous>".to_string(), lines, len: offset }
98    }
99}
100
101impl Source {
102    /// Get the length of the total number of characters in the source.
103    pub fn length(&self) -> usize {
104        self.len
105    }
106
107    /// Return an iterator over the characters in the source.
108    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
109        self.lines.iter().map(|l| l.chars()).flatten()
110    }
111
112    /// Get access to a specific, zero-indexed [`Line`].
113    pub fn line(&self, idx: usize) -> Option<&Line> {
114        self.lines.get(idx)
115    }
116
117    /// Return an iterator over the [`Line`]s in this source.
118    pub fn lines(&self) -> impl ExactSizeIterator<Item = &Line> + '_ {
119        self.lines.iter()
120    }
121
122    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
123    ///
124    /// Note that the line/column numbers are zero-indexed.
125    pub fn get_offset_line(&self, offset: usize) -> Option<(&Line, usize, usize)> {
126        if offset <= self.len {
127            let idx = self.lines.binary_search_by_key(&offset, |line| line.offset).unwrap_or_else(|idx| idx.saturating_sub(1));
128            let line = &self.lines[idx];
129            assert!(offset >= line.offset, "offset = {}, line.offset = {}", offset, line.offset);
130            Some((line, idx, offset - line.offset))
131        }
132        else {
133            None
134        }
135    }
136
137    /// Get the range of lines that this span runs across.
138    ///
139    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
140    /// [`Source::line`]).
141    pub fn get_line_range(&self, span: &Range<usize>) -> Range<usize> {
142        let start = self.get_offset_line(span.start).map_or(0, |(_, l, _)| l);
143        let end = self.get_offset_line(span.end.saturating_sub(1).max(span.start)).map_or(self.lines.len(), |(_, l, _)| l + 1);
144        start..end
145    }
146}
147
148/// A [`Cache`] that fetches [`Source`]s from the filesystem.
149#[derive(Default, Debug, Clone)]
150pub struct FileCache {
151    files: HashMap<FileID, Source>,
152}
153
154impl FileCache {
155    /// Create a new [`FileCache`].
156    pub fn load_local<P>(&mut self, path: P) -> Result<FileID, std::io::Error>
157    where
158        P: AsRef<PathBuf>,
159    {
160        let path = path.as_ref();
161        let hasher = self.files.hasher();
162        let name_hash = {
163            let mut hasher = hasher.build_hasher();
164            path.hash(&mut hasher);
165            FileID { hash: hasher.finish() }
166        };
167        let text = std::fs::read_to_string(path)?;
168        let source = Source::from(text);
169        self.files.insert(name_hash, source);
170        Ok(name_hash)
171    }
172    /// Create a new [`FileCache`].
173    pub fn load_text<T, N>(&mut self, text: T, name: N) -> FileID
174    where
175        T: ToString,
176        N: ToString,
177    {
178        let name = name.to_string();
179        let hasher = self.files.hasher();
180        let name_hash = {
181            let mut hasher = hasher.build_hasher();
182            name.hash(&mut hasher);
183            FileID { hash: hasher.finish() }
184        };
185        let mut source = Source::from(text.to_string());
186        source.file_name = name;
187        self.files.insert(name_hash, source);
188        name_hash
189    }
190    /// Create a new [`FileCache`].
191    pub fn fetch(&mut self, file: &FileID) -> Result<&Source, std::io::Error> {
192        match self.files.get(file) {
193            Some(source) => Ok(source),
194            None => Err(std::io::Error::new(std::io::ErrorKind::NotFound, format!("File {:?} not found", file))),
195        }
196    }
197    /// Create a new [`FileCache`].
198    pub fn display(&self, file: &FileID) -> Option<&str> {
199        Some(self.files.get(file)?.file_name.as_str())
200    }
201}