nickel_lang_parser/
files.rs

1//! This module provides [`Files`], a cheaply-clonable, persistent, `codespan`-compatible collection of files.
2
3use std::{
4    ffi::{OsStr, OsString},
5    path::PathBuf,
6    rc::Rc,
7    sync::Arc,
8};
9
10use codespan::ByteIndex;
11use codespan_reporting::files::Error;
12use nickel_lang_vector::Vector;
13
14use crate::position::RawSpan;
15
16/// A file identifier, which can be used to access a file in a [`Files`].
17///
18/// Note that there is no protection against using a `FileId` for the wrong
19/// instance of `Files`.
20#[derive(
21    Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
22)]
23pub struct FileId(u32);
24
25#[derive(Debug, Clone)]
26struct File {
27    /// The name of the file.
28    name: OsString,
29    /// The source code of the file.
30    source: Arc<str>,
31    /// The byte index of the start of each line. The first element of this array is always 0.
32    line_starts: Rc<[ByteIndex]>,
33}
34
35impl File {
36    fn new(name: impl Into<OsString>, source: impl Into<Arc<str>>) -> Self {
37        let source = source.into();
38        let line_starts: Vec<_> = std::iter::once(ByteIndex(0))
39            .chain(
40                source
41                    .match_indices('\n')
42                    .map(|(i, _)| ByteIndex(i as u32 + 1)),
43            )
44            .collect();
45
46        File {
47            name: name.into(),
48            line_starts: line_starts.into(),
49            source,
50        }
51    }
52
53    fn line_index(&self, byte_index: ByteIndex) -> usize {
54        match self.line_starts.binary_search(&byte_index) {
55            Ok(line) => line,
56            // unwrap: we always start off the `line_starts` array with a zero,
57            // so next_line must be at least 1.
58            Err(next_line) => next_line.checked_sub(1).unwrap(),
59        }
60    }
61}
62
63/// A cheaply-clonable, persistent, `codespan`-compatible collection of files.
64///
65/// `Files` knows about the nickel standard library, and automatically loads it on creation
66/// (but it doesn't do parsing, or anything particularly expensive).
67///
68/// Cloning a `Files` is cheap, and the underlying file data will be shared between clones
69/// until one of them wants to modify a file. In that case, only the modified file(s) will
70/// be duplicated.
71#[derive(Debug, Clone, Default)]
72pub struct Files {
73    files: Vector<File, 8>,
74    first_non_stdlib: usize,
75}
76
77impl Files {
78    pub fn empty() -> Self {
79        Files::default()
80    }
81
82    /// Creates a new `Files`, initialized with the nickel standard library.
83    pub fn new<Name, Contents, I>(stdlib_modules: I) -> Self
84    where
85        Name: Into<OsString>,
86        Contents: Into<Arc<str>>,
87        I: IntoIterator<Item = (Name, Contents)>,
88    {
89        let files: Vector<_, 8> = stdlib_modules
90            .into_iter()
91            .map(|(name, contents)| File::new(name, contents))
92            .collect();
93
94        Files {
95            first_non_stdlib: files.len(),
96            files,
97        }
98    }
99
100    /// Does this file id point to a standard library file?
101    pub fn is_stdlib(&self, id: FileId) -> bool {
102        (id.0 as usize) < self.first_non_stdlib
103    }
104
105    /// Returns the list of file ids of stdlib modules, in the order they were passed
106    /// to [`Files::new`].
107    pub fn stdlib_modules(&self) -> impl Iterator<Item = FileId> + use<> {
108        (0..self.first_non_stdlib).map(|id| FileId(id as u32))
109    }
110
111    /// Adds a file to this collection, creating and returning a new file id.
112    ///
113    /// The name does not need to be unique, and this method does not affect any other files
114    /// with the same name.
115    pub fn add(&mut self, name: impl Into<OsString>, source: impl Into<Arc<str>>) -> FileId {
116        let file_id = FileId(self.files.len() as u32);
117        self.files.push(File::new(name, source));
118        file_id
119    }
120
121    /// Updates a source file in place.
122    ///
123    /// Panics if `file_id` is invalid.
124    pub fn update(&mut self, file_id: FileId, source: impl Into<Arc<str>>) {
125        // This implementation would be a little nicer if `Vector` supported mutable access.
126        // unwrap: we're allowed to panic if file_id is invalid
127        let mut old = self.get(file_id).unwrap().clone();
128        old = File::new(old.name, source);
129        self.files.set(file_id.0 as usize, old);
130    }
131
132    /// Returns a span containing all of a source.
133    ///
134    /// Panics if `file_id` is invalid.
135    pub fn source_span(&self, file_id: FileId) -> RawSpan {
136        // unwrap: we're allowed to panic if file_id is invalid
137        let len = self.get(file_id).unwrap().source.len();
138
139        RawSpan {
140            src_id: file_id,
141            start: ByteIndex(0),
142            end: ByteIndex(len as u32),
143        }
144    }
145
146    /// Returns the source's name.
147    ///
148    /// Panics if `file_id` is invalid.
149    pub fn name(&self, id: FileId) -> &OsStr {
150        &self.get(id).unwrap().name
151    }
152
153    /// Returns a source's contents.
154    pub fn source(&self, id: FileId) -> &str {
155        self.get(id).unwrap().source.as_ref()
156    }
157
158    /// Returns a cloned reference to the source's contents
159    pub fn clone_source(&self, id: FileId) -> Arc<str> {
160        self.get(id).unwrap().source.clone()
161    }
162
163    /// Returns a slice of the source's contents.
164    pub fn source_slice(&self, span: RawSpan) -> &str {
165        let start: usize = span.start.into();
166        let end: usize = span.end.into();
167        &self.source(span.src_id)[start..end]
168    }
169
170    /// Returns the `codespan::Location` (basically: line + col) corresponding
171    /// to a byte index.
172    ///
173    /// Returns an error if the byte index is out of bounds or fails to point to
174    /// a UTF-8 char boundary.
175    pub fn location(
176        &self,
177        id: FileId,
178        byte_index: impl Into<ByteIndex>,
179    ) -> Result<codespan::Location, Error> {
180        let file = self.get(id)?;
181        let byte_index = byte_index.into();
182        let idx = byte_index.to_usize();
183
184        if idx >= file.source.len() {
185            return Err(Error::IndexTooLarge {
186                given: idx,
187                max: file.source.len() - 1,
188            });
189        }
190
191        let line_idx = file.line_index(byte_index);
192        let line_start_idx = file.line_starts[line_idx];
193        let line = file
194            .source
195            .get(line_start_idx.to_usize()..idx)
196            .ok_or(Error::InvalidCharBoundary { given: idx })?;
197
198        Ok(codespan::Location {
199            line: codespan::LineIndex::from(line_idx as u32),
200            column: codespan::ColumnIndex::from(line.chars().count() as u32),
201        })
202    }
203
204    fn get(&self, id: FileId) -> Result<&File, Error> {
205        self.files.get(id.0 as usize).ok_or(Error::FileMissing)
206    }
207
208    pub fn filenames(&self) -> impl Iterator<Item = &OsStr> {
209        self.files.iter().map(|f| &*f.name)
210    }
211}
212
213impl<'a> codespan_reporting::files::Files<'a> for Files {
214    type FileId = FileId;
215    type Name = String;
216    type Source = &'a str;
217
218    fn name(&'a self, id: Self::FileId) -> Result<String, Error> {
219        Ok(PathBuf::from(&self.get(id)?.name).display().to_string())
220    }
221
222    fn source(
223        &'a self,
224        id: Self::FileId,
225    ) -> Result<Self::Source, codespan_reporting::files::Error> {
226        Ok(self.get(id)?.source.as_ref())
227    }
228
229    fn line_index(
230        &'a self,
231        id: Self::FileId,
232        byte_index: usize,
233    ) -> Result<usize, codespan_reporting::files::Error> {
234        let file = self.get(id)?;
235        Ok(file.line_index(ByteIndex(byte_index as u32)))
236    }
237
238    fn line_range(
239        &'a self,
240        id: Self::FileId,
241        line_index: usize,
242    ) -> Result<std::ops::Range<usize>, codespan_reporting::files::Error> {
243        let file = self.get(id)?;
244        let starts = &file.line_starts;
245        let end = starts
246            .get(line_index + 1)
247            .copied()
248            .unwrap_or(ByteIndex(file.source.len() as u32));
249        Ok(starts[line_index].into()..end.into())
250    }
251}