nickel_lang_core/
files.rs

1//! This module provides [`Files`], a cheaply-clonable, persistent, `codespan`-compatible collection of files.
2
3use std::{
4    ffi::{OsStr, OsString},
5    path::PathBuf,
6    rc::Rc,
7    sync::Arc,
8};
9
10use codespan::ByteIndex;
11use codespan_reporting::files::Error;
12use nickel_lang_vector::Vector;
13
14use crate::{position::RawSpan, stdlib::StdlibModule};
15
16/// A file identifier, which can be used to access a file in a [`Files`].
17///
18/// Note that there is no protection against using a `FileId` for the wrong
19/// instance of `Files`.
20#[derive(
21    Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
22)]
23pub struct FileId(u32);
24
25#[derive(Debug, Clone)]
26struct File {
27    /// The name of the file.
28    name: OsString,
29    /// The source code of the file.
30    source: Arc<str>,
31    /// The byte index of the start of each line. The first element of this array is always 0.
32    line_starts: Rc<[ByteIndex]>,
33}
34
35impl File {
36    fn new(name: impl Into<OsString>, source: impl Into<Arc<str>>) -> Self {
37        let source = source.into();
38        let line_starts: Vec<_> = std::iter::once(ByteIndex(0))
39            .chain(
40                source
41                    .match_indices('\n')
42                    .map(|(i, _)| ByteIndex(i as u32 + 1)),
43            )
44            .collect();
45
46        File {
47            name: name.into(),
48            line_starts: line_starts.into(),
49            source,
50        }
51    }
52
53    fn line_index(&self, byte_index: ByteIndex) -> usize {
54        match self.line_starts.binary_search(&byte_index) {
55            Ok(line) => line,
56            // unwrap: we always start off the `line_starts` array with a zero,
57            // so next_line must be at least 1.
58            Err(next_line) => next_line.checked_sub(1).unwrap(),
59        }
60    }
61}
62
63/// A cheaply-clonable, persistent, `codespan`-compatible collection of files.
64///
65/// `Files` knows about the nickel standard library, and automatically loads it on creation
66/// (but it doesn't do parsing, or anything particularly expensive).
67///
68/// Cloning a `Files` is cheap, and the underlying file data will be shared between clones
69/// until one of them wants to modify a file. In that case, only the modified file(s) will
70/// be duplicated.
71#[derive(Debug, Clone)]
72pub struct Files {
73    files: Vector<File, 8>,
74    first_non_stdlib: usize,
75}
76
77impl Files {
78    /// Creates a new `Files`, initialized with the nickel standard library.
79    pub fn new() -> Self {
80        let files: Vector<_, 8> = crate::stdlib::modules()
81            .iter()
82            .map(|m| File::new(m.file_name().to_owned(), m.content()))
83            .collect();
84
85        Files {
86            first_non_stdlib: files.len(),
87            files,
88        }
89    }
90
91    /// Does this file id point to a standard library file?
92    pub fn is_stdlib(&self, id: FileId) -> bool {
93        (id.0 as usize) < self.first_non_stdlib
94    }
95
96    /// Returns the list of all standard library modules and their file ids.
97    pub fn stdlib_modules(&self) -> impl Iterator<Item = (StdlibModule, FileId)> {
98        crate::stdlib::modules()
99            .into_iter()
100            .zip(0..)
101            .map(|(m, id)| (m, FileId(id)))
102    }
103
104    /// Adds a file to this collection, creating and returning a new file id.
105    ///
106    /// The name does not need to be unique, and this method does not affect any other files
107    /// with the same name.
108    pub fn add(&mut self, name: impl Into<OsString>, source: impl Into<Arc<str>>) -> FileId {
109        let file_id = FileId(self.files.len() as u32);
110        self.files.push(File::new(name, source));
111        file_id
112    }
113
114    /// Updates a source file in place.
115    ///
116    /// Panics if `file_id` is invalid.
117    pub fn update(&mut self, file_id: FileId, source: impl Into<Arc<str>>) {
118        // This implementation would be a little nicer if `Vector` supported mutable access.
119        // unwrap: we're allowed to panic if file_id is invalid
120        let mut old = self.get(file_id).unwrap().clone();
121        old = File::new(old.name, source);
122        self.files.set(file_id.0 as usize, old);
123    }
124
125    /// Returns a span containing all of a source.
126    ///
127    /// Panics if `file_id` is invalid.
128    pub fn source_span(&self, file_id: FileId) -> RawSpan {
129        // unwrap: we're allowed to panic if file_id is invalid
130        let len = self.get(file_id).unwrap().source.len();
131
132        RawSpan {
133            src_id: file_id,
134            start: ByteIndex(0),
135            end: ByteIndex(len as u32),
136        }
137    }
138
139    /// Returns the source's name.
140    ///
141    /// Panics if `file_id` is invalid.
142    pub fn name(&self, id: FileId) -> &OsStr {
143        &self.get(id).unwrap().name
144    }
145
146    /// Returns a source's contents.
147    pub fn source(&self, id: FileId) -> &str {
148        self.get(id).unwrap().source.as_ref()
149    }
150
151    /// Returns a cloned reference to the source's contents
152    pub fn clone_source(&self, id: FileId) -> Arc<str> {
153        self.get(id).unwrap().source.clone()
154    }
155
156    /// Returns a slice of the source's contents.
157    pub fn source_slice(&self, span: RawSpan) -> &str {
158        let start: usize = span.start.into();
159        let end: usize = span.end.into();
160        &self.source(span.src_id)[start..end]
161    }
162
163    /// Returns the `codespan::Location` (basically: line + col) corresponding
164    /// to a byte index.
165    ///
166    /// Returns an error if the byte index is out of bounds or fails to point to
167    /// a UTF-8 char boundary.
168    pub fn location(
169        &self,
170        id: FileId,
171        byte_index: impl Into<ByteIndex>,
172    ) -> Result<codespan::Location, Error> {
173        let file = self.get(id)?;
174        let byte_index = byte_index.into();
175        let idx = byte_index.to_usize();
176
177        if idx >= file.source.len() {
178            return Err(Error::IndexTooLarge {
179                given: idx,
180                max: file.source.len() - 1,
181            });
182        }
183
184        let line_idx = file.line_index(byte_index);
185        let line_start_idx = file.line_starts[line_idx];
186        let line = file
187            .source
188            .get(line_start_idx.to_usize()..idx)
189            .ok_or(Error::InvalidCharBoundary { given: idx })?;
190
191        Ok(codespan::Location {
192            line: codespan::LineIndex::from(line_idx as u32),
193            column: codespan::ColumnIndex::from(line.chars().count() as u32),
194        })
195    }
196
197    fn get(&self, id: FileId) -> Result<&File, Error> {
198        self.files.get(id.0 as usize).ok_or(Error::FileMissing)
199    }
200
201    pub(crate) fn filenames(&self) -> impl Iterator<Item = &OsStr> {
202        self.files.iter().map(|f| &*f.name)
203    }
204}
205
206impl Default for Files {
207    fn default() -> Self {
208        Self::new()
209    }
210}
211
212impl<'a> codespan_reporting::files::Files<'a> for Files {
213    type FileId = FileId;
214    type Name = String;
215    type Source = &'a str;
216
217    fn name(&'a self, id: Self::FileId) -> Result<String, Error> {
218        Ok(PathBuf::from(&self.get(id)?.name).display().to_string())
219    }
220
221    fn source(
222        &'a self,
223        id: Self::FileId,
224    ) -> Result<Self::Source, codespan_reporting::files::Error> {
225        Ok(self.get(id)?.source.as_ref())
226    }
227
228    fn line_index(
229        &'a self,
230        id: Self::FileId,
231        byte_index: usize,
232    ) -> Result<usize, codespan_reporting::files::Error> {
233        let file = self.get(id)?;
234        Ok(file.line_index(ByteIndex(byte_index as u32)))
235    }
236
237    fn line_range(
238        &'a self,
239        id: Self::FileId,
240        line_index: usize,
241    ) -> Result<std::ops::Range<usize>, codespan_reporting::files::Error> {
242        let file = self.get(id)?;
243        let starts = &file.line_starts;
244        let end = starts
245            .get(line_index + 1)
246            .copied()
247            .unwrap_or(ByteIndex(file.source.len() as u32));
248        Ok(starts[line_index].into()..end.into())
249    }
250}