fea_rs/parse/
source.rs

1//! source files
2
3use std::{
4    collections::HashMap,
5    fmt::Debug,
6    num::NonZeroU32,
7    ops::Range,
8    path::{Path, PathBuf},
9    sync::Arc,
10};
11
12use crate::{util, Diagnostic};
13
14/// Uniquely identifies a source file.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct FileId(NonZeroU32);
18
19/// A single source file, corresponding to a file on disk.
20///
21/// We keep hold of all sources used in a given compilation so that we can
22/// do error reporting.
23///
24/// Note: this type uses `Arc` internally so that it can be safely sent across
25/// threads.
26#[derive(Clone, Debug, PartialEq)]
27#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
28pub struct Source {
29    id: FileId,
30    /// The non-canonicalized path to this source, suitable for printing.
31    path: PathBuf,
32    contents: Arc<str>,
33    /// The index of each newline character, for efficiently fetching lines
34    /// (for error reporting, e.g.)
35    line_offsets: Arc<[usize]>,
36}
37
38/// A list of sources in a project.
39#[derive(Debug, Clone, Default, PartialEq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41pub struct SourceList {
42    ids: HashMap<PathBuf, FileId>,
43    sources: HashMap<FileId, Source>,
44}
45
46pub(crate) struct SourceLoader {
47    sources: SourceList,
48    resolver: Box<dyn SourceResolver>,
49}
50
51/// A map from positions in a resolved token tree (which may contain the
52/// contents of multiple sources) to locations in specific sources.
53#[derive(Clone, Debug, Default, PartialEq)]
54#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
55pub struct SourceMap {
56    /// sorted vec of (offset_in_combined_tree, (file_id, offest_in_source_file));
57    offsets: Vec<(Range<usize>, (FileId, usize))>,
58}
59
60/// An error that occurs when trying .to load a source.
61#[derive(Clone, Debug, thiserror::Error)]
62#[error("Failed to load source at '{}': '{cause}'", Path::new(.path.as_os_str()).display())]
63pub struct SourceLoadError {
64    cause: Arc<str>,
65    path: PathBuf,
66}
67
68/// A trait that abstracts resolving a path.
69///
70/// In general, paths are resolved through the filesystem; however if you are
71/// doing something fancy (such as keeping your source files in memory) you
72/// can pass a closure or another custom implementation of this trait into the
73/// appropriate parse functions.
74///
75/// If you need a custom resolver, you can either implement this trait for some
76/// custom type, or you can use a closure with the signature,
77/// `|&Path| -> Result<String, SourceLoadError>`.
78pub trait SourceResolver {
79    /// Return the contents of the utf-8 encoded file at the provided path.
80    fn get_contents(&self, path: &Path) -> Result<Arc<str>, SourceLoadError>;
81
82    /// Given a raw path (the `$path` in `include($path)`), return the path to load.
83    /// The final path may differ based on which file the include statement occurs
84    /// in; the path of the including file (if this is not the root source) is
85    /// passed as the second argument.
86    ///
87    /// See [including files][] for more information.
88    ///
89    /// The default implementation returns the `path` argument, unchanged.
90    ///
91    /// [including files]: http://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#3-including-files
92    fn resolve_raw_path(&self, path: &Path, _included_from: Option<&Path>) -> PathBuf {
93        path.to_path_buf()
94    }
95
96    /// If necessary, canonicalize this path.
97    ///
98    /// There are an unbounded number of ways to represent a given path;
99    /// fot instance, the path `./features.fea` may be equivalent to the path
100    /// `./some_folder/../features.fea` or to `../../my/font/features.fea`.
101    /// This method is an opportunity to specify the canonical representaiton
102    /// of a path.
103    fn canonicalize(&self, path: &Path) -> Result<PathBuf, SourceLoadError> {
104        Ok(path.to_owned())
105    }
106
107    /// A convenience method for creating a `Source` after loading a path.
108    #[doc(hidden)]
109    fn resolve(&self, path: &Path) -> Result<Source, SourceLoadError> {
110        let contents = self.get_contents(path)?;
111        Ok(Source::new(path.to_owned(), contents))
112    }
113
114    // a little helper used in our debug impl
115    #[doc(hidden)]
116    fn type_name(&self) -> &'static str {
117        std::any::type_name::<Self>()
118    }
119}
120
121impl std::fmt::Debug for dyn SourceResolver {
122    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
123        self.type_name().fmt(f)
124    }
125}
126
127impl<F> SourceResolver for F
128where
129    F: Fn(&Path) -> Result<Arc<str>, SourceLoadError>,
130{
131    fn get_contents(&self, path: &Path) -> Result<Arc<str>, SourceLoadError> {
132        (self)(path)
133    }
134}
135
136/// An implementation of [`SourceResolver`] for the local file system.
137///
138/// This is the common case.
139#[derive(Default)]
140pub struct FileSystemResolver {
141    project_root: PathBuf,
142}
143
144impl FileSystemResolver {
145    /// Create a new resolver with the provided root directory.
146    ///
147    /// If compiling from a UFO, the root directory is the UFO directory. In other
148    /// cases, it is likely the directory containing the root feature file.
149    /// If the path is empty (i.e. ""), the current working directory is assumed.
150    pub fn new(project_root: PathBuf) -> Self {
151        Self { project_root }
152    }
153}
154
155impl SourceResolver for FileSystemResolver {
156    fn get_contents(&self, path: &Path) -> Result<Arc<str>, SourceLoadError> {
157        std::fs::read_to_string(path)
158            .map(Into::into)
159            .map_err(|cause| SourceLoadError::new(path.into(), cause))
160    }
161
162    fn resolve_raw_path(&self, path: &Path, included_from: Option<&Path>) -> PathBuf {
163        let path = Path::new(path);
164        let included_from = included_from.map(Path::new).and_then(Path::parent);
165        util::paths::resolve_path(path, &self.project_root, included_from)
166    }
167
168    fn canonicalize(&self, path: &Path) -> Result<PathBuf, SourceLoadError> {
169        std::fs::canonicalize(path).map_err(|io_err| SourceLoadError::new(path.into(), io_err))
170    }
171}
172
173impl FileId {
174    /// A reserved FileId used during parsing.
175    pub(crate) const CURRENT_FILE: FileId = FileId(NonZeroU32::new(1).unwrap());
176
177    pub(crate) fn next() -> FileId {
178        use std::sync::atomic;
179        static COUNTER: atomic::AtomicU32 = atomic::AtomicU32::new(2);
180        FileId(NonZeroU32::new(COUNTER.fetch_add(1, atomic::Ordering::Relaxed)).unwrap())
181    }
182}
183
184impl Source {
185    pub(crate) fn new(path: PathBuf, contents: Arc<str>) -> Self {
186        let line_offsets = line_offsets(&contents);
187        Source {
188            path,
189            id: FileId::next(),
190            contents,
191            line_offsets,
192        }
193    }
194
195    /// The raw text for this source
196    pub fn text(&self) -> &str {
197        &self.contents
198    }
199
200    /// The source's path.
201    ///
202    /// If the source is a file, this will be the *resolved* file path. In other
203    /// cases the exact behaviour depends on the implementation of the current
204    /// [`SourceResolver`].
205    pub fn path(&self) -> &Path {
206        &self.path
207    }
208
209    /// The `FileId` for this source.
210    pub fn id(&self) -> FileId {
211        self.id
212    }
213
214    /// Compute the line and column for a given utf-8 offset.
215    pub fn line_col_for_offset(&self, offset: usize) -> (usize, usize) {
216        let offset_idx = match self.line_offsets.binary_search(&offset) {
217            Ok(x) => x,
218            Err(x) => x - 1, // cannot underflow as 0 is always in list
219        };
220        let offset_of_line = self.line_offsets[offset_idx];
221        let offset_in_line = offset - offset_of_line;
222        (offset_idx + 1, offset_in_line)
223    }
224
225    /// returns the (1-indexed) number and text.
226    pub fn line_containing_offset(&self, offset: usize) -> (usize, &str) {
227        let offset_idx = match self.line_offsets.binary_search(&offset) {
228            Ok(x) => x,
229            Err(x) => x - 1, // cannot underflow as 0 is always in list
230        };
231        let start_offset = self.line_offsets[offset_idx];
232        let end_offset = self
233            .line_offsets
234            .get(offset_idx + 1)
235            .copied()
236            .unwrap_or(self.contents.len());
237
238        (
239            offset_idx + 1,
240            self.contents[start_offset..end_offset].trim_end_matches('\n'),
241        )
242    }
243
244    /// Return the offset of the start of the (1-indexed) line.
245    ///
246    /// Panics if the line number exceeds the total number of lines in the file.
247    pub fn offset_for_line_number(&self, line_number: usize) -> usize {
248        self.line_offsets[line_number - 1]
249    }
250}
251
252fn line_offsets(text: &str) -> Arc<[usize]> {
253    // we could use memchar for this; benefits would require benchmarking
254    let mut result = vec![0];
255    result.extend(
256        text.bytes()
257            .enumerate()
258            .filter_map(|(i, b)| if b == b'\n' { Some(i + 1) } else { None }),
259    );
260    result.into()
261}
262
263impl SourceMap {
264    pub(crate) fn add_entry(&mut self, src: Range<usize>, dest: (FileId, usize)) {
265        if !src.is_empty() {
266            self.offsets.push((src, dest));
267        }
268    }
269
270    /// panics if `global_range` crosses a file barrier?
271    pub(crate) fn resolve_range(&self, global_range: Range<usize>) -> (FileId, Range<usize>) {
272        // it is hard to imagine more than a couple hundred include statements,
273        // and even that would be extremely rare, so I don't think it's really
274        // worth doing a binary search here?
275        let (chunk, (file, local_offset)) = self
276            .offsets
277            .iter()
278            .find(|item| item.0.contains(&global_range.start))
279            .unwrap();
280        let chunk_offset = global_range.start - chunk.start;
281        let range_start = *local_offset + chunk_offset;
282        let len = global_range.end - global_range.start;
283        (*file, range_start..range_start + len)
284    }
285}
286
287impl SourceLoader {
288    pub(crate) fn new(resolver: Box<dyn SourceResolver>) -> Self {
289        Self {
290            sources: Default::default(),
291            resolver,
292        }
293    }
294
295    pub(crate) fn into_inner(self) -> Arc<SourceList> {
296        Arc::new(self.sources)
297    }
298
299    pub(crate) fn get(&self, id: &FileId) -> Option<&Source> {
300        self.sources.get(id)
301    }
302
303    /// Attempt to load the source at the provided path.
304    ///
305    /// This uses the [`SourceResolver`] that was passed in at construction time,
306    /// and is used to load both the root source as well as any sources that are
307    /// referenced by `include($path)` statements. In this case, the `path` argument
308    /// is the literal (e.g. unresolved and uncanonicalized) `$path` in the
309    /// include.
310    ///
311    /// If the source cannot be resolved, returns an error.
312    pub(crate) fn source_for_path(
313        &mut self,
314        path: &Path,
315        included_by: Option<FileId>,
316    ) -> Result<FileId, SourceLoadError> {
317        let included_by = included_by.map(|id| self.sources.get(&id).unwrap().path.as_path());
318        let path = self.resolver.resolve_raw_path(path.as_ref(), included_by);
319        let canonical = self.resolver.canonicalize(&path)?;
320
321        match self.sources.id_for_path(&canonical) {
322            Some(id) => Ok(id),
323            None => {
324                let source = self.resolver.resolve(&path)?;
325                let id = source.id;
326                self.sources.add(canonical, source);
327                Ok(id)
328            }
329        }
330    }
331}
332
333impl SourceList {
334    pub(crate) fn id_for_path(&self, path: impl AsRef<Path>) -> Option<FileId> {
335        self.ids.get(path.as_ref()).copied()
336    }
337
338    pub(crate) fn get(&self, id: &FileId) -> Option<&Source> {
339        self.sources.get(id)
340    }
341
342    fn add(&mut self, canonical_path: PathBuf, source: Source) {
343        self.ids.insert(canonical_path, source.id);
344        self.sources.insert(source.id, source);
345    }
346
347    /// Generate a string suitable for presenting a [`Diagnostic`] to the user.
348    ///
349    /// This associates the message with the appropriate source location and
350    /// syntax highlighting.
351    pub(crate) fn format_diagnostic(&self, err: &Diagnostic, colorize: bool) -> String {
352        let mut s = String::new();
353        let source = self.get(&err.message.file).unwrap();
354        crate::util::highlighting::write_diagnostic(&mut s, err, source, None, colorize);
355        s
356    }
357}
358
359impl SourceLoadError {
360    /// Create a new [`SourceLoadError`].
361    ///
362    /// The `cause` argument should be some error type that communicates the
363    /// cause of the failure.
364    pub fn new(path: PathBuf, cause: impl std::fmt::Display) -> Self {
365        Self {
366            cause: cause.to_string().into(),
367            path,
368        }
369    }
370}