codespan_reporting/
files.rs

1//! Source file support for diagnostic reporting.
2//!
3//! The main trait defined in this module is the [`Files`] trait, which provides
4//! provides the minimum amount of functionality required for printing [`Diagnostics`]
5//! with the [`term::emit`] function.
6//!
7//! Simple implementations of this trait are implemented:
8//!
9//! - [`SimpleFile`]: For single-file use-cases
10//! - [`SimpleFiles`]: For multi-file use-cases
11//!
12//! These data structures provide a pretty minimal API, however,
13//! so end-users are encouraged to create their own implementations for their
14//! own specific use-cases, such as an implementation that accesses the file
15//! system directly (and caches the line start locations), or an implementation
16//! using an incremental compilation library like [`salsa`].
17//!
18//! [`term::emit`]: crate::term::emit
19//! [`Diagnostics`]: crate::diagnostic::Diagnostic
20//! [`Files`]: Files
21//! [`SimpleFile`]: SimpleFile
22//! [`SimpleFiles`]: SimpleFiles
23//!
24//! [`salsa`]: https://crates.io/crates/salsa
25
26use alloc::vec::Vec;
27use core::ops::Range;
28
29#[cfg(feature = "std")]
30use std::error;
31
32#[cfg(not(feature = "std"))]
33use core::error;
34
35/// An enum representing an error that happened while looking up a file or a piece of content in that file.
36#[derive(Debug)]
37#[non_exhaustive]
38pub enum Error {
39    /// A required file is not in the file database.
40    FileMissing,
41    /// The file is present, but does not contain the specified byte index.
42    IndexTooLarge { given: usize, max: usize },
43    /// The file is present, but does not contain the specified line index.
44    LineTooLarge { given: usize, max: usize },
45    /// The file is present and contains the specified line index, but the line does not contain the specified column index.
46    ColumnTooLarge { given: usize, max: usize },
47    /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
48    InvalidCharBoundary { given: usize },
49    /// There was a error while doing IO.
50    #[cfg(feature = "std")]
51    Io(std::io::Error),
52    /// There was a error during formatting.
53    FormatError,
54}
55
56#[cfg(feature = "std")]
57impl From<std::io::Error> for Error {
58    fn from(err: std::io::Error) -> Error {
59        Error::Io(err)
60    }
61}
62
63impl From<core::fmt::Error> for Error {
64    fn from(_err: core::fmt::Error) -> Error {
65        Error::FormatError
66    }
67}
68
69impl core::fmt::Display for Error {
70    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
71        match self {
72            Error::FileMissing => write!(f, "file missing"),
73            Error::IndexTooLarge { given, max } => {
74                write!(f, "invalid index {}, maximum index is {}", given, max)
75            }
76            Error::LineTooLarge { given, max } => {
77                write!(f, "invalid line {}, maximum line is {}", given, max)
78            }
79            Error::ColumnTooLarge { given, max } => {
80                write!(f, "invalid column {}, maximum column {}", given, max)
81            }
82            Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
83            #[cfg(feature = "std")]
84            Error::Io(err) => write!(f, "{}", err),
85            Error::FormatError => write!(f, "formatting error"),
86        }
87    }
88}
89
90impl error::Error for Error {
91    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
92        match &self {
93            #[cfg(feature = "std")]
94            Error::Io(err) => Some(err),
95            _ => None,
96        }
97    }
98}
99
100/// A minimal interface for accessing source files when rendering diagnostics.
101///
102/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
103/// This is to workaround the lack of higher kinded lifetime parameters.
104/// This can be ignored if this is not needed, however.
105pub trait Files<'a> {
106    /// A unique identifier for files in the file provider. This will be used
107    /// for rendering `diagnostic::Label`s in the corresponding source files.
108    type FileId: 'a + Copy + PartialEq;
109    /// The user-facing name of a file, to be displayed in diagnostics.
110    type Name: 'a + core::fmt::Display;
111    /// The source code of a file.
112    type Source: 'a + AsRef<str>;
113
114    /// The user-facing name of a file.
115    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
116
117    /// The source code of a file.
118    fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
119
120    /// The index of the line at the given byte index.
121    /// If the byte index is past the end of the file, returns the maximum line index in the file.
122    /// This means that this function only fails if the file is not present.
123    ///
124    /// # Note for trait implementors
125    ///
126    /// This can be implemented efficiently by performing a binary search over
127    /// a list of line starts that was computed by calling the [`line_starts`]
128    /// function that is exported from the [`files`] module. It might be useful
129    /// to pre-compute and cache these line starts.
130    ///
131    /// [`line_starts`]: crate::files::line_starts
132    /// [`files`]: crate::files
133    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
134
135    /// The user-facing line number at the given line index.
136    /// It is not necessarily checked that the specified line index
137    /// is actually in the file.
138    ///
139    /// # Note for trait implementors
140    ///
141    /// This is usually 1-indexed from the beginning of the file, but
142    /// can be useful for implementing something like the
143    /// [C preprocessor's `#line` macro][line-macro].
144    ///
145    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
146    #[allow(unused_variables)]
147    fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
148        Ok(line_index + 1)
149    }
150
151    /// The user-facing column number at the given line index and byte index.
152    ///
153    /// # Note for trait implementors
154    ///
155    /// This is usually 1-indexed from the the start of the line.
156    /// A default implementation is provided, based on the [`column_index`]
157    /// function that is exported from the [`files`] module.
158    ///
159    /// [`files`]: crate::files
160    /// [`column_index`]: crate::files::column_index
161    fn column_number(
162        &'a self,
163        id: Self::FileId,
164        line_index: usize,
165        byte_index: usize,
166    ) -> Result<usize, Error> {
167        let source = self.source(id)?;
168        let line_range = self.line_range(id, line_index)?;
169        let column_index = column_index(source.as_ref(), line_range, byte_index);
170
171        Ok(column_index + 1)
172    }
173
174    /// Convenience method for returning line and column number at the given
175    /// byte index in the file.
176    fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
177        let line_index = self.line_index(id, byte_index)?;
178
179        Ok(Location {
180            line_number: self.line_number(id, line_index)?,
181            column_number: self.column_number(id, line_index, byte_index)?,
182        })
183    }
184
185    /// The byte range of line in the source of the file.
186    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
187}
188
189/// A user-facing location in a source file.
190///
191/// Returned by [`Files::location`].
192///
193/// [`Files::location`]: Files::location
194#[derive(Debug, Copy, Clone, PartialEq, Eq)]
195pub struct Location {
196    /// The user-facing line number.
197    pub line_number: usize,
198    /// The user-facing column number.
199    pub column_number: usize,
200}
201
202/// The column index at the given byte index in the source file.
203/// This is the number of characters to the given byte index.
204///
205/// If the byte index is smaller than the start of the line, then `0` is returned.
206/// If the byte index is past the end of the line, the column index of the last
207/// character `+ 1` is returned.
208///
209/// # Example
210///
211/// ```rust
212/// use codespan_reporting::files;
213///
214/// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n";
215///
216/// assert_eq!(files::column_index(source, 0..1, 0), 0);
217/// assert_eq!(files::column_index(source, 2..13, 0), 0);
218/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
219/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
220/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
221/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
222/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
223/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
224/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
225/// ```
226pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
227    let end_index = core::cmp::min(byte_index, core::cmp::min(line_range.end, source.len()));
228
229    (line_range.start..end_index)
230        .filter(|byte_index| source.is_char_boundary(byte_index + 1))
231        .count()
232}
233
234/// Return the starting byte index of each line in the source string.
235///
236/// This can make it easier to implement [`Files::line_index`] by allowing
237/// implementors of [`Files`] to pre-compute the line starts, then search for
238/// the corresponding line range, as shown in the example below.
239///
240/// [`Files`]: Files
241/// [`Files::line_index`]: Files::line_index
242///
243/// # Example
244///
245/// ```rust
246/// use codespan_reporting::files;
247///
248/// let source = "foo\nbar\r\n\nbaz";
249/// let line_starts: Vec<_> = files::line_starts(source).collect();
250///
251/// assert_eq!(
252///     line_starts,
253///     [
254///         0,  // "foo\n"
255///         4,  // "bar\r\n"
256///         9,  // ""
257///         10, // "baz"
258///     ],
259/// );
260///
261/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
262///     match line_starts.binary_search(&byte_index) {
263///         Ok(line) => Some(line),
264///         Err(next_line) => Some(next_line - 1),
265///     }
266/// }
267///
268/// assert_eq!(line_index(&line_starts, 5), Some(1));
269/// ```
270// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
271pub fn line_starts(source: &str) -> impl '_ + Iterator<Item = usize> {
272    core::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
273}
274
275/// A file database that contains a single source file.
276///
277/// Because there is only single file in this database we use `()` as a [`FileId`].
278///
279/// This is useful for simple language tests, but it might be worth creating a
280/// custom implementation when a language scales beyond a certain size.
281///
282/// [`FileId`]: Files::FileId
283#[derive(Debug, Clone)]
284pub struct SimpleFile<Name, Source> {
285    /// The name of the file.
286    name: Name,
287    /// The source code of the file.
288    source: Source,
289    /// The starting byte indices in the source code.
290    line_starts: Vec<usize>,
291}
292
293impl<Name, Source> SimpleFile<Name, Source>
294where
295    Name: core::fmt::Display,
296    Source: AsRef<str>,
297{
298    /// Create a new source file.
299    pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
300        SimpleFile {
301            name,
302            line_starts: line_starts(source.as_ref()).collect(),
303            source,
304        }
305    }
306
307    /// Return the name of the file.
308    pub fn name(&self) -> &Name {
309        &self.name
310    }
311
312    /// Return the source of the file.
313    pub fn source(&self) -> &Source {
314        &self.source
315    }
316
317    /// Return the starting byte index of the line with the specified line index.
318    /// Convenience method that already generates errors if necessary.
319    fn line_start(&self, line_index: usize) -> Result<usize, Error> {
320        use core::cmp::Ordering;
321
322        match line_index.cmp(&self.line_starts.len()) {
323            Ordering::Less => Ok(self
324                .line_starts
325                .get(line_index)
326                .cloned()
327                .expect("failed despite previous check")),
328            Ordering::Equal => Ok(self.source.as_ref().len()),
329            Ordering::Greater => Err(Error::LineTooLarge {
330                given: line_index,
331                max: self.line_starts.len() - 1,
332            }),
333        }
334    }
335}
336
337impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
338where
339    Name: 'a + core::fmt::Display + Clone,
340    Source: 'a + AsRef<str>,
341{
342    type FileId = ();
343    type Name = Name;
344    type Source = &'a str;
345
346    fn name(&self, (): ()) -> Result<Name, Error> {
347        Ok(self.name.clone())
348    }
349
350    fn source(&self, (): ()) -> Result<&str, Error> {
351        Ok(self.source.as_ref())
352    }
353
354    fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
355        Ok(self
356            .line_starts
357            .binary_search(&byte_index)
358            .unwrap_or_else(|next_line| next_line - 1))
359    }
360
361    fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
362        let line_start = self.line_start(line_index)?;
363        let next_line_start = self.line_start(line_index + 1)?;
364
365        Ok(line_start..next_line_start)
366    }
367}
368
369/// A file database that can store multiple source files.
370///
371/// This is useful for simple language tests, but it might be worth creating a
372/// custom implementation when a language scales beyond a certain size.
373/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
374#[derive(Debug, Default, Clone)]
375pub struct SimpleFiles<Name, Source> {
376    files: Vec<SimpleFile<Name, Source>>,
377}
378
379impl<Name, Source> SimpleFiles<Name, Source>
380where
381    Name: core::fmt::Display,
382    Source: AsRef<str>,
383{
384    /// Create a new files database.
385    pub fn new() -> SimpleFiles<Name, Source> {
386        SimpleFiles { files: Vec::new() }
387    }
388
389    /// Add a file to the database, returning the handle that can be used to
390    /// refer to it again.
391    pub fn add(&mut self, name: Name, source: Source) -> usize {
392        let file_id = self.files.len();
393        self.files.push(SimpleFile::new(name, source));
394        file_id
395    }
396
397    /// Get the file corresponding to the given id.
398    pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
399        self.files.get(file_id).ok_or(Error::FileMissing)
400    }
401}
402
403impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
404where
405    Name: 'a + core::fmt::Display + Clone,
406    Source: 'a + AsRef<str>,
407{
408    type FileId = usize;
409    type Name = Name;
410    type Source = &'a str;
411
412    fn name(&self, file_id: usize) -> Result<Name, Error> {
413        Ok(self.get(file_id)?.name().clone())
414    }
415
416    fn source(&self, file_id: usize) -> Result<&str, Error> {
417        Ok(self.get(file_id)?.source().as_ref())
418    }
419
420    fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
421        self.get(file_id)?.line_index((), byte_index)
422    }
423
424    fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
425        self.get(file_id)?.line_range((), line_index)
426    }
427}
428
429#[cfg(test)]
430mod test {
431    use super::*;
432
433    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
434
435    #[test]
436    fn line_starts() {
437        let file = SimpleFile::new("test", TEST_SOURCE);
438
439        assert_eq!(
440            file.line_starts,
441            [
442                0,  // "foo\n"
443                4,  // "bar\r\n"
444                9,  // ""
445                10, // "baz"
446            ],
447        );
448    }
449
450    #[test]
451    fn line_span_sources() {
452        let file = SimpleFile::new("test", TEST_SOURCE);
453
454        let line_sources = (0..4)
455            .map(|line| {
456                let line_range = file.line_range((), line).unwrap();
457                &file.source[line_range]
458            })
459            .collect::<Vec<_>>();
460
461        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
462    }
463}