codespan_reporting/
files.rs

1//! Source file support for diagnostic reporting.
2//!
3//! The main trait defined in this module is the [`Files`] trait, which provides
4//! provides the minimum amount of functionality required for printing [`Diagnostics`]
5//! with the [`term::emit`] function.
6//!
7//! Simple implementations of this trait are implemented:
8//!
9//! - [`SimpleFile`]: For single-file use-cases
10//! - [`SimpleFiles`]: For multi-file use-cases
11//!
12//! These data structures provide a pretty minimal API, however,
13//! so end-users are encouraged to create their own implementations for their
14//! own specific use-cases, such as an implementation that accesses the file
15//! system directly (and caches the line start locations), or an implementation
16//! using an incremental compilation library like [`salsa`].
17//!
18//! [`term::emit`]: crate::term::emit
19//! [`Diagnostics`]: crate::diagnostic::Diagnostic
20//! [`Files`]: Files
21//! [`SimpleFile`]: SimpleFile
22//! [`SimpleFiles`]: SimpleFiles
23//!
24//! [`salsa`]: https://crates.io/crates/salsa
25
26use alloc::vec::Vec;
27use core::ops::Range;
28
29/// An enum representing an error that happened while looking up a file or a piece of content in that file.
30#[derive(Debug)]
31#[non_exhaustive]
32pub enum Error {
33    /// A required file is not in the file database.
34    FileMissing,
35    /// The file is present, but does not contain the specified byte index.
36    IndexTooLarge { given: usize, max: usize },
37    /// The file is present, but does not contain the specified line index.
38    LineTooLarge { given: usize, max: usize },
39    /// The file is present and contains the specified line index, but the line does not contain
40    /// the specified column index.
41    ColumnTooLarge { given: usize, max: usize },
42    /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
43    InvalidCharBoundary { given: usize },
44    /// There was a error while doing IO.
45    #[cfg(feature = "std")]
46    Io(std::io::Error),
47    /// There was a error during formatting.
48    FormatError,
49}
50
51#[cfg(feature = "std")]
52impl From<std::io::Error> for Error {
53    fn from(err: std::io::Error) -> Error {
54        Error::Io(err)
55    }
56}
57
58impl From<core::fmt::Error> for Error {
59    fn from(_err: core::fmt::Error) -> Error {
60        Error::FormatError
61    }
62}
63
64impl core::fmt::Display for Error {
65    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
66        match self {
67            Error::FileMissing => write!(f, "file missing"),
68            Error::IndexTooLarge { given, max } => {
69                write!(f, "invalid index {}, maximum index is {}", given, max)
70            }
71            Error::LineTooLarge { given, max } => {
72                write!(f, "invalid line {}, maximum line is {}", given, max)
73            }
74            Error::ColumnTooLarge { given, max } => {
75                write!(f, "invalid column {}, maximum column {}", given, max)
76            }
77            Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
78            #[cfg(feature = "std")]
79            Error::Io(err) => write!(f, "{}", err),
80            Error::FormatError => write!(f, "formatting error"),
81        }
82    }
83}
84
85#[cfg(feature = "std")]
86use std::error::Error as RustError;
87
88#[cfg(not(feature = "std"))]
89use core::error::Error as RustError;
90
91impl RustError for Error {
92    fn source(&self) -> Option<&(dyn RustError + 'static)> {
93        match &self {
94            #[cfg(feature = "std")]
95            Error::Io(err) => Some(err),
96            _ => None,
97        }
98    }
99}
100
101/// A minimal interface for accessing source files when rendering diagnostics.
102///
103/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
104/// This is to workaround the lack of higher kinded lifetime parameters.
105/// This can be ignored if this is not needed, however.
106pub trait Files<'a> {
107    /// A unique identifier for files in the file provider. This will be used
108    /// for rendering `diagnostic::Label`s in the corresponding source files.
109    type FileId: 'a + Copy + PartialEq;
110    /// The user-facing name of a file, to be displayed in diagnostics.
111    type Name: 'a + core::fmt::Display;
112    /// The source code of a file.
113    type Source: 'a + AsRef<str>;
114
115    /// The user-facing name of a file.
116    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
117
118    /// The source code of a file.
119    fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
120
121    /// The index of the line at the given byte index.
122    /// If the byte index is past the end of the file, returns the maximum line index in the file.
123    /// This means that this function only fails if the file is not present.
124    ///
125    /// # Note for trait implementors
126    ///
127    /// This can be implemented efficiently by performing a binary search over
128    /// a list of line starts that was computed by calling the [`line_starts`]
129    /// function that is exported from the [`files`] module. It might be useful
130    /// to pre-compute and cache these line starts.
131    ///
132    /// [`line_starts`]: crate::files::line_starts
133    /// [`files`]: crate::files
134    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
135
136    /// The user-facing line number at the given line index.
137    /// It is not necessarily checked that the specified line index
138    /// is actually in the file.
139    ///
140    /// # Note for trait implementors
141    ///
142    /// This is usually 1-indexed from the beginning of the file, but
143    /// can be useful for implementing something like the
144    /// [C preprocessor's `#line` macro][line-macro].
145    ///
146    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
147    #[allow(unused_variables)]
148    fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
149        Ok(line_index + 1)
150    }
151
152    /// The user-facing column number at the given line index and byte index.
153    ///
154    /// # Note for trait implementors
155    ///
156    /// This is usually 1-indexed from the the start of the line.
157    /// A default implementation is provided, based on the [`column_index`]
158    /// function that is exported from the [`files`] module.
159    ///
160    /// [`files`]: crate::files
161    /// [`column_index`]: crate::files::column_index
162    fn column_number(
163        &'a self,
164        id: Self::FileId,
165        line_index: usize,
166        byte_index: usize,
167    ) -> Result<usize, Error> {
168        let source = self.source(id)?;
169        let line_range = self.line_range(id, line_index)?;
170        let column_index = column_index(source.as_ref(), line_range, byte_index);
171
172        Ok(column_index + 1)
173    }
174
175    /// Convenience method for returning line and column number at the given
176    /// byte index in the file.
177    fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
178        let line_index = self.line_index(id, byte_index)?;
179
180        Ok(Location {
181            line_number: self.line_number(id, line_index)?,
182            column_number: self.column_number(id, line_index, byte_index)?,
183        })
184    }
185
186    /// The byte range of line in the source of the file.
187    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
188}
189
190/// A user-facing location in a source file.
191///
192/// Returned by [`Files::location`].
193///
194/// [`Files::location`]: Files::location
195#[derive(Debug, Copy, Clone, PartialEq, Eq)]
196pub struct Location {
197    /// The user-facing line number.
198    pub line_number: usize,
199    /// The user-facing column number.
200    pub column_number: usize,
201}
202
203/// The column index at the given byte index in the source file.
204/// This is the number of characters to the given byte index.
205///
206/// If the byte index is smaller than the start of the line, then `0` is returned.
207/// If the byte index is past the end of the line, the column index of the last
208/// character `+ 1` is returned.
209///
210/// # Example
211///
212/// ```rust
213/// use codespan_reporting::files;
214///
215/// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n";
216///
217/// assert_eq!(files::column_index(source, 0..1, 0), 0);
218/// assert_eq!(files::column_index(source, 2..13, 0), 0);
219/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
220/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
221/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
222/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
223/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
224/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
225/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
226/// ```
227#[must_use]
228pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
229    let end_index = core::cmp::min(byte_index, core::cmp::min(line_range.end, source.len()));
230
231    (line_range.start..end_index)
232        .filter(|byte_index| source.is_char_boundary(byte_index + 1))
233        .count()
234}
235
236/// Return the starting byte index of each line in the source string.
237///
238/// This can make it easier to implement [`Files::line_index`] by allowing
239/// implementors of [`Files`] to pre-compute the line starts, then search for
240/// the corresponding line range, as shown in the example below.
241///
242/// [`Files`]: Files
243/// [`Files::line_index`]: Files::line_index
244///
245/// # Example
246///
247/// ```rust
248/// use codespan_reporting::files;
249///
250/// let source = "foo\nbar\r\n\nbaz";
251/// let line_starts: Vec<_> = files::line_starts(source).collect();
252///
253/// assert_eq!(
254///     line_starts,
255///     [
256///         0,  // "foo\n"
257///         4,  // "bar\r\n"
258///         9,  // ""
259///         10, // "baz"
260///     ],
261/// );
262///
263/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
264///     match line_starts.binary_search(&byte_index) {
265///         Ok(line) => Some(line),
266///         Err(next_line) => Some(next_line - 1),
267///     }
268/// }
269///
270/// assert_eq!(line_index(&line_starts, 5), Some(1));
271/// ```
272// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
273pub fn line_starts(source: &str) -> impl '_ + Iterator<Item = usize> {
274    core::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
275}
276
277/// A file database that contains a single source file.
278///
279/// Because there is only single file in this database we use `()` as a [`FileId`].
280///
281/// This is useful for simple language tests, but it might be worth creating a
282/// custom implementation when a language scales beyond a certain size.
283///
284/// [`FileId`]: Files::FileId
285#[derive(Debug, Clone)]
286pub struct SimpleFile<Name, Source> {
287    /// The name of the file.
288    name: Name,
289    /// The source code of the file.
290    source: Source,
291    /// The starting byte indices in the source code.
292    line_starts: Vec<usize>,
293}
294
295impl<Name, Source> SimpleFile<Name, Source>
296where
297    Name: core::fmt::Display,
298    Source: AsRef<str>,
299{
300    /// Create a new source file.
301    pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
302        SimpleFile {
303            name,
304            line_starts: line_starts(source.as_ref()).collect(),
305            source,
306        }
307    }
308
309    /// Return the name of the file.
310    pub fn name(&self) -> &Name {
311        &self.name
312    }
313
314    /// Return the source of the file.
315    pub fn source(&self) -> &Source {
316        &self.source
317    }
318
319    /// Return the starting byte index of the line with the specified line index.
320    /// Convenience method that already generates errors if necessary.
321    fn line_start(&self, line_index: usize) -> Result<usize, Error> {
322        use core::cmp::Ordering;
323
324        match line_index.cmp(&self.line_starts.len()) {
325            Ordering::Less => Ok(self
326                .line_starts
327                .get(line_index)
328                .copied()
329                .expect("failed despite previous check")),
330            Ordering::Equal => Ok(self.source.as_ref().len()),
331            Ordering::Greater => Err(Error::LineTooLarge {
332                given: line_index,
333                max: self.line_starts.len() - 1,
334            }),
335        }
336    }
337}
338
339impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
340where
341    Name: 'a + core::fmt::Display + Clone,
342    Source: 'a + AsRef<str>,
343{
344    type FileId = ();
345    type Name = Name;
346    type Source = &'a str;
347
348    fn name(&self, (): ()) -> Result<Name, Error> {
349        Ok(self.name.clone())
350    }
351
352    fn source(&self, (): ()) -> Result<&str, Error> {
353        Ok(self.source.as_ref())
354    }
355
356    fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
357        Ok(self
358            .line_starts
359            .binary_search(&byte_index)
360            .unwrap_or_else(|next_line| next_line - 1))
361    }
362
363    fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
364        let line_start = self.line_start(line_index)?;
365        let next_line_start = self.line_start(line_index + 1)?;
366
367        Ok(line_start..next_line_start)
368    }
369}
370
371/// A file database that can store multiple source files.
372///
373/// This is useful for simple language tests, but it might be worth creating a
374/// custom implementation when a language scales beyond a certain size.
375/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
376#[derive(Debug, Default, Clone)]
377pub struct SimpleFiles<Name, Source> {
378    files: Vec<SimpleFile<Name, Source>>,
379}
380
381impl<Name, Source> SimpleFiles<Name, Source>
382where
383    Name: core::fmt::Display,
384    Source: AsRef<str>,
385{
386    /// Create a new files database.
387    #[must_use]
388    pub fn new() -> SimpleFiles<Name, Source> {
389        SimpleFiles { files: Vec::new() }
390    }
391
392    /// Add a file to the database, returning the handle that can be used to
393    /// refer to it again.
394    pub fn add(&mut self, name: Name, source: Source) -> usize {
395        let file_id = self.files.len();
396        self.files.push(SimpleFile::new(name, source));
397        file_id
398    }
399
400    /// Get the file corresponding to the given id.
401    pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
402        self.files.get(file_id).ok_or(Error::FileMissing)
403    }
404}
405
406impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
407where
408    Name: 'a + core::fmt::Display + Clone,
409    Source: 'a + AsRef<str>,
410{
411    type FileId = usize;
412    type Name = Name;
413    type Source = &'a str;
414
415    fn name(&self, file_id: usize) -> Result<Name, Error> {
416        Ok(self.get(file_id)?.name().clone())
417    }
418
419    fn source(&self, file_id: usize) -> Result<&str, Error> {
420        Ok(self.get(file_id)?.source().as_ref())
421    }
422
423    fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
424        self.get(file_id)?.line_index((), byte_index)
425    }
426
427    fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
428        self.get(file_id)?.line_range((), line_index)
429    }
430}
431
432#[cfg(test)]
433mod test {
434    use super::*;
435
436    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
437
438    #[test]
439    fn line_starts() {
440        let file = SimpleFile::new("test", TEST_SOURCE);
441
442        assert_eq!(
443            file.line_starts,
444            [
445                0,  // "foo\n"
446                4,  // "bar\r\n"
447                9,  // ""
448                10, // "baz"
449            ],
450        );
451    }
452
453    #[test]
454    fn line_span_sources() {
455        let file = SimpleFile::new("test", TEST_SOURCE);
456
457        let line_sources = (0..4)
458            .map(|line| {
459                let line_range = file.line_range((), line).unwrap();
460                &file.source[line_range]
461            })
462            .collect::<Vec<_>>();
463
464        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
465    }
466}