codespan/
file.rs

1use codespan_reporting::files::Error;
2#[cfg(feature = "serialization")]
3use serde::{Deserialize, Serialize};
4use std::ffi::{OsStr, OsString};
5use std::num::NonZeroU32;
6
7use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
8
9/// A handle that points to a file in the database.
10#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
11#[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
12pub struct FileId(NonZeroU32);
13
14impl FileId {
15    /// Offset of our `FileId`'s numeric value to an index on `Files::files`.
16    ///
17    /// This is to ensure the first `FileId` is non-zero for memory layout optimisations (e.g.
18    /// `Option<FileId>` is 4 bytes)
19    const OFFSET: u32 = 1;
20
21    fn new(index: usize) -> FileId {
22        FileId(NonZeroU32::new(index as u32 + Self::OFFSET).expect("file index cannot be stored"))
23    }
24
25    fn get(self) -> usize {
26        (self.0.get() - Self::OFFSET) as usize
27    }
28}
29
30/// A database of source files.
31///
32/// The `Source` generic parameter determines how source text is stored. Using [`String`] will have
33/// `Files` take ownership of all source text. Smart pointer types such as [`Cow<'_, str>`],
34/// [`Rc<str>`] or [`Arc<str>`] can be used to share the source text with the rest of the program.
35///
36/// [`Cow<'_, str>`]: std::borrow::Cow
37/// [`Rc<str>`]: std::rc::Rc
38/// [`Arc<str>`]: std::sync::Arc
39#[derive(Clone, Debug)]
40pub struct Files<Source> {
41    files: Vec<File<Source>>,
42}
43
44impl<Source> Default for Files<Source>
45where
46    Source: AsRef<str>,
47{
48    fn default() -> Self {
49        Self { files: vec![] }
50    }
51}
52
53impl<Source> Files<Source>
54where
55    Source: AsRef<str>,
56{
57    /// Create a new, empty database of files.
58    pub fn new() -> Self {
59        Files::<Source>::default()
60    }
61
62    /// Add a file to the database, returning the handle that can be used to
63    /// refer to it again.
64    pub fn add(&mut self, name: impl Into<OsString>, source: Source) -> FileId {
65        let file_id = FileId::new(self.files.len());
66        self.files.push(File::new(name.into(), source.into()));
67        file_id
68    }
69
70    /// Update a source file in place.
71    ///
72    /// This will mean that any outstanding byte indexes will now point to
73    /// invalid locations.
74    pub fn update(&mut self, file_id: FileId, source: Source) {
75        self.get_mut(file_id).update(source.into())
76    }
77
78    /// Get a the source file using the file id.
79    // FIXME: return an option or result?
80    fn get(&self, file_id: FileId) -> &File<Source> {
81        &self.files[file_id.get()]
82    }
83
84    /// Get a the source file using the file id.
85    // FIXME: return an option or result?
86    fn get_mut(&mut self, file_id: FileId) -> &mut File<Source> {
87        &mut self.files[file_id.get()]
88    }
89
90    /// Get the name of the source file.
91    ///
92    /// ```rust
93    /// use codespan::Files;
94    ///
95    /// let name = "test";
96    ///
97    /// let mut files = Files::new();
98    /// let file_id = files.add(name, "hello world!");
99    ///
100    /// assert_eq!(files.name(file_id), name);
101    /// ```
102    pub fn name(&self, file_id: FileId) -> &OsStr {
103        self.get(file_id).name()
104    }
105
106    /// Get the span at the given line index.
107    ///
108    /// ```rust
109    /// use codespan::{Files, LineIndex, Span};
110    ///
111    /// let mut files = Files::new();
112    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
113    ///
114    /// let line_sources = (0..4)
115    ///     .map(|line| files.line_span(file_id, line).unwrap())
116    ///     .collect::<Vec<_>>();
117    ///
118    /// assert_eq!(line_sources,
119    ///     [
120    ///         Span::new(0, 4),    // 0: "foo\n"
121    ///         Span::new(4, 9),    // 1: "bar\r\n"
122    ///         Span::new(9, 10),   // 2: ""
123    ///         Span::new(10, 13),  // 3: "baz"
124    ///     ]
125    /// );
126    /// assert!(files.line_span(file_id, 4).is_err());
127    /// ```
128    pub fn line_span(
129        &self,
130        file_id: FileId,
131        line_index: impl Into<LineIndex>,
132    ) -> Result<Span, Error> {
133        self.get(file_id).line_span(line_index.into())
134    }
135
136    /// Get the line index at the given byte in the source file.
137    ///
138    /// ```rust
139    /// use codespan::{Files, LineIndex};
140    ///
141    /// let mut files = Files::new();
142    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
143    ///
144    /// assert_eq!(files.line_index(file_id, 0), LineIndex::from(0));
145    /// assert_eq!(files.line_index(file_id, 7), LineIndex::from(1));
146    /// assert_eq!(files.line_index(file_id, 8), LineIndex::from(1));
147    /// assert_eq!(files.line_index(file_id, 9), LineIndex::from(2));
148    /// assert_eq!(files.line_index(file_id, 100), LineIndex::from(3));
149    /// ```
150    pub fn line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex {
151        self.get(file_id).line_index(byte_index.into())
152    }
153
154    /// Get the location at the given byte index in the source file.
155    ///
156    /// ```rust
157    /// use codespan::{ByteIndex, Files, Location, Span};
158    ///
159    /// let mut files = Files::new();
160    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
161    ///
162    /// assert_eq!(files.location(file_id, 0).unwrap(), Location::new(0, 0));
163    /// assert_eq!(files.location(file_id, 7).unwrap(), Location::new(1, 3));
164    /// assert_eq!(files.location(file_id, 8).unwrap(), Location::new(1, 4));
165    /// assert_eq!(files.location(file_id, 9).unwrap(), Location::new(2, 0));
166    /// assert!(files.location(file_id, 100).is_err());
167    /// ```
168    pub fn location(
169        &self,
170        file_id: FileId,
171        byte_index: impl Into<ByteIndex>,
172    ) -> Result<Location, Error> {
173        self.get(file_id).location(byte_index.into())
174    }
175
176    /// Get the source of the file.
177    ///
178    /// ```rust
179    /// use codespan::Files;
180    ///
181    /// let source = "hello world!";
182    ///
183    /// let mut files = Files::new();
184    /// let file_id = files.add("test", source);
185    ///
186    /// assert_eq!(*files.source(file_id), source);
187    /// ```
188    pub fn source(&self, file_id: FileId) -> &Source {
189        self.get(file_id).source()
190    }
191
192    /// Return the span of the full source.
193    ///
194    /// ```rust
195    /// use codespan::{Files, Span};
196    ///
197    /// let source = "hello world!";
198    ///
199    /// let mut files = Files::new();
200    /// let file_id = files.add("test", source);
201    ///
202    /// assert_eq!(files.source_span(file_id), Span::from_str(source));
203    /// ```
204    pub fn source_span(&self, file_id: FileId) -> Span {
205        self.get(file_id).source_span()
206    }
207
208    /// Return a slice of the source file, given a span.
209    ///
210    /// ```rust
211    /// use codespan::{Files, Span};
212    ///
213    /// let mut files = Files::new();
214    /// let file_id = files.add("test",  "hello world!");
215    ///
216    /// assert_eq!(files.source_slice(file_id, Span::new(0, 5)).unwrap(), "hello");
217    /// assert!(files.source_slice(file_id, Span::new(0, 100)).is_err());
218    /// ```
219    pub fn source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error> {
220        self.get(file_id).source_slice(span.into())
221    }
222}
223
224impl<'a, Source> codespan_reporting::files::Files<'a> for Files<Source>
225where
226    Source: AsRef<str>,
227{
228    type FileId = FileId;
229    type Name = String;
230    type Source = &'a str;
231
232    fn name(&self, id: FileId) -> Result<String, Error> {
233        use std::path::PathBuf;
234
235        Ok(PathBuf::from(self.name(id)).display().to_string())
236    }
237
238    fn source(&'a self, id: FileId) -> Result<&str, Error> {
239        Ok(self.source(id).as_ref())
240    }
241
242    fn line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error> {
243        Ok(self.line_index(id, byte_index as u32).to_usize())
244    }
245
246    fn line_range(
247        &'a self,
248        id: FileId,
249        line_index: usize,
250    ) -> Result<std::ops::Range<usize>, Error> {
251        let span = self.line_span(id, line_index as u32)?;
252
253        Ok(span.start().to_usize()..span.end().to_usize())
254    }
255}
256
257/// A file that is stored in the database.
258#[derive(Debug, Clone)]
259// `Serialize` is only implemented on `OsString` for windows/unix
260#[cfg_attr(
261    all(feature = "serialization", any(windows, unix)),
262    derive(Deserialize, Serialize)
263)]
264struct File<Source> {
265    /// The name of the file.
266    name: OsString,
267    /// The source code of the file.
268    source: Source,
269    /// The starting byte indices in the source code.
270    line_starts: Vec<ByteIndex>,
271}
272
273impl<Source> File<Source>
274where
275    Source: AsRef<str>,
276{
277    fn new(name: OsString, source: Source) -> Self {
278        let line_starts = line_starts(source.as_ref())
279            .map(|i| ByteIndex::from(i as u32))
280            .collect();
281
282        File {
283            name,
284            source,
285            line_starts,
286        }
287    }
288
289    fn update(&mut self, source: Source) {
290        let line_starts = line_starts(source.as_ref())
291            .map(|i| ByteIndex::from(i as u32))
292            .collect();
293        self.source = source;
294        self.line_starts = line_starts;
295    }
296
297    fn name(&self) -> &OsStr {
298        &self.name
299    }
300
301    fn line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error> {
302        use std::cmp::Ordering;
303
304        match line_index.cmp(&self.last_line_index()) {
305            Ordering::Less => Ok(self.line_starts[line_index.to_usize()]),
306            Ordering::Equal => Ok(self.source_span().end()),
307            Ordering::Greater => Err(Error::LineTooLarge {
308                given: line_index.to_usize(),
309                max: self.last_line_index().to_usize(),
310            }),
311        }
312    }
313
314    fn last_line_index(&self) -> LineIndex {
315        LineIndex::from(self.line_starts.len() as RawIndex)
316    }
317
318    fn line_span(&self, line_index: LineIndex) -> Result<Span, Error> {
319        let line_start = self.line_start(line_index)?;
320        let next_line_start = self.line_start(line_index + LineOffset::from(1))?;
321
322        Ok(Span::new(line_start, next_line_start))
323    }
324
325    fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
326        match self.line_starts.binary_search(&byte_index) {
327            // Found the start of a line
328            Ok(line) => LineIndex::from(line as u32),
329            Err(next_line) => LineIndex::from(next_line as u32 - 1),
330        }
331    }
332
333    fn location(&self, byte_index: ByteIndex) -> Result<Location, Error> {
334        let line_index = self.line_index(byte_index);
335        let line_start_index = self
336            .line_start(line_index)
337            .map_err(|_| Error::IndexTooLarge {
338                given: byte_index.to_usize(),
339                max: self.source().as_ref().len() - 1,
340            })?;
341        let line_src = self
342            .source
343            .as_ref()
344            .get(line_start_index.to_usize()..byte_index.to_usize())
345            .ok_or_else(|| {
346                let given = byte_index.to_usize();
347                let max = self.source().as_ref().len() - 1;
348                if given > max {
349                    Error::IndexTooLarge { given, max }
350                } else {
351                    Error::InvalidCharBoundary { given }
352                }
353            })?;
354
355        Ok(Location {
356            line: line_index,
357            column: ColumnIndex::from(line_src.chars().count() as u32),
358        })
359    }
360
361    fn source(&self) -> &Source {
362        &self.source
363    }
364
365    fn source_span(&self) -> Span {
366        Span::from_str(self.source.as_ref())
367    }
368
369    fn source_slice(&self, span: Span) -> Result<&str, Error> {
370        let start = span.start().to_usize();
371        let end = span.end().to_usize();
372
373        self.source.as_ref().get(start..end).ok_or_else(|| {
374            let max = self.source().as_ref().len() - 1;
375            Error::IndexTooLarge {
376                given: if start > max { start } else { end },
377                max,
378            }
379        })
380    }
381}
382
383// NOTE: this is copied from `codespan_reporting::files::line_starts` and should be kept in sync.
384fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {
385    std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
386}
387
388#[cfg(test)]
389mod test {
390    use super::*;
391
392    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
393
394    #[test]
395    fn line_starts() {
396        let mut files = Files::<String>::new();
397        let file_id = files.add("test", TEST_SOURCE.to_owned());
398
399        assert_eq!(
400            files.get(file_id).line_starts,
401            [
402                ByteIndex::from(0),  // "foo\n"
403                ByteIndex::from(4),  // "bar\r\n"
404                ByteIndex::from(9),  // ""
405                ByteIndex::from(10), // "baz"
406            ],
407        );
408    }
409
410    #[test]
411    fn line_span_sources() {
412        // Also make sure we can use `Arc` for source
413        use std::sync::Arc;
414
415        let mut files = Files::<Arc<str>>::new();
416        let file_id = files.add("test", TEST_SOURCE.into());
417
418        let line_sources = (0..4)
419            .map(|line| {
420                let line_span = files.line_span(file_id, line).unwrap();
421                files.source_slice(file_id, line_span).unwrap()
422            })
423            .collect::<Vec<_>>();
424
425        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"],);
426    }
427
428    #[test]
429    fn interoperability() {
430        extern crate termcolor;
431        use codespan_reporting::{diagnostic::*, term::emit};
432        use termcolor::{ColorChoice, StandardStream};
433
434        let mut files = Files::<String>::new();
435        let file_id = files.add("test", TEST_SOURCE.to_owned());
436
437        let diagnostic = Diagnostic::note()
438            .with_message("middle")
439            .with_labels(vec![Label::primary(file_id, 4..7).with_message("middle")]);
440
441        let config = codespan_reporting::term::Config::default();
442        emit(
443            &mut StandardStream::stdout(ColorChoice::Auto),
444            &config,
445            &files,
446            &diagnostic,
447        )
448        .unwrap();
449    }
450}