codespan/
file.rs

1use alloc::string::{String, ToString};
2use alloc::vec;
3use alloc::vec::Vec;
4use core::num::NonZeroU32;
5
6use codespan_reporting::files::Error;
7
8use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
9
10#[cfg(feature = "serialization")]
11use serde::{Deserialize, Serialize};
12
13#[cfg(feature = "std")]
14use std::ffi::{OsStr, OsString};
15
16#[cfg(not(feature = "std"))]
17use {alloc::string::String as OsString, core::primitive::str as OsStr};
18
19/// A handle that points to a file in the database.
20#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
21#[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
22pub struct FileId(NonZeroU32);
23
24impl FileId {
25    /// Offset of our `FileId`'s numeric value to an index on `Files::files`.
26    ///
27    /// This is to ensure the first `FileId` is non-zero for memory layout optimisations (e.g.
28    /// `Option<FileId>` is 4 bytes)
29    const OFFSET: u32 = 1;
30
31    #[must_use]
32    fn new(index: usize) -> FileId {
33        FileId(NonZeroU32::new(index as u32 + Self::OFFSET).expect("file index cannot be stored"))
34    }
35
36    fn get(self) -> usize {
37        (self.0.get() - Self::OFFSET) as usize
38    }
39}
40
41/// A database of source files.
42///
43/// The `Source` generic parameter determines how source text is stored. Using [`String`] will have
44/// `Files` take ownership of all source text. Smart pointer types such as [`Cow<'_, str>`],
45/// [`Rc<str>`] or [`Arc<str>`] can be used to share the source text with the rest of the program.
46///
47/// [`Cow<'_, str>`]: alloc::borrow::Cow
48/// [`Rc<str>`]: alloc::rc::Rc
49/// [`Arc<str>`]: alloc::sync::Arc
50#[derive(Clone, Debug)]
51pub struct Files<Source> {
52    files: Vec<File<Source>>,
53}
54
55impl<Source> Default for Files<Source>
56where
57    Source: AsRef<str>,
58{
59    fn default() -> Self {
60        Self { files: vec![] }
61    }
62}
63
64impl<Source> Files<Source>
65where
66    Source: AsRef<str>,
67{
68    /// Create a new, empty database of files.
69    pub fn new() -> Self {
70        Files::<Source>::default()
71    }
72
73    /// Add a file to the database, returning the handle that can be used to
74    /// refer to it again.
75    pub fn add(&mut self, name: impl Into<OsString>, source: Source) -> FileId {
76        let file_id = FileId::new(self.files.len());
77        self.files.push(File::new(name.into(), source));
78        file_id
79    }
80
81    /// Update a source file in place.
82    ///
83    /// This will mean that any outstanding byte indexes will now point to
84    /// invalid locations.
85    pub fn update(&mut self, file_id: FileId, source: Source) {
86        self.get_mut(file_id).update(source);
87    }
88
89    /// Get a the source file using the file id.
90    // FIXME: return an option or result?
91    fn get(&self, file_id: FileId) -> &File<Source> {
92        &self.files[file_id.get()]
93    }
94
95    /// Get a the source file using the file id.
96    // FIXME: return an option or result?
97    fn get_mut(&mut self, file_id: FileId) -> &mut File<Source> {
98        &mut self.files[file_id.get()]
99    }
100
101    /// Get the name of the source file.
102    ///
103    /// ```rust
104    /// use codespan::Files;
105    ///
106    /// let name = "test";
107    ///
108    /// let mut files = Files::new();
109    /// let file_id = files.add(name, "hello world!");
110    ///
111    /// assert_eq!(files.name(file_id), name);
112    /// ```
113    #[must_use]
114    pub fn name(&self, file_id: FileId) -> &OsStr {
115        self.get(file_id).name()
116    }
117
118    /// Get the span at the given line index.
119    ///
120    /// ```rust
121    /// use codespan::{Files, LineIndex, Span};
122    ///
123    /// let mut files = Files::new();
124    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
125    ///
126    /// let line_sources = (0..4)
127    ///     .map(|line| files.line_span(file_id, line).unwrap())
128    ///     .collect::<Vec<_>>();
129    ///
130    /// assert_eq!(line_sources,
131    ///     [
132    ///         Span::new(0, 4),    // 0: "foo\n"
133    ///         Span::new(4, 9),    // 1: "bar\r\n"
134    ///         Span::new(9, 10),   // 2: ""
135    ///         Span::new(10, 13),  // 3: "baz"
136    ///     ]
137    /// );
138    /// assert!(files.line_span(file_id, 4).is_err());
139    /// ```
140    pub fn line_span(
141        &self,
142        file_id: FileId,
143        line_index: impl Into<LineIndex>,
144    ) -> Result<Span, Error> {
145        self.get(file_id).line_span(line_index.into())
146    }
147
148    /// Get the line index at the given byte in the source file.
149    ///
150    /// ```rust
151    /// use codespan::{Files, LineIndex};
152    ///
153    /// let mut files = Files::new();
154    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
155    ///
156    /// assert_eq!(files.line_index(file_id, 0), LineIndex::from(0));
157    /// assert_eq!(files.line_index(file_id, 7), LineIndex::from(1));
158    /// assert_eq!(files.line_index(file_id, 8), LineIndex::from(1));
159    /// assert_eq!(files.line_index(file_id, 9), LineIndex::from(2));
160    /// assert_eq!(files.line_index(file_id, 100), LineIndex::from(3));
161    /// ```
162    pub fn line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex {
163        self.get(file_id).line_index(byte_index.into())
164    }
165
166    /// Get the location at the given byte index in the source file.
167    ///
168    /// ```rust
169    /// use codespan::{ByteIndex, Files, Location, Span};
170    ///
171    /// let mut files = Files::new();
172    /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
173    ///
174    /// assert_eq!(files.location(file_id, 0).unwrap(), Location::new(0, 0));
175    /// assert_eq!(files.location(file_id, 7).unwrap(), Location::new(1, 3));
176    /// assert_eq!(files.location(file_id, 8).unwrap(), Location::new(1, 4));
177    /// assert_eq!(files.location(file_id, 9).unwrap(), Location::new(2, 0));
178    /// assert!(files.location(file_id, 100).is_err());
179    /// ```
180    pub fn location(
181        &self,
182        file_id: FileId,
183        byte_index: impl Into<ByteIndex>,
184    ) -> Result<Location, Error> {
185        self.get(file_id).location(byte_index.into())
186    }
187
188    /// Get the source of the file.
189    ///
190    /// ```rust
191    /// use codespan::Files;
192    ///
193    /// let source = "hello world!";
194    ///
195    /// let mut files = Files::new();
196    /// let file_id = files.add("test", source);
197    ///
198    /// assert_eq!(*files.source(file_id), source);
199    /// ```
200    #[must_use]
201    pub fn source(&self, file_id: FileId) -> &Source {
202        self.get(file_id).source()
203    }
204
205    /// Return the span of the full source.
206    ///
207    /// ```rust
208    /// use codespan::{Files, Span};
209    ///
210    /// let source = "hello world!";
211    ///
212    /// let mut files = Files::new();
213    /// let file_id = files.add("test", source);
214    ///
215    /// assert_eq!(files.source_span(file_id), Span::from_str(source));
216    /// ```
217    pub fn source_span(&self, file_id: FileId) -> Span {
218        self.get(file_id).source_span()
219    }
220
221    /// Return a slice of the source file, given a span.
222    ///
223    /// ```rust
224    /// use codespan::{Files, Span};
225    ///
226    /// let mut files = Files::new();
227    /// let file_id = files.add("test",  "hello world!");
228    ///
229    /// assert_eq!(files.source_slice(file_id, Span::new(0, 5)).unwrap(), "hello");
230    /// assert!(files.source_slice(file_id, Span::new(0, 100)).is_err());
231    /// ```
232    pub fn source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error> {
233        self.get(file_id).source_slice(span.into())
234    }
235}
236
237impl<'a, Source> codespan_reporting::files::Files<'a> for Files<Source>
238where
239    Source: AsRef<str>,
240{
241    type FileId = FileId;
242    type Name = String;
243    type Source = &'a str;
244
245    fn name(&self, id: FileId) -> Result<String, Error> {
246        #[cfg(feature = "std")]
247        {
248            use std::path::PathBuf;
249
250            Ok(PathBuf::from(self.name(id)).display().to_string())
251        }
252
253        #[cfg(not(feature = "std"))]
254        {
255            Ok(self.name(id).to_string())
256        }
257    }
258
259    fn source(&'a self, id: FileId) -> Result<&'a str, Error> {
260        Ok(self.source(id).as_ref())
261    }
262
263    fn line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error> {
264        Ok(self.line_index(id, byte_index as u32).to_usize())
265    }
266
267    fn line_range(
268        &'a self,
269        id: FileId,
270        line_index: usize,
271    ) -> Result<core::ops::Range<usize>, Error> {
272        let span = self.line_span(id, line_index as u32)?;
273
274        Ok(span.start().to_usize()..span.end().to_usize())
275    }
276}
277
278/// A file that is stored in the database.
279#[derive(Debug, Clone)]
280// `Serialize` is only implemented on `OsString` for windows/unix
281#[cfg_attr(
282    all(feature = "serialization", any(windows, unix)),
283    derive(Deserialize, Serialize)
284)]
285struct File<Source> {
286    /// The name of the file.
287    name: OsString,
288    /// The source code of the file.
289    source: Source,
290    /// The starting byte indices in the source code.
291    line_starts: Vec<ByteIndex>,
292}
293
294impl<Source> File<Source>
295where
296    Source: AsRef<str>,
297{
298    fn new(name: OsString, source: Source) -> Self {
299        let line_starts = line_starts(source.as_ref())
300            .map(|i| ByteIndex::from(i as u32))
301            .collect();
302
303        File {
304            name,
305            source,
306            line_starts,
307        }
308    }
309
310    fn update(&mut self, source: Source) {
311        let line_starts = line_starts(source.as_ref())
312            .map(|i| ByteIndex::from(i as u32))
313            .collect();
314        self.source = source;
315        self.line_starts = line_starts;
316    }
317
318    fn name(&self) -> &OsStr {
319        &self.name
320    }
321
322    fn line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error> {
323        use core::cmp::Ordering;
324
325        match line_index.cmp(&self.last_line_index()) {
326            Ordering::Less => Ok(self.line_starts[line_index.to_usize()]),
327            Ordering::Equal => Ok(self.source_span().end()),
328            Ordering::Greater => Err(Error::LineTooLarge {
329                given: line_index.to_usize(),
330                max: self.last_line_index().to_usize(),
331            }),
332        }
333    }
334
335    fn last_line_index(&self) -> LineIndex {
336        LineIndex::from(self.line_starts.len() as RawIndex)
337    }
338
339    fn line_span(&self, line_index: LineIndex) -> Result<Span, Error> {
340        let line_start = self.line_start(line_index)?;
341        let next_line_start = self.line_start(line_index + LineOffset::from(1))?;
342
343        Ok(Span::new(line_start, next_line_start))
344    }
345
346    fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
347        match self.line_starts.binary_search(&byte_index) {
348            // Found the start of a line
349            Ok(line) => LineIndex::from(line as u32),
350            Err(next_line) => LineIndex::from(next_line as u32 - 1),
351        }
352    }
353
354    fn location(&self, byte_index: ByteIndex) -> Result<Location, Error> {
355        let line_index = self.line_index(byte_index);
356        let line_start_index = self
357            .line_start(line_index)
358            .map_err(|_| Error::IndexTooLarge {
359                given: byte_index.to_usize(),
360                max: self.source().as_ref().len() - 1,
361            })?;
362        let line_src = self
363            .source
364            .as_ref()
365            .get(line_start_index.to_usize()..byte_index.to_usize())
366            .ok_or_else(|| {
367                let given = byte_index.to_usize();
368                let max = self.source().as_ref().len() - 1;
369                if given > max {
370                    Error::IndexTooLarge { given, max }
371                } else {
372                    Error::InvalidCharBoundary { given }
373                }
374            })?;
375
376        Ok(Location {
377            line: line_index,
378            column: ColumnIndex::from(line_src.chars().count() as u32),
379        })
380    }
381
382    fn source(&self) -> &Source {
383        &self.source
384    }
385
386    fn source_span(&self) -> Span {
387        Span::from_str(self.source.as_ref())
388    }
389
390    fn source_slice(&self, span: Span) -> Result<&str, Error> {
391        let start = span.start().to_usize();
392        let end = span.end().to_usize();
393
394        self.source.as_ref().get(start..end).ok_or_else(|| {
395            let max = self.source().as_ref().len() - 1;
396            Error::IndexTooLarge {
397                given: if start > max { start } else { end },
398                max,
399            }
400        })
401    }
402}
403
404// NOTE: this is copied from `codespan_reporting::files::line_starts` and should be kept in sync.
405fn line_starts(source: &str) -> impl '_ + Iterator<Item = usize> {
406    core::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
407}
408
409#[cfg(test)]
410mod test {
411    use alloc::borrow::ToOwned;
412
413    use super::*;
414
415    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
416
417    #[test]
418    fn line_starts() {
419        let mut files = Files::<String>::new();
420        let file_id = files.add("test", TEST_SOURCE.to_owned());
421
422        assert_eq!(
423            files.get(file_id).line_starts,
424            [
425                ByteIndex::from(0),  // "foo\n"
426                ByteIndex::from(4),  // "bar\r\n"
427                ByteIndex::from(9),  // ""
428                ByteIndex::from(10), // "baz"
429            ],
430        );
431    }
432
433    #[test]
434    fn line_span_sources() {
435        // Also make sure we can use `Arc` for source
436        use alloc::sync::Arc;
437
438        let mut files = Files::<Arc<str>>::new();
439        let file_id = files.add("test", TEST_SOURCE.into());
440
441        let line_sources = (0..4)
442            .map(|line| {
443                let line_span = files.line_span(file_id, line).unwrap();
444                files.source_slice(file_id, line_span).unwrap()
445            })
446            .collect::<Vec<_>>();
447
448        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"],);
449    }
450
451    #[test]
452    fn interoperability() {
453        extern crate termcolor;
454        use codespan_reporting::{diagnostic::*, term::emit_to_write_style};
455        use termcolor::{ColorChoice, StandardStream};
456
457        let mut files = Files::<String>::new();
458        let file_id = files.add("test", TEST_SOURCE.to_owned());
459
460        let diagnostic = Diagnostic::note()
461            .with_message("middle")
462            .with_labels(vec![Label::primary(file_id, 4..7).with_message("middle")]);
463
464        let config = codespan_reporting::term::Config::default();
465        let writer = StandardStream::stdout(ColorChoice::Auto);
466        emit_to_write_style(&mut writer.lock(), &config, &files, &diagnostic).unwrap();
467    }
468}