solar_interface/source_map/
file.rs

1use crate::{BytePos, CharPos, pos::RelativeBytePos};
2use std::{
3    fmt, io,
4    ops::RangeInclusive,
5    path::{Path, PathBuf},
6    sync::Arc,
7};
8
9/// Identifies an offset of a multi-byte character in a `SourceFile`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct MultiByteChar {
12    /// The relative offset of the character in the `SourceFile`.
13    pub pos: RelativeBytePos,
14    /// The number of bytes, `>= 2`.
15    pub bytes: u8,
16}
17
18/// The name of a source file.
19///
20/// This is used as the key in the source map. See
21/// [`SourceMap::get_file`](crate::SourceMap::get_file).
22#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
23pub enum FileName {
24    /// Files from the file system.
25    Real(PathBuf),
26    /// Command line.
27    Stdin,
28    /// Custom sources for explicit parser calls from plugins and drivers.
29    Custom(String),
30}
31
32impl PartialEq<Path> for FileName {
33    fn eq(&self, other: &Path) -> bool {
34        match self {
35            Self::Real(p) => p == other,
36            _ => false,
37        }
38    }
39}
40
41impl PartialEq<&Path> for FileName {
42    fn eq(&self, other: &&Path) -> bool {
43        match self {
44            Self::Real(p) => p == *other,
45            _ => false,
46        }
47    }
48}
49
50impl PartialEq<PathBuf> for FileName {
51    fn eq(&self, other: &PathBuf) -> bool {
52        match self {
53            Self::Real(p) => p == other,
54            _ => false,
55        }
56    }
57}
58
59impl From<PathBuf> for FileName {
60    fn from(p: PathBuf) -> Self {
61        Self::Real(p)
62    }
63}
64
65impl From<&PathBuf> for FileName {
66    fn from(p: &PathBuf) -> Self {
67        Self::Real(p.clone())
68    }
69}
70
71impl From<&Path> for FileName {
72    fn from(p: &Path) -> Self {
73        Self::Real(p.to_path_buf())
74    }
75}
76
77impl From<String> for FileName {
78    fn from(s: String) -> Self {
79        Self::Custom(s)
80    }
81}
82
83impl From<&Self> for FileName {
84    fn from(s: &Self) -> Self {
85        s.clone()
86    }
87}
88
89impl FileName {
90    /// Creates a new `FileName` from a path.
91    pub fn real(path: impl Into<PathBuf>) -> Self {
92        Self::Real(path.into())
93    }
94
95    /// Creates a new `FileName` from a string.
96    pub fn custom(s: impl Into<String>) -> Self {
97        Self::Custom(s.into())
98    }
99
100    /// Displays the filename.
101    #[inline]
102    pub fn display(&self) -> FileNameDisplay<'_> {
103        let base_path =
104            crate::SessionGlobals::try_with(|g| g.and_then(|g| g.source_map.base_path()));
105        FileNameDisplay { inner: self, base_path }
106    }
107
108    /// Returns the path if the file name is a real file.
109    #[inline]
110    pub fn as_real(&self) -> Option<&Path> {
111        match self {
112            Self::Real(path) => Some(path),
113            _ => None,
114        }
115    }
116}
117
118/// A display wrapper for `FileName`.
119///
120/// Created by [`FileName::display`].
121pub struct FileNameDisplay<'a> {
122    pub(crate) inner: &'a FileName,
123    pub(crate) base_path: Option<PathBuf>,
124}
125
126impl fmt::Display for FileNameDisplay<'_> {
127    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128        match self.inner {
129            FileName::Real(path) => {
130                let path = if let Some(base_path) = &self.base_path
131                    && let Ok(rpath) = path.strip_prefix(base_path)
132                {
133                    rpath
134                } else {
135                    path.as_path()
136                };
137                path.display().fmt(f)
138            }
139            FileName::Stdin => f.write_str("<stdin>"),
140            FileName::Custom(s) => write!(f, "<{s}>"),
141        }
142    }
143}
144
145#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
146pub(crate) struct SourceFileId(u64);
147
148impl SourceFileId {
149    pub(crate) fn new(filename: &FileName) -> Self {
150        use std::hash::{Hash, Hasher};
151        let mut hasher = solar_data_structures::map::FxHasher::with_seed(0);
152        filename.hash(&mut hasher);
153        Self(hasher.finish())
154    }
155}
156
157/// Sum of all file lengths is over [`u32::MAX`].
158#[derive(Debug)]
159pub struct OffsetOverflowError(pub(crate) ());
160
161impl fmt::Display for OffsetOverflowError {
162    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163        f.write_str("files larger than 4GiB are not supported")
164    }
165}
166
167impl std::error::Error for OffsetOverflowError {}
168
169impl From<OffsetOverflowError> for io::Error {
170    fn from(e: OffsetOverflowError) -> Self {
171        Self::new(io::ErrorKind::FileTooLarge, e)
172    }
173}
174
175/// A single source in the `SourceMap`.
176#[derive(Clone, derive_more::Debug)]
177#[non_exhaustive]
178pub struct SourceFile {
179    /// The name of the file that the source came from. Source that doesn't
180    /// originate from files has names between angle brackets by convention
181    /// (e.g., `<stdin>`).
182    pub name: FileName,
183    /// The complete source code.
184    #[debug(skip)]
185    pub src: Arc<String>,
186    /// The start position of this source in the `SourceMap`.
187    pub start_pos: BytePos,
188    /// The byte length of this source.
189    pub source_len: RelativeBytePos,
190    /// Locations of lines beginnings in the source code.
191    #[debug(skip)]
192    pub lines: Vec<RelativeBytePos>,
193    /// Locations of multi-byte characters in the source code.
194    #[debug(skip)]
195    pub multibyte_chars: Vec<MultiByteChar>,
196}
197
198impl PartialEq for SourceFile {
199    fn eq(&self, other: &Self) -> bool {
200        self.start_pos == other.start_pos
201    }
202}
203
204impl Eq for SourceFile {}
205
206impl std::hash::Hash for SourceFile {
207    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
208        self.start_pos.hash(state);
209    }
210}
211
212impl SourceFile {
213    /// Creates a new `SourceFile`. Use the [`SourceMap`](crate::SourceMap) methods instead.
214    pub(crate) fn new(
215        name: FileName,
216        id: SourceFileId,
217        mut src: String,
218    ) -> Result<Self, OffsetOverflowError> {
219        // Compute the file hash before any normalization.
220        // let src_hash = SourceFileHash::new(hash_kind, &src);
221
222        // let normalized_pos = normalize_src(&mut src);
223
224        debug_assert_eq!(id, SourceFileId::new(&name));
225        let source_len = src.len();
226        let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError(()))?;
227
228        let (lines, multibyte_chars) = super::analyze::analyze_source_file(&src);
229
230        src.shrink_to_fit();
231        Ok(Self {
232            name,
233            src: Arc::new(src),
234            start_pos: BytePos::from_u32(0),
235            source_len: RelativeBytePos::from_u32(source_len),
236            lines,
237            multibyte_chars,
238        })
239    }
240
241    pub fn lines(&self) -> &[RelativeBytePos] {
242        &self.lines
243    }
244
245    pub fn count_lines(&self) -> usize {
246        self.lines().len()
247    }
248
249    #[inline]
250    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
251        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
252    }
253
254    #[inline]
255    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
256        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
257    }
258
259    #[inline]
260    pub fn end_position(&self) -> BytePos {
261        self.absolute_position(self.source_len)
262    }
263
264    /// Finds the line containing the given position. The return value is the
265    /// index into the `lines` array of this `SourceFile`, not the 1-based line
266    /// number. If the source_file is empty or the position is located before the
267    /// first line, `None` is returned.
268    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
269        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
270    }
271
272    /// Returns the relative byte position of the start of the line at the given
273    /// 0-based line index.
274    pub fn line_position(&self, line_number: usize) -> Option<usize> {
275        self.lines().get(line_number).map(|x| x.to_usize())
276    }
277
278    /// Converts a `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
279    pub(crate) fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
280        // The number of extra bytes due to multibyte chars in the `SourceFile`.
281        let mut total_extra_bytes = 0;
282
283        for mbc in self.multibyte_chars.iter() {
284            if mbc.pos < bpos {
285                // Every character is at least one byte, so we only
286                // count the actual extra bytes.
287                total_extra_bytes += mbc.bytes as u32 - 1;
288                // We should never see a byte position in the middle of a
289                // character.
290                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
291            } else {
292                break;
293            }
294        }
295
296        assert!(total_extra_bytes <= bpos.to_u32());
297        CharPos(bpos.to_usize() - total_extra_bytes as usize)
298    }
299
300    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
301    /// given `RelativeBytePos`.
302    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
303        let chpos = self.bytepos_to_file_charpos(pos);
304        match self.lookup_line(pos) {
305            Some(a) => {
306                let line = a + 1; // Line numbers start at 1
307                let linebpos = self.lines()[a];
308                let linechpos = self.bytepos_to_file_charpos(linebpos);
309                let col = chpos - linechpos;
310                assert!(chpos >= linechpos);
311                (line, col)
312            }
313            None => (0, chpos),
314        }
315    }
316
317    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
318    /// column offset when displayed, for a given `BytePos`.
319    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
320        let pos = self.relative_position(pos);
321        let (line, col_or_chpos) = self.lookup_file_pos(pos);
322        if line > 0 {
323            let Some(code) = self.get_line(line - 1) else {
324                // If we don't have the code available, it is ok as a fallback to return the bytepos
325                // instead of the "display" column, which is only used to properly show underlines
326                // in the terminal.
327                // FIXME: we'll want better handling of this in the future for the sake of tools
328                // that want to use the display col instead of byte offsets to modify code, but
329                // that is a problem for another day, the previous code was already incorrect for
330                // both displaying *and* third party tools using the json output naïvely.
331                debug!("couldn't find line {line} in {:?}", self.name);
332                return (line, col_or_chpos, col_or_chpos.0);
333            };
334            let display_col = code.chars().take(col_or_chpos.0).map(char_width).sum();
335            (line, col_or_chpos, display_col)
336        } else {
337            // This is never meant to happen?
338            (0, col_or_chpos, col_or_chpos.0)
339        }
340    }
341
342    /// Gets a line from the list of pre-computed line-beginnings.
343    /// The line number here is 0-based.
344    pub fn get_line(&self, line_number: usize) -> Option<&str> {
345        fn get_until_newline(src: &str, begin: usize) -> &str {
346            // We can't use `lines.get(line_number+1)` because we might
347            // be parsing when we call this function and thus the current
348            // line is the last one we have line info for.
349            let slice = &src[begin..];
350            match slice.find('\n') {
351                Some(e) => &slice[..e],
352                None => slice,
353            }
354        }
355
356        let start = self.lines().get(line_number)?.to_usize();
357        Some(get_until_newline(&self.src, start))
358    }
359
360    /// Gets a slice of the source text between two lines, including the
361    /// terminator of the second line (if any).
362    pub fn get_lines(&self, range: RangeInclusive<usize>) -> Option<&str> {
363        fn get_until_newline(src: &str, start: usize, end: usize) -> &str {
364            match src[end..].find('\n') {
365                Some(e) => &src[start..end + e + 1],
366                None => &src[start..],
367            }
368        }
369
370        let (start, end) = range.into_inner();
371        let lines = self.lines();
372        let start = lines.get(start)?.to_usize();
373        let end = lines.get(end)?.to_usize();
374        Some(get_until_newline(&self.src, start, end))
375    }
376
377    /// Returns whether or not the file contains the given `SourceMap` byte
378    /// position. The position one past the end of the file is considered to be
379    /// contained by the file. This implies that files for which `is_empty`
380    /// returns true still contain one byte position according to this function.
381    #[inline]
382    pub fn contains(&self, byte_pos: BytePos) -> bool {
383        byte_pos >= self.start_pos && byte_pos <= self.end_position()
384    }
385
386    #[inline]
387    pub fn is_empty(&self) -> bool {
388        self.source_len.to_u32() == 0
389    }
390
391    /// Calculates the original byte position relative to the start of the file
392    /// based on the given byte position.
393    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
394        let pos = self.relative_position(pos);
395        RelativeBytePos::from_u32(pos.0)
396    }
397}
398
399pub fn char_width(ch: char) -> usize {
400    match ch {
401        '\t' => 4,
402        _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
403    }
404}