solar_interface/source_map/
file.rs

1use crate::{pos::RelativeBytePos, BytePos, CharPos};
2use std::{
3    fmt, io,
4    ops::RangeInclusive,
5    path::{Path, PathBuf},
6    sync::Arc,
7};
8
9/// Identifies an offset of a multi-byte character in a `SourceFile`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct MultiByteChar {
12    /// The relative offset of the character in the `SourceFile`.
13    pub pos: RelativeBytePos,
14    /// The number of bytes, `>= 2`.
15    pub bytes: u8,
16}
17
18#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub enum FileName {
20    /// Files from the file system.
21    Real(PathBuf),
22    /// Command line.
23    Stdin,
24    /// Custom sources for explicit parser calls from plugins and drivers.
25    Custom(String),
26}
27
28impl PartialEq<Path> for FileName {
29    fn eq(&self, other: &Path) -> bool {
30        match self {
31            Self::Real(p) => p == other,
32            _ => false,
33        }
34    }
35}
36
37impl PartialEq<&Path> for FileName {
38    fn eq(&self, other: &&Path) -> bool {
39        match self {
40            Self::Real(p) => p == *other,
41            _ => false,
42        }
43    }
44}
45
46impl PartialEq<PathBuf> for FileName {
47    fn eq(&self, other: &PathBuf) -> bool {
48        match self {
49            Self::Real(p) => p == other,
50            _ => false,
51        }
52    }
53}
54
55impl From<PathBuf> for FileName {
56    fn from(p: PathBuf) -> Self {
57        Self::Real(p)
58    }
59}
60
61impl FileName {
62    /// Creates a new `FileName` from a path.
63    pub fn real(path: impl Into<PathBuf>) -> Self {
64        Self::Real(path.into())
65    }
66
67    /// Creates a new `FileName` from a string.
68    pub fn custom(s: impl Into<String>) -> Self {
69        Self::Custom(s.into())
70    }
71
72    /// Displays the filename.
73    #[inline]
74    pub fn display(&self) -> FileNameDisplay<'_> {
75        FileNameDisplay { inner: self }
76    }
77}
78
79pub struct FileNameDisplay<'a> {
80    inner: &'a FileName,
81}
82
83impl fmt::Display for FileNameDisplay<'_> {
84    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85        match self.inner {
86            FileName::Real(path) => path.display().fmt(f),
87            FileName::Stdin => f.write_str("<stdin>"),
88            FileName::Custom(s) => write!(f, "<{s}>"),
89        }
90    }
91}
92
93#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
94pub struct StableSourceFileId(u64);
95
96impl StableSourceFileId {
97    pub(super) fn from_filename_in_current_crate(filename: &FileName) -> Self {
98        Self::new(
99            filename,
100            // None
101        )
102    }
103
104    // pub fn from_filename_for_export(
105    //     filename: &FileName,
106    //     local_crate_stable_crate_id: StableCrateId,
107    // ) -> Self {
108    //     Self::new(filename, Some(local_crate_stable_crate_id))
109    // }
110
111    fn new(
112        filename: &FileName,
113        // stable_crate_id: Option<StableCrateId>,
114    ) -> Self {
115        use std::hash::{Hash, Hasher};
116        let mut hasher = solar_data_structures::map::FxHasher::default();
117        filename.hash(&mut hasher);
118        // stable_crate_id.hash(&mut hasher);
119        Self(hasher.finish())
120    }
121}
122
123/// Sum of all file lengths is over [`u32::MAX`].
124#[derive(Debug)]
125pub struct OffsetOverflowError(pub(crate) ());
126
127impl fmt::Display for OffsetOverflowError {
128    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129        f.write_str("files larger than 4GiB are not supported")
130    }
131}
132
133impl std::error::Error for OffsetOverflowError {}
134
135impl From<OffsetOverflowError> for io::Error {
136    fn from(e: OffsetOverflowError) -> Self {
137        Self::new(io::ErrorKind::FileTooLarge, e)
138    }
139}
140
141/// A single source in the `SourceMap`.
142#[derive(Clone, derive_more::Debug)]
143pub struct SourceFile {
144    /// The name of the file that the source came from. Source that doesn't
145    /// originate from files has names between angle brackets by convention
146    /// (e.g., `<stdin>`).
147    pub name: FileName,
148    /// The complete source code.
149    #[debug(skip)]
150    pub src: Arc<String>,
151    /// The source code's hash.
152    #[debug(skip)]
153    pub src_hash: SourceFileHash,
154    /// The start position of this source in the `SourceMap`.
155    pub start_pos: BytePos,
156    /// The byte length of this source.
157    pub source_len: RelativeBytePos,
158    /// Locations of lines beginnings in the source code.
159    #[debug(skip)]
160    pub lines: Vec<RelativeBytePos>,
161    /// Locations of multi-byte characters in the source code.
162    #[debug(skip)]
163    pub multibyte_chars: Vec<MultiByteChar>,
164    /// A hash of the filename & crate-id, used for uniquely identifying source
165    /// files within the crate graph and for speeding up hashing in incremental
166    /// compilation.
167    #[debug(skip)]
168    pub stable_id: StableSourceFileId,
169}
170
171impl SourceFile {
172    pub fn new(
173        name: FileName,
174        mut src: String,
175        hash_kind: SourceFileHashAlgorithm,
176    ) -> Result<Self, OffsetOverflowError> {
177        // Compute the file hash before any normalization.
178        let src_hash = SourceFileHash::new(hash_kind, &src);
179        // let normalized_pos = normalize_src(&mut src);
180
181        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
182        let source_len = src.len();
183        let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError(()))?;
184
185        let (lines, multibyte_chars) = super::analyze::analyze_source_file(&src);
186
187        src.shrink_to_fit();
188        Ok(Self {
189            name,
190            src: Arc::new(src),
191            src_hash,
192            start_pos: BytePos::from_u32(0),
193            source_len: RelativeBytePos::from_u32(source_len),
194            lines,
195            multibyte_chars,
196            stable_id,
197        })
198    }
199
200    pub fn lines(&self) -> &[RelativeBytePos] {
201        &self.lines
202    }
203
204    pub fn count_lines(&self) -> usize {
205        self.lines().len()
206    }
207
208    #[inline]
209    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
210        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
211    }
212
213    #[inline]
214    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
215        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
216    }
217
218    #[inline]
219    pub fn end_position(&self) -> BytePos {
220        self.absolute_position(self.source_len)
221    }
222
223    /// Finds the line containing the given position. The return value is the
224    /// index into the `lines` array of this `SourceFile`, not the 1-based line
225    /// number. If the source_file is empty or the position is located before the
226    /// first line, `None` is returned.
227    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
228        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
229    }
230
231    /// Returns the relative byte position of the start of the line at the given
232    /// 0-based line index.
233    pub fn line_position(&self, line_number: usize) -> Option<usize> {
234        self.lines().get(line_number).map(|x| x.to_usize())
235    }
236
237    /// Converts a `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
238    pub(crate) fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
239        // The number of extra bytes due to multibyte chars in the `SourceFile`.
240        let mut total_extra_bytes = 0;
241
242        for mbc in self.multibyte_chars.iter() {
243            if mbc.pos < bpos {
244                // Every character is at least one byte, so we only
245                // count the actual extra bytes.
246                total_extra_bytes += mbc.bytes as u32 - 1;
247                // We should never see a byte position in the middle of a
248                // character.
249                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
250            } else {
251                break;
252            }
253        }
254
255        assert!(total_extra_bytes <= bpos.to_u32());
256        CharPos(bpos.to_usize() - total_extra_bytes as usize)
257    }
258
259    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
260    /// given `RelativeBytePos`.
261    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
262        let chpos = self.bytepos_to_file_charpos(pos);
263        match self.lookup_line(pos) {
264            Some(a) => {
265                let line = a + 1; // Line numbers start at 1
266                let linebpos = self.lines()[a];
267                let linechpos = self.bytepos_to_file_charpos(linebpos);
268                let col = chpos - linechpos;
269                assert!(chpos >= linechpos);
270                (line, col)
271            }
272            None => (0, chpos),
273        }
274    }
275
276    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
277    /// column offset when displayed, for a given `BytePos`.
278    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
279        let pos = self.relative_position(pos);
280        let (line, col_or_chpos) = self.lookup_file_pos(pos);
281        if line > 0 {
282            let Some(code) = self.get_line(line - 1) else {
283                // If we don't have the code available, it is ok as a fallback to return the bytepos
284                // instead of the "display" column, which is only used to properly show underlines
285                // in the terminal.
286                // FIXME: we'll want better handling of this in the future for the sake of tools
287                // that want to use the display col instead of byte offsets to modify code, but
288                // that is a problem for another day, the previous code was already incorrect for
289                // both displaying *and* third party tools using the json output naïvely.
290                debug!("couldn't find line {line} in {:?}", self.name);
291                return (line, col_or_chpos, col_or_chpos.0);
292            };
293            let display_col = code.chars().take(col_or_chpos.0).map(char_width).sum();
294            (line, col_or_chpos, display_col)
295        } else {
296            // This is never meant to happen?
297            (0, col_or_chpos, col_or_chpos.0)
298        }
299    }
300
301    /// Gets a line from the list of pre-computed line-beginnings.
302    /// The line number here is 0-based.
303    pub fn get_line(&self, line_number: usize) -> Option<&str> {
304        fn get_until_newline(src: &str, begin: usize) -> &str {
305            // We can't use `lines.get(line_number+1)` because we might
306            // be parsing when we call this function and thus the current
307            // line is the last one we have line info for.
308            let slice = &src[begin..];
309            match slice.find('\n') {
310                Some(e) => &slice[..e],
311                None => slice,
312            }
313        }
314
315        let start = self.lines().get(line_number)?.to_usize();
316        Some(get_until_newline(&self.src, start))
317    }
318
319    /// Gets a slice of the source text between two lines, including the
320    /// terminator of the second line (if any).
321    pub fn get_lines(&self, range: RangeInclusive<usize>) -> Option<&str> {
322        fn get_until_newline(src: &str, start: usize, end: usize) -> &str {
323            match src[end..].find('\n') {
324                Some(e) => &src[start..end + e + 1],
325                None => &src[start..],
326            }
327        }
328
329        let (start, end) = range.into_inner();
330        let lines = self.lines();
331        let start = lines.get(start)?.to_usize();
332        let end = lines.get(end)?.to_usize();
333        Some(get_until_newline(&self.src, start, end))
334    }
335
336    /// Returns whether or not the file contains the given `SourceMap` byte
337    /// position. The position one past the end of the file is considered to be
338    /// contained by the file. This implies that files for which `is_empty`
339    /// returns true still contain one byte position according to this function.
340    #[inline]
341    pub fn contains(&self, byte_pos: BytePos) -> bool {
342        byte_pos >= self.start_pos && byte_pos <= self.end_position()
343    }
344
345    #[inline]
346    pub fn is_empty(&self) -> bool {
347        self.source_len.to_u32() == 0
348    }
349
350    /// Calculates the original byte position relative to the start of the file
351    /// based on the given byte position.
352    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
353        let pos = self.relative_position(pos);
354        RelativeBytePos::from_u32(pos.0)
355    }
356}
357
358pub fn char_width(ch: char) -> usize {
359    match ch {
360        '\t' => 4,
361        _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
362    }
363}
364
365#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
366pub enum SourceFileHashAlgorithm {
367    #[default]
368    None,
369    // Md5,
370    // Sha1,
371    // Sha256,
372}
373
374impl std::str::FromStr for SourceFileHashAlgorithm {
375    type Err = ();
376
377    fn from_str(s: &str) -> Result<Self, Self::Err> {
378        // match s {
379        //     "md5" => Ok(Self::Md5),
380        //     "sha1" => Ok(Self::Sha1),
381        //     "sha256" => Ok(Self::Sha256),
382        //     _ => Err(()),
383        // }
384        let _ = s;
385        Err(())
386    }
387}
388
389impl SourceFileHashAlgorithm {
390    /// The length of the hash in bytes.
391    #[inline]
392    pub const fn hash_len(self) -> usize {
393        match self {
394            Self::None => 0,
395            // Self::Md5 => 16,
396            // Self::Sha1 => 20,
397            // Self::Sha256 => 32,
398        }
399    }
400}
401
402const MAX_HASH_SIZE: usize = 32;
403
404/// The hash of the on-disk source file used for debug info.
405#[derive(Clone, Copy, PartialEq, Eq, Hash)]
406pub struct SourceFileHash {
407    kind: SourceFileHashAlgorithm,
408    value: [u8; MAX_HASH_SIZE],
409}
410
411impl fmt::Debug for SourceFileHash {
412    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413        let mut dbg = f.debug_struct("SourceFileHash");
414        dbg.field("kind", &self.kind);
415        if self.kind != SourceFileHashAlgorithm::None {
416            dbg.field("value", &format_args!("{}", hex::encode(self.hash_bytes())));
417        }
418        dbg.finish()
419    }
420}
421
422impl SourceFileHash {
423    pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> Self {
424        // use md5::digest::{typenum::Unsigned, Digest, OutputSizeUser};
425
426        // fn digest_into<D: Digest>(data: &[u8], out: &mut [u8; MAX_HASH_SIZE]) {
427        //     let mut hasher = D::new();
428        //     hasher.update(data);
429        //     hasher.finalize_into((&mut out[..<D as OutputSizeUser>::OutputSize::USIZE]).into());
430        // }
431
432        // let mut hash = Self { kind, value: Default::default() };
433        // let value = &mut hash.value;
434        // let data = src.as_bytes();
435        // match kind {
436        //     SourceFileHashAlgorithm::None => (),
437        //     SourceFileHashAlgorithm::Md5 => digest_into::<md5::Md5>(data, value),
438        //     SourceFileHashAlgorithm::Sha1 => digest_into::<sha1::Sha1>(data, value),
439        //     SourceFileHashAlgorithm::Sha256 => digest_into::<sha256::Sha256>(data, value),
440        // }
441        // hash
442        let _ = src;
443        Self { kind, value: Default::default() }
444    }
445
446    /// Check if the stored hash matches the hash of the string.
447    pub fn matches(&self, src: &str) -> bool {
448        Self::new(self.kind, src).hash_bytes() == self.hash_bytes()
449    }
450
451    /// The bytes of the hash.
452    pub fn hash_bytes(&self) -> &[u8] {
453        &self.value[..self.hash_len()]
454    }
455
456    /// The hash algorithm used.
457    pub const fn kind(&self) -> SourceFileHashAlgorithm {
458        self.kind
459    }
460
461    /// Returns the length of the hash in bytes.
462    #[inline]
463    pub const fn hash_len(&self) -> usize {
464        self.kind.hash_len()
465    }
466}