solar_interface/source_map/
file.rs

1use crate::{BytePos, CharPos, SourceMap, pos::RelativeBytePos};
2use std::{
3    fmt, io,
4    ops::RangeInclusive,
5    path::{Path, PathBuf},
6    sync::Arc,
7};
8
9/// Identifies an offset of a multi-byte character in a `SourceFile`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct MultiByteChar {
12    /// The relative offset of the character in the `SourceFile`.
13    pub pos: RelativeBytePos,
14    /// The number of bytes, `>= 2`.
15    pub bytes: u8,
16}
17
18/// The name of a source file.
19///
20/// This is used as the key in the source map. See [`SourceMap::get_file`].
21#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
22pub enum FileName {
23    /// Files from the file system.
24    Real(PathBuf),
25    /// Command line.
26    Stdin,
27    /// Custom sources for explicit parser calls from plugins and drivers.
28    Custom(String),
29}
30
31impl PartialEq<Path> for FileName {
32    fn eq(&self, other: &Path) -> bool {
33        match self {
34            Self::Real(p) => p == other,
35            _ => false,
36        }
37    }
38}
39
40impl PartialEq<&Path> for FileName {
41    fn eq(&self, other: &&Path) -> bool {
42        match self {
43            Self::Real(p) => p == *other,
44            _ => false,
45        }
46    }
47}
48
49impl PartialEq<PathBuf> for FileName {
50    fn eq(&self, other: &PathBuf) -> bool {
51        match self {
52            Self::Real(p) => p == other,
53            _ => false,
54        }
55    }
56}
57
58impl From<PathBuf> for FileName {
59    fn from(p: PathBuf) -> Self {
60        Self::Real(p)
61    }
62}
63
64impl From<&PathBuf> for FileName {
65    fn from(p: &PathBuf) -> Self {
66        Self::Real(p.clone())
67    }
68}
69
70impl From<&Path> for FileName {
71    fn from(p: &Path) -> Self {
72        Self::Real(p.to_path_buf())
73    }
74}
75
76impl From<String> for FileName {
77    fn from(s: String) -> Self {
78        Self::Custom(s)
79    }
80}
81
82impl From<&Self> for FileName {
83    fn from(s: &Self) -> Self {
84        s.clone()
85    }
86}
87
88impl FileName {
89    /// Creates a new `FileName` from a path.
90    pub fn real(path: impl Into<PathBuf>) -> Self {
91        Self::Real(path.into())
92    }
93
94    /// Creates a new `FileName` from a string.
95    pub fn custom(s: impl Into<String>) -> Self {
96        Self::Custom(s.into())
97    }
98
99    /// Displays the filename.
100    #[inline]
101    pub fn display(&self) -> FileNameDisplay<'_> {
102        let sm = crate::SessionGlobals::try_with(|g| g.map(|g| g.source_map.clone()));
103        FileNameDisplay { inner: self, sm }
104    }
105
106    /// Returns the path if the file name is a real file.
107    #[inline]
108    pub fn as_real(&self) -> Option<&Path> {
109        match self {
110            Self::Real(path) => Some(path),
111            _ => None,
112        }
113    }
114}
115
116pub struct FileNameDisplay<'a> {
117    inner: &'a FileName,
118    sm: Option<Arc<SourceMap>>,
119}
120
121impl fmt::Display for FileNameDisplay<'_> {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        match self.inner {
124            FileName::Real(path) => {
125                let path = if let Some(sm) = &self.sm
126                    && let Some(base_path) = sm.base_path.get()
127                    && let Ok(rpath) = path.strip_prefix(base_path)
128                {
129                    rpath
130                } else {
131                    path.as_path()
132                };
133                path.display().fmt(f)
134            }
135            FileName::Stdin => f.write_str("<stdin>"),
136            FileName::Custom(s) => write!(f, "<{s}>"),
137        }
138    }
139}
140
141#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
142pub struct StableSourceFileId(u64);
143
144impl StableSourceFileId {
145    pub(super) fn from_filename_in_current_crate(filename: &FileName) -> Self {
146        Self::new(
147            filename,
148            // None
149        )
150    }
151
152    // pub fn from_filename_for_export(
153    //     filename: &FileName,
154    //     local_crate_stable_crate_id: StableCrateId,
155    // ) -> Self {
156    //     Self::new(filename, Some(local_crate_stable_crate_id))
157    // }
158
159    fn new(
160        filename: &FileName,
161        // stable_crate_id: Option<StableCrateId>,
162    ) -> Self {
163        use std::hash::{Hash, Hasher};
164        let mut hasher = solar_data_structures::map::FxHasher::default();
165        filename.hash(&mut hasher);
166        // stable_crate_id.hash(&mut hasher);
167        Self(hasher.finish())
168    }
169}
170
171/// Sum of all file lengths is over [`u32::MAX`].
172#[derive(Debug)]
173pub struct OffsetOverflowError(pub(crate) ());
174
175impl fmt::Display for OffsetOverflowError {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        f.write_str("files larger than 4GiB are not supported")
178    }
179}
180
181impl std::error::Error for OffsetOverflowError {}
182
183impl From<OffsetOverflowError> for io::Error {
184    fn from(e: OffsetOverflowError) -> Self {
185        Self::new(io::ErrorKind::FileTooLarge, e)
186    }
187}
188
189/// A single source in the `SourceMap`.
190#[derive(Clone, derive_more::Debug)]
191pub struct SourceFile {
192    /// The name of the file that the source came from. Source that doesn't
193    /// originate from files has names between angle brackets by convention
194    /// (e.g., `<stdin>`).
195    pub name: FileName,
196    /// The complete source code.
197    #[debug(skip)]
198    pub src: Arc<String>,
199    /// The source code's hash.
200    #[debug(skip)]
201    pub src_hash: SourceFileHash,
202    /// The start position of this source in the `SourceMap`.
203    pub start_pos: BytePos,
204    /// The byte length of this source.
205    pub source_len: RelativeBytePos,
206    /// Locations of lines beginnings in the source code.
207    #[debug(skip)]
208    pub lines: Vec<RelativeBytePos>,
209    /// Locations of multi-byte characters in the source code.
210    #[debug(skip)]
211    pub multibyte_chars: Vec<MultiByteChar>,
212    /// A hash of the filename & crate-id, used for uniquely identifying source
213    /// files within the crate graph and for speeding up hashing in incremental
214    /// compilation.
215    #[debug(skip)]
216    pub stable_id: StableSourceFileId,
217}
218
219impl SourceFile {
220    pub fn new(
221        name: FileName,
222        mut src: String,
223        hash_kind: SourceFileHashAlgorithm,
224    ) -> Result<Self, OffsetOverflowError> {
225        // Compute the file hash before any normalization.
226        let src_hash = SourceFileHash::new(hash_kind, &src);
227        // let normalized_pos = normalize_src(&mut src);
228
229        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
230        let source_len = src.len();
231        let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError(()))?;
232
233        let (lines, multibyte_chars) = super::analyze::analyze_source_file(&src);
234
235        src.shrink_to_fit();
236        Ok(Self {
237            name,
238            src: Arc::new(src),
239            src_hash,
240            start_pos: BytePos::from_u32(0),
241            source_len: RelativeBytePos::from_u32(source_len),
242            lines,
243            multibyte_chars,
244            stable_id,
245        })
246    }
247
248    pub fn lines(&self) -> &[RelativeBytePos] {
249        &self.lines
250    }
251
252    pub fn count_lines(&self) -> usize {
253        self.lines().len()
254    }
255
256    #[inline]
257    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
258        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
259    }
260
261    #[inline]
262    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
263        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
264    }
265
266    #[inline]
267    pub fn end_position(&self) -> BytePos {
268        self.absolute_position(self.source_len)
269    }
270
271    /// Finds the line containing the given position. The return value is the
272    /// index into the `lines` array of this `SourceFile`, not the 1-based line
273    /// number. If the source_file is empty or the position is located before the
274    /// first line, `None` is returned.
275    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
276        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
277    }
278
279    /// Returns the relative byte position of the start of the line at the given
280    /// 0-based line index.
281    pub fn line_position(&self, line_number: usize) -> Option<usize> {
282        self.lines().get(line_number).map(|x| x.to_usize())
283    }
284
285    /// Converts a `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
286    pub(crate) fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
287        // The number of extra bytes due to multibyte chars in the `SourceFile`.
288        let mut total_extra_bytes = 0;
289
290        for mbc in self.multibyte_chars.iter() {
291            if mbc.pos < bpos {
292                // Every character is at least one byte, so we only
293                // count the actual extra bytes.
294                total_extra_bytes += mbc.bytes as u32 - 1;
295                // We should never see a byte position in the middle of a
296                // character.
297                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
298            } else {
299                break;
300            }
301        }
302
303        assert!(total_extra_bytes <= bpos.to_u32());
304        CharPos(bpos.to_usize() - total_extra_bytes as usize)
305    }
306
307    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
308    /// given `RelativeBytePos`.
309    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
310        let chpos = self.bytepos_to_file_charpos(pos);
311        match self.lookup_line(pos) {
312            Some(a) => {
313                let line = a + 1; // Line numbers start at 1
314                let linebpos = self.lines()[a];
315                let linechpos = self.bytepos_to_file_charpos(linebpos);
316                let col = chpos - linechpos;
317                assert!(chpos >= linechpos);
318                (line, col)
319            }
320            None => (0, chpos),
321        }
322    }
323
324    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
325    /// column offset when displayed, for a given `BytePos`.
326    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
327        let pos = self.relative_position(pos);
328        let (line, col_or_chpos) = self.lookup_file_pos(pos);
329        if line > 0 {
330            let Some(code) = self.get_line(line - 1) else {
331                // If we don't have the code available, it is ok as a fallback to return the bytepos
332                // instead of the "display" column, which is only used to properly show underlines
333                // in the terminal.
334                // FIXME: we'll want better handling of this in the future for the sake of tools
335                // that want to use the display col instead of byte offsets to modify code, but
336                // that is a problem for another day, the previous code was already incorrect for
337                // both displaying *and* third party tools using the json output naïvely.
338                debug!("couldn't find line {line} in {:?}", self.name);
339                return (line, col_or_chpos, col_or_chpos.0);
340            };
341            let display_col = code.chars().take(col_or_chpos.0).map(char_width).sum();
342            (line, col_or_chpos, display_col)
343        } else {
344            // This is never meant to happen?
345            (0, col_or_chpos, col_or_chpos.0)
346        }
347    }
348
349    /// Gets a line from the list of pre-computed line-beginnings.
350    /// The line number here is 0-based.
351    pub fn get_line(&self, line_number: usize) -> Option<&str> {
352        fn get_until_newline(src: &str, begin: usize) -> &str {
353            // We can't use `lines.get(line_number+1)` because we might
354            // be parsing when we call this function and thus the current
355            // line is the last one we have line info for.
356            let slice = &src[begin..];
357            match slice.find('\n') {
358                Some(e) => &slice[..e],
359                None => slice,
360            }
361        }
362
363        let start = self.lines().get(line_number)?.to_usize();
364        Some(get_until_newline(&self.src, start))
365    }
366
367    /// Gets a slice of the source text between two lines, including the
368    /// terminator of the second line (if any).
369    pub fn get_lines(&self, range: RangeInclusive<usize>) -> Option<&str> {
370        fn get_until_newline(src: &str, start: usize, end: usize) -> &str {
371            match src[end..].find('\n') {
372                Some(e) => &src[start..end + e + 1],
373                None => &src[start..],
374            }
375        }
376
377        let (start, end) = range.into_inner();
378        let lines = self.lines();
379        let start = lines.get(start)?.to_usize();
380        let end = lines.get(end)?.to_usize();
381        Some(get_until_newline(&self.src, start, end))
382    }
383
384    /// Returns whether or not the file contains the given `SourceMap` byte
385    /// position. The position one past the end of the file is considered to be
386    /// contained by the file. This implies that files for which `is_empty`
387    /// returns true still contain one byte position according to this function.
388    #[inline]
389    pub fn contains(&self, byte_pos: BytePos) -> bool {
390        byte_pos >= self.start_pos && byte_pos <= self.end_position()
391    }
392
393    #[inline]
394    pub fn is_empty(&self) -> bool {
395        self.source_len.to_u32() == 0
396    }
397
398    /// Calculates the original byte position relative to the start of the file
399    /// based on the given byte position.
400    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
401        let pos = self.relative_position(pos);
402        RelativeBytePos::from_u32(pos.0)
403    }
404}
405
406pub fn char_width(ch: char) -> usize {
407    match ch {
408        '\t' => 4,
409        _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
410    }
411}
412
413#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
414pub enum SourceFileHashAlgorithm {
415    #[default]
416    None,
417    // Md5,
418    // Sha1,
419    // Sha256,
420}
421
422impl std::str::FromStr for SourceFileHashAlgorithm {
423    type Err = ();
424
425    fn from_str(s: &str) -> Result<Self, Self::Err> {
426        // match s {
427        //     "md5" => Ok(Self::Md5),
428        //     "sha1" => Ok(Self::Sha1),
429        //     "sha256" => Ok(Self::Sha256),
430        //     _ => Err(()),
431        // }
432        let _ = s;
433        Err(())
434    }
435}
436
437impl SourceFileHashAlgorithm {
438    /// The length of the hash in bytes.
439    #[inline]
440    pub const fn hash_len(self) -> usize {
441        match self {
442            Self::None => 0,
443            // Self::Md5 => 16,
444            // Self::Sha1 => 20,
445            // Self::Sha256 => 32,
446        }
447    }
448}
449
450const MAX_HASH_SIZE: usize = 32;
451
452/// The hash of the on-disk source file used for debug info.
453#[derive(Clone, Copy, PartialEq, Eq, Hash)]
454pub struct SourceFileHash {
455    kind: SourceFileHashAlgorithm,
456    value: [u8; MAX_HASH_SIZE],
457}
458
459impl fmt::Debug for SourceFileHash {
460    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461        let mut dbg = f.debug_struct("SourceFileHash");
462        dbg.field("kind", &self.kind);
463        if self.kind != SourceFileHashAlgorithm::None {
464            dbg.field("value", &format_args!("{}", hex::encode(self.hash_bytes())));
465        }
466        dbg.finish()
467    }
468}
469
470impl SourceFileHash {
471    pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> Self {
472        // use md5::digest::{typenum::Unsigned, Digest, OutputSizeUser};
473
474        // fn digest_into<D: Digest>(data: &[u8], out: &mut [u8; MAX_HASH_SIZE]) {
475        //     let mut hasher = D::new();
476        //     hasher.update(data);
477        //     hasher.finalize_into((&mut out[..<D as OutputSizeUser>::OutputSize::USIZE]).into());
478        // }
479
480        // let mut hash = Self { kind, value: Default::default() };
481        // let value = &mut hash.value;
482        // let data = src.as_bytes();
483        // match kind {
484        //     SourceFileHashAlgorithm::None => (),
485        //     SourceFileHashAlgorithm::Md5 => digest_into::<md5::Md5>(data, value),
486        //     SourceFileHashAlgorithm::Sha1 => digest_into::<sha1::Sha1>(data, value),
487        //     SourceFileHashAlgorithm::Sha256 => digest_into::<sha256::Sha256>(data, value),
488        // }
489        // hash
490        let _ = src;
491        Self { kind, value: Default::default() }
492    }
493
494    /// Check if the stored hash matches the hash of the string.
495    pub fn matches(&self, src: &str) -> bool {
496        Self::new(self.kind, src).hash_bytes() == self.hash_bytes()
497    }
498
499    /// The bytes of the hash.
500    pub fn hash_bytes(&self) -> &[u8] {
501        &self.value[..self.hash_len()]
502    }
503
504    /// The hash algorithm used.
505    pub const fn kind(&self) -> SourceFileHashAlgorithm {
506        self.kind
507    }
508
509    /// Returns the length of the hash in bytes.
510    #[inline]
511    pub const fn hash_len(&self) -> usize {
512        self.kind.hash_len()
513    }
514}