solar_interface/source_map/
file.rs

1use crate::{pos::RelativeBytePos, BytePos, CharPos};
2use std::{
3    fmt, io,
4    ops::RangeInclusive,
5    path::{Path, PathBuf},
6    sync::Arc,
7};
8
9/// Identifies an offset of a multi-byte character in a `SourceFile`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct MultiByteChar {
12    /// The relative offset of the character in the `SourceFile`.
13    pub pos: RelativeBytePos,
14    /// The number of bytes, `>= 2`.
15    pub bytes: u8,
16}
17
18/// Identifies an offset of a non-narrow character in a `SourceFile`.
19#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20pub enum NonNarrowChar {
21    /// Represents a zero-width character.
22    ZeroWidth(RelativeBytePos),
23    /// Represents a wide (full-width) character.
24    Wide(RelativeBytePos),
25    /// Represents a tab character, represented visually with a width of 4 characters.
26    Tab(RelativeBytePos),
27}
28
29impl NonNarrowChar {
30    pub(crate) fn new(pos: RelativeBytePos, width: usize) -> Self {
31        match width {
32            0 => Self::ZeroWidth(pos),
33            2 => Self::Wide(pos),
34            4 => Self::Tab(pos),
35            _ => panic!("width {width} given for non-narrow character"),
36        }
37    }
38
39    /// Returns the relative offset of the character in the `SourceFile`.
40    pub fn pos(&self) -> RelativeBytePos {
41        match *self {
42            Self::ZeroWidth(p) | Self::Wide(p) | Self::Tab(p) => p,
43        }
44    }
45
46    /// Returns the width of the character, 0 (zero-width) or 2 (wide).
47    pub fn width(&self) -> usize {
48        match *self {
49            Self::ZeroWidth(_) => 0,
50            Self::Wide(_) => 2,
51            Self::Tab(_) => 4,
52        }
53    }
54}
55
56impl std::ops::Add<RelativeBytePos> for NonNarrowChar {
57    type Output = Self;
58
59    fn add(self, rhs: RelativeBytePos) -> Self {
60        match self {
61            Self::ZeroWidth(pos) => Self::ZeroWidth(pos + rhs),
62            Self::Wide(pos) => Self::Wide(pos + rhs),
63            Self::Tab(pos) => Self::Tab(pos + rhs),
64        }
65    }
66}
67
68impl std::ops::Sub<RelativeBytePos> for NonNarrowChar {
69    type Output = Self;
70
71    fn sub(self, rhs: RelativeBytePos) -> Self {
72        match self {
73            Self::ZeroWidth(pos) => Self::ZeroWidth(pos - rhs),
74            Self::Wide(pos) => Self::Wide(pos - rhs),
75            Self::Tab(pos) => Self::Tab(pos - rhs),
76        }
77    }
78}
79
80#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
81pub enum FileName {
82    /// Files from the file system.
83    Real(PathBuf),
84    /// Command line.
85    Stdin,
86    /// Custom sources for explicit parser calls from plugins and drivers.
87    Custom(String),
88}
89
90impl PartialEq<Path> for FileName {
91    fn eq(&self, other: &Path) -> bool {
92        match self {
93            Self::Real(p) => p == other,
94            _ => false,
95        }
96    }
97}
98
99impl PartialEq<&Path> for FileName {
100    fn eq(&self, other: &&Path) -> bool {
101        match self {
102            Self::Real(p) => p == *other,
103            _ => false,
104        }
105    }
106}
107
108impl PartialEq<PathBuf> for FileName {
109    fn eq(&self, other: &PathBuf) -> bool {
110        match self {
111            Self::Real(p) => p == other,
112            _ => false,
113        }
114    }
115}
116
117impl From<PathBuf> for FileName {
118    fn from(p: PathBuf) -> Self {
119        Self::Real(p)
120    }
121}
122
123impl FileName {
124    /// Creates a new `FileName` from a path.
125    pub fn real(path: impl Into<PathBuf>) -> Self {
126        Self::Real(path.into())
127    }
128
129    /// Creates a new `FileName` from a string.
130    pub fn custom(s: impl Into<String>) -> Self {
131        Self::Custom(s.into())
132    }
133
134    /// Displays the filename.
135    #[inline]
136    pub fn display(&self) -> FileNameDisplay<'_> {
137        FileNameDisplay { inner: self }
138    }
139}
140
141pub struct FileNameDisplay<'a> {
142    inner: &'a FileName,
143}
144
145impl fmt::Display for FileNameDisplay<'_> {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        match self.inner {
148            FileName::Real(path) => path.display().fmt(f),
149            FileName::Stdin => f.write_str("<stdin>"),
150            FileName::Custom(s) => write!(f, "<{s}>"),
151        }
152    }
153}
154
155#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
156pub struct StableSourceFileId(u64);
157
158impl StableSourceFileId {
159    pub(super) fn from_filename_in_current_crate(filename: &FileName) -> Self {
160        Self::new(
161            filename,
162            // None
163        )
164    }
165
166    // pub fn from_filename_for_export(
167    //     filename: &FileName,
168    //     local_crate_stable_crate_id: StableCrateId,
169    // ) -> Self {
170    //     Self::new(filename, Some(local_crate_stable_crate_id))
171    // }
172
173    fn new(
174        filename: &FileName,
175        // stable_crate_id: Option<StableCrateId>,
176    ) -> Self {
177        use std::hash::{Hash, Hasher};
178        let mut hasher = solar_data_structures::map::FxHasher::default();
179        filename.hash(&mut hasher);
180        // stable_crate_id.hash(&mut hasher);
181        Self(hasher.finish())
182    }
183}
184
185/// Sum of all file lengths is over [`u32::MAX`].
186#[derive(Debug)]
187pub struct OffsetOverflowError(pub(crate) ());
188
189impl fmt::Display for OffsetOverflowError {
190    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
191        f.write_str("files larger than 4GiB are not supported")
192    }
193}
194
195impl std::error::Error for OffsetOverflowError {}
196
197impl From<OffsetOverflowError> for io::Error {
198    fn from(e: OffsetOverflowError) -> Self {
199        Self::new(io::ErrorKind::FileTooLarge, e)
200    }
201}
202
203/// A single source in the `SourceMap`.
204#[derive(Clone, Debug)]
205pub struct SourceFile {
206    /// The name of the file that the source came from. Source that doesn't
207    /// originate from files has names between angle brackets by convention
208    /// (e.g., `<stdin>`).
209    pub name: FileName,
210    /// The complete source code.
211    pub src: Arc<String>,
212    /// The source code's hash.
213    pub src_hash: SourceFileHash,
214    /// The start position of this source in the `SourceMap`.
215    pub start_pos: BytePos,
216    /// The byte length of this source.
217    pub source_len: RelativeBytePos,
218    /// Locations of lines beginnings in the source code.
219    pub lines: Vec<RelativeBytePos>,
220    /// Locations of multi-byte characters in the source code.
221    pub multibyte_chars: Vec<MultiByteChar>,
222    /// Width of characters that are not narrow in the source code.
223    pub non_narrow_chars: Vec<NonNarrowChar>,
224    /// A hash of the filename & crate-id, used for uniquely identifying source
225    /// files within the crate graph and for speeding up hashing in incremental
226    /// compilation.
227    pub stable_id: StableSourceFileId,
228}
229
230impl SourceFile {
231    pub fn new(
232        name: FileName,
233        mut src: String,
234        hash_kind: SourceFileHashAlgorithm,
235    ) -> Result<Self, OffsetOverflowError> {
236        // Compute the file hash before any normalization.
237        let src_hash = SourceFileHash::new(hash_kind, &src);
238        // let normalized_pos = normalize_src(&mut src);
239
240        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
241        let source_len = src.len();
242        let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError(()))?;
243
244        let (lines, multibyte_chars, non_narrow_chars) = super::analyze::analyze_source_file(&src);
245
246        src.shrink_to_fit();
247        Ok(Self {
248            name,
249            src: Arc::new(src),
250            src_hash,
251            start_pos: BytePos::from_u32(0),
252            source_len: RelativeBytePos::from_u32(source_len),
253            lines,
254            multibyte_chars,
255            non_narrow_chars,
256            stable_id,
257        })
258    }
259
260    pub fn lines(&self) -> &[RelativeBytePos] {
261        &self.lines
262    }
263
264    pub fn count_lines(&self) -> usize {
265        self.lines().len()
266    }
267
268    #[inline]
269    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
270        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
271    }
272
273    #[inline]
274    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
275        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
276    }
277
278    #[inline]
279    pub fn end_position(&self) -> BytePos {
280        self.absolute_position(self.source_len)
281    }
282
283    /// Finds the line containing the given position. The return value is the
284    /// index into the `lines` array of this `SourceFile`, not the 1-based line
285    /// number. If the source_file is empty or the position is located before the
286    /// first line, `None` is returned.
287    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
288        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
289    }
290
291    /// Returns the relative byte position of the start of the line at the given
292    /// 0-based line index.
293    pub fn line_position(&self, line_number: usize) -> Option<usize> {
294        self.lines().get(line_number).map(|x| x.to_usize())
295    }
296
297    /// Converts a `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
298    pub(crate) fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
299        // The number of extra bytes due to multibyte chars in the `SourceFile`.
300        let mut total_extra_bytes = 0;
301
302        for mbc in self.multibyte_chars.iter() {
303            if mbc.pos < bpos {
304                // Every character is at least one byte, so we only
305                // count the actual extra bytes.
306                total_extra_bytes += mbc.bytes as u32 - 1;
307                // We should never see a byte position in the middle of a
308                // character.
309                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
310            } else {
311                break;
312            }
313        }
314
315        assert!(total_extra_bytes <= bpos.to_u32());
316        CharPos(bpos.to_usize() - total_extra_bytes as usize)
317    }
318
319    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
320    /// given `RelativeBytePos`.
321    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
322        let chpos = self.bytepos_to_file_charpos(pos);
323        match self.lookup_line(pos) {
324            Some(a) => {
325                let line = a + 1; // Line numbers start at 1
326                let linebpos = self.lines()[a];
327                let linechpos = self.bytepos_to_file_charpos(linebpos);
328                let col = chpos - linechpos;
329                assert!(chpos >= linechpos);
330                (line, col)
331            }
332            None => (0, chpos),
333        }
334    }
335
336    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
337    /// column offset when displayed, for a given `BytePos`.
338    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
339        let pos = self.relative_position(pos);
340        let (line, col_or_chpos) = self.lookup_file_pos(pos);
341        if line > 0 {
342            let col = col_or_chpos;
343            let linebpos = self.lines()[line - 1];
344            let col_display = {
345                let start_width_idx = self
346                    .non_narrow_chars
347                    .binary_search_by_key(&linebpos, |x| x.pos())
348                    .unwrap_or_else(|x| x);
349                let end_width_idx = self
350                    .non_narrow_chars
351                    .binary_search_by_key(&pos, |x| x.pos())
352                    .unwrap_or_else(|x| x);
353                let special_chars = end_width_idx - start_width_idx;
354                let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx]
355                    .iter()
356                    .map(|x| x.width())
357                    .sum();
358                col.0 - special_chars + non_narrow
359            };
360            (line, col, col_display)
361        } else {
362            let chpos = col_or_chpos;
363            let col_display = {
364                let end_width_idx = self
365                    .non_narrow_chars
366                    .binary_search_by_key(&pos, |x| x.pos())
367                    .unwrap_or_else(|x| x);
368                let non_narrow: usize =
369                    self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum();
370                chpos.0 - end_width_idx + non_narrow
371            };
372            (0, chpos, col_display)
373        }
374    }
375
376    /// Gets a line from the list of pre-computed line-beginnings.
377    /// The line number here is 0-based.
378    pub fn get_line(&self, line_number: usize) -> Option<&str> {
379        fn get_until_newline(src: &str, begin: usize) -> &str {
380            // We can't use `lines.get(line_number+1)` because we might
381            // be parsing when we call this function and thus the current
382            // line is the last one we have line info for.
383            let slice = &src[begin..];
384            match slice.find('\n') {
385                Some(e) => &slice[..e],
386                None => slice,
387            }
388        }
389
390        let start = self.lines().get(line_number)?.to_usize();
391        Some(get_until_newline(&self.src, start))
392    }
393
394    /// Gets a slice of the source text between two lines, including the
395    /// terminator of the second line (if any).
396    pub fn get_lines(&self, range: RangeInclusive<usize>) -> Option<&str> {
397        fn get_until_newline(src: &str, start: usize, end: usize) -> &str {
398            match src[end..].find('\n') {
399                Some(e) => &src[start..end + e + 1],
400                None => &src[start..],
401            }
402        }
403
404        let (start, end) = range.into_inner();
405        let lines = self.lines();
406        let start = lines.get(start)?.to_usize();
407        let end = lines.get(end)?.to_usize();
408        Some(get_until_newline(&self.src, start, end))
409    }
410
411    /// Returns whether or not the file contains the given `SourceMap` byte
412    /// position. The position one past the end of the file is considered to be
413    /// contained by the file. This implies that files for which `is_empty`
414    /// returns true still contain one byte position according to this function.
415    #[inline]
416    pub fn contains(&self, byte_pos: BytePos) -> bool {
417        byte_pos >= self.start_pos && byte_pos <= self.end_position()
418    }
419
420    #[inline]
421    pub fn is_empty(&self) -> bool {
422        self.source_len.to_u32() == 0
423    }
424
425    /// Calculates the original byte position relative to the start of the file
426    /// based on the given byte position.
427    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
428        let pos = self.relative_position(pos);
429        RelativeBytePos::from_u32(pos.0)
430    }
431}
432
433#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
434pub enum SourceFileHashAlgorithm {
435    #[default]
436    None,
437    // Md5,
438    // Sha1,
439    // Sha256,
440}
441
442impl std::str::FromStr for SourceFileHashAlgorithm {
443    type Err = ();
444
445    fn from_str(s: &str) -> Result<Self, Self::Err> {
446        // match s {
447        //     "md5" => Ok(Self::Md5),
448        //     "sha1" => Ok(Self::Sha1),
449        //     "sha256" => Ok(Self::Sha256),
450        //     _ => Err(()),
451        // }
452        let _ = s;
453        Err(())
454    }
455}
456
457impl SourceFileHashAlgorithm {
458    /// The length of the hash in bytes.
459    #[inline]
460    pub const fn hash_len(self) -> usize {
461        match self {
462            Self::None => 0,
463            // Self::Md5 => 16,
464            // Self::Sha1 => 20,
465            // Self::Sha256 => 32,
466        }
467    }
468}
469
470const MAX_HASH_SIZE: usize = 32;
471
472/// The hash of the on-disk source file used for debug info.
473#[derive(Clone, Copy, PartialEq, Eq, Hash)]
474pub struct SourceFileHash {
475    kind: SourceFileHashAlgorithm,
476    value: [u8; MAX_HASH_SIZE],
477}
478
479impl fmt::Debug for SourceFileHash {
480    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
481        let mut dbg = f.debug_struct("SourceFileHash");
482        dbg.field("kind", &self.kind);
483        if self.kind != SourceFileHashAlgorithm::None {
484            dbg.field("value", &format_args!("{}", hex::encode(self.hash_bytes())));
485        }
486        dbg.finish()
487    }
488}
489
490impl SourceFileHash {
491    pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> Self {
492        // use md5::digest::{typenum::Unsigned, Digest, OutputSizeUser};
493
494        // fn digest_into<D: Digest>(data: &[u8], out: &mut [u8; MAX_HASH_SIZE]) {
495        //     let mut hasher = D::new();
496        //     hasher.update(data);
497        //     hasher.finalize_into((&mut out[..<D as OutputSizeUser>::OutputSize::USIZE]).into());
498        // }
499
500        // let mut hash = Self { kind, value: Default::default() };
501        // let value = &mut hash.value;
502        // let data = src.as_bytes();
503        // match kind {
504        //     SourceFileHashAlgorithm::None => (),
505        //     SourceFileHashAlgorithm::Md5 => digest_into::<md5::Md5>(data, value),
506        //     SourceFileHashAlgorithm::Sha1 => digest_into::<sha1::Sha1>(data, value),
507        //     SourceFileHashAlgorithm::Sha256 => digest_into::<sha256::Sha256>(data, value),
508        // }
509        // hash
510        let _ = src;
511        Self { kind, value: Default::default() }
512    }
513
514    /// Check if the stored hash matches the hash of the string.
515    pub fn matches(&self, src: &str) -> bool {
516        Self::new(self.kind, src).hash_bytes() == self.hash_bytes()
517    }
518
519    /// The bytes of the hash.
520    pub fn hash_bytes(&self) -> &[u8] {
521        &self.value[..self.hash_len()]
522    }
523
524    /// The hash algorithm used.
525    pub const fn kind(&self) -> SourceFileHashAlgorithm {
526        self.kind
527    }
528
529    /// Returns the length of the hash in bytes.
530    #[inline]
531    pub const fn hash_len(&self) -> usize {
532        self.kind.hash_len()
533    }
534}