solar_interface/source_map/
file.rs

1use crate::{pos::RelativeBytePos, BytePos, CharPos};
2use std::{
3    fmt, io,
4    ops::RangeInclusive,
5    path::{Path, PathBuf},
6    sync::Arc,
7};
8
9/// Identifies an offset of a multi-byte character in a `SourceFile`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct MultiByteChar {
12    /// The relative offset of the character in the `SourceFile`.
13    pub pos: RelativeBytePos,
14    /// The number of bytes, `>= 2`.
15    pub bytes: u8,
16}
17
18/// Identifies an offset of a non-narrow character in a `SourceFile`.
19#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20pub enum NonNarrowChar {
21    /// Represents a zero-width character.
22    ZeroWidth(RelativeBytePos),
23    /// Represents a wide (full-width) character.
24    Wide(RelativeBytePos),
25    /// Represents a tab character, represented visually with a width of 4 characters.
26    Tab(RelativeBytePos),
27}
28
29impl NonNarrowChar {
30    pub(crate) fn new(pos: RelativeBytePos, width: usize) -> Self {
31        match width {
32            0 => Self::ZeroWidth(pos),
33            2 => Self::Wide(pos),
34            4 => Self::Tab(pos),
35            _ => panic!("width {width} given for non-narrow character"),
36        }
37    }
38
39    /// Returns the relative offset of the character in the `SourceFile`.
40    pub fn pos(&self) -> RelativeBytePos {
41        match *self {
42            Self::ZeroWidth(p) | Self::Wide(p) | Self::Tab(p) => p,
43        }
44    }
45
46    /// Returns the width of the character, 0 (zero-width) or 2 (wide).
47    pub fn width(&self) -> usize {
48        match *self {
49            Self::ZeroWidth(_) => 0,
50            Self::Wide(_) => 2,
51            Self::Tab(_) => 4,
52        }
53    }
54}
55
56impl std::ops::Add<RelativeBytePos> for NonNarrowChar {
57    type Output = Self;
58
59    fn add(self, rhs: RelativeBytePos) -> Self {
60        match self {
61            Self::ZeroWidth(pos) => Self::ZeroWidth(pos + rhs),
62            Self::Wide(pos) => Self::Wide(pos + rhs),
63            Self::Tab(pos) => Self::Tab(pos + rhs),
64        }
65    }
66}
67
68impl std::ops::Sub<RelativeBytePos> for NonNarrowChar {
69    type Output = Self;
70
71    fn sub(self, rhs: RelativeBytePos) -> Self {
72        match self {
73            Self::ZeroWidth(pos) => Self::ZeroWidth(pos - rhs),
74            Self::Wide(pos) => Self::Wide(pos - rhs),
75            Self::Tab(pos) => Self::Tab(pos - rhs),
76        }
77    }
78}
79
80#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
81pub enum FileName {
82    /// Files from the file system.
83    Real(PathBuf),
84    /// Command line.
85    Stdin,
86    /// Custom sources for explicit parser calls from plugins and drivers.
87    Custom(String),
88}
89
90impl PartialEq<Path> for FileName {
91    fn eq(&self, other: &Path) -> bool {
92        match self {
93            Self::Real(p) => p == other,
94            _ => false,
95        }
96    }
97}
98
99impl PartialEq<&Path> for FileName {
100    fn eq(&self, other: &&Path) -> bool {
101        match self {
102            Self::Real(p) => p == *other,
103            _ => false,
104        }
105    }
106}
107
108impl PartialEq<PathBuf> for FileName {
109    fn eq(&self, other: &PathBuf) -> bool {
110        match self {
111            Self::Real(p) => p == other,
112            _ => false,
113        }
114    }
115}
116
117impl From<PathBuf> for FileName {
118    fn from(p: PathBuf) -> Self {
119        Self::Real(p)
120    }
121}
122
123impl FileName {
124    /// Creates a new `FileName` from a path.
125    pub fn real(path: impl Into<PathBuf>) -> Self {
126        Self::Real(path.into())
127    }
128
129    /// Creates a new `FileName` from a string.
130    pub fn custom(s: impl Into<String>) -> Self {
131        Self::Custom(s.into())
132    }
133
134    /// Displays the filename.
135    #[inline]
136    pub fn display(&self) -> FileNameDisplay<'_> {
137        FileNameDisplay { inner: self }
138    }
139}
140
141pub struct FileNameDisplay<'a> {
142    inner: &'a FileName,
143}
144
145impl fmt::Display for FileNameDisplay<'_> {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        match self.inner {
148            FileName::Real(path) => path.display().fmt(f),
149            FileName::Stdin => f.write_str("<stdin>"),
150            FileName::Custom(s) => write!(f, "<{s}>"),
151        }
152    }
153}
154
155#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
156pub struct StableSourceFileId(u64);
157
158impl StableSourceFileId {
159    pub(super) fn from_filename_in_current_crate(filename: &FileName) -> Self {
160        Self::new(
161            filename,
162            // None
163        )
164    }
165
166    // pub fn from_filename_for_export(
167    //     filename: &FileName,
168    //     local_crate_stable_crate_id: StableCrateId,
169    // ) -> Self {
170    //     Self::new(filename, Some(local_crate_stable_crate_id))
171    // }
172
173    fn new(
174        filename: &FileName,
175        // stable_crate_id: Option<StableCrateId>,
176    ) -> Self {
177        use std::hash::{Hash, Hasher};
178        let mut hasher = solar_data_structures::map::FxHasher::default();
179        filename.hash(&mut hasher);
180        // stable_crate_id.hash(&mut hasher);
181        Self(hasher.finish())
182    }
183}
184
185/// Sum of all file lengths is over [`u32::MAX`].
186#[derive(Debug)]
187pub struct OffsetOverflowError(pub(crate) ());
188
189impl fmt::Display for OffsetOverflowError {
190    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
191        f.write_str("files larger than 4GiB are not supported")
192    }
193}
194
195impl std::error::Error for OffsetOverflowError {}
196
197impl From<OffsetOverflowError> for io::Error {
198    fn from(e: OffsetOverflowError) -> Self {
199        Self::new(io::ErrorKind::FileTooLarge, e)
200    }
201}
202
203/// A single source in the `SourceMap`.
204#[derive(Clone, derive_more::Debug)]
205pub struct SourceFile {
206    /// The name of the file that the source came from. Source that doesn't
207    /// originate from files has names between angle brackets by convention
208    /// (e.g., `<stdin>`).
209    pub name: FileName,
210    /// The complete source code.
211    #[debug(skip)]
212    pub src: Arc<String>,
213    /// The source code's hash.
214    #[debug(skip)]
215    pub src_hash: SourceFileHash,
216    /// The start position of this source in the `SourceMap`.
217    pub start_pos: BytePos,
218    /// The byte length of this source.
219    pub source_len: RelativeBytePos,
220    /// Locations of lines beginnings in the source code.
221    #[debug(skip)]
222    pub lines: Vec<RelativeBytePos>,
223    /// Locations of multi-byte characters in the source code.
224    #[debug(skip)]
225    pub multibyte_chars: Vec<MultiByteChar>,
226    /// Width of characters that are not narrow in the source code.
227    #[debug(skip)]
228    pub non_narrow_chars: Vec<NonNarrowChar>,
229    /// A hash of the filename & crate-id, used for uniquely identifying source
230    /// files within the crate graph and for speeding up hashing in incremental
231    /// compilation.
232    #[debug(skip)]
233    pub stable_id: StableSourceFileId,
234}
235
236impl SourceFile {
237    pub fn new(
238        name: FileName,
239        mut src: String,
240        hash_kind: SourceFileHashAlgorithm,
241    ) -> Result<Self, OffsetOverflowError> {
242        // Compute the file hash before any normalization.
243        let src_hash = SourceFileHash::new(hash_kind, &src);
244        // let normalized_pos = normalize_src(&mut src);
245
246        let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
247        let source_len = src.len();
248        let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError(()))?;
249
250        let (lines, multibyte_chars, non_narrow_chars) = super::analyze::analyze_source_file(&src);
251
252        src.shrink_to_fit();
253        Ok(Self {
254            name,
255            src: Arc::new(src),
256            src_hash,
257            start_pos: BytePos::from_u32(0),
258            source_len: RelativeBytePos::from_u32(source_len),
259            lines,
260            multibyte_chars,
261            non_narrow_chars,
262            stable_id,
263        })
264    }
265
266    pub fn lines(&self) -> &[RelativeBytePos] {
267        &self.lines
268    }
269
270    pub fn count_lines(&self) -> usize {
271        self.lines().len()
272    }
273
274    #[inline]
275    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
276        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
277    }
278
279    #[inline]
280    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
281        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
282    }
283
284    #[inline]
285    pub fn end_position(&self) -> BytePos {
286        self.absolute_position(self.source_len)
287    }
288
289    /// Finds the line containing the given position. The return value is the
290    /// index into the `lines` array of this `SourceFile`, not the 1-based line
291    /// number. If the source_file is empty or the position is located before the
292    /// first line, `None` is returned.
293    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
294        self.lines().partition_point(|x| x <= &pos).checked_sub(1)
295    }
296
297    /// Returns the relative byte position of the start of the line at the given
298    /// 0-based line index.
299    pub fn line_position(&self, line_number: usize) -> Option<usize> {
300        self.lines().get(line_number).map(|x| x.to_usize())
301    }
302
303    /// Converts a `RelativeBytePos` to a `CharPos` relative to the `SourceFile`.
304    pub(crate) fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
305        // The number of extra bytes due to multibyte chars in the `SourceFile`.
306        let mut total_extra_bytes = 0;
307
308        for mbc in self.multibyte_chars.iter() {
309            if mbc.pos < bpos {
310                // Every character is at least one byte, so we only
311                // count the actual extra bytes.
312                total_extra_bytes += mbc.bytes as u32 - 1;
313                // We should never see a byte position in the middle of a
314                // character.
315                assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
316            } else {
317                break;
318            }
319        }
320
321        assert!(total_extra_bytes <= bpos.to_u32());
322        CharPos(bpos.to_usize() - total_extra_bytes as usize)
323    }
324
325    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
326    /// given `RelativeBytePos`.
327    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
328        let chpos = self.bytepos_to_file_charpos(pos);
329        match self.lookup_line(pos) {
330            Some(a) => {
331                let line = a + 1; // Line numbers start at 1
332                let linebpos = self.lines()[a];
333                let linechpos = self.bytepos_to_file_charpos(linebpos);
334                let col = chpos - linechpos;
335                assert!(chpos >= linechpos);
336                (line, col)
337            }
338            None => (0, chpos),
339        }
340    }
341
342    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
343    /// column offset when displayed, for a given `BytePos`.
344    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
345        let pos = self.relative_position(pos);
346        let (line, col_or_chpos) = self.lookup_file_pos(pos);
347        if line > 0 {
348            let col = col_or_chpos;
349            let linebpos = self.lines()[line - 1];
350            let col_display = {
351                let start_width_idx = self
352                    .non_narrow_chars
353                    .binary_search_by_key(&linebpos, |x| x.pos())
354                    .unwrap_or_else(|x| x);
355                let end_width_idx = self
356                    .non_narrow_chars
357                    .binary_search_by_key(&pos, |x| x.pos())
358                    .unwrap_or_else(|x| x);
359                let special_chars = end_width_idx - start_width_idx;
360                let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx]
361                    .iter()
362                    .map(|x| x.width())
363                    .sum();
364                col.0 - special_chars + non_narrow
365            };
366            (line, col, col_display)
367        } else {
368            let chpos = col_or_chpos;
369            let col_display = {
370                let end_width_idx = self
371                    .non_narrow_chars
372                    .binary_search_by_key(&pos, |x| x.pos())
373                    .unwrap_or_else(|x| x);
374                let non_narrow: usize =
375                    self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum();
376                chpos.0 - end_width_idx + non_narrow
377            };
378            (0, chpos, col_display)
379        }
380    }
381
382    /// Gets a line from the list of pre-computed line-beginnings.
383    /// The line number here is 0-based.
384    pub fn get_line(&self, line_number: usize) -> Option<&str> {
385        fn get_until_newline(src: &str, begin: usize) -> &str {
386            // We can't use `lines.get(line_number+1)` because we might
387            // be parsing when we call this function and thus the current
388            // line is the last one we have line info for.
389            let slice = &src[begin..];
390            match slice.find('\n') {
391                Some(e) => &slice[..e],
392                None => slice,
393            }
394        }
395
396        let start = self.lines().get(line_number)?.to_usize();
397        Some(get_until_newline(&self.src, start))
398    }
399
400    /// Gets a slice of the source text between two lines, including the
401    /// terminator of the second line (if any).
402    pub fn get_lines(&self, range: RangeInclusive<usize>) -> Option<&str> {
403        fn get_until_newline(src: &str, start: usize, end: usize) -> &str {
404            match src[end..].find('\n') {
405                Some(e) => &src[start..end + e + 1],
406                None => &src[start..],
407            }
408        }
409
410        let (start, end) = range.into_inner();
411        let lines = self.lines();
412        let start = lines.get(start)?.to_usize();
413        let end = lines.get(end)?.to_usize();
414        Some(get_until_newline(&self.src, start, end))
415    }
416
417    /// Returns whether or not the file contains the given `SourceMap` byte
418    /// position. The position one past the end of the file is considered to be
419    /// contained by the file. This implies that files for which `is_empty`
420    /// returns true still contain one byte position according to this function.
421    #[inline]
422    pub fn contains(&self, byte_pos: BytePos) -> bool {
423        byte_pos >= self.start_pos && byte_pos <= self.end_position()
424    }
425
426    #[inline]
427    pub fn is_empty(&self) -> bool {
428        self.source_len.to_u32() == 0
429    }
430
431    /// Calculates the original byte position relative to the start of the file
432    /// based on the given byte position.
433    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
434        let pos = self.relative_position(pos);
435        RelativeBytePos::from_u32(pos.0)
436    }
437}
438
439#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
440pub enum SourceFileHashAlgorithm {
441    #[default]
442    None,
443    // Md5,
444    // Sha1,
445    // Sha256,
446}
447
448impl std::str::FromStr for SourceFileHashAlgorithm {
449    type Err = ();
450
451    fn from_str(s: &str) -> Result<Self, Self::Err> {
452        // match s {
453        //     "md5" => Ok(Self::Md5),
454        //     "sha1" => Ok(Self::Sha1),
455        //     "sha256" => Ok(Self::Sha256),
456        //     _ => Err(()),
457        // }
458        let _ = s;
459        Err(())
460    }
461}
462
463impl SourceFileHashAlgorithm {
464    /// The length of the hash in bytes.
465    #[inline]
466    pub const fn hash_len(self) -> usize {
467        match self {
468            Self::None => 0,
469            // Self::Md5 => 16,
470            // Self::Sha1 => 20,
471            // Self::Sha256 => 32,
472        }
473    }
474}
475
476const MAX_HASH_SIZE: usize = 32;
477
478/// The hash of the on-disk source file used for debug info.
479#[derive(Clone, Copy, PartialEq, Eq, Hash)]
480pub struct SourceFileHash {
481    kind: SourceFileHashAlgorithm,
482    value: [u8; MAX_HASH_SIZE],
483}
484
485impl fmt::Debug for SourceFileHash {
486    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
487        let mut dbg = f.debug_struct("SourceFileHash");
488        dbg.field("kind", &self.kind);
489        if self.kind != SourceFileHashAlgorithm::None {
490            dbg.field("value", &format_args!("{}", hex::encode(self.hash_bytes())));
491        }
492        dbg.finish()
493    }
494}
495
496impl SourceFileHash {
497    pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> Self {
498        // use md5::digest::{typenum::Unsigned, Digest, OutputSizeUser};
499
500        // fn digest_into<D: Digest>(data: &[u8], out: &mut [u8; MAX_HASH_SIZE]) {
501        //     let mut hasher = D::new();
502        //     hasher.update(data);
503        //     hasher.finalize_into((&mut out[..<D as OutputSizeUser>::OutputSize::USIZE]).into());
504        // }
505
506        // let mut hash = Self { kind, value: Default::default() };
507        // let value = &mut hash.value;
508        // let data = src.as_bytes();
509        // match kind {
510        //     SourceFileHashAlgorithm::None => (),
511        //     SourceFileHashAlgorithm::Md5 => digest_into::<md5::Md5>(data, value),
512        //     SourceFileHashAlgorithm::Sha1 => digest_into::<sha1::Sha1>(data, value),
513        //     SourceFileHashAlgorithm::Sha256 => digest_into::<sha256::Sha256>(data, value),
514        // }
515        // hash
516        let _ = src;
517        Self { kind, value: Default::default() }
518    }
519
520    /// Check if the stored hash matches the hash of the string.
521    pub fn matches(&self, src: &str) -> bool {
522        Self::new(self.kind, src).hash_bytes() == self.hash_bytes()
523    }
524
525    /// The bytes of the hash.
526    pub fn hash_bytes(&self) -> &[u8] {
527        &self.value[..self.hash_len()]
528    }
529
530    /// The hash algorithm used.
531    pub const fn kind(&self) -> SourceFileHashAlgorithm {
532        self.kind
533    }
534
535    /// Returns the length of the hash in bytes.
536    #[inline]
537    pub const fn hash_len(&self) -> usize {
538        self.kind.hash_len()
539    }
540}