source_cache/text/
mod.rs

1use crate::{SourceID, SourcePath};
2use std::{
3    borrow::Cow,
4    fmt::{Debug, Display, Formatter},
5    ops::Range,
6    path::Path,
7};
8use url::Url;
9
10mod display;
11
12/// A type representing a single identifier that may be referred to by [`Span`]s.
13///
14/// In most cases, an identifier is a single input file.
15#[derive(Clone, Debug, Hash, PartialEq, Eq)]
16pub struct SourceText {
17    /// The path of the identifier.
18    path: SourcePath,
19    /// The text
20    raw: String,
21    /// The lines of the identifier.
22    lines: Vec<SourceLine>,
23    /// bytes in identifier
24    length: u32,
25    /// Is the data dirty
26    dirty: bool,
27}
28
29/// A type representing a single line of a [`SourceText`].
30#[derive(Clone, Debug, Hash, PartialEq, Eq)]
31pub struct SourceLine {
32    /// Get the offset of this line in the original [`SourceText`] (i.e: the number of characters that precede it).
33    pub offset: u32,
34    /// Get the character length of this line.
35    pub length: u32,
36    /// Get the view of this line in the original [`SourceText`].
37    pub text: String,
38}
39
40/// A type representing a single line of a [`Source`].
41#[derive(Copy, Clone, Default, Eq, PartialEq, Hash)]
42#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43pub struct SourceSpan {
44    /// The start offset of the span
45    pub start: u32,
46    /// The end offset of the span
47    pub end: u32,
48    /// The file id of the span
49    pub file: SourceID,
50}
51
52impl SourceText {
53    /// Create a snippet with given name
54    pub fn snippet<S, N>(text: S, name: N) -> Self
55    where
56        S: Into<String>,
57        N: Into<Cow<'static, str>>,
58    {
59        let mut src = Self::from(text);
60        src.path = SourcePath::Snippet(name.into());
61        src
62    }
63
64    /// Get the cache id
65    pub fn source_id(&self) -> SourceID {
66        self.path.source_id()
67    }
68    /// Get the length of the total number of characters in the identifier.
69    pub fn get_length(&self) -> usize {
70        self.length as usize
71    }
72    /// Get access to a specific, zero-indexed [`SourceLine`].
73    pub fn get_line(&self, idx: usize) -> Option<&SourceLine> {
74        self.lines.get(idx)
75    }
76    /// Get the length of the total number of characters in the identifier.
77    pub fn get_source(&self) -> &SourcePath {
78        &self.path
79    }
80    /// Set path name of identifier
81    pub fn set_source(&mut self, path: SourcePath) {
82        self.path = path;
83    }
84    /// Set path name of identifier
85    pub fn set_path(&mut self, path: &Path) {
86        self.path = SourcePath::Local(path.to_path_buf());
87    }
88    /// Get path name of identifier
89    pub fn with_path(self, path: &Path) -> Self {
90        Self { path: SourcePath::Local(path.to_path_buf()), ..self }
91    }
92    /// Set path name of identifier
93    pub fn set_remote(&mut self, url: Url) -> bool {
94        self.path = SourcePath::Remote(url);
95        true
96    }
97    /// Get path name of identifier
98    pub fn with_remote(self, url: Url) -> Self {
99        Self { path: SourcePath::Remote(url), ..self }
100    }
101
102    /// Return the raw text fetch from source
103    pub fn text(&self) -> &str {
104        self.raw.as_str()
105    }
106    /// Return an iterator over the [`SourceLine`]s in this identifier.
107    pub fn lines(&self) -> &[SourceLine] {
108        self.lines.as_slice()
109    }
110    /// Clear the cache cache
111    pub fn clear(&mut self) {
112        self.raw.clear();
113        self.lines.clear();
114        self.dirty = true;
115    }
116}
117impl SourceText {
118    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
119    ///
120    /// Note that the line/column numbers are zero-indexed.
121    pub fn get_offset_line(&self, offset: u32) -> Option<(&SourceLine, usize, u32)> {
122        if offset <= self.length {
123            let idx = self.lines.binary_search_by_key(&offset, |line| line.offset).unwrap_or_else(|idx| idx.saturating_sub(1));
124            let line = &self.lines[idx];
125            assert!(offset >= line.offset, "offset = {}, line.offset = {}", offset, line.offset);
126            Some((line, idx, offset - line.offset))
127        }
128        else {
129            None
130        }
131    }
132    /// Get the range of lines that this source_text runs across.
133    ///
134    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
135    /// [`SourceText::get_line`]).
136    pub fn get_line_range(&self, span: &Range<u32>) -> Range<usize> {
137        let start = self.get_offset_line(span.start).map_or(0, |(_, l, _)| l);
138        let end = self.get_offset_line(span.end.saturating_sub(1).max(span.start)).map_or(self.lines.len(), |(_, l, _)| l + 1);
139        start..end
140    }
141}
142
143impl SourceSpan {
144    /// Create a new source_text with the given start and end offsets, and the given file.
145    pub fn new(file: SourceID, start: u32, end: u32) -> Self {
146        Self { start, end, file }
147    }
148    /// Create a new source_text with the given start and end offsets, and the given file.
149    pub fn get_range(&self) -> Range<u32> {
150        self.start..self.end
151    }
152    /// Get the start offset of this source_text.
153    ///
154    /// Offsets are zero-indexed character offsets from the beginning of the identifier.
155    pub fn get_start(&self) -> u32 {
156        self.start
157    }
158
159    /// Get the (exclusive) end offset of this source_text.
160    ///
161    /// The end offset should *always* be greater than or equal to the start offset as given by [`Span::start`].
162    ///
163    /// Offsets are zero-indexed character offsets from the beginning of the identifier.
164    pub fn get_end(&self) -> u32 {
165        self.end
166    }
167    /// Create a new source_text with the given start and end offsets, and the given file.
168    pub fn set_range(&mut self, range: Range<u32>) {
169        self.start = range.start;
170        self.end = range.end;
171    }
172    /// Create a new source_text with the given start and end offsets, and the given file.
173    pub fn with_range(self, range: Range<u32>) -> Self {
174        Self { start: range.start, end: range.end, ..self }
175    }
176    /// Create a new source_text with the given start and end offsets, and the given file.
177    pub fn get_file(&self) -> SourceID {
178        self.file
179    }
180    /// Create a new source_text with the given start and end offsets, and the given file.
181    pub fn set_file(&mut self, file: SourceID) {
182        self.file = file;
183    }
184    /// Create a new source_text with the given start and end offsets, and the given file.
185    pub fn with_file(self, file: SourceID) -> Self {
186        Self { file, ..self }
187    }
188
189    /// Get the length of this source_text (difference between the start of the source_text and the end of the source_text).
190    pub fn length(&self) -> u32 {
191        self.end.saturating_sub(self.start)
192    }
193    /// Determine whether the source_text contains the given offset.
194    pub fn contains(&self, offset: u32) -> bool {
195        self.get_range().contains(&offset)
196    }
197}
198impl SourceLine {
199    /// Get the offset source_text of this line in the original [`SourceText`].
200    pub fn range(&self) -> Range<u32> {
201        self.offset..self.offset + self.length
202    }
203
204    /// Return an iterator over the characters in the line, excluding trailing whitespace.
205    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
206        self.text.chars()
207    }
208}