Skip to main content

rustpython_ruff_source_file/
lib.rs

1use std::cmp::Ordering;
2use std::fmt::{Debug, Display, Formatter};
3use std::hash::Hash;
4use std::sync::{Arc, OnceLock};
5
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9use ruff_text_size::{Ranged, TextRange, TextSize};
10
11pub use crate::line_index::{LineIndex, OneIndexed, PositionEncoding};
12pub use crate::line_ranges::LineRanges;
13pub use crate::newlines::{
14    Line, LineEnding, NewlineWithTrailingNewline, UniversalNewlineIterator, UniversalNewlines,
15    find_newline,
16};
17
18mod line_index;
19mod line_ranges;
20mod newlines;
21
22/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`LineColumn`].
23#[derive(Debug)]
24pub struct SourceCode<'src, 'index> {
25    text: &'src str,
26    index: &'index LineIndex,
27}
28
29impl<'src, 'index> SourceCode<'src, 'index> {
30    pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
31        Self {
32            text: content,
33            index,
34        }
35    }
36
37    /// Computes the one indexed line and column numbers for `offset`, skipping any potential BOM.
38    #[inline]
39    pub fn line_column(&self, offset: TextSize) -> LineColumn {
40        self.index.line_column(offset, self.text)
41    }
42
43    #[inline]
44    pub fn source_location(
45        &self,
46        offset: TextSize,
47        position_encoding: PositionEncoding,
48    ) -> SourceLocation {
49        self.index
50            .source_location(offset, self.text, position_encoding)
51    }
52
53    #[inline]
54    pub fn line_index(&self, offset: TextSize) -> OneIndexed {
55        self.index.line_index(offset)
56    }
57
58    /// Take the source code up to the given [`TextSize`].
59    #[inline]
60    pub fn up_to(&self, offset: TextSize) -> &'src str {
61        &self.text[TextRange::up_to(offset)]
62    }
63
64    /// Take the source code after the given [`TextSize`].
65    #[inline]
66    pub fn after(&self, offset: TextSize) -> &'src str {
67        &self.text[usize::from(offset)..]
68    }
69
70    /// Take the source code between the given [`TextRange`].
71    pub fn slice<T: Ranged>(&self, ranged: T) -> &'src str {
72        &self.text[ranged.range()]
73    }
74
75    pub fn line_start(&self, line: OneIndexed) -> TextSize {
76        self.index.line_start(line, self.text)
77    }
78
79    pub fn line_end(&self, line: OneIndexed) -> TextSize {
80        self.index.line_end(line, self.text)
81    }
82
83    pub fn line_end_exclusive(&self, line: OneIndexed) -> TextSize {
84        self.index.line_end_exclusive(line, self.text)
85    }
86
87    pub fn line_range(&self, line: OneIndexed) -> TextRange {
88        self.index.line_range(line, self.text)
89    }
90
91    /// Returns the source text of the line with the given index
92    #[inline]
93    pub fn line_text(&self, index: OneIndexed) -> &'src str {
94        let range = self.index.line_range(index, self.text);
95        &self.text[range]
96    }
97
98    /// Returns the source text
99    pub fn text(&self) -> &'src str {
100        self.text
101    }
102
103    /// Returns the number of lines
104    #[inline]
105    pub fn line_count(&self) -> usize {
106        self.index.line_count()
107    }
108}
109
110impl PartialEq<Self> for SourceCode<'_, '_> {
111    fn eq(&self, other: &Self) -> bool {
112        self.text == other.text
113    }
114}
115
116impl Eq for SourceCode<'_, '_> {}
117
118/// A Builder for constructing a [`SourceFile`]
119pub struct SourceFileBuilder {
120    name: Box<str>,
121    code: Box<str>,
122    index: Option<LineIndex>,
123}
124
125impl SourceFileBuilder {
126    /// Creates a new builder for a file named `name`.
127    pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
128        Self {
129            name: name.into(),
130            code: code.into(),
131            index: None,
132        }
133    }
134
135    #[must_use]
136    pub fn line_index(mut self, index: LineIndex) -> Self {
137        self.index = Some(index);
138        self
139    }
140
141    pub fn set_line_index(&mut self, index: LineIndex) {
142        self.index = Some(index);
143    }
144
145    /// Consumes `self` and returns the [`SourceFile`].
146    pub fn finish(self) -> SourceFile {
147        let index = if let Some(index) = self.index {
148            OnceLock::from(index)
149        } else {
150            OnceLock::new()
151        };
152
153        SourceFile {
154            inner: Arc::new(SourceFileInner {
155                name: self.name,
156                code: self.code,
157                line_index: index,
158            }),
159        }
160    }
161}
162
163/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`].
164///
165/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count.
166#[derive(Clone, Eq, PartialEq, Hash)]
167#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
168pub struct SourceFile {
169    inner: Arc<SourceFileInner>,
170}
171
172impl Debug for SourceFile {
173    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
174        f.debug_struct("SourceFile")
175            .field("name", &self.name())
176            .field("code", &self.source_text())
177            .finish()
178    }
179}
180
181impl SourceFile {
182    /// Returns the name of the source file (filename).
183    #[inline]
184    pub fn name(&self) -> &str {
185        &self.inner.name
186    }
187
188    #[inline]
189    pub fn slice(&self, range: TextRange) -> &str {
190        &self.source_text()[range]
191    }
192
193    pub fn to_source_code(&self) -> SourceCode<'_, '_> {
194        SourceCode {
195            text: self.source_text(),
196            index: self.index(),
197        }
198    }
199
200    pub fn index(&self) -> &LineIndex {
201        self.inner
202            .line_index
203            .get_or_init(|| LineIndex::from_source_text(self.source_text()))
204    }
205
206    /// Returns the source code.
207    #[inline]
208    pub fn source_text(&self) -> &str {
209        &self.inner.code
210    }
211}
212
213impl PartialOrd for SourceFile {
214    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
215        Some(self.cmp(other))
216    }
217}
218
219impl Ord for SourceFile {
220    fn cmp(&self, other: &Self) -> Ordering {
221        // Short circuit if these are the same source files
222        if Arc::ptr_eq(&self.inner, &other.inner) {
223            Ordering::Equal
224        } else {
225            self.inner.name.cmp(&other.inner.name)
226        }
227    }
228}
229
230#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
231struct SourceFileInner {
232    name: Box<str>,
233    code: Box<str>,
234    line_index: OnceLock<LineIndex>,
235}
236
237impl PartialEq for SourceFileInner {
238    fn eq(&self, other: &Self) -> bool {
239        self.name == other.name && self.code == other.code
240    }
241}
242
243impl Eq for SourceFileInner {}
244
245impl Hash for SourceFileInner {
246    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
247        self.name.hash(state);
248        self.code.hash(state);
249    }
250}
251
252/// The line and column of an offset in a source file.
253///
254/// See [`LineIndex::line_column`] for more information.
255#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
256#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
257pub struct LineColumn {
258    /// The line in the source text.
259    pub line: OneIndexed,
260    /// The column (UTF scalar values) relative to the start of the line except any
261    /// potential BOM on the first line.
262    pub column: OneIndexed,
263}
264
265impl Default for LineColumn {
266    fn default() -> Self {
267        Self {
268            line: OneIndexed::MIN,
269            column: OneIndexed::MIN,
270        }
271    }
272}
273
274impl Debug for LineColumn {
275    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
276        f.debug_struct("LineColumn")
277            .field("line", &self.line.get())
278            .field("column", &self.column.get())
279            .finish()
280    }
281}
282
283impl std::fmt::Display for LineColumn {
284    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285        write!(f, "{line}:{column}", line = self.line, column = self.column)
286    }
287}
288
289/// A position into a source file represented by the line number and the offset to that character relative to the start of that line.
290#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
291#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
292pub struct SourceLocation {
293    /// The line in the source text.
294    pub line: OneIndexed,
295    /// The offset from the start of the line to the character.
296    ///
297    /// This can be a byte offset, the number of UTF16 code points, or the UTF8 code units, depending on the
298    /// [`PositionEncoding`] used.
299    pub character_offset: OneIndexed,
300}
301
302impl Default for SourceLocation {
303    fn default() -> Self {
304        Self {
305            line: OneIndexed::MIN,
306            character_offset: OneIndexed::MIN,
307        }
308    }
309}
310
311#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
312pub enum SourceRow {
313    /// A row within a cell in a Jupyter Notebook.
314    Notebook { cell: OneIndexed, line: OneIndexed },
315    /// A row within a source file.
316    SourceFile { line: OneIndexed },
317}
318
319impl Display for SourceRow {
320    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
321        match self {
322            SourceRow::Notebook { cell, line } => write!(f, "cell {cell}, line {line}"),
323            SourceRow::SourceFile { line } => write!(f, "line {line}"),
324        }
325    }
326}