Skip to main content

pydocstring/
text.rs

1//! Source location types (offset-only).
2//!
3//! This module provides [`TextSize`] (a byte offset) and [`TextRange`]
4//! (a half-open byte range) for tracking source positions.
5//! Inspired by ruff / rust-analyzer's `text-size` crate.
6
7use core::fmt;
8use core::ops;
9
10// =============================================================================
11// TextSize
12// =============================================================================
13
14/// A byte offset in the source text.
15///
16/// Newtype over `u32` for type safety (prevents mixing with line numbers, etc.).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
18pub struct TextSize(u32);
19
20impl TextSize {
21    /// Creates a new text size from a raw byte offset.
22    pub const fn new(raw: u32) -> Self {
23        Self(raw)
24    }
25
26    /// Returns the raw byte offset.
27    pub const fn raw(self) -> u32 {
28        self.0
29    }
30}
31
32impl From<u32> for TextSize {
33    fn from(raw: u32) -> Self {
34        Self(raw)
35    }
36}
37
38impl From<TextSize> for u32 {
39    fn from(size: TextSize) -> Self {
40        size.0
41    }
42}
43
44impl From<TextSize> for usize {
45    fn from(size: TextSize) -> Self {
46        size.0 as usize
47    }
48}
49
50impl From<usize> for TextSize {
51    fn from(raw: usize) -> Self {
52        Self(raw as u32)
53    }
54}
55
56impl ops::Add for TextSize {
57    type Output = Self;
58    fn add(self, rhs: Self) -> Self {
59        Self(self.0 + rhs.0)
60    }
61}
62
63impl ops::Sub for TextSize {
64    type Output = Self;
65    fn sub(self, rhs: Self) -> Self {
66        Self(self.0 - rhs.0)
67    }
68}
69
70impl fmt::Display for TextSize {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        self.0.fmt(f)
73    }
74}
75
76// =============================================================================
77// TextRange
78// =============================================================================
79
80/// A range in the source text `[start, end)`, represented as byte offsets.
81///
82/// Stores only offsets.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
84pub struct TextRange {
85    start: TextSize,
86    end: TextSize,
87}
88
89impl TextRange {
90    /// Creates a new range from start (inclusive) and end (exclusive) offsets.
91    pub const fn new(start: TextSize, end: TextSize) -> Self {
92        Self { start, end }
93    }
94
95    /// Start offset (inclusive).
96    pub const fn start(self) -> TextSize {
97        self.start
98    }
99
100    /// End offset (exclusive).
101    pub const fn end(self) -> TextSize {
102        self.end
103    }
104
105    /// Length of the range in bytes.
106    pub const fn len(self) -> TextSize {
107        TextSize::new(self.end.0 - self.start.0)
108    }
109
110    /// Whether the range is empty.
111    pub const fn is_empty(self) -> bool {
112        self.start.0 == self.end.0
113    }
114
115    /// Whether `offset` is contained in this range.
116    pub const fn contains(self, offset: TextSize) -> bool {
117        self.start.0 <= offset.0 && offset.0 < self.end.0
118    }
119
120    /// Creates a range from an absolute byte offset and a length.
121    pub const fn from_offset_len(offset: usize, len: usize) -> Self {
122        Self {
123            start: TextSize::new(offset as u32),
124            end: TextSize::new((offset + len) as u32),
125        }
126    }
127
128    /// Extracts the corresponding slice from the source text.
129    ///
130    /// Returns an empty string if the range is empty or offsets are out of bounds.
131    pub fn source_text<'a>(&self, source: &'a str) -> &'a str {
132        let start = self.start.0 as usize;
133        let end = self.end.0 as usize;
134        if start <= end && end <= source.len() {
135            &source[start..end]
136        } else {
137            ""
138        }
139    }
140
141    /// Extend this range's end to include `other`.
142    pub fn extend(&mut self, other: TextRange) {
143        self.end = other.end;
144    }
145}
146
147impl fmt::Display for TextRange {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        write!(f, "{}..{}", self.start, self.end)
150    }
151}
152
153// =============================================================================
154// LineColumn
155// =============================================================================
156
157/// A line/column position in the source text.
158///
159/// `lineno` is 1-based; `col` is the 0-based byte offset from the start of
160/// the line.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
162pub struct LineColumn {
163    /// 1-based line number.
164    pub lineno: u32,
165    /// 0-based byte column offset from the start of the line.
166    pub col: u32,
167}
168
169impl fmt::Display for LineColumn {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        write!(f, "{}:{}", self.lineno, self.col)
172    }
173}
174
175// =============================================================================
176// LineIndex
177// =============================================================================
178
179/// A lookup table for converting byte offsets to [`LineColumn`] positions.
180///
181/// Build once from the source text with [`LineIndex::new`], then call
182/// [`LineIndex::line_col`] for any [`TextSize`] offset.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct LineIndex {
185    /// Byte offset of the first character of each line.
186    /// `line_starts[0]` is always 0 (start of the first line).
187    line_starts: Vec<u32>,
188}
189
190impl LineIndex {
191    /// Build a `LineIndex` from the source text.
192    pub fn new(source: &str) -> Self {
193        let mut line_starts = vec![0u32];
194        for (i, b) in source.bytes().enumerate() {
195            if b == b'\n' {
196                line_starts.push((i + 1) as u32);
197            }
198        }
199        Self { line_starts }
200    }
201
202    /// Convert a byte offset to a [`LineColumn`] position.
203    ///
204    /// `lineno` is 1-based; `col` is the 0-based byte offset within the line.
205    pub fn line_col(&self, offset: TextSize) -> LineColumn {
206        let offset = offset.raw();
207        // The index of the last line that starts at or before `offset`.
208        let line = self.line_starts.partition_point(|&s| s <= offset) - 1;
209        let col = offset - self.line_starts[line];
210        LineColumn {
211            lineno: line as u32 + 1,
212            col,
213        }
214    }
215}