pydocstring/text.rs
1//! Source location types (offset-only).
2//!
3//! This module provides [`TextSize`] (a byte offset) and [`TextRange`]
4//! (a half-open byte range) for tracking source positions.
5//! Inspired by ruff / rust-analyzer's `text-size` crate.
6
7use core::fmt;
8use core::ops;
9
10// =============================================================================
11// TextSize
12// =============================================================================
13
14/// A byte offset in the source text.
15///
16/// Newtype over `u32` for type safety (prevents mixing with line numbers, etc.).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
18pub struct TextSize(u32);
19
20impl TextSize {
21 /// Creates a new text size from a raw byte offset.
22 pub const fn new(raw: u32) -> Self {
23 Self(raw)
24 }
25
26 /// Returns the raw byte offset.
27 pub const fn raw(self) -> u32 {
28 self.0
29 }
30}
31
32impl From<u32> for TextSize {
33 fn from(raw: u32) -> Self {
34 Self(raw)
35 }
36}
37
38impl From<TextSize> for u32 {
39 fn from(size: TextSize) -> Self {
40 size.0
41 }
42}
43
44impl From<TextSize> for usize {
45 fn from(size: TextSize) -> Self {
46 size.0 as usize
47 }
48}
49
50impl From<usize> for TextSize {
51 fn from(raw: usize) -> Self {
52 Self(raw as u32)
53 }
54}
55
56impl ops::Add for TextSize {
57 type Output = Self;
58 fn add(self, rhs: Self) -> Self {
59 Self(self.0 + rhs.0)
60 }
61}
62
63impl ops::Sub for TextSize {
64 type Output = Self;
65 fn sub(self, rhs: Self) -> Self {
66 Self(self.0 - rhs.0)
67 }
68}
69
70impl fmt::Display for TextSize {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 self.0.fmt(f)
73 }
74}
75
76// =============================================================================
77// TextRange
78// =============================================================================
79
80/// A range in the source text `[start, end)`, represented as byte offsets.
81///
82/// Stores only offsets.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
84pub struct TextRange {
85 start: TextSize,
86 end: TextSize,
87}
88
89impl TextRange {
90 /// Creates a new range from start (inclusive) and end (exclusive) offsets.
91 pub const fn new(start: TextSize, end: TextSize) -> Self {
92 Self { start, end }
93 }
94
95 /// Start offset (inclusive).
96 pub const fn start(self) -> TextSize {
97 self.start
98 }
99
100 /// End offset (exclusive).
101 pub const fn end(self) -> TextSize {
102 self.end
103 }
104
105 /// Length of the range in bytes.
106 pub const fn len(self) -> TextSize {
107 TextSize::new(self.end.0 - self.start.0)
108 }
109
110 /// Whether the range is empty.
111 pub const fn is_empty(self) -> bool {
112 self.start.0 == self.end.0
113 }
114
115 /// Whether `offset` is contained in this range.
116 pub const fn contains(self, offset: TextSize) -> bool {
117 self.start.0 <= offset.0 && offset.0 < self.end.0
118 }
119
120 /// Creates a range from an absolute byte offset and a length.
121 pub const fn from_offset_len(offset: usize, len: usize) -> Self {
122 Self {
123 start: TextSize::new(offset as u32),
124 end: TextSize::new((offset + len) as u32),
125 }
126 }
127
128 /// Extracts the corresponding slice from the source text.
129 ///
130 /// Returns an empty string if the range is empty or offsets are out of bounds.
131 pub fn source_text<'a>(&self, source: &'a str) -> &'a str {
132 let start = self.start.0 as usize;
133 let end = self.end.0 as usize;
134 if start <= end && end <= source.len() {
135 &source[start..end]
136 } else {
137 ""
138 }
139 }
140
141 /// Extend this range's end to include `other`.
142 pub fn extend(&mut self, other: TextRange) {
143 self.end = other.end;
144 }
145}
146
147impl fmt::Display for TextRange {
148 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149 write!(f, "{}..{}", self.start, self.end)
150 }
151}
152
153// =============================================================================
154// LineColumn
155// =============================================================================
156
157/// A line/column position in the source text.
158///
159/// `lineno` is 1-based; `col` is the 0-based byte offset from the start of
160/// the line.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
162pub struct LineColumn {
163 /// 1-based line number.
164 pub lineno: u32,
165 /// 0-based byte column offset from the start of the line.
166 pub col: u32,
167}
168
169impl fmt::Display for LineColumn {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 write!(f, "{}:{}", self.lineno, self.col)
172 }
173}
174
175// =============================================================================
176// LineIndex
177// =============================================================================
178
179/// A lookup table for converting byte offsets to [`LineColumn`] positions.
180///
181/// Build once from the source text with [`LineIndex::new`], then call
182/// [`LineIndex::line_col`] for any [`TextSize`] offset.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct LineIndex {
185 /// Byte offset of the first character of each line.
186 /// `line_starts[0]` is always 0 (start of the first line).
187 line_starts: Vec<u32>,
188}
189
190impl LineIndex {
191 /// Build a `LineIndex` from the source text.
192 pub fn new(source: &str) -> Self {
193 let mut line_starts = vec![0u32];
194 for (i, b) in source.bytes().enumerate() {
195 if b == b'\n' {
196 line_starts.push((i + 1) as u32);
197 }
198 }
199 Self { line_starts }
200 }
201
202 /// Convert a byte offset to a [`LineColumn`] position.
203 ///
204 /// `lineno` is 1-based; `col` is the 0-based byte offset within the line.
205 pub fn line_col(&self, offset: TextSize) -> LineColumn {
206 let offset = offset.raw();
207 // The index of the last line that starts at or before `offset`.
208 let line = self.line_starts.partition_point(|&s| s <= offset) - 1;
209 let col = offset - self.line_starts[line];
210 LineColumn {
211 lineno: line as u32 + 1,
212 col,
213 }
214 }
215}