oak_core/source/mod.rs
1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod buffer;
9mod cursor;
10mod rope;
11mod simd;
12mod streaming;
13mod text;
14
15pub use self::{
16 buffer::{SourceBuffer, ToSource},
17 cursor::SourceCursor,
18 rope::{RopeBuffer, RopeSource},
19 simd::SimdScanner,
20 streaming::{ChunkedBuffer, ChunkedSource},
21 text::SourceText,
22};
23use crate::OakError;
24
25/// A unique identifier for a source file.
26pub type SourceId = u32;
27
28/// A chunk of text from a source, including its start offset.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub struct TextChunk<'a> {
31 /// The start byte offset of this chunk in the source.
32 pub start: usize,
33 /// The text content of this chunk.
34 pub text: &'a str,
35}
36
37impl<'a> TextChunk<'a> {
38 /// Returns the end byte offset of this chunk.
39 #[inline]
40 pub fn end(&self) -> usize {
41 self.start + self.text.len()
42 }
43
44 /// Returns a slice of the chunk text starting from the specified absolute offset.
45 #[inline]
46 pub fn slice_from(&self, offset: usize) -> &'a str {
47 if offset <= self.start {
48 return self.text;
49 }
50 let rel = offset.saturating_sub(self.start);
51 self.text.get(rel..).unwrap_or("")
52 }
53}
54
55/// Represents a text edit operation for incremental updates.
56///
57/// Text edits are used to apply changes to source text in an incremental
58/// manner, such as those received from LSP clients or other text editing
59/// operations. Each edit specifies a byte range to replace and the new text
60/// to insert in that range.
61///
62/// # Examples
63///
64/// ```
65/// # #![feature(new_range_api)]
66/// # use oak_core::source::TextEdit;
67/// use core::range::Range;
68/// let edit = TextEdit {
69/// span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
70/// text: "world".into(), // With the text "world"
71/// };
72/// ```
73pub struct TextEdit {
74 /// The byte range in the original text to be replaced (start..end)
75 pub span: Range<usize>,
76 /// The new text to insert in place of the specified range
77 pub text: Cow<'static, str>,
78}
79
80/// Trait for abstract text sources.
81///
82/// This trait provides a unified interface for different text sources that may have:
83/// - Different character representations (Unicode escapes, HTML entities)
84/// - Different internal storage formats
85/// - Different error handling requirements
86///
87/// All offsets exposed by this trait are simple text ranges from the start of this source.
88pub trait Source: Send + Sync {
89 /// Get the length of this source.
90 ///
91 /// This represents the total size of this source in bytes.
92 fn length(&self) -> usize;
93
94 /// Returns the ID of this source, if available.
95 fn source_id(&self) -> Option<SourceId> {
96 None
97 }
98
99 /// Returns a text chunk containing the specified offset.
100 fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
101
102 /// Check if the source is empty.
103 fn is_empty(&self) -> bool {
104 self.length() == 0
105 }
106
107 /// Get a single character at the specified offset.
108 ///
109 /// This method should handle any character encoding transformations
110 /// and return the actual character that would be seen by the parser.
111 ///
112 /// # Arguments
113 ///
114 /// * `offset` - The byte offset from the start of this source
115 ///
116 /// # Returns
117 ///
118 /// The character at the specified offset, or `None` if the offset is invalid
119 fn get_char_at(&self, offset: usize) -> Option<char> {
120 self.chunk_at(offset).slice_from(offset).chars().next()
121 }
122
123 /// Get the text content at the specified range.
124 ///
125 /// The range is specified as simple offsets from the start of this source.
126 /// The returned text should have any character encoding transformations
127 /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
128 ///
129 /// # Arguments
130 ///
131 /// * `range` - The byte range to extract text from (relative to this source)
132 ///
133 /// # Returns
134 ///
135 /// The text content in the specified range.
136 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
137
138 /// Get the text from the current position to the end of the source.
139 ///
140 /// # Arguments
141 ///
142 /// * `offset` - The byte offset to start from (relative to this source)
143 ///
144 /// # Returns
145 ///
146 /// The remaining text from the offset to the end.
147 fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
148 if offset >= self.length() {
149 return Cow::Borrowed("");
150 }
151 self.get_text_in(core::range::Range { start: offset, end: self.length() })
152 }
153
154 /// Find the next occurrence of a character starting from an offset.
155 ///
156 /// # Arguments
157 ///
158 /// * `offset` - The byte offset to start searching from (relative to this source)
159 /// * `ch` - The character to search for
160 ///
161 /// # Returns
162 ///
163 /// The offset of the next occurrence, or `None` if not found
164 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
165 let mut cursor = SourceCursor::new_at(self, offset);
166 let mut base = offset;
167 loop {
168 let rest = cursor.rest();
169 if let Some(pos) = rest.find(ch) {
170 return Some(base + pos);
171 }
172 let next = cursor.chunk_end();
173 if next >= self.length() {
174 return None;
175 }
176 base = next;
177 cursor.set_position(next);
178 }
179 }
180
181 /// Find the next occurrence of a substring starting from an offset.
182 ///
183 /// # Arguments
184 ///
185 /// * `offset` - The byte offset to start searching from (relative to this source)
186 /// * `pattern` - The substring to search for
187 ///
188 /// # Returns
189 ///
190 /// The offset of the next occurrence, or `None` if not found
191 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
192 let mut cursor = SourceCursor::new_at(self, offset);
193 cursor.find_str(pattern)
194 }
195
196 /// Create a syntax error with location information.
197 ///
198 /// # Arguments
199 ///
200 /// * `message` - The error message
201 /// * `offset` - The byte offset where the error occurred
202 ///
203 /// # Returns
204 ///
205 /// An [`OakError`] with precise location information.
206 fn syntax_error(&self, message: String, offset: usize) -> OakError {
207 OakError::syntax_error(message, offset, self.source_id())
208 }
209}
210
211impl Source for str {
212 fn length(&self) -> usize {
213 self.len()
214 }
215
216 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
217 let len = self.len();
218 if offset >= len {
219 return TextChunk { start: len, text: "" };
220 }
221 TextChunk { start: offset, text: &self[offset..] }
222 }
223
224 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
225 self.get(range.start..range.end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""))
226 }
227}
228
229impl<S: Source + ?Sized> Source for &S {
230 fn length(&self) -> usize {
231 (**self).length()
232 }
233
234 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
235 (**self).chunk_at(offset)
236 }
237
238 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
239 (**self).get_text_in(range)
240 }
241
242 fn source_id(&self) -> Option<SourceId> {
243 (**self).source_id()
244 }
245
246 fn get_char_at(&self, offset: usize) -> Option<char> {
247 (**self).get_char_at(offset)
248 }
249
250 fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
251 (**self).get_text_from(offset)
252 }
253
254 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
255 (**self).find_char_from(offset, ch)
256 }
257
258 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
259 (**self).find_str_from(offset, pattern)
260 }
261
262 fn syntax_error(&self, message: String, offset: usize) -> OakError {
263 (**self).syntax_error(message, offset)
264 }
265}