oak_core/source/mod.rs
1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod buffer;
9mod cursor;
10mod rope;
11mod simd;
12mod streaming;
13mod text;
14
15pub use self::{
16 buffer::{SourceBuffer, ToSource},
17 cursor::SourceCursor,
18 rope::{RopeBuffer, RopeSource},
19 simd::SimdScanner,
20 streaming::{ChunkedBuffer, ChunkedSource},
21 text::SourceText,
22};
23use crate::OakError;
24
25/// A unique identifier for a source file.
26pub type SourceId = u32;
27
28/// A chunk of text from a source, including its start offset.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub struct TextChunk<'a> {
31 /// The start byte offset of this chunk in the source.
32 pub start: usize,
33 /// The text content of this chunk.
34 pub text: &'a str,
35}
36
37impl<'a> TextChunk<'a> {
38 /// Returns the end byte offset of this chunk.
39 #[inline]
40 pub fn end(&self) -> usize {
41 self.start + self.text.len()
42 }
43
44 /// Returns a slice of the chunk text starting from the specified absolute offset.
45 #[inline]
46 pub fn slice_from(&self, offset: usize) -> &'a str {
47 if offset <= self.start {
48 return self.text;
49 }
50 let rel = offset.saturating_sub(self.start);
51 self.text.get(rel..).unwrap_or("")
52 }
53}
54
55/// Represents a text edit operation for incremental updates.
56///
57/// Text edits are used to apply changes to source text in an incremental
58/// manner, such as those received from LSP clients or other text editing
59/// operations. Each edit specifies a byte range to replace and the new text
60/// to insert in that range.
61///
62/// # Examples
63///
64/// ```
65/// # #![feature(new_range_api)]
66/// # use oak_core::source::TextEdit;
67/// use core::range::Range;
68/// let edit = TextEdit {
69/// span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
70/// text: "world".into(), // With the text "world"
71/// }
72/// ```
73pub struct TextEdit {
74 /// The byte range in the original text to be replaced (start..end)
75 pub span: Range<usize>,
76 /// The new text to insert in place of the specified range
77 pub text: Cow<'static, str>,
78}
79
80/// Trait for abstract text sources.
81///
82/// This trait provides a unified interface for different text sources that may have:
83/// - Different character representations (Unicode escapes, HTML entities)
84/// - Different internal storage formats (contiguous, chunked, ropes)
85/// - Different error handling requirements
86///
87/// All offsets exposed by this trait are simple byte ranges from the start of this source.
88pub trait Source: Send + Sync {
89 /// Returns the total length of this source in bytes.
90 fn length(&self) -> usize;
91
92 /// Returns a unique identifier for this source, if available.
93 ///
94 /// Useful for associating diagnostics with specific files.
95 fn source_id(&self) -> Option<SourceId> {
96 None
97 }
98
99 /// Returns a text chunk containing the specified byte offset.
100 ///
101 /// This allows for efficient traversal of large or non-contiguous sources.
102 fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
103
104 /// Returns `true` if the source has no content.
105 fn is_empty(&self) -> bool {
106 self.length() == 0
107 }
108
109 /// Returns the character at the specified byte offset.
110 ///
111 /// This method should handle any character encoding transformations
112 /// and return the actual character that would be seen by the parser.
113 ///
114 /// # Arguments
115 ///
116 /// * `offset` - The byte offset from the start of this source.
117 ///
118 /// # Returns
119 ///
120 /// The character at the specified offset, or `None` if the offset is invalid.
121 fn get_char_at(&self, offset: usize) -> Option<char> {
122 self.chunk_at(offset).slice_from(offset).chars().next()
123 }
124
125 /// Returns the text content within the specified byte range.
126 ///
127 /// # Arguments
128 ///
129 /// * `range` - The byte range to extract text from.
130 ///
131 /// # Returns
132 ///
133 /// The text content in the specified range, potentially as a borrowed slice.
134 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
135
136 /// Returns the text from the specified byte offset to the end of the source.
137 ///
138 /// # Arguments
139 ///
140 /// * `offset` - The byte offset to start from.
141 fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
142 if offset >= self.length() {
143 return Cow::Borrowed("");
144 }
145 self.get_text_in(core::range::Range { start: offset, end: self.length() })
146 }
147
148 /// Finds the next occurrence of a character starting from an offset.
149 ///
150 /// # Arguments
151 ///
152 /// * `offset` - The byte offset to start searching from.
153 /// * `ch` - The character to search for.
154 ///
155 /// # Returns
156 ///
157 /// The absolute byte offset of the next occurrence, or `None` if not found.
158 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
159 let mut cursor = SourceCursor::new_at(self, offset);
160 let mut base = offset;
161 loop {
162 let rest = cursor.rest();
163 if let Some(pos) = rest.find(ch) {
164 return Some(base + pos);
165 }
166 let next = cursor.chunk_end();
167 if next >= self.length() {
168 return None;
169 }
170 base = next;
171 cursor.set_position(next);
172 }
173 }
174
175 /// Finds the next occurrence of a substring starting from an offset.
176 ///
177 /// # Arguments
178 ///
179 /// * `offset` - The byte offset to start searching from.
180 /// * `pattern` - The substring to search for.
181 ///
182 /// # Returns
183 ///
184 /// The absolute byte offset of the next occurrence, or `None` if not found.
185 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
186 let mut cursor = SourceCursor::new_at(self, offset);
187 cursor.find_str(pattern)
188 }
189
190 /// Creates a syntax error with location information associated with this source.
191 ///
192 /// # Arguments
193 ///
194 /// * `message` - The error message.
195 /// * `offset` - The byte offset where the error occurred.
196 fn syntax_error(&self, message: String, offset: usize) -> OakError {
197 OakError::syntax_error(message, offset, self.source_id())
198 }
199}
200
201impl Source for str {
202 fn length(&self) -> usize {
203 self.len()
204 }
205
206 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
207 let len = self.len();
208 if offset >= len {
209 return TextChunk { start: len, text: "" };
210 }
211 TextChunk { start: offset, text: &self[offset..] }
212 }
213
214 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
215 self.get(range.start..range.end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""))
216 }
217}
218
219impl<S: Source + ?Sized> Source for &S {
220 fn length(&self) -> usize {
221 (**self).length()
222 }
223
224 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
225 (**self).chunk_at(offset)
226 }
227
228 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
229 (**self).get_text_in(range)
230 }
231
232 fn source_id(&self) -> Option<SourceId> {
233 (**self).source_id()
234 }
235
236 fn get_char_at(&self, offset: usize) -> Option<char> {
237 (**self).get_char_at(offset)
238 }
239
240 fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
241 (**self).get_text_from(offset)
242 }
243
244 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
245 (**self).find_char_from(offset, ch)
246 }
247
248 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
249 (**self).find_str_from(offset, pattern)
250 }
251
252 fn syntax_error(&self, message: String, offset: usize) -> OakError {
253 (**self).syntax_error(message, offset)
254 }
255}