oak_core/source/mod.rs
1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod cursor;
9mod rope;
10mod simd;
11mod streaming;
12mod text;
13
14pub use self::{
15 cursor::SourceCursor,
16 rope::{RopeBuffer, RopeSource},
17 simd::SimdScanner,
18 streaming::{ChunkedBuffer, ChunkedSource},
19 text::SourceText,
20};
21use crate::OakError;
22pub use url::Url;
23
24/// A chunk of text from a source, including its start offset.
25#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26pub struct TextChunk<'a> {
27 /// The start byte offset of this chunk in the source.
28 pub start: usize,
29 /// The text content of this chunk.
30 pub text: &'a str,
31}
32
33impl<'a> TextChunk<'a> {
34 /// Returns the end byte offset of this chunk.
35 #[inline]
36 pub fn end(&self) -> usize {
37 self.start + self.text.len()
38 }
39
40 /// Returns a slice of the chunk text starting from the specified absolute offset.
41 #[inline]
42 pub fn slice_from(&self, offset: usize) -> &'a str {
43 if offset <= self.start {
44 return self.text;
45 }
46 let rel = offset.saturating_sub(self.start);
47 self.text.get(rel..).unwrap_or("")
48 }
49}
50
51/// Represents a text edit operation for incremental updates.
52///
53/// Text edits are used to apply changes to source text in an incremental
54/// manner, such as those received from LSP clients or other text editing
55/// operations. Each edit specifies a byte range to replace and the new text
56/// to insert in that range.
57///
58/// # Examples
59///
60/// ```
61/// # #![feature(new_range_api)]
62/// # use oak_core::source::TextEdit;
63/// use core::range::Range;
64/// let edit = TextEdit {
65/// span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
66/// text: "world".into(), // With the text "world"
67/// };
68/// ```
69pub struct TextEdit {
70 /// The byte range in the original text to be replaced (start..end)
71 pub span: Range<usize>,
72 /// The new text to insert in place of the specified range
73 pub text: String,
74}
75
76/// Trait for abstract text sources.
77///
78/// This trait provides a unified interface for different text sources that may have:
79/// - Different character representations (Unicode escapes, HTML entities)
80/// - Different internal storage formats
81/// - Different error handling requirements
82///
83/// All offsets exposed by this trait are simple text ranges from the start of this source.
84pub trait Source: Send + Sync {
85 /// Get the length of this source.
86 ///
87 /// This represents the total size of this source in bytes.
88 fn length(&self) -> usize;
89
90 /// Returns the URL of this source, if available.
91 fn url(&self) -> Option<Url> {
92 None
93 }
94
95 /// Returns a text chunk containing the specified offset.
96 fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
97
98 /// Check if the source is empty.
99 fn is_empty(&self) -> bool {
100 self.length() == 0
101 }
102
103 /// Get a single character at the specified offset.
104 ///
105 /// This method should handle any character encoding transformations
106 /// and return the actual character that would be seen by the parser.
107 ///
108 /// # Arguments
109 ///
110 /// * `offset` - The byte offset from the start of this source
111 ///
112 /// # Returns
113 ///
114 /// The character at the specified offset, or `None` if the offset is invalid
115 fn get_char_at(&self, offset: usize) -> Option<char> {
116 self.chunk_at(offset).slice_from(offset).chars().next()
117 }
118
119 /// Get the text content at the specified range.
120 ///
121 /// The range is specified as simple offsets from the start of this source.
122 /// The returned text should have any character encoding transformations
123 /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
124 ///
125 /// # Arguments
126 ///
127 /// * `range` - The byte range to extract text from (relative to this source)
128 ///
129 /// # Returns
130 ///
131 /// The text content in the specified range.
132 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
133
134 /// Get the text from the current position to the end of the source.
135 ///
136 /// # Arguments
137 ///
138 /// * `offset` - The byte offset to start from (relative to this source)
139 ///
140 /// # Returns
141 ///
142 /// The remaining text from the offset to the end.
143 fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
144 if offset >= self.length() {
145 return Cow::Borrowed("");
146 }
147 self.get_text_in(core::range::Range { start: offset, end: self.length() })
148 }
149
150 /// Get the URL of this source, if available.
151 ///
152 /// This method returns a reference to the URL associated with this source,
153 /// typically used for file-based sources or remote resources.
154 ///
155 /// # Returns
156 ///
157 /// An optional reference to the source URL, or `None` if no URL is available
158 fn get_url(&self) -> Option<&Url> {
159 None
160 }
161
162 /// Find the next occurrence of a character starting from an offset.
163 ///
164 /// # Arguments
165 ///
166 /// * `offset` - The byte offset to start searching from (relative to this source)
167 /// * `ch` - The character to search for
168 ///
169 /// # Returns
170 ///
171 /// The offset of the next occurrence, or `None` if not found
172 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
173 let mut cursor = SourceCursor::new_at(self, offset);
174 let mut base = offset;
175 loop {
176 let rest = cursor.rest();
177 if let Some(pos) = rest.find(ch) {
178 return Some(base + pos);
179 }
180 let next = cursor.chunk_end();
181 if next >= self.length() {
182 return None;
183 }
184 base = next;
185 cursor.set_position(next);
186 }
187 }
188
189 /// Find the next occurrence of a substring starting from an offset.
190 ///
191 /// # Arguments
192 ///
193 /// * `offset` - The byte offset to start searching from (relative to this source)
194 /// * `pattern` - The substring to search for
195 ///
196 /// # Returns
197 ///
198 /// The offset of the next occurrence, or `None` if not found
199 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
200 let mut cursor = SourceCursor::new_at(self, offset);
201 cursor.find_str(pattern)
202 }
203
204 /// Create a syntax error with location information.
205 ///
206 /// # Arguments
207 ///
208 /// * `message` - The error message
209 /// * `offset` - The byte offset where the error occurred
210 ///
211 /// # Returns
212 ///
213 /// An [`OakError`] with precise location information.
214 fn syntax_error(&self, message: String, offset: usize) -> OakError {
215 OakError::syntax_error(message, offset, self.get_url().cloned())
216 }
217}
218
219impl<S: Source + ?Sized> Source for &S {
220 fn length(&self) -> usize {
221 (**self).length()
222 }
223
224 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
225 (**self).chunk_at(offset)
226 }
227
228 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
229 (**self).get_text_in(range)
230 }
231
232 fn get_url(&self) -> Option<&Url> {
233 (**self).get_url()
234 }
235}
236
237impl Source for Box<dyn Source + Send + Sync> {
238 fn length(&self) -> usize {
239 (**self).length()
240 }
241
242 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
243 (**self).chunk_at(offset)
244 }
245
246 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
247 (**self).get_text_in(range)
248 }
249
250 fn get_url(&self) -> Option<&Url> {
251 (**self).get_url()
252 }
253}