oak_core/source/
mod.rs

1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod cursor;
9mod rope;
10mod simd;
11mod streaming;
12mod text;
13
14pub use self::{
15    cursor::SourceCursor,
16    rope::{RopeBuffer, RopeSource},
17    simd::SimdScanner,
18    streaming::{ChunkedBuffer, ChunkedSource},
19    text::SourceText,
20};
21use crate::OakError;
22pub use url::Url;
23
24/// A chunk of text from a source, including its start offset.
25#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26pub struct TextChunk<'a> {
27    /// The start byte offset of this chunk in the source.
28    pub start: usize,
29    /// The text content of this chunk.
30    pub text: &'a str,
31}
32
33impl<'a> TextChunk<'a> {
34    /// Returns the end byte offset of this chunk.
35    #[inline]
36    pub fn end(&self) -> usize {
37        self.start + self.text.len()
38    }
39
40    /// Returns a slice of the chunk text starting from the specified absolute offset.
41    #[inline]
42    pub fn slice_from(&self, offset: usize) -> &'a str {
43        if offset <= self.start {
44            return self.text;
45        }
46        let rel = offset.saturating_sub(self.start);
47        self.text.get(rel..).unwrap_or("")
48    }
49}
50
51/// Represents a text edit operation for incremental updates.
52///
53/// Text edits are used to apply changes to source text in an incremental
54/// manner, such as those received from LSP clients or other text editing
55/// operations. Each edit specifies a byte range to replace and the new text
56/// to insert in that range.
57///
58/// # Examples
59///
60/// ```
61/// # #![feature(new_range_api)]
62/// # use oak_core::source::TextEdit;
63/// use core::range::Range;
64/// let edit = TextEdit {
65///     span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
66///     text: "world".into(),             // With the text "world"
67/// };
68/// ```
69pub struct TextEdit {
70    /// The byte range in the original text to be replaced (start..end)
71    pub span: Range<usize>,
72    /// The new text to insert in place of the specified range
73    pub text: String,
74}
75
76/// Trait for abstract text sources.
77///
78/// This trait provides a unified interface for different text sources that may have:
79/// - Different character representations (Unicode escapes, HTML entities)
80/// - Different internal storage formats
81/// - Different error handling requirements
82///
83/// All offsets exposed by this trait are simple text ranges from the start of this source.
84pub trait Source: Send + Sync {
85    /// Get the length of this source.
86    ///
87    /// This represents the total size of this source in bytes.
88    fn length(&self) -> usize;
89
90    /// Returns the URL of this source, if available.
91    fn url(&self) -> Option<Url> {
92        None
93    }
94
95    /// Returns a text chunk containing the specified offset.
96    fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
97
98    /// Check if the source is empty.
99    fn is_empty(&self) -> bool {
100        self.length() == 0
101    }
102
103    /// Get a single character at the specified offset.
104    ///
105    /// This method should handle any character encoding transformations
106    /// and return the actual character that would be seen by the parser.
107    ///
108    /// # Arguments
109    ///
110    /// * `offset` - The byte offset from the start of this source
111    ///
112    /// # Returns
113    ///
114    /// The character at the specified offset, or `None` if the offset is invalid
115    fn get_char_at(&self, offset: usize) -> Option<char> {
116        self.chunk_at(offset).slice_from(offset).chars().next()
117    }
118
119    /// Get the text content at the specified range.
120    ///
121    /// The range is specified as simple offsets from the start of this source.
122    /// The returned text should have any character encoding transformations
123    /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
124    ///
125    /// # Arguments
126    ///
127    /// * `range` - The byte range to extract text from (relative to this source)
128    ///
129    /// # Returns
130    ///
131    /// The text content in the specified range.
132    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
133
134    /// Get the text from the current position to the end of the source.
135    ///
136    /// # Arguments
137    ///
138    /// * `offset` - The byte offset to start from (relative to this source)
139    ///
140    /// # Returns
141    ///
142    /// The remaining text from the offset to the end.
143    fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
144        if offset >= self.length() {
145            return Cow::Borrowed("");
146        }
147        self.get_text_in(core::range::Range { start: offset, end: self.length() })
148    }
149
150    /// Get the URL of this source, if available.
151    ///
152    /// This method returns a reference to the URL associated with this source,
153    /// typically used for file-based sources or remote resources.
154    ///
155    /// # Returns
156    ///
157    /// An optional reference to the source URL, or `None` if no URL is available
158    fn get_url(&self) -> Option<&Url> {
159        None
160    }
161
162    /// Find the next occurrence of a character starting from an offset.
163    ///
164    /// # Arguments
165    ///
166    /// * `offset` - The byte offset to start searching from (relative to this source)
167    /// * `ch` - The character to search for
168    ///
169    /// # Returns
170    ///
171    /// The offset of the next occurrence, or `None` if not found
172    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
173        let mut cursor = SourceCursor::new_at(self, offset);
174        let mut base = offset;
175        loop {
176            let rest = cursor.rest();
177            if let Some(pos) = rest.find(ch) {
178                return Some(base + pos);
179            }
180            let next = cursor.chunk_end();
181            if next >= self.length() {
182                return None;
183            }
184            base = next;
185            cursor.set_position(next);
186        }
187    }
188
189    /// Find the next occurrence of a substring starting from an offset.
190    ///
191    /// # Arguments
192    ///
193    /// * `offset` - The byte offset to start searching from (relative to this source)
194    /// * `pattern` - The substring to search for
195    ///
196    /// # Returns
197    ///
198    /// The offset of the next occurrence, or `None` if not found
199    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
200        let mut cursor = SourceCursor::new_at(self, offset);
201        cursor.find_str(pattern)
202    }
203
204    /// Create a syntax error with location information.
205    ///
206    /// # Arguments
207    ///
208    /// * `message` - The error message
209    /// * `offset` - The byte offset where the error occurred
210    ///
211    /// # Returns
212    ///
213    /// An [`OakError`] with precise location information.
214    fn syntax_error(&self, message: String, offset: usize) -> OakError {
215        OakError::syntax_error(message, offset, self.get_url().cloned())
216    }
217}
218
219impl<S: Source + ?Sized> Source for &S {
220    fn length(&self) -> usize {
221        (**self).length()
222    }
223
224    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
225        (**self).chunk_at(offset)
226    }
227
228    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
229        (**self).get_text_in(range)
230    }
231
232    fn get_url(&self) -> Option<&Url> {
233        (**self).get_url()
234    }
235}
236
237impl Source for Box<dyn Source + Send + Sync> {
238    fn length(&self) -> usize {
239        (**self).length()
240    }
241
242    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
243        (**self).chunk_at(offset)
244    }
245
246    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
247        (**self).get_text_in(range)
248    }
249
250    fn get_url(&self) -> Option<&Url> {
251        (**self).get_url()
252    }
253}