Skip to main content

oak_core/source/
mod.rs

1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod buffer;
9mod cursor;
10mod rope;
11mod simd;
12mod streaming;
13mod text;
14
15pub use self::{
16    buffer::{SourceBuffer, ToSource},
17    cursor::SourceCursor,
18    rope::{RopeBuffer, RopeSource},
19    simd::SimdScanner,
20    streaming::{ChunkedBuffer, ChunkedSource},
21    text::SourceText,
22};
23use crate::OakError;
24
25/// A unique identifier for a source file.
26pub type SourceId = u32;
27
28/// A chunk of text from a source, including its start offset.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub struct TextChunk<'a> {
31    /// The start byte offset of this chunk in the source.
32    pub start: usize,
33    /// The text content of this chunk.
34    pub text: &'a str,
35}
36
37impl<'a> TextChunk<'a> {
38    /// Returns the end byte offset of this chunk.
39    #[inline]
40    pub fn end(&self) -> usize {
41        self.start + self.text.len()
42    }
43
44    /// Returns a slice of the chunk text starting from the specified absolute offset.
45    #[inline]
46    pub fn slice_from(&self, offset: usize) -> &'a str {
47        if offset <= self.start {
48            return self.text;
49        }
50        let rel = offset.saturating_sub(self.start);
51        self.text.get(rel..).unwrap_or("")
52    }
53}
54
55/// Represents a text edit operation for incremental updates.
56///
57/// Text edits are used to apply changes to source text in an incremental
58/// manner, such as those received from LSP clients or other text editing
59/// operations. Each edit specifies a byte range to replace and the new text
60/// to insert in that range.
61///
62/// # Examples
63///
64/// ```
65/// # #![feature(new_range_api)]
66/// # use oak_core::source::TextEdit;
67/// use core::range::Range;
68/// let edit = TextEdit {
69///     span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
70///     text: "world".into(),             // With the text "world"
71/// };
72/// ```
73pub struct TextEdit {
74    /// The byte range in the original text to be replaced (start..end)
75    pub span: Range<usize>,
76    /// The new text to insert in place of the specified range
77    pub text: Cow<'static, str>,
78}
79
80/// Trait for abstract text sources.
81///
82/// This trait provides a unified interface for different text sources that may have:
83/// - Different character representations (Unicode escapes, HTML entities)
84/// - Different internal storage formats
85/// - Different error handling requirements
86///
87/// All offsets exposed by this trait are simple text ranges from the start of this source.
88pub trait Source: Send + Sync {
89    /// Get the length of this source.
90    ///
91    /// This represents the total size of this source in bytes.
92    fn length(&self) -> usize;
93
94    /// Returns the ID of this source, if available.
95    fn source_id(&self) -> Option<SourceId> {
96        None
97    }
98
99    /// Returns a text chunk containing the specified offset.
100    fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
101
102    /// Check if the source is empty.
103    fn is_empty(&self) -> bool {
104        self.length() == 0
105    }
106
107    /// Get a single character at the specified offset.
108    ///
109    /// This method should handle any character encoding transformations
110    /// and return the actual character that would be seen by the parser.
111    ///
112    /// # Arguments
113    ///
114    /// * `offset` - The byte offset from the start of this source
115    ///
116    /// # Returns
117    ///
118    /// The character at the specified offset, or `None` if the offset is invalid
119    fn get_char_at(&self, offset: usize) -> Option<char> {
120        self.chunk_at(offset).slice_from(offset).chars().next()
121    }
122
123    /// Get the text content at the specified range.
124    ///
125    /// The range is specified as simple offsets from the start of this source.
126    /// The returned text should have any character encoding transformations
127    /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
128    ///
129    /// # Arguments
130    ///
131    /// * `range` - The byte range to extract text from (relative to this source)
132    ///
133    /// # Returns
134    ///
135    /// The text content in the specified range.
136    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
137
138    /// Get the text from the current position to the end of the source.
139    ///
140    /// # Arguments
141    ///
142    /// * `offset` - The byte offset to start from (relative to this source)
143    ///
144    /// # Returns
145    ///
146    /// The remaining text from the offset to the end.
147    fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
148        if offset >= self.length() {
149            return Cow::Borrowed("");
150        }
151        self.get_text_in(core::range::Range { start: offset, end: self.length() })
152    }
153
154    /// Find the next occurrence of a character starting from an offset.
155    ///
156    /// # Arguments
157    ///
158    /// * `offset` - The byte offset to start searching from (relative to this source)
159    /// * `ch` - The character to search for
160    ///
161    /// # Returns
162    ///
163    /// The offset of the next occurrence, or `None` if not found
164    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
165        let mut cursor = SourceCursor::new_at(self, offset);
166        let mut base = offset;
167        loop {
168            let rest = cursor.rest();
169            if let Some(pos) = rest.find(ch) {
170                return Some(base + pos);
171            }
172            let next = cursor.chunk_end();
173            if next >= self.length() {
174                return None;
175            }
176            base = next;
177            cursor.set_position(next);
178        }
179    }
180
181    /// Find the next occurrence of a substring starting from an offset.
182    ///
183    /// # Arguments
184    ///
185    /// * `offset` - The byte offset to start searching from (relative to this source)
186    /// * `pattern` - The substring to search for
187    ///
188    /// # Returns
189    ///
190    /// The offset of the next occurrence, or `None` if not found
191    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
192        let mut cursor = SourceCursor::new_at(self, offset);
193        cursor.find_str(pattern)
194    }
195
196    /// Create a syntax error with location information.
197    ///
198    /// # Arguments
199    ///
200    /// * `message` - The error message
201    /// * `offset` - The byte offset where the error occurred
202    ///
203    /// # Returns
204    ///
205    /// An [`OakError`] with precise location information.
206    fn syntax_error(&self, message: String, offset: usize) -> OakError {
207        OakError::syntax_error(message, offset, self.source_id())
208    }
209}
210
211impl Source for str {
212    fn length(&self) -> usize {
213        self.len()
214    }
215
216    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
217        let len = self.len();
218        if offset >= len {
219            return TextChunk { start: len, text: "" };
220        }
221        TextChunk { start: offset, text: &self[offset..] }
222    }
223
224    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
225        self.get(range.start..range.end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""))
226    }
227}
228
229impl<S: Source + ?Sized> Source for &S {
230    fn length(&self) -> usize {
231        (**self).length()
232    }
233
234    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
235        (**self).chunk_at(offset)
236    }
237
238    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
239        (**self).get_text_in(range)
240    }
241
242    fn source_id(&self) -> Option<SourceId> {
243        (**self).source_id()
244    }
245
246    fn get_char_at(&self, offset: usize) -> Option<char> {
247        (**self).get_char_at(offset)
248    }
249
250    fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
251        (**self).get_text_from(offset)
252    }
253
254    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
255        (**self).find_char_from(offset, ch)
256    }
257
258    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
259        (**self).find_str_from(offset, pattern)
260    }
261
262    fn syntax_error(&self, message: String, offset: usize) -> OakError {
263        (**self).syntax_error(message, offset)
264    }
265}