Skip to main content

oak_core/source/
mod.rs

1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it.
5
6use core::range::Range;
7use std::borrow::Cow;
8mod buffer;
9mod cursor;
10mod rope;
11mod simd;
12mod streaming;
13mod text;
14
15pub use self::{
16    buffer::{SourceBuffer, ToSource},
17    cursor::SourceCursor,
18    rope::{RopeBuffer, RopeSource},
19    simd::SimdScanner,
20    streaming::{ChunkedBuffer, ChunkedSource},
21    text::SourceText,
22};
23use crate::OakError;
24
25/// A unique identifier for a source file.
26pub type SourceId = u32;
27
28/// A chunk of text from a source, including its start offset.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub struct TextChunk<'a> {
31    /// The start byte offset of this chunk in the source.
32    pub start: usize,
33    /// The text content of this chunk.
34    pub text: &'a str,
35}
36
37impl<'a> TextChunk<'a> {
38    /// Returns the end byte offset of this chunk.
39    #[inline]
40    pub fn end(&self) -> usize {
41        self.start + self.text.len()
42    }
43
44    /// Returns a slice of the chunk text starting from the specified absolute offset.
45    #[inline]
46    pub fn slice_from(&self, offset: usize) -> &'a str {
47        if offset <= self.start {
48            return self.text;
49        }
50        let rel = offset.saturating_sub(self.start);
51        self.text.get(rel..).unwrap_or("")
52    }
53}
54
55/// Represents a text edit operation for incremental updates.
56///
57/// Text edits are used to apply changes to source text in an incremental
58/// manner, such as those received from LSP clients or other text editing
59/// operations. Each edit specifies a byte range to replace and the new text
60/// to insert in that range.
61///
62/// # Examples
63///
64/// ```
65/// # #![feature(new_range_api)]
66/// # use oak_core::source::TextEdit;
67/// use core::range::Range;
68/// let edit = TextEdit {
69///     span: Range { start: 4, end: 9 }, // Replace characters at positions 4-8
70///     text: "world".into(),             // With the text "world"
71/// }
72/// ```
73pub struct TextEdit {
74    /// The byte range in the original text to be replaced (start..end)
75    pub span: Range<usize>,
76    /// The new text to insert in place of the specified range
77    pub text: Cow<'static, str>,
78}
79
80/// Trait for abstract text sources.
81///
82/// This trait provides a unified interface for different text sources that may have:
83/// - Different character representations (Unicode escapes, HTML entities)
84/// - Different internal storage formats (contiguous, chunked, ropes)
85/// - Different error handling requirements
86///
87/// All offsets exposed by this trait are simple byte ranges from the start of this source.
88pub trait Source: Send + Sync {
89    /// Returns the total length of this source in bytes.
90    fn length(&self) -> usize;
91
92    /// Returns a unique identifier for this source, if available.
93    ///
94    /// Useful for associating diagnostics with specific files.
95    fn source_id(&self) -> Option<SourceId> {
96        None
97    }
98
99    /// Returns a text chunk containing the specified byte offset.
100    ///
101    /// This allows for efficient traversal of large or non-contiguous sources.
102    fn chunk_at(&self, offset: usize) -> TextChunk<'_>;
103
104    /// Returns `true` if the source has no content.
105    fn is_empty(&self) -> bool {
106        self.length() == 0
107    }
108
109    /// Returns the character at the specified byte offset.
110    ///
111    /// This method should handle any character encoding transformations
112    /// and return the actual character that would be seen by the parser.
113    ///
114    /// # Arguments
115    ///
116    /// * `offset` - The byte offset from the start of this source.
117    ///
118    /// # Returns
119    ///
120    /// The character at the specified offset, or `None` if the offset is invalid.
121    fn get_char_at(&self, offset: usize) -> Option<char> {
122        self.chunk_at(offset).slice_from(offset).chars().next()
123    }
124
125    /// Returns the text content within the specified byte range.
126    ///
127    /// # Arguments
128    ///
129    /// * `range` - The byte range to extract text from.
130    ///
131    /// # Returns
132    ///
133    /// The text content in the specified range, potentially as a borrowed slice.
134    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str>;
135
136    /// Returns the text from the specified byte offset to the end of the source.
137    ///
138    /// # Arguments
139    ///
140    /// * `offset` - The byte offset to start from.
141    fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
142        if offset >= self.length() {
143            return Cow::Borrowed("");
144        }
145        self.get_text_in(core::range::Range { start: offset, end: self.length() })
146    }
147
148    /// Finds the next occurrence of a character starting from an offset.
149    ///
150    /// # Arguments
151    ///
152    /// * `offset` - The byte offset to start searching from.
153    /// * `ch` - The character to search for.
154    ///
155    /// # Returns
156    ///
157    /// The absolute byte offset of the next occurrence, or `None` if not found.
158    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
159        let mut cursor = SourceCursor::new_at(self, offset);
160        let mut base = offset;
161        loop {
162            let rest = cursor.rest();
163            if let Some(pos) = rest.find(ch) {
164                return Some(base + pos);
165            }
166            let next = cursor.chunk_end();
167            if next >= self.length() {
168                return None;
169            }
170            base = next;
171            cursor.set_position(next);
172        }
173    }
174
175    /// Finds the next occurrence of a substring starting from an offset.
176    ///
177    /// # Arguments
178    ///
179    /// * `offset` - The byte offset to start searching from.
180    /// * `pattern` - The substring to search for.
181    ///
182    /// # Returns
183    ///
184    /// The absolute byte offset of the next occurrence, or `None` if not found.
185    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
186        let mut cursor = SourceCursor::new_at(self, offset);
187        cursor.find_str(pattern)
188    }
189
190    /// Creates a syntax error with location information associated with this source.
191    ///
192    /// # Arguments
193    ///
194    /// * `message` - The error message.
195    /// * `offset` - The byte offset where the error occurred.
196    fn syntax_error(&self, message: String, offset: usize) -> OakError {
197        OakError::syntax_error(message, offset, self.source_id())
198    }
199}
200
201impl Source for str {
202    fn length(&self) -> usize {
203        self.len()
204    }
205
206    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
207        let len = self.len();
208        if offset >= len {
209            return TextChunk { start: len, text: "" };
210        }
211        TextChunk { start: offset, text: &self[offset..] }
212    }
213
214    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
215        self.get(range.start..range.end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""))
216    }
217}
218
219impl<S: Source + ?Sized> Source for &S {
220    fn length(&self) -> usize {
221        (**self).length()
222    }
223
224    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
225        (**self).chunk_at(offset)
226    }
227
228    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
229        (**self).get_text_in(range)
230    }
231
232    fn source_id(&self) -> Option<SourceId> {
233        (**self).source_id()
234    }
235
236    fn get_char_at(&self, offset: usize) -> Option<char> {
237        (**self).get_char_at(offset)
238    }
239
240    fn get_text_from(&self, offset: usize) -> Cow<'_, str> {
241        (**self).get_text_from(offset)
242    }
243
244    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
245        (**self).find_char_from(offset, ch)
246    }
247
248    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
249        (**self).find_str_from(offset, pattern)
250    }
251
252    fn syntax_error(&self, message: String, offset: usize) -> OakError {
253        (**self).syntax_error(message, offset)
254    }
255}