oak_core/source/
mod.rs

1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it, including support for LSP (Language Server Protocol) integration.
5
6mod text;
7mod view;
8
9pub use self::{text::SourceText, view::SourceView};
10use crate::OakError;
11use lsp_types::Position;
12use serde::{Deserialize, Serialize};
13use std::range::Range;
14pub use url::Url;
15
16/// Represents a text edit operation for incremental updates.
17///
18/// Text edits are used to apply changes to source text in an incremental
19/// manner, such as those received from LSP clients or other text editing
20/// operations. Each edit specifies a byte range to replace and the new text
21/// to insert in that range.
22///
23/// # Examples
24///
25/// ```
26/// let edit = TextEdit {
27///     span: 4..9,           // Replace characters at positions 4-8
28///     text: "world".into(), // With the text "world"
29/// };
30/// ```
31pub struct TextEdit {
32    /// The byte range in the original text to be replaced (start..end)
33    pub span: Range<usize>,
34    /// The new text to insert in place of the specified range
35    pub text: String,
36}
37
38/// Represents a specific location within source code.
39///
40/// This struct provides line and column information for error reporting
41/// and debugging, optionally including a URL reference to the source file.
42#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
43pub struct SourceLocation {
44    /// The 1-based line number in the source text
45    pub line: u32,
46    /// The 0-based column number within the line
47    pub column: u32,
48    /// Optional URL reference to the source file
49    pub url: Option<Url>,
50}
51
52/// Trait for abstract text sources with error position management.
53///
54/// This trait provides a unified interface for different text sources that may have:
55/// - Different character representations (Unicode escapes, HTML entities)
56/// - Different internal storage formats
57/// - Different error handling requirements
58///
59/// All offsets exposed by this trait are simple text ranges from the start of this source.
60/// Internal complexity like global offset mapping, character encoding transformations,
61/// and position tracking are handled internally.
62pub trait Source {
63    /// Get the length of this source.
64    ///
65    /// This represents the total size of this source in bytes.
66    fn length(&self) -> usize;
67
68    /// Check if the source is empty.
69    fn is_empty(&self) -> bool {
70        self.length() == 0
71    }
72
73    /// Get a single character at the specified offset.
74    ///
75    /// This method should handle any character encoding transformations
76    /// and return the actual character that would be seen by the parser.
77    ///
78    /// # Arguments
79    ///
80    /// * `offset` - The byte offset from the start of this source
81    ///
82    /// # Returns
83    ///
84    /// The character at the specified offset, or `None` if the offset is invalid
85    fn get_char_at(&self, offset: usize) -> Option<char> {
86        self.get_text_from(offset).chars().next()
87    }
88
89    /// Get the text content at the specified range.
90    ///
91    /// The range is specified as simple offsets from the start of this source.
92    /// The returned text should have any character encoding transformations
93    /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
94    ///
95    /// # Arguments
96    ///
97    /// * `range` - The byte range to extract text from (relative to this source)
98    ///
99    /// # Returns
100    ///
101    /// The text content in the specified range, or `None` if the range is invalid
102    fn get_text_in(&self, range: Range<usize>) -> &str;
103
104    /// Get the text from the current position to the end of the source.
105    ///
106    /// # Arguments
107    ///
108    /// * `offset` - The byte offset to start from (relative to this source)
109    ///
110    /// # Returns
111    ///
112    /// The remaining text from the offset to the end, or `None` if the offset is invalid
113    fn get_text_from(&self, offset: usize) -> &str {
114        if offset >= self.length() {
115            return "";
116        }
117        self.get_text_in((offset..self.length()).into())
118    }
119
120    /// Get the URL of this source, if available.
121    ///
122    /// This method returns a reference to the URL associated with this source,
123    /// typically used for file-based sources or remote resources.
124    ///
125    /// # Returns
126    ///
127    /// An optional reference to the source URL, or `None` if no URL is available
128    fn get_url(&self) -> Option<&Url> {
129        None
130    }
131
132    /// Convert an offset to position information for error reporting.
133    ///
134    /// This method handles the mapping from offsets to human-readable
135    /// line/column positions for error reporting.
136    ///
137    /// # Arguments
138    ///
139    /// * `offset` - The byte offset from the start of this source
140    ///
141    /// # Returns
142    ///
143    /// A [`SourcePosition`] with line and column information,
144    /// or `None` if the offset is invalid
145    fn offset_to_position(&self, offset: usize) -> Position;
146
147    /// Convert a position to an offset.
148    ///
149    /// # Arguments
150    ///
151    /// * `position` - The position to convert
152    ///
153    /// # Returns
154    ///
155    /// The offset corresponding to the position
156    fn position_to_offset(&self, position: Position) -> usize;
157
158    /// Converts a byte range to an LSP Range.
159    ///
160    /// # Arguments
161    ///
162    /// * `span` - The byte range to convert
163    ///
164    /// # Returns
165    ///
166    /// An `lsp_types::Range` with line/column positions.
167    ///
168    /// # Availability
169    ///
170    /// This method is only available when the `lsp-types` feature is enabled.
171    fn span_to_lsp_range(&self, span: Range<usize>) -> lsp_types::Range {
172        let start = self.offset_to_position(span.start);
173        let end = self.offset_to_position(span.end);
174        lsp_types::Range { start, end }
175    }
176
177    /// Converts an LSP Range to a byte-based source span.
178    ///
179    /// # Arguments
180    ///
181    /// * `range` - The LSP Range to convert
182    ///
183    /// # Returns
184    ///
185    /// A `Range<usize>` representing the byte offset range.
186    ///
187    /// # Availability
188    ///
189    /// This method is only available when the `lsp-types` feature is enabled.
190    fn lsp_range_to_span(&self, range: lsp_types::Range) -> Range<usize> {
191        Range { start: self.position_to_offset(range.start), end: self.position_to_offset(range.end) }
192    }
193
194    /// Find the next occurrence of a character starting from an offset.
195    ///
196    /// # Arguments
197    ///
198    /// * `offset` - The byte offset to start searching from (relative to this source)
199    /// * `ch` - The character to search for
200    ///
201    /// # Returns
202    ///
203    /// The offset of the next occurrence, or `None` if not found
204    fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
205        let text = self.get_text_from(offset);
206        text.find(ch).map(|pos| offset + pos)
207    }
208
209    /// Find the next occurrence of a substring starting from an offset.
210    ///
211    /// # Arguments
212    ///
213    /// * `offset` - The byte offset to start searching from (relative to this source)
214    /// * `pattern` - The substring to search for
215    ///
216    /// # Returns
217    ///
218    /// The offset of the next occurrence, or `None` if not found
219    fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
220        let text = self.get_text_from(offset);
221        text.find(pattern).map(|pos| offset + pos)
222    }
223
224    /// Create an error for an invalid range.
225    ///
226    /// # Arguments
227    ///
228    /// * `range` - The invalid range
229    /// * `message` - The error message
230    ///
231    /// # Returns
232    ///
233    /// An [`OakError`] with position information at the start of the range
234    fn syntax_error(&self, message: impl Into<String>, position: usize) -> OakError {
235        let position = self.offset_to_position(position);
236        OakError::syntax_error(
237            message.into(),
238            SourceLocation { line: position.line, column: position.character, url: self.get_url().cloned() },
239        )
240    }
241}