oak_core/source/mod.rs
1//! Source text management and location tracking for incremental parsing.
2//!
3//! This module provides structures for managing source code text and tracking
4//! locations within it, including support for LSP (Language Server Protocol) integration.
5
6mod text;
7mod view;
8
9pub use self::{text::SourceText, view::SourceView};
10use crate::OakError;
11use lsp_types::Position;
12use serde::{Deserialize, Serialize};
13use std::range::Range;
14pub use url::Url;
15
16/// Represents a text edit operation for incremental updates.
17///
18/// Text edits are used to apply changes to source text in an incremental
19/// manner, such as those received from LSP clients or other text editing
20/// operations. Each edit specifies a byte range to replace and the new text
21/// to insert in that range.
22///
23/// # Examples
24///
25/// ```
26/// let edit = TextEdit {
27/// span: 4..9, // Replace characters at positions 4-8
28/// text: "world".into(), // With the text "world"
29/// };
30/// ```
31pub struct TextEdit {
32 /// The byte range in the original text to be replaced (start..end)
33 pub span: Range<usize>,
34 /// The new text to insert in place of the specified range
35 pub text: String,
36}
37
38/// Represents a specific location within source code.
39///
40/// This struct provides line and column information for error reporting
41/// and debugging, optionally including a URL reference to the source file.
42#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
43pub struct SourceLocation {
44 /// The 1-based line number in the source text
45 pub line: u32,
46 /// The 0-based column number within the line
47 pub column: u32,
48 /// Optional URL reference to the source file
49 pub url: Option<Url>,
50}
51
52/// Trait for abstract text sources with error position management.
53///
54/// This trait provides a unified interface for different text sources that may have:
55/// - Different character representations (Unicode escapes, HTML entities)
56/// - Different internal storage formats
57/// - Different error handling requirements
58///
59/// All offsets exposed by this trait are simple text ranges from the start of this source.
60/// Internal complexity like global offset mapping, character encoding transformations,
61/// and position tracking are handled internally.
62pub trait Source {
63 /// Get the length of this source.
64 ///
65 /// This represents the total size of this source in bytes.
66 fn length(&self) -> usize;
67
68 /// Check if the source is empty.
69 fn is_empty(&self) -> bool {
70 self.length() == 0
71 }
72
73 /// Get a single character at the specified offset.
74 ///
75 /// This method should handle any character encoding transformations
76 /// and return the actual character that would be seen by the parser.
77 ///
78 /// # Arguments
79 ///
80 /// * `offset` - The byte offset from the start of this source
81 ///
82 /// # Returns
83 ///
84 /// The character at the specified offset, or `None` if the offset is invalid
85 fn get_char_at(&self, offset: usize) -> Option<char> {
86 self.get_text_from(offset).chars().next()
87 }
88
89 /// Get the text content at the specified range.
90 ///
91 /// The range is specified as simple offsets from the start of this source.
92 /// The returned text should have any character encoding transformations
93 /// already applied (e.g., Unicode escapes decoded, HTML entities resolved).
94 ///
95 /// # Arguments
96 ///
97 /// * `range` - The byte range to extract text from (relative to this source)
98 ///
99 /// # Returns
100 ///
101 /// The text content in the specified range, or `None` if the range is invalid
102 fn get_text_in(&self, range: Range<usize>) -> &str;
103
104 /// Get the text from the current position to the end of the source.
105 ///
106 /// # Arguments
107 ///
108 /// * `offset` - The byte offset to start from (relative to this source)
109 ///
110 /// # Returns
111 ///
112 /// The remaining text from the offset to the end, or `None` if the offset is invalid
113 fn get_text_from(&self, offset: usize) -> &str {
114 if offset >= self.length() {
115 return "";
116 }
117 self.get_text_in((offset..self.length()).into())
118 }
119
120 /// Get the URL of this source, if available.
121 ///
122 /// This method returns a reference to the URL associated with this source,
123 /// typically used for file-based sources or remote resources.
124 ///
125 /// # Returns
126 ///
127 /// An optional reference to the source URL, or `None` if no URL is available
128 fn get_url(&self) -> Option<&Url> {
129 None
130 }
131
132 /// Convert an offset to position information for error reporting.
133 ///
134 /// This method handles the mapping from offsets to human-readable
135 /// line/column positions for error reporting.
136 ///
137 /// # Arguments
138 ///
139 /// * `offset` - The byte offset from the start of this source
140 ///
141 /// # Returns
142 ///
143 /// A [`SourcePosition`] with line and column information,
144 /// or `None` if the offset is invalid
145 fn offset_to_position(&self, offset: usize) -> Position;
146
147 /// Convert a position to an offset.
148 ///
149 /// # Arguments
150 ///
151 /// * `position` - The position to convert
152 ///
153 /// # Returns
154 ///
155 /// The offset corresponding to the position
156 fn position_to_offset(&self, position: Position) -> usize;
157
158 /// Converts a byte range to an LSP Range.
159 ///
160 /// # Arguments
161 ///
162 /// * `span` - The byte range to convert
163 ///
164 /// # Returns
165 ///
166 /// An `lsp_types::Range` with line/column positions.
167 ///
168 /// # Availability
169 ///
170 /// This method is only available when the `lsp-types` feature is enabled.
171 fn span_to_lsp_range(&self, span: Range<usize>) -> lsp_types::Range {
172 let start = self.offset_to_position(span.start);
173 let end = self.offset_to_position(span.end);
174 lsp_types::Range { start, end }
175 }
176
177 /// Converts an LSP Range to a byte-based source span.
178 ///
179 /// # Arguments
180 ///
181 /// * `range` - The LSP Range to convert
182 ///
183 /// # Returns
184 ///
185 /// A `Range<usize>` representing the byte offset range.
186 ///
187 /// # Availability
188 ///
189 /// This method is only available when the `lsp-types` feature is enabled.
190 fn lsp_range_to_span(&self, range: lsp_types::Range) -> Range<usize> {
191 Range { start: self.position_to_offset(range.start), end: self.position_to_offset(range.end) }
192 }
193
194 /// Find the next occurrence of a character starting from an offset.
195 ///
196 /// # Arguments
197 ///
198 /// * `offset` - The byte offset to start searching from (relative to this source)
199 /// * `ch` - The character to search for
200 ///
201 /// # Returns
202 ///
203 /// The offset of the next occurrence, or `None` if not found
204 fn find_char_from(&self, offset: usize, ch: char) -> Option<usize> {
205 let text = self.get_text_from(offset);
206 text.find(ch).map(|pos| offset + pos)
207 }
208
209 /// Find the next occurrence of a substring starting from an offset.
210 ///
211 /// # Arguments
212 ///
213 /// * `offset` - The byte offset to start searching from (relative to this source)
214 /// * `pattern` - The substring to search for
215 ///
216 /// # Returns
217 ///
218 /// The offset of the next occurrence, or `None` if not found
219 fn find_str_from(&self, offset: usize, pattern: &str) -> Option<usize> {
220 let text = self.get_text_from(offset);
221 text.find(pattern).map(|pos| offset + pos)
222 }
223
224 /// Create an error for an invalid range.
225 ///
226 /// # Arguments
227 ///
228 /// * `range` - The invalid range
229 /// * `message` - The error message
230 ///
231 /// # Returns
232 ///
233 /// An [`OakError`] with position information at the start of the range
234 fn syntax_error(&self, message: impl Into<String>, position: usize) -> OakError {
235 let position = self.offset_to_position(position);
236 OakError::syntax_error(
237 message.into(),
238 SourceLocation { line: position.line, column: position.character, url: self.get_url().cloned() },
239 )
240 }
241}