oak_core/source/text.rs
1use crate::{
2 OakError, SourceLocation,
3 source::{Source, SourceView, TextEdit},
4};
5use lsp_types::Position;
6use serde::{Deserialize, Serialize};
7use std::range::Range;
8use url::Url;
9
10/// Represents source code text with line mapping and optional URL reference.
11///
12/// This struct manages the raw source text and provides utilities for:
13/// - Text extraction at specific offsets or ranges
14/// - Character and line/column position tracking
15/// - LSP position and range conversions (when `lsp-types` feature is enabled)
16/// - Error reporting with precise location information
17#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct SourceText {
19 pub(crate) url: Option<Url>,
20 pub(crate) raw: String,
21 pub(crate) line_map: Vec<usize>,
22}
23
24impl<'input> Source for &'input SourceText {
25 fn length(&self) -> usize {
26 self.raw.len()
27 }
28
29 fn get_text_in(&self, range: Range<usize>) -> &str {
30 self.raw.get(range.start..range.end).unwrap_or("")
31 }
32
33 fn offset_to_position(&self, offset: usize) -> Position {
34 let total_len = self.raw.len();
35 // 超出文件范围,返回最后一行的末尾位置(0-based 行号)
36 if offset >= total_len {
37 let last_line_idx = self.line_map.len().saturating_sub(1);
38 let line_start = self.line_map.get(last_line_idx).copied().unwrap_or(0);
39 let line = last_line_idx as u32; // 0-based
40 let column = (total_len.saturating_sub(line_start)) as u32; // 以字符计数
41 return Position { line, character: column };
42 }
43
44 // 二分查找所在行的起始偏移
45 let line_idx = self.line_map.binary_search(&offset).unwrap_or_else(|idx| idx.saturating_sub(1));
46
47 let line_start = self.line_map[line_idx];
48 let line = line_idx as u32; // 0-based 行号
49 // 计算列:从行起始到目标偏移的字符数量
50 let column = self.raw[line_start..offset].chars().count() as u32;
51 Position { line, character: column }
52 }
53
54 fn position_to_offset(&self, position: Position) -> usize {
55 let line = position.line as usize;
56 let column = position.character as usize;
57
58 // 超出了文件范围
59 if line >= self.line_map.len() {
60 // 超出了最后一行,返回文件末尾
61 return self.raw.len();
62 }
63
64 let line_start = self.line_map[line];
65
66 // Find the end of this line
67 let line_end = if line + 1 < self.line_map.len() { self.line_map[line + 1] } else { self.raw.len() };
68
69 // Calculate the byte offset within the line, handling UTF-8 character boundaries
70 let mut current_column = 0;
71 let mut offset = line_start;
72
73 for ch in self.raw[line_start..line_end].chars() {
74 if current_column >= column {
75 break;
76 }
77 current_column += 1;
78 offset += ch.len_utf8();
79 }
80
81 offset
82 }
83}
84impl SourceText {
85 /// Applies multiple text edits to the source text and returns the minimum affected offset.
86 ///
87 /// This method is used for incremental updates to source code, such as those
88 /// received from LSP clients or other text editing operations.
89 ///
90 /// # Arguments
91 ///
92 /// * `edits` - A slice of [`TextEdit`] operations to apply
93 ///
94 /// # Returns
95 ///
96 /// The minimum byte offset that was affected by any of the edits. This is
97 /// useful for determining where to restart parsing after incremental changes.
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// let mut source = SourceText::new("let x = 5;");
103 /// let edits = vec![TextEdit { span: 4..5, text: "y".to_string() }];
104 /// let min_offset = source.apply_edits(&edits);
105 /// assert_eq!(min_offset, 4);
106 /// ```
107 pub fn apply_edits(&mut self, edits: &[TextEdit]) -> usize {
108 let mut min = self.raw.len();
109 for TextEdit { span, text } in edits {
110 min = min.min(span.start);
111 self.raw.replace_range(span.start..span.end, text);
112 }
113 min
114 }
115
116 /// Creates a new [`SourceText`] containing a slice of the original text.
117 ///
118 /// This method extracts a portion of the source text and creates a new
119 /// [`SourceText`] instance with the extracted content. The line map is
120 /// rebuilt for the new content.
121 ///
122 /// # Arguments
123 ///
124 /// * `range` - The byte range to extract from the original text
125 ///
126 /// # Returns
127 ///
128 /// A new [`SourceText`] instance containing the extracted text slice
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// let source = SourceText::new("fn main() {\n println!(\"Hello\");\n}");
134 /// let slice = source.slice(0..12); // "fn main() {"
135 /// ```
136 #[allow(mismatched_lifetime_syntaxes)]
137 pub fn view(&self, range: Range<usize>) -> SourceView {
138 SourceView { source: self, range }
139 }
140
141 /// Gets the URL associated with this source text, if any.
142 ///
143 /// # Returns
144 ///
145 /// An [`Option<&Url>`] containing the URL reference if one was set,
146 /// or `None` if no URL is associated with this source text.
147 ///
148 /// # Examples
149 ///
150 /// ```
151 /// let source = SourceText::new_with_url("code", Url::parse("file:///main.rs").unwrap());
152 /// assert!(source.get_url().is_some());
153 /// ```
154 pub fn get_url(&self) -> Option<&Url> {
155 self.url.as_ref()
156 }
157
158 /// Gets the length of the source text in bytes.
159 ///
160 /// # Returns
161 ///
162 /// The length of the source text in bytes.
163 ///
164 /// # Examples
165 ///
166 /// ```
167 /// let source = SourceText::new("Hello, world!");
168 /// assert_eq!(source.len(), 13);
169 /// ```
170 pub fn len(&self) -> usize {
171 self.raw.len()
172 }
173
174 /// Checks if the source text is empty.
175 ///
176 /// # Returns
177 ///
178 /// `true` if the source text is empty, `false` otherwise.
179 ///
180 /// # Examples
181 ///
182 /// ```
183 /// let source = SourceText::new("");
184 /// assert!(source.is_empty());
185 /// ```
186 pub fn is_empty(&self) -> bool {
187 self.raw.is_empty()
188 }
189}
190
191impl SourceText {
192 /// Creates a new SourceText from a string.
193 ///
194 /// # Arguments
195 ///
196 /// * `input` - The source code text
197 ///
198 /// # Examples
199 ///
200 /// ```
201 /// let source = SourceText::new("fn main() {}");
202 /// ```
203 pub fn new(input: impl ToString) -> Self {
204 let text = input.to_string();
205 let line_map = build_line_map(&text);
206 Self { url: None, raw: text, line_map }
207 }
208 /// Creates a new SourceText from a string with an optional URL.
209 ///
210 /// # Arguments
211 ///
212 /// * `input` - The source code text
213 /// * `url` - URL reference for the source file
214 ///
215 /// # Examples
216 ///
217 /// ```
218 /// let source = SourceText::new_with_url("fn main() {}", Url::parse("file:///main.rs").unwrap());
219 /// ```
220 pub fn new_with_url(input: impl ToString, url: Url) -> Self {
221 let text = input.to_string();
222 let line_map = build_line_map(&text);
223 Self { url: Some(url), raw: text, line_map }
224 }
225
226 /// Converts an LSP TextEdit to a TextEdit.
227 ///
228 /// # Arguments
229 ///
230 /// * `edit` - The LSP TextEdit to convert
231 ///
232 /// # Returns
233 ///
234 /// A `TextEdit` with byte-based span suitable for internal use.
235 ///
236 /// # Availability
237 ///
238 /// This method is only available when the `lsp-types` feature is enabled.
239 pub fn lsp_to_text_edit(&self, edit: lsp_types::TextEdit) -> TextEdit {
240 TextEdit { span: self.lsp_range_to_span(edit.range), text: edit.new_text }
241 }
242
243 /// Creates a kind error with location information.
244 ///
245 /// # Arguments
246 ///
247 /// * `message` - The error message
248 /// * `offset` - The byte offset where the error occurred
249 ///
250 /// # Returns
251 ///
252 /// A `PexError` with precise location information including line and column.
253 ///
254 /// # Examples
255 ///
256 /// ```
257 /// let source = SourceText::new("let x =");
258 /// let error = source.syntax_error("Unexpected end of input", 7);
259 /// ```
260 /// Creates a kind error with location information.
261 ///
262 /// # Arguments
263 ///
264 /// * `message` - The error message
265 /// * `offset` - The byte offset where the error occurred
266 ///
267 /// # Returns
268 ///
269 /// A `PexError` with precise location information including line and column.
270 ///
271 /// # Examples
272 ///
273 /// ```
274 /// let source = SourceText::new("let x =");
275 /// let error = source.syntax_error("Unexpected end of input", 7);
276 /// ```
277 pub fn syntax_error(&self, message: impl Into<String>, offset: usize) -> OakError {
278 OakError::syntax_error(message, self.get_location(offset))
279 }
280
281 /// Creates an error for an unexpected character with location information.
282 ///
283 /// # Arguments
284 ///
285 /// * `character` - The unexpected character
286 /// * `offset` - The byte offset where the unexpected character was found
287 ///
288 /// # Returns
289 ///
290 /// A `PexError` with precise location information including line and column.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// let source = SourceText::new("let x@y = 5");
296 /// let error = source.unexpected_character('@', 6);
297 /// ```
298 pub fn unexpected_character(&self, character: char, offset: usize) -> OakError {
299 OakError::unexpected_character(character, self.get_location(offset))
300 }
301
302 /// Gets the source location for a given byte offset.
303 ///
304 /// # Arguments
305 ///
306 /// * `offset` - The byte offset to get location for
307 ///
308 /// # Returns
309 ///
310 /// A `SourceLocation` with line, column, and optional URL information.
311 ///
312 /// # Examples
313 ///
314 /// ```
315 /// let source = SourceText::new("line 1\nline 2\n");
316 /// let location = source.get_location(8); // Start of second line
317 /// assert_eq!(location.line, 2);
318 /// assert_eq!(location.column, 0);
319 /// ```
320 pub fn get_location(&self, offset: usize) -> SourceLocation {
321 let position = self.offset_to_position(offset);
322 // SourceLocation 的行号语义为 1-based
323 SourceLocation { line: position.line + 1, column: position.character, url: self.url.clone() }
324 }
325}
326
327/// Builds a line map for efficient line/column calculations.
328///
329/// This creates a vector of byte offsets where each line starts.
330/// Handles both LF (`\n`) and CRLF (`\r\n`) line endings properly.
331fn build_line_map(text: &str) -> Vec<usize> {
332 let mut line_map = vec![0]; // First line starts at offset 0
333 let mut chars = text.char_indices().peekable();
334
335 while let Some((i, ch)) = chars.next() {
336 if ch == '\r' {
337 // Check for CRLF sequence
338 if let Some((_, '\n')) = chars.peek() {
339 // Skip the '\n' as it's part of CRLF
340 chars.next();
341 // Next line starts after CRLF
342 line_map.push(i + 2);
343 }
344 else {
345 // Standalone CR - treat as line ending
346 line_map.push(i + 1);
347 }
348 }
349 else if ch == '\n' {
350 line_map.push(i + 1);
351 }
352 }
353 line_map
354}