oak_core/source/
text.rs

1use crate::{
2    OakError, SourceLocation,
3    source::{Source, SourceView, TextEdit},
4};
5use lsp_types::Position;
6use serde::{Deserialize, Serialize};
7use std::range::Range;
8use url::Url;
9
10/// Represents source code text with line mapping and optional URL reference.
11///
12/// This struct manages the raw source text and provides utilities for:
13/// - Text extraction at specific offsets or ranges
14/// - Character and line/column position tracking
15/// - LSP position and range conversions (when `lsp-types` feature is enabled)
16/// - Error reporting with precise location information
17#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct SourceText {
19    pub(crate) url: Option<Url>,
20    pub(crate) raw: String,
21    pub(crate) line_map: Vec<usize>,
22}
23
24impl<'input> Source for &'input SourceText {
25    fn length(&self) -> usize {
26        self.raw.len()
27    }
28
29    fn get_text_in(&self, range: Range<usize>) -> &str {
30        self.raw.get(range.start..range.end).unwrap_or("")
31    }
32
33    fn offset_to_position(&self, offset: usize) -> Position {
34        let total_len = self.raw.len();
35        // 超出文件范围,返回最后一行的末尾位置(0-based 行号)
36        if offset >= total_len {
37            let last_line_idx = self.line_map.len().saturating_sub(1);
38            let line_start = self.line_map.get(last_line_idx).copied().unwrap_or(0);
39            let line = last_line_idx as u32; // 0-based
40            let column = (total_len.saturating_sub(line_start)) as u32; // 以字符计数
41            return Position { line, character: column };
42        }
43
44        // 二分查找所在行的起始偏移
45        let line_idx = self.line_map.binary_search(&offset).unwrap_or_else(|idx| idx.saturating_sub(1));
46
47        let line_start = self.line_map[line_idx];
48        let line = line_idx as u32; // 0-based 行号
49        // 计算列:从行起始到目标偏移的字符数量
50        let column = self.raw[line_start..offset].chars().count() as u32;
51        Position { line, character: column }
52    }
53
54    fn position_to_offset(&self, position: Position) -> usize {
55        let line = position.line as usize;
56        let column = position.character as usize;
57
58        // 超出了文件范围
59        if line >= self.line_map.len() {
60            // 超出了最后一行,返回文件末尾
61            return self.raw.len();
62        }
63
64        let line_start = self.line_map[line];
65
66        // Find the end of this line
67        let line_end = if line + 1 < self.line_map.len() { self.line_map[line + 1] } else { self.raw.len() };
68
69        // Calculate the byte offset within the line, handling UTF-8 character boundaries
70        let mut current_column = 0;
71        let mut offset = line_start;
72
73        for ch in self.raw[line_start..line_end].chars() {
74            if current_column >= column {
75                break;
76            }
77            current_column += 1;
78            offset += ch.len_utf8();
79        }
80
81        offset
82    }
83}
84impl SourceText {
85    /// Applies multiple text edits to the source text and returns the minimum affected offset.
86    ///
87    /// This method is used for incremental updates to source code, such as those
88    /// received from LSP clients or other text editing operations.
89    ///
90    /// # Arguments
91    ///
92    /// * `edits` - A slice of [`TextEdit`] operations to apply
93    ///
94    /// # Returns
95    ///
96    /// The minimum byte offset that was affected by any of the edits. This is
97    /// useful for determining where to restart parsing after incremental changes.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// let mut source = SourceText::new("let x = 5;");
103    /// let edits = vec![TextEdit { span: 4..5, text: "y".to_string() }];
104    /// let min_offset = source.apply_edits(&edits);
105    /// assert_eq!(min_offset, 4);
106    /// ```
107    pub fn apply_edits(&mut self, edits: &[TextEdit]) -> usize {
108        let mut min = self.raw.len();
109        for TextEdit { span, text } in edits {
110            min = min.min(span.start);
111            self.raw.replace_range(span.start..span.end, text);
112        }
113        min
114    }
115
116    /// Creates a new [`SourceText`] containing a slice of the original text.
117    ///
118    /// This method extracts a portion of the source text and creates a new
119    /// [`SourceText`] instance with the extracted content. The line map is
120    /// rebuilt for the new content.
121    ///
122    /// # Arguments
123    ///
124    /// * `range` - The byte range to extract from the original text
125    ///
126    /// # Returns
127    ///
128    /// A new [`SourceText`] instance containing the extracted text slice
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// let source = SourceText::new("fn main() {\n    println!(\"Hello\");\n}");
134    /// let slice = source.slice(0..12); // "fn main() {"
135    /// ```
136    #[allow(mismatched_lifetime_syntaxes)]
137    pub fn view(&self, range: Range<usize>) -> SourceView {
138        SourceView { source: self, range }
139    }
140
141    /// Gets the URL associated with this source text, if any.
142    ///
143    /// # Returns
144    ///
145    /// An [`Option<&Url>`] containing the URL reference if one was set,
146    /// or `None` if no URL is associated with this source text.
147    ///
148    /// # Examples
149    ///
150    /// ```
151    /// let source = SourceText::new_with_url("code", Url::parse("file:///main.rs").unwrap());
152    /// assert!(source.get_url().is_some());
153    /// ```
154    pub fn get_url(&self) -> Option<&Url> {
155        self.url.as_ref()
156    }
157
158    /// Gets the length of the source text in bytes.
159    ///
160    /// # Returns
161    ///
162    /// The length of the source text in bytes.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// let source = SourceText::new("Hello, world!");
168    /// assert_eq!(source.len(), 13);
169    /// ```
170    pub fn len(&self) -> usize {
171        self.raw.len()
172    }
173
174    /// Checks if the source text is empty.
175    ///
176    /// # Returns
177    ///
178    /// `true` if the source text is empty, `false` otherwise.
179    ///
180    /// # Examples
181    ///
182    /// ```
183    /// let source = SourceText::new("");
184    /// assert!(source.is_empty());
185    /// ```
186    pub fn is_empty(&self) -> bool {
187        self.raw.is_empty()
188    }
189}
190
191impl SourceText {
192    /// Creates a new SourceText from a string.
193    ///
194    /// # Arguments
195    ///
196    /// * `input` - The source code text
197    ///
198    /// # Examples
199    ///
200    /// ```
201    /// let source = SourceText::new("fn main() {}");
202    /// ```
203    pub fn new(input: impl ToString) -> Self {
204        let text = input.to_string();
205        let line_map = build_line_map(&text);
206        Self { url: None, raw: text, line_map }
207    }
208    /// Creates a new SourceText from a string with an optional URL.
209    ///
210    /// # Arguments
211    ///
212    /// * `input` - The source code text
213    /// * `url` - URL reference for the source file
214    ///
215    /// # Examples
216    ///
217    /// ```
218    /// let source = SourceText::new_with_url("fn main() {}", Url::parse("file:///main.rs").unwrap());
219    /// ```
220    pub fn new_with_url(input: impl ToString, url: Url) -> Self {
221        let text = input.to_string();
222        let line_map = build_line_map(&text);
223        Self { url: Some(url), raw: text, line_map }
224    }
225
226    /// Converts an LSP TextEdit to a TextEdit.
227    ///
228    /// # Arguments
229    ///
230    /// * `edit` - The LSP TextEdit to convert
231    ///
232    /// # Returns
233    ///
234    /// A `TextEdit` with byte-based span suitable for internal use.
235    ///
236    /// # Availability
237    ///
238    /// This method is only available when the `lsp-types` feature is enabled.
239    pub fn lsp_to_text_edit(&self, edit: lsp_types::TextEdit) -> TextEdit {
240        TextEdit { span: self.lsp_range_to_span(edit.range), text: edit.new_text }
241    }
242
243    /// Creates a kind error with location information.
244    ///
245    /// # Arguments
246    ///
247    /// * `message` - The error message
248    /// * `offset` - The byte offset where the error occurred
249    ///
250    /// # Returns
251    ///
252    /// A `PexError` with precise location information including line and column.
253    ///
254    /// # Examples
255    ///
256    /// ```
257    /// let source = SourceText::new("let x =");
258    /// let error = source.syntax_error("Unexpected end of input", 7);
259    /// ```
260    /// Creates a kind error with location information.
261    ///
262    /// # Arguments
263    ///
264    /// * `message` - The error message
265    /// * `offset` - The byte offset where the error occurred
266    ///
267    /// # Returns
268    ///
269    /// A `PexError` with precise location information including line and column.
270    ///
271    /// # Examples
272    ///
273    /// ```
274    /// let source = SourceText::new("let x =");
275    /// let error = source.syntax_error("Unexpected end of input", 7);
276    /// ```
277    pub fn syntax_error(&self, message: impl Into<String>, offset: usize) -> OakError {
278        OakError::syntax_error(message, self.get_location(offset))
279    }
280
281    /// Creates an error for an unexpected character with location information.
282    ///
283    /// # Arguments
284    ///
285    /// * `character` - The unexpected character
286    /// * `offset` - The byte offset where the unexpected character was found
287    ///
288    /// # Returns
289    ///
290    /// A `PexError` with precise location information including line and column.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// let source = SourceText::new("let x@y = 5");
296    /// let error = source.unexpected_character('@', 6);
297    /// ```
298    pub fn unexpected_character(&self, character: char, offset: usize) -> OakError {
299        OakError::unexpected_character(character, self.get_location(offset))
300    }
301
302    /// Gets the source location for a given byte offset.
303    ///
304    /// # Arguments
305    ///
306    /// * `offset` - The byte offset to get location for
307    ///
308    /// # Returns
309    ///
310    /// A `SourceLocation` with line, column, and optional URL information.
311    ///
312    /// # Examples
313    ///
314    /// ```
315    /// let source = SourceText::new("line 1\nline 2\n");
316    /// let location = source.get_location(8); // Start of second line
317    /// assert_eq!(location.line, 2);
318    /// assert_eq!(location.column, 0);
319    /// ```
320    pub fn get_location(&self, offset: usize) -> SourceLocation {
321        let position = self.offset_to_position(offset);
322        // SourceLocation 的行号语义为 1-based
323        SourceLocation { line: position.line + 1, column: position.character, url: self.url.clone() }
324    }
325}
326
327/// Builds a line map for efficient line/column calculations.
328///
329/// This creates a vector of byte offsets where each line starts.
330/// Handles both LF (`\n`) and CRLF (`\r\n`) line endings properly.
331fn build_line_map(text: &str) -> Vec<usize> {
332    let mut line_map = vec![0]; // First line starts at offset 0
333    let mut chars = text.char_indices().peekable();
334
335    while let Some((i, ch)) = chars.next() {
336        if ch == '\r' {
337            // Check for CRLF sequence
338            if let Some((_, '\n')) = chars.peek() {
339                // Skip the '\n' as it's part of CRLF
340                chars.next();
341                // Next line starts after CRLF
342                line_map.push(i + 2);
343            }
344            else {
345                // Standalone CR - treat as line ending
346                line_map.push(i + 1);
347            }
348        }
349        else if ch == '\n' {
350            line_map.push(i + 1);
351        }
352    }
353    line_map
354}