Skip to main content

oak_core/source/
text.rs

1use crate::{
2    OakError,
3    source::{Source, SourceId, TextEdit},
4};
5use core::range::Range;
6use std::borrow::Cow;
7use triomphe::Arc;
8
9/// Represents source code text with optional source ID reference.
10///
11/// This struct manages the raw source text and provides utilities for:
12/// - Text extraction at specific offsets or ranges
13/// - Error reporting with precise location information
14#[derive(Clone, Debug, PartialEq, Eq, Hash)]
15pub struct SourceText {
16    pub(crate) source_id: Option<SourceId>,
17    pub(crate) raw: Arc<str>,
18}
19
20impl Default for SourceText {
21    fn default() -> Self {
22        Self { source_id: None, raw: Arc::from("") }
23    }
24}
25
26impl Source for SourceText {
27    fn length(&self) -> usize {
28        self.raw.len()
29    }
30
31    fn chunk_at(&self, offset: usize) -> crate::source::TextChunk<'_> {
32        let len = self.raw.len();
33        if offset >= len {
34            return crate::source::TextChunk { start: len, text: "" };
35        }
36        crate::source::TextChunk { start: offset, text: self.raw.get(offset..).unwrap_or("") }
37    }
38
39    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
40        self.raw.get(range.start..range.end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""))
41    }
42
43    fn source_id(&self) -> Option<SourceId> {
44        self.source_id
45    }
46}
47
48impl SourceText {
49    /// Creates a new source text from the given input.
50    pub fn new(input: impl Into<Arc<str>>) -> Self {
51        Self { source_id: None, raw: input.into() }
52    }
53
54    /// Creates a new source text from the given input and source ID.
55    pub fn new_with_id(input: impl Into<Arc<str>>, source_id: SourceId) -> Self {
56        Self { source_id: Some(source_id), raw: input.into() }
57    }
58
59    /// Returns the raw source text as a string slice.
60    pub fn text(&self) -> &str {
61        &self.raw
62    }
63
64    /// Returns a slice of the source text in the given range.
65    pub fn slice(&self, range: Range<usize>) -> Cow<'_, str> {
66        self.get_text_in(range)
67    }
68
69    /// Applies multiple text edits to the source text and returns the affected range.
70    pub fn apply_edits_range(&mut self, edits: &[TextEdit]) -> Range<usize> {
71        let old_len = self.raw.len();
72        if edits.is_empty() {
73            return Range { start: old_len, end: old_len };
74        }
75
76        let mut order: Vec<usize> = (0..edits.len()).collect();
77        order.sort_by_key(|&i| edits[i].span.start);
78
79        let mut reparse_from = old_len;
80        let mut reparse_to = 0;
81        let mut delta: isize = 0;
82
83        for &i in &order {
84            let TextEdit { span, text } = &edits[i];
85            reparse_from = reparse_from.min(span.start);
86            let start_new = (span.start as isize + delta) as usize;
87            let end_new = start_new + text.len();
88            reparse_to = reparse_to.max(end_new);
89            delta += text.len() as isize - (span.end - span.start) as isize
90        }
91
92        let mut raw = self.raw.to_string();
93        for &i in order.iter().rev() {
94            let TextEdit { span, text } = &edits[i];
95            raw.replace_range(span.start..span.end, text)
96        }
97        self.raw = Arc::from(raw);
98
99        Range { start: reparse_from, end: reparse_to }
100    }
101
102    /// Applies multiple text edits to the source text and returns the minimum affected offset.
103    ///
104    /// This method is used for incremental updates to source code, such as those
105    /// received from LSP clients or other text editing operations.
106    ///
107    /// # Arguments
108    ///
109    /// * `edits` - A slice of [`TextEdit`] operations to apply
110    ///
111    /// # Returns
112    ///
113    /// The minimum byte offset that was affected by any of the edits. This is
114    /// useful for determining where to restart parsing after incremental changes.
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// # #![feature(new_range_api)]
120    /// # use oak_core::source::SourceText;
121    /// # use oak_core::source::TextEdit;
122    /// # use core::range::Range;
123    /// let mut source = SourceText::new("let x = 5;");
124    /// let edits = vec![TextEdit { span: Range { start: 4, end: 5 }, text: "y".to_string().into() }];
125    /// let min_offset = source.apply_edits(&edits);
126    /// assert_eq!(min_offset, 4);
127    /// ```
128    pub fn apply_edits(&mut self, edits: &[TextEdit]) -> usize {
129        self.apply_edits_range(edits).start
130    }
131
132    /// Gets the source ID associated with this source text, if any.
133    ///
134    /// # Returns
135    ///
136    /// An [`Option<SourceId>`] containing the source ID if one was set,
137    /// or `None` if no ID is associated with this source text.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// # use oak_core::SourceText;
143    /// let source = SourceText::new_with_id("code", 1);
144    /// assert!(source.source_id().is_some());
145    /// ```
146    pub fn source_id(&self) -> Option<SourceId> {
147        self.source_id
148    }
149
150    /// Gets the length of the source text in bytes.
151    ///
152    /// # Returns
153    ///
154    /// The length of the source text in bytes.
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// # use oak_core::SourceText;
160    /// let source = SourceText::new("Hello, world!");
161    /// assert_eq!(source.len(), 13);
162    /// ```
163    pub fn len(&self) -> usize {
164        self.raw.len()
165    }
166
167    /// Checks if the source text is empty.
168    ///
169    /// # Returns
170    ///
171    /// `true` if the source text is empty, `false` otherwise.
172    ///
173    /// # Examples
174    ///
175    /// ```
176    /// # use oak_core::SourceText;
177    /// let source = SourceText::new("");
178    /// assert!(source.is_empty());
179    /// ```
180    pub fn is_empty(&self) -> bool {
181        self.raw.is_empty()
182    }
183
184    /// Creates a syntax error with location information.
185    ///
186    /// # Arguments
187    ///
188    /// * `message` - The error message
189    /// * `offset` - The byte offset where the error occurred
190    ///
191    /// # Returns
192    ///
193    /// An [`OakError`] with precise location information including line and column.
194    ///
195    /// # Examples
196    ///
197    /// ```
198    /// # use oak_core::SourceText;
199    /// let source = SourceText::new("let x =");
200    /// let error = source.syntax_error("Unexpected end of input", 7);
201    /// ```
202    pub fn syntax_error(&self, message: impl Into<String>, offset: usize) -> OakError {
203        OakError::syntax_error(message, offset, self.source_id)
204    }
205
206    /// Creates an error for an unexpected character at the specified offset.
207    pub fn unexpected_character(&self, character: char, offset: usize) -> OakError {
208        OakError::unexpected_character(character, offset, self.source_id)
209    }
210
211    /// Creates an error for an expected token that was missing at the specified offset.
212    pub fn expected_token(&self, expected: impl Into<String>, offset: usize) -> OakError {
213        OakError::expected_token(expected, offset, self.source_id)
214    }
215
216    /// Creates an error for an expected name that was missing at the specified offset.
217    pub fn expected_name(&self, name_kind: impl Into<String>, offset: usize) -> OakError {
218        OakError::expected_name(name_kind, offset, self.source_id)
219    }
220
221    /// Creates an error for a trailing comma that is not allowed at the specified offset.
222    pub fn trailing_comma_not_allowed(&self, offset: usize) -> OakError {
223        OakError::trailing_comma_not_allowed(offset, self.source_id)
224    }
225}