Skip to main content

oak_core/source/
rope.rs

1use crate::source::{Source, SourceId, TextChunk, TextEdit};
2use core::range::Range;
3use std::borrow::Cow;
4use triomphe::Arc;
5
6const CHUNK_SIZE: usize = 4096;
7
8/// A read-only, rope-based source implementation for efficient handling of large files.
9#[derive(Clone, Debug)]
10pub struct RopeSource {
11    source_id: Option<SourceId>,
12    chunks: Arc<[Arc<str>]>,
13    starts: Arc<[usize]>,
14    len: usize,
15}
16
17/// A mutable buffer for rope-based source code, supporting efficient edits.
18#[derive(Clone, Debug)]
19pub struct RopeBuffer {
20    source_id: Option<SourceId>,
21    chunks: Vec<Arc<str>>,
22    starts: Vec<usize>,
23    len: usize,
24}
25
26impl RopeBuffer {
27    /// Creates a new empty `RopeBuffer`.
28    pub fn new(input: impl ToString) -> Self {
29        Self::new_with_id(input, None)
30    }
31
32    /// Creates a new `RopeBuffer` with the specified input and source ID.
33    pub fn new_with_id(input: impl ToString, source_id: impl Into<Option<SourceId>>) -> Self {
34        let source_id = source_id.into();
35        let text = input.to_string();
36        let chunks = chunkify(&text);
37        let (starts, len) = rebuild_starts(&chunks);
38        Self { source_id, chunks, starts, len }
39    }
40
41    /// Returns a read-only snapshot of the current buffer.
42    pub fn snapshot(&self) -> RopeSource {
43        RopeSource { source_id: self.source_id, chunks: Arc::<[Arc<str>]>::from(self.chunks.clone()), starts: Arc::<[usize]>::from(self.starts.clone()), len: self.len }
44    }
45
46    /// Applies multiple text edits to the buffer and returns the affected range.
47    pub fn apply_edits_range(&mut self, edits: &[TextEdit]) -> Range<usize> {
48        let old_len = self.len;
49        if edits.is_empty() {
50            return Range { start: old_len, end: old_len };
51        }
52
53        let mut order: Vec<usize> = (0..edits.len()).collect();
54        order.sort_by_key(|&i| edits[i].span.start);
55
56        let mut reparse_from = old_len;
57        let mut reparse_to = 0;
58        let mut delta: isize = 0;
59        for &i in &order {
60            let TextEdit { span, text } = &edits[i];
61            reparse_from = reparse_from.min(span.start);
62            let start_new = (span.start as isize + delta) as usize;
63            let end_new = start_new + text.len();
64            reparse_to = reparse_to.max(end_new);
65            delta += text.len() as isize - (span.end - span.start) as isize;
66        }
67
68        for &i in order.iter().rev() {
69            let TextEdit { span, text } = &edits[i];
70            self.replace_range(Range { start: span.start, end: span.end }, text);
71        }
72
73        Range { start: reparse_from, end: reparse_to }
74    }
75
76    /// Applies multiple text edits to the buffer and returns the minimum affected offset.
77    pub fn apply_edits(&mut self, edits: &[TextEdit]) -> usize {
78        self.apply_edits_range(edits).start
79    }
80
81    fn replace_range(&mut self, span: Range<usize>, replacement: &str) {
82        let start = span.start.min(self.len);
83        let end = span.end.min(self.len).max(start);
84
85        self.split_at(start);
86        self.split_at(end);
87
88        let start_idx = self.boundary_index(start);
89        let end_idx = self.boundary_index(end);
90        if start_idx < end_idx {
91            self.chunks.drain(start_idx..end_idx);
92        }
93
94        if !replacement.is_empty() {
95            let new_chunks = chunkify(replacement);
96            let insert_at = start_idx.min(self.chunks.len());
97            self.chunks.splice(insert_at..insert_at, new_chunks);
98        }
99
100        let (starts, len) = rebuild_starts(&self.chunks);
101        self.starts = starts;
102        self.len = len;
103    }
104
105    fn split_at(&mut self, offset: usize) {
106        if offset == 0 || offset >= self.len {
107            return;
108        }
109        let (idx, start) = self.chunk_index(offset);
110        let rel = offset - start;
111        let s = self.chunks[idx].clone();
112        let s = s.as_ref();
113        if rel == 0 || rel >= s.len() {
114            return;
115        }
116        let Some(left) = s.get(..rel)
117        else {
118            return;
119        };
120        let Some(right) = s.get(rel..)
121        else {
122            return;
123        };
124        if left.is_empty() || right.is_empty() {
125            return;
126        }
127        self.chunks[idx] = Arc::<str>::from(left.to_string());
128        self.chunks.insert(idx + 1, Arc::<str>::from(right.to_string()));
129        let (starts, len) = rebuild_starts(&self.chunks);
130        self.starts = starts;
131        self.len = len;
132    }
133
134    fn boundary_index(&self, offset: usize) -> usize {
135        if offset >= self.len {
136            return self.chunks.len();
137        }
138        match self.starts.binary_search(&offset) {
139            Ok(i) => i,
140            Err(i) => i,
141        }
142    }
143
144    fn chunk_index(&self, offset: usize) -> (usize, usize) {
145        let offset = offset.min(self.len.saturating_sub(1));
146        match self.starts.binary_search(&offset) {
147            Ok(i) => (i, self.starts[i]),
148            Err(0) => (0, 0),
149            Err(i) => (i - 1, self.starts[i - 1]),
150        }
151    }
152}
153
154impl Source for RopeBuffer {
155    fn length(&self) -> usize {
156        self.len
157    }
158
159    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
160        if offset >= self.len {
161            return TextChunk { start: self.len, text: "" };
162        }
163        let (idx, start) = self.chunk_index(offset);
164        TextChunk { start, text: self.chunks[idx].as_ref() }
165    }
166
167    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
168        text_in_chunks(&self.chunks, &self.starts, self.len, range)
169    }
170
171    fn source_id(&self) -> Option<SourceId> {
172        self.source_id
173    }
174}
175
176impl Source for RopeSource {
177    fn length(&self) -> usize {
178        self.len
179    }
180
181    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
182        if offset >= self.len {
183            return TextChunk { start: self.len, text: "" };
184        }
185        let idx = match self.starts.binary_search(&offset) {
186            Ok(i) => i,
187            Err(0) => 0,
188            Err(i) => i - 1,
189        };
190        let start = self.starts[idx];
191        TextChunk { start, text: self.chunks[idx].as_ref() }
192    }
193
194    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
195        text_in_chunks(&self.chunks, &self.starts, self.len, range)
196    }
197
198    fn source_id(&self) -> Option<SourceId> {
199        self.source_id
200    }
201}
202
203fn rebuild_starts(chunks: &[Arc<str>]) -> (Vec<usize>, usize) {
204    let mut starts = Vec::with_capacity(chunks.len());
205    let mut offset = 0usize;
206    for c in chunks {
207        starts.push(offset);
208        offset += c.len();
209    }
210    (starts, offset)
211}
212
213fn chunkify(text: &str) -> Vec<Arc<str>> {
214    if text.is_empty() {
215        return vec![];
216    }
217    let mut out = Vec::new();
218    let mut start = 0usize;
219    while start < text.len() {
220        let mut end = (start + CHUNK_SIZE).min(text.len());
221        while end > start && !text.is_char_boundary(end) {
222            end -= 1;
223        }
224        if end == start {
225            end = text.len();
226        }
227        let part = &text[start..end];
228        out.push(Arc::<str>::from(part.to_string()));
229        start = end;
230    }
231    out
232}
233
234fn text_in_chunks<'a>(chunks: &'a [Arc<str>], starts: &'a [usize], len: usize, range: Range<usize>) -> Cow<'a, str> {
235    if range.start >= range.end || range.start >= len {
236        return Cow::Borrowed("");
237    }
238    let start = range.start;
239    let end = range.end.min(len);
240
241    let start_idx = match starts.binary_search(&start) {
242        Ok(i) => i,
243        Err(0) => 0,
244        Err(i) => i - 1,
245    };
246    let end_idx = match starts.binary_search(&end) {
247        Ok(i) => i,
248        Err(0) => 0,
249        Err(i) => i - 1,
250    };
251
252    if start_idx == end_idx {
253        let base = starts[start_idx];
254        let rel_start = start - base;
255        let rel_end = end - base;
256        let s = chunks[start_idx].as_ref();
257        return s.get(rel_start..rel_end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""));
258    }
259
260    let mut buf = String::new();
261    for (i, c) in chunks.iter().enumerate().skip(start_idx).take(end_idx - start_idx + 1) {
262        let base = starts[i];
263        let cs = c.as_ref();
264        let seg_start = if i == start_idx { start.saturating_sub(base) } else { 0 };
265        let seg_end = if i == end_idx { end.saturating_sub(base) } else { cs.len() };
266        if let Some(seg) = cs.get(seg_start..seg_end) {
267            buf.push_str(seg);
268        }
269    }
270    Cow::Owned(buf)
271}