Skip to main content

oak_core/source/
streaming.rs

1use crate::source::{Source, SourceId, TextChunk};
2use core::range::Range;
3use std::borrow::Cow;
4use triomphe::Arc;
5
6const CHUNK_SIZE: usize = 4096;
7
8/// A read-only, chunked source implementation for efficient handling of streamed or large files.
9#[derive(Clone, Debug)]
10pub struct ChunkedSource {
11    source_id: Option<SourceId>,
12    chunks: Arc<[Arc<str>]>,
13    starts: Arc<[usize]>,
14    len: usize,
15}
16
17/// A mutable buffer for chunked source code, supporting efficient appending of text.
18#[derive(Clone, Debug, Default)]
19pub struct ChunkedBuffer {
20    source_id: Option<SourceId>,
21    chunks: Vec<Arc<str>>,
22    starts: Vec<usize>,
23    len: usize,
24}
25
26impl ChunkedBuffer {
27    /// Creates a new empty `ChunkedBuffer`.
28    pub fn new() -> Self {
29        Self::default()
30    }
31
32    /// Creates a new `ChunkedBuffer` with the specified source ID.
33    pub fn new_with_id(source_id: impl Into<Option<SourceId>>) -> Self {
34        Self { source_id: source_id.into(), ..Self::default() }
35    }
36
37    /// Appends the specified string to the end of the buffer.
38    pub fn push_str(&mut self, text: &str) {
39        if text.is_empty() {
40            return;
41        }
42        for chunk in chunkify(text) {
43            self.starts.push(self.len);
44            self.len += chunk.len();
45            self.chunks.push(chunk);
46        }
47    }
48
49    /// Returns a read-only snapshot of the current buffer.
50    pub fn snapshot(&self) -> ChunkedSource {
51        ChunkedSource { source_id: self.source_id, chunks: Arc::<[Arc<str>]>::from(self.chunks.clone()), starts: Arc::<[usize]>::from(self.starts.clone()), len: self.len }
52    }
53}
54
55impl Source for ChunkedBuffer {
56    fn length(&self) -> usize {
57        self.len
58    }
59
60    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
61        if offset >= self.len {
62            return TextChunk { start: self.len, text: "" };
63        }
64        let idx = match self.starts.binary_search(&offset) {
65            Ok(i) => i,
66            Err(0) => 0,
67            Err(i) => i - 1,
68        };
69        let start = self.starts[idx];
70        TextChunk { start, text: self.chunks[idx].as_ref() }
71    }
72
73    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
74        text_in_chunks(&self.chunks, &self.starts, self.len, range)
75    }
76
77    fn source_id(&self) -> Option<SourceId> {
78        self.source_id
79    }
80}
81
82impl Source for ChunkedSource {
83    fn length(&self) -> usize {
84        self.len
85    }
86
87    fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
88        if offset >= self.len {
89            return TextChunk { start: self.len, text: "" };
90        }
91        let idx = match self.starts.binary_search(&offset) {
92            Ok(i) => i,
93            Err(0) => 0,
94            Err(i) => i - 1,
95        };
96        let start = self.starts[idx];
97        TextChunk { start, text: self.chunks[idx].as_ref() }
98    }
99
100    fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
101        text_in_chunks(&self.chunks, &self.starts, self.len, range)
102    }
103
104    fn source_id(&self) -> Option<SourceId> {
105        self.source_id
106    }
107}
108
109fn chunkify(text: &str) -> Vec<Arc<str>> {
110    if text.is_empty() {
111        return vec![];
112    }
113    let mut out = Vec::new();
114    let mut start = 0usize;
115    while start < text.len() {
116        let mut end = (start + CHUNK_SIZE).min(text.len());
117        while end > start && !text.is_char_boundary(end) {
118            end -= 1;
119        }
120        if end == start {
121            end = text.len();
122        }
123        let part = &text[start..end];
124        out.push(Arc::<str>::from(part.to_string()));
125        start = end;
126    }
127    out
128}
129
130fn text_in_chunks<'a>(chunks: &'a [Arc<str>], starts: &'a [usize], len: usize, range: Range<usize>) -> Cow<'a, str> {
131    if range.start >= range.end || range.start >= len {
132        return Cow::Borrowed("");
133    }
134    let start = range.start;
135    let end = range.end.min(len);
136
137    let start_idx = match starts.binary_search(&start) {
138        Ok(i) => i,
139        Err(0) => 0,
140        Err(i) => i - 1,
141    };
142    let end_idx = match starts.binary_search(&end) {
143        Ok(i) => i,
144        Err(0) => 0,
145        Err(i) => i - 1,
146    };
147
148    if start_idx == end_idx {
149        let base = starts[start_idx];
150        let rel_start = start - base;
151        let rel_end = end - base;
152        let s = chunks[start_idx].as_ref();
153        return s.get(rel_start..rel_end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""));
154    }
155
156    let mut buf = String::new();
157    for (i, c) in chunks.iter().enumerate().skip(start_idx).take(end_idx - start_idx + 1) {
158        let base = starts[i];
159        let cs = c.as_ref();
160        let seg_start = if i == start_idx { start.saturating_sub(base) } else { 0 };
161        let seg_end = if i == end_idx { end.saturating_sub(base) } else { cs.len() };
162        if let Some(seg) = cs.get(seg_start..seg_end) {
163            buf.push_str(seg);
164        }
165    }
166    Cow::Owned(buf)
167}