oak_core/source/
streaming.rs1use crate::source::{Source, SourceId, TextChunk};
2use core::range::Range;
3use std::borrow::Cow;
4use triomphe::Arc;
5
6const CHUNK_SIZE: usize = 4096;
7
8#[derive(Clone, Debug)]
10pub struct ChunkedSource {
11 source_id: Option<SourceId>,
12 chunks: Arc<[Arc<str>]>,
13 starts: Arc<[usize]>,
14 len: usize,
15}
16
17#[derive(Clone, Debug, Default)]
19pub struct ChunkedBuffer {
20 source_id: Option<SourceId>,
21 chunks: Vec<Arc<str>>,
22 starts: Vec<usize>,
23 len: usize,
24}
25
26impl ChunkedBuffer {
27 pub fn new() -> Self {
29 Self::default()
30 }
31
32 pub fn new_with_id(source_id: impl Into<Option<SourceId>>) -> Self {
34 Self { source_id: source_id.into(), ..Self::default() }
35 }
36
37 pub fn push_str(&mut self, text: &str) {
39 if text.is_empty() {
40 return;
41 }
42 for chunk in chunkify(text) {
43 self.starts.push(self.len);
44 self.len += chunk.len();
45 self.chunks.push(chunk);
46 }
47 }
48
49 pub fn snapshot(&self) -> ChunkedSource {
51 ChunkedSource { source_id: self.source_id, chunks: Arc::<[Arc<str>]>::from(self.chunks.clone()), starts: Arc::<[usize]>::from(self.starts.clone()), len: self.len }
52 }
53}
54
55impl Source for ChunkedBuffer {
56 fn length(&self) -> usize {
57 self.len
58 }
59
60 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
61 if offset >= self.len {
62 return TextChunk { start: self.len, text: "" };
63 }
64 let idx = match self.starts.binary_search(&offset) {
65 Ok(i) => i,
66 Err(0) => 0,
67 Err(i) => i - 1,
68 };
69 let start = self.starts[idx];
70 TextChunk { start, text: self.chunks[idx].as_ref() }
71 }
72
73 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
74 text_in_chunks(&self.chunks, &self.starts, self.len, range)
75 }
76
77 fn source_id(&self) -> Option<SourceId> {
78 self.source_id
79 }
80}
81
82impl Source for ChunkedSource {
83 fn length(&self) -> usize {
84 self.len
85 }
86
87 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
88 if offset >= self.len {
89 return TextChunk { start: self.len, text: "" };
90 }
91 let idx = match self.starts.binary_search(&offset) {
92 Ok(i) => i,
93 Err(0) => 0,
94 Err(i) => i - 1,
95 };
96 let start = self.starts[idx];
97 TextChunk { start, text: self.chunks[idx].as_ref() }
98 }
99
100 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
101 text_in_chunks(&self.chunks, &self.starts, self.len, range)
102 }
103
104 fn source_id(&self) -> Option<SourceId> {
105 self.source_id
106 }
107}
108
109fn chunkify(text: &str) -> Vec<Arc<str>> {
110 if text.is_empty() {
111 return vec![];
112 }
113 let mut out = Vec::new();
114 let mut start = 0usize;
115 while start < text.len() {
116 let mut end = (start + CHUNK_SIZE).min(text.len());
117 while end > start && !text.is_char_boundary(end) {
118 end -= 1;
119 }
120 if end == start {
121 end = text.len();
122 }
123 let part = &text[start..end];
124 out.push(Arc::<str>::from(part.to_string()));
125 start = end;
126 }
127 out
128}
129
130fn text_in_chunks<'a>(chunks: &'a [Arc<str>], starts: &'a [usize], len: usize, range: Range<usize>) -> Cow<'a, str> {
131 if range.start >= range.end || range.start >= len {
132 return Cow::Borrowed("");
133 }
134 let start = range.start;
135 let end = range.end.min(len);
136
137 let start_idx = match starts.binary_search(&start) {
138 Ok(i) => i,
139 Err(0) => 0,
140 Err(i) => i - 1,
141 };
142 let end_idx = match starts.binary_search(&end) {
143 Ok(i) => i,
144 Err(0) => 0,
145 Err(i) => i - 1,
146 };
147
148 if start_idx == end_idx {
149 let base = starts[start_idx];
150 let rel_start = start - base;
151 let rel_end = end - base;
152 let s = chunks[start_idx].as_ref();
153 return s.get(rel_start..rel_end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""));
154 }
155
156 let mut buf = String::new();
157 for (i, c) in chunks.iter().enumerate().skip(start_idx).take(end_idx - start_idx + 1) {
158 let base = starts[i];
159 let cs = c.as_ref();
160 let seg_start = if i == start_idx { start.saturating_sub(base) } else { 0 };
161 let seg_end = if i == end_idx { end.saturating_sub(base) } else { cs.len() };
162 if let Some(seg) = cs.get(seg_start..seg_end) {
163 buf.push_str(seg);
164 }
165 }
166 Cow::Owned(buf)
167}