oak_core/source/
streaming.rs1use crate::source::{Source, TextChunk};
2use core::range::Range;
3use std::borrow::Cow;
4use triomphe::Arc;
5use url::Url;
6
7const CHUNK_SIZE: usize = 4096;
8
9#[derive(Clone, Debug)]
11pub struct ChunkedSource {
12 url: Option<Url>,
13 chunks: Arc<[Arc<str>]>,
14 starts: Arc<[usize]>,
15 len: usize,
16}
17
18#[derive(Clone, Debug, Default)]
20pub struct ChunkedBuffer {
21 url: Option<Url>,
22 chunks: Vec<Arc<str>>,
23 starts: Vec<usize>,
24 len: usize,
25}
26
27impl ChunkedBuffer {
28 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn new_with_url(url: impl Into<Option<Url>>) -> Self {
35 Self { url: url.into(), ..Self::default() }
36 }
37
38 pub fn push_str(&mut self, text: &str) {
40 if text.is_empty() {
41 return;
42 }
43 for chunk in chunkify(text) {
44 self.starts.push(self.len);
45 self.len += chunk.len();
46 self.chunks.push(chunk);
47 }
48 }
49
50 pub fn snapshot(&self) -> ChunkedSource {
52 ChunkedSource { url: self.url.clone(), chunks: Arc::<[Arc<str>]>::from(self.chunks.clone()), starts: Arc::<[usize]>::from(self.starts.clone()), len: self.len }
53 }
54}
55
56impl Source for ChunkedBuffer {
57 fn length(&self) -> usize {
58 self.len
59 }
60
61 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
62 if offset >= self.len {
63 return TextChunk { start: self.len, text: "" };
64 }
65 let idx = match self.starts.binary_search(&offset) {
66 Ok(i) => i,
67 Err(0) => 0,
68 Err(i) => i - 1,
69 };
70 let start = self.starts[idx];
71 TextChunk { start, text: self.chunks[idx].as_ref() }
72 }
73
74 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
75 text_in_chunks(&self.chunks, &self.starts, self.len, range)
76 }
77
78 fn get_url(&self) -> Option<&Url> {
79 self.url.as_ref()
80 }
81}
82
83impl Source for ChunkedSource {
84 fn length(&self) -> usize {
85 self.len
86 }
87
88 fn chunk_at(&self, offset: usize) -> TextChunk<'_> {
89 if offset >= self.len {
90 return TextChunk { start: self.len, text: "" };
91 }
92 let idx = match self.starts.binary_search(&offset) {
93 Ok(i) => i,
94 Err(0) => 0,
95 Err(i) => i - 1,
96 };
97 let start = self.starts[idx];
98 TextChunk { start, text: self.chunks[idx].as_ref() }
99 }
100
101 fn get_text_in(&self, range: Range<usize>) -> Cow<'_, str> {
102 text_in_chunks(&self.chunks, &self.starts, self.len, range)
103 }
104
105 fn get_url(&self) -> Option<&Url> {
106 self.url.as_ref()
107 }
108}
109
110fn chunkify(text: &str) -> Vec<Arc<str>> {
111 if text.is_empty() {
112 return vec![];
113 }
114 let mut out = Vec::new();
115 let mut start = 0usize;
116 while start < text.len() {
117 let mut end = (start + CHUNK_SIZE).min(text.len());
118 while end > start && !text.is_char_boundary(end) {
119 end -= 1;
120 }
121 if end == start {
122 end = text.len();
123 }
124 let part = &text[start..end];
125 out.push(Arc::<str>::from(part.to_string()));
126 start = end;
127 }
128 out
129}
130
131fn text_in_chunks<'a>(chunks: &'a [Arc<str>], starts: &'a [usize], len: usize, range: Range<usize>) -> Cow<'a, str> {
132 if range.start >= range.end || range.start >= len {
133 return Cow::Borrowed("");
134 }
135 let start = range.start;
136 let end = range.end.min(len);
137
138 let start_idx = match starts.binary_search(&start) {
139 Ok(i) => i,
140 Err(0) => 0,
141 Err(i) => i - 1,
142 };
143 let end_idx = match starts.binary_search(&end) {
144 Ok(i) => i,
145 Err(0) => 0,
146 Err(i) => i - 1,
147 };
148
149 if start_idx == end_idx {
150 let base = starts[start_idx];
151 let rel_start = start - base;
152 let rel_end = end - base;
153 let s = chunks[start_idx].as_ref();
154 return s.get(rel_start..rel_end).map(Cow::Borrowed).unwrap_or(Cow::Borrowed(""));
155 }
156
157 let mut buf = String::new();
158 for (i, c) in chunks.iter().enumerate().skip(start_idx).take(end_idx - start_idx + 1) {
159 let base = starts[i];
160 let cs = c.as_ref();
161 let seg_start = if i == start_idx { start.saturating_sub(base) } else { 0 };
162 let seg_end = if i == end_idx { end.saturating_sub(base) } else { cs.len() };
163 if let Some(seg) = cs.get(seg_start..seg_end) {
164 buf.push_str(seg);
165 }
166 }
167 Cow::Owned(buf)
168}