rust_expect/expect/
large_buffer.rs1use std::fs::{File, OpenOptions};
7use std::io::{self, Read, Seek, SeekFrom, Write};
8use std::path::{Path, PathBuf};
9
10pub const MMAP_THRESHOLD: usize = 10 * 1024 * 1024;
12
13pub struct LargeBuffer {
18 file: File,
20 path: PathBuf,
22 size: usize,
24 cleanup: bool,
26 read_pos: usize,
28}
29
30impl LargeBuffer {
31 pub fn new() -> io::Result<Self> {
37 let path = std::env::temp_dir().join(format!(
38 "rust_expect_buffer_{}_{}",
39 std::process::id(),
40 std::time::SystemTime::now()
41 .duration_since(std::time::UNIX_EPOCH)
42 .map(|d| d.as_nanos())
43 .unwrap_or(0)
44 ));
45 Self::with_path(&path)
46 }
47
48 pub fn with_path(path: &Path) -> io::Result<Self> {
54 let file = OpenOptions::new()
55 .read(true)
56 .write(true)
57 .create(true)
58 .truncate(true)
59 .open(path)?;
60
61 Ok(Self {
62 file,
63 path: path.to_path_buf(),
64 size: 0,
65 cleanup: true,
66 read_pos: 0,
67 })
68 }
69
70 pub const fn set_cleanup(&mut self, cleanup: bool) {
72 self.cleanup = cleanup;
73 }
74
75 #[must_use]
77 pub fn path(&self) -> &Path {
78 &self.path
79 }
80
81 pub fn append(&mut self, data: &[u8]) -> io::Result<()> {
87 self.file.seek(SeekFrom::End(0))?;
88 self.file.write_all(data)?;
89 self.size += data.len();
90 Ok(())
91 }
92
93 #[must_use]
95 pub const fn len(&self) -> usize {
96 self.size
97 }
98
99 #[must_use]
101 pub const fn is_empty(&self) -> bool {
102 self.size == 0
103 }
104
105 pub fn read_range(&mut self, start: usize, len: usize) -> io::Result<Vec<u8>> {
111 if start >= self.size {
112 return Ok(Vec::new());
113 }
114
115 let actual_len = len.min(self.size - start);
116 let mut buf = vec![0u8; actual_len];
117
118 self.file.seek(SeekFrom::Start(start as u64))?;
119 self.file.read_exact(&mut buf)?;
120
121 Ok(buf)
122 }
123
124 pub fn read_all(&mut self) -> io::Result<Vec<u8>> {
130 self.read_range(0, self.size)
131 }
132
133 pub fn tail(&mut self, n: usize) -> io::Result<Vec<u8>> {
139 let start = self.size.saturating_sub(n);
140 self.read_range(start, n)
141 }
142
143 pub fn head(&mut self, n: usize) -> io::Result<Vec<u8>> {
149 self.read_range(0, n)
150 }
151
152 pub fn clear(&mut self) -> io::Result<()> {
158 self.file.set_len(0)?;
159 self.size = 0;
160 self.read_pos = 0;
161 Ok(())
162 }
163
164 pub fn find(&mut self, needle: &[u8]) -> io::Result<Option<usize>> {
172 const CHUNK_SIZE: usize = 64 * 1024;
174
175 if needle.is_empty() {
176 return Ok(Some(0));
177 }
178 if needle.len() > self.size {
179 return Ok(None);
180 }
181
182 let mut pos = 0;
183 let mut overlap = Vec::new();
184
185 self.file.seek(SeekFrom::Start(0))?;
186
187 while pos < self.size {
188 let read_size = CHUNK_SIZE.min(self.size - pos);
189 let mut chunk = vec![0u8; read_size];
190 self.file.read_exact(&mut chunk)?;
191
192 let search_data = if overlap.is_empty() {
194 chunk.clone()
195 } else {
196 let mut combined = overlap.clone();
197 combined.extend(&chunk);
198 combined
199 };
200
201 if let Some(idx) = find_subsequence(&search_data, needle) {
203 let actual_pos = if overlap.is_empty() {
204 pos + idx
205 } else {
206 pos - overlap.len() + idx
207 };
208 return Ok(Some(actual_pos));
209 }
210
211 overlap = if chunk.len() >= needle.len() - 1 {
213 chunk[chunk.len() - (needle.len() - 1)..].to_vec()
214 } else {
215 chunk
216 };
217
218 pos += read_size;
219 }
220
221 Ok(None)
222 }
223
224 pub fn find_str(&mut self, needle: &str) -> io::Result<Option<usize>> {
230 self.find(needle.as_bytes())
231 }
232
233 pub fn as_str_lossy(&mut self) -> io::Result<String> {
239 let data = self.read_all()?;
240 Ok(String::from_utf8_lossy(&data).into_owned())
241 }
242
243 pub fn consume(&mut self, len: usize) -> io::Result<Vec<u8>> {
251 if len == 0 {
252 return Ok(Vec::new());
253 }
254
255 let consume_len = len.min(self.size);
256
257 let consumed = self.read_range(0, consume_len)?;
259
260 let remaining = self.read_range(consume_len, self.size - consume_len)?;
262
263 self.file.seek(SeekFrom::Start(0))?;
265 self.file.set_len(0)?;
266 self.file.write_all(&remaining)?;
267 self.size = remaining.len();
268
269 Ok(consumed)
270 }
271
272 pub fn sync(&self) -> io::Result<()> {
278 self.file.sync_all()
279 }
280}
281
282impl Drop for LargeBuffer {
283 fn drop(&mut self) {
284 if self.cleanup {
285 let _ = std::fs::remove_file(&self.path);
286 }
287 }
288}
289
290impl std::fmt::Debug for LargeBuffer {
291 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292 f.debug_struct("LargeBuffer")
293 .field("path", &self.path)
294 .field("size", &self.size)
295 .field("cleanup", &self.cleanup)
296 .finish()
297 }
298}
299
300fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option<usize> {
302 haystack
303 .windows(needle.len())
304 .position(|window| window == needle)
305}
306
307pub enum AdaptiveBuffer {
309 Memory(Vec<u8>),
311 File(LargeBuffer),
313}
314
315impl AdaptiveBuffer {
316 #[must_use]
318 pub const fn new() -> Self {
319 Self::Memory(Vec::new())
320 }
321
322 #[must_use]
324 pub const fn with_threshold(_threshold: usize) -> Self {
325 Self::Memory(Vec::new())
327 }
328
329 pub fn append(&mut self, data: &[u8], threshold: usize) -> io::Result<()> {
335 match self {
336 Self::Memory(buf) => {
337 if buf.len() + data.len() > threshold {
338 let mut large = LargeBuffer::new()?;
340 large.append(buf)?;
341 large.append(data)?;
342 *self = Self::File(large);
343 } else {
344 buf.extend_from_slice(data);
345 }
346 }
347 Self::File(large) => {
348 large.append(data)?;
349 }
350 }
351 Ok(())
352 }
353
354 #[must_use]
356 pub const fn len(&self) -> usize {
357 match self {
358 Self::Memory(buf) => buf.len(),
359 Self::File(large) => large.len(),
360 }
361 }
362
363 #[must_use]
365 pub const fn is_empty(&self) -> bool {
366 self.len() == 0
367 }
368
369 #[must_use]
371 pub const fn is_file_backed(&self) -> bool {
372 matches!(self, Self::File(_))
373 }
374
375 pub fn read_all(&mut self) -> io::Result<Vec<u8>> {
381 match self {
382 Self::Memory(buf) => Ok(buf.clone()),
383 Self::File(large) => large.read_all(),
384 }
385 }
386
387 pub fn tail(&mut self, n: usize) -> io::Result<Vec<u8>> {
393 match self {
394 Self::Memory(buf) => {
395 let start = buf.len().saturating_sub(n);
396 Ok(buf[start..].to_vec())
397 }
398 Self::File(large) => large.tail(n),
399 }
400 }
401
402 pub fn clear(&mut self) -> io::Result<()> {
408 match self {
409 Self::Memory(buf) => {
410 buf.clear();
411 Ok(())
412 }
413 Self::File(large) => large.clear(),
414 }
415 }
416
417 pub fn as_str_lossy(&mut self) -> io::Result<String> {
423 match self {
424 Self::Memory(buf) => Ok(String::from_utf8_lossy(buf).into_owned()),
425 Self::File(large) => large.as_str_lossy(),
426 }
427 }
428}
429
430impl Default for AdaptiveBuffer {
431 fn default() -> Self {
432 Self::new()
433 }
434}
435
436impl std::fmt::Debug for AdaptiveBuffer {
437 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
438 match self {
439 Self::Memory(buf) => f.debug_tuple("Memory").field(&buf.len()).finish(),
440 Self::File(large) => f.debug_tuple("File").field(large).finish(),
441 }
442 }
443}
444
445#[cfg(test)]
446mod tests {
447 use super::*;
448
449 #[test]
450 fn large_buffer_basic() {
451 let mut buf = LargeBuffer::new().unwrap();
452 buf.append(b"hello world").unwrap();
453
454 assert_eq!(buf.len(), 11);
455 assert_eq!(buf.read_all().unwrap(), b"hello world");
456 }
457
458 #[test]
459 fn large_buffer_find() {
460 let mut buf = LargeBuffer::new().unwrap();
461 buf.append(b"the quick brown fox").unwrap();
462
463 assert_eq!(buf.find(b"quick").unwrap(), Some(4));
464 assert_eq!(buf.find(b"lazy").unwrap(), None);
465 }
466
467 #[test]
468 fn large_buffer_tail() {
469 let mut buf = LargeBuffer::new().unwrap();
470 buf.append(b"hello world").unwrap();
471
472 assert_eq!(buf.tail(5).unwrap(), b"world");
473 }
474
475 #[test]
476 fn large_buffer_consume() {
477 let mut buf = LargeBuffer::new().unwrap();
478 buf.append(b"hello world").unwrap();
479
480 let consumed = buf.consume(6).unwrap();
481 assert_eq!(consumed, b"hello ");
482 assert_eq!(buf.read_all().unwrap(), b"world");
483 }
484
485 #[test]
486 fn adaptive_buffer_stays_memory() {
487 let mut buf = AdaptiveBuffer::new();
488 buf.append(b"small data", MMAP_THRESHOLD).unwrap();
489
490 assert!(!buf.is_file_backed());
491 }
492
493 #[test]
494 fn adaptive_buffer_switches_to_file() {
495 let mut buf = AdaptiveBuffer::new();
496 let threshold = 100;
497
498 let large_data = vec![b'x'; 150];
500 buf.append(&large_data, threshold).unwrap();
501
502 assert!(buf.is_file_backed());
503 assert_eq!(buf.len(), 150);
504 }
505}