1use std::collections::HashMap;
2use std::ops::Range;
3use std::sync::atomic::AtomicUsize;
4use std::sync::Mutex;
5
6use elsa::sync::FrozenVec;
7
8use crate::chunked_read_buffer_manager::{ChunkedReadBufferManager, RangeLocation, RangeSourcing};
9use crate::{FileAndPathHelperResult, FileContents};
10
11const CHUNK_SIZE: u64 = 32 * 1024;
12
13pub trait FileByteSource: Send + Sync {
14 fn read_bytes_into(
18 &self,
19 buffer: &mut Vec<u8>,
20 offset: u64,
21 size: usize,
22 ) -> FileAndPathHelperResult<()>;
23}
24
25pub struct FileContentsWithChunkedCaching<S: FileByteSource> {
26 source: S,
27 file_len: u64,
28 buffer_manager: Mutex<ChunkedReadBufferManager<CHUNK_SIZE>>,
29 string_cache: Mutex<HashMap<(u64, u8), RangeLocation>>,
30 buffers: FrozenVec<Box<[u8]>>,
31 buffer_count: AtomicUsize,
32}
33
34impl<S: FileByteSource> FileContentsWithChunkedCaching<S> {
35 pub fn new(file_len: u64, source: S) -> Self {
36 FileContentsWithChunkedCaching {
37 source,
38 buffers: FrozenVec::new(),
39 file_len,
40 buffer_manager: Mutex::new(ChunkedReadBufferManager::new_with_size(file_len)),
41 string_cache: Mutex::new(HashMap::new()),
42 buffer_count: AtomicUsize::new(0),
43 }
44 }
45
46 #[inline]
47 fn slice_from_location(&self, location: &RangeLocation) -> &[u8] {
48 let buffer = &self.buffers.get(location.buffer_handle).unwrap();
49 &buffer[location.offset_from_start..][..location.size]
50 }
51
52 #[inline]
54 fn get_range_location(&self, range: Range<u64>) -> FileAndPathHelperResult<RangeLocation> {
55 let mut buffer_manager = self.buffer_manager.lock().unwrap();
56 let read_range = match buffer_manager.determine_range_sourcing(range.clone()) {
57 RangeSourcing::InExistingBuffer(l) => return Ok(l),
58 RangeSourcing::NeedToReadNewBuffer(read_range) => read_range,
59 };
60 assert!(read_range.start <= read_range.end);
61
62 let read_len: usize = (read_range.end - read_range.start).try_into()?;
64 let mut buffer = Vec::new();
65 self.source
66 .read_bytes_into(&mut buffer, read_range.start, read_len)?;
67 assert!(buffer.len() == read_len);
68
69 let buffer_handle = self
70 .buffer_count
71 .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
72 self.buffers.push(buffer.into_boxed_slice());
73 buffer_manager.insert_buffer_range(read_range.clone(), buffer_handle);
74
75 Ok(RangeLocation {
76 buffer_handle,
77 offset_from_start: (range.start - read_range.start) as usize,
78 size: (range.end - range.start) as usize,
79 })
80 }
81}
82
83impl<S: FileByteSource> FileContents for FileContentsWithChunkedCaching<S> {
84 #[inline]
85 fn len(&self) -> u64 {
86 self.file_len
87 }
88
89 #[inline]
90 fn read_bytes_at(&self, offset: u64, size: u64) -> FileAndPathHelperResult<&[u8]> {
91 if size == 0 {
92 return Ok(&[]);
93 }
94
95 let start = offset;
96 let end = offset.checked_add(size).ok_or_else(|| {
97 std::io::Error::new(
98 std::io::ErrorKind::InvalidInput,
99 "read_bytes_at with offset + size overflowing u64",
100 )
101 })?;
102 if end > self.file_len {
103 return Err(Box::new(std::io::Error::new(
104 std::io::ErrorKind::InvalidInput,
105 "read_bytes_at range out-of-bounds",
106 )));
107 }
108 let location = self.get_range_location(start..end)?;
109 Ok(self.slice_from_location(&location))
110 }
111
112 #[inline]
113 fn read_bytes_at_until(
114 &self,
115 range: Range<u64>,
116 delimiter: u8,
117 ) -> FileAndPathHelperResult<&[u8]> {
118 const MAX_LENGTH_INCLUDING_DELIMITER: u64 = 4096;
119
120 if range.end < range.start {
121 return Err(Box::new(std::io::Error::new(
122 std::io::ErrorKind::InvalidInput,
123 "read_bytes_at_until called with range.end < range.start",
124 )));
125 }
126 if range.end > self.file_len {
127 return Err(Box::new(std::io::Error::new(
128 std::io::ErrorKind::InvalidInput,
129 "read_bytes_at_until range out-of-bounds",
130 )));
131 }
132
133 let mut string_cache = self.string_cache.lock().unwrap();
134 if let Some(location) = string_cache.get(&(range.start, delimiter)) {
135 return Ok(self.slice_from_location(location));
136 }
137
138 let max_len = (range.end - range.start).min(MAX_LENGTH_INCLUDING_DELIMITER);
139 let mut location = self.get_range_location(range.start..(range.start + max_len))?;
140 let bytes = self.slice_from_location(&location);
141
142 let string_len = match memchr::memchr(delimiter, bytes) {
143 Some(len) => len,
144 None => {
145 return Err(Box::new(std::io::Error::new(
146 std::io::ErrorKind::InvalidInput,
147 "Could not find delimiter",
148 )));
149 }
150 };
151
152 location.size = string_len;
153 string_cache.insert((range.start, delimiter), location);
154 Ok(&bytes[..string_len])
155 }
156
157 fn read_bytes_into(
158 &self,
159 buffer: &mut Vec<u8>,
160 offset: u64,
161 size: usize,
162 ) -> FileAndPathHelperResult<()> {
163 self.source.read_bytes_into(buffer, offset, size)
164 }
165}