1use std::{
2 fs::File,
3 io::{Read, Seek, SeekFrom},
4 path::Path,
5};
6
7use fwob_core::{FrameRef, Key, KeyType, OwnedFrame, Schema};
8
9use crate::{
10 header::{read_header, Header},
11 Result, V1Error,
12};
13
14pub struct Reader<R> {
15 inner: R,
16 header: Header,
17 schema: Schema,
18 key_type: KeyType,
19}
20
21impl Reader<File> {
22 pub fn open(path: impl AsRef<Path>, key_field_index: usize) -> Result<Self> {
23 let file = File::open(path.as_ref())?;
24 let len = file.metadata()?.len();
25 let reader = Self::new(file, key_field_index)?;
26 if reader.header.file_length() != len {
27 return Err(V1Error::CorruptedFileLength {
28 expected: reader.header.file_length(),
29 actual: len,
30 });
31 }
32 Ok(reader)
33 }
34}
35
36impl<R: Read + Seek> Reader<R> {
37 pub fn new(mut inner: R, key_field_index: usize) -> Result<Self> {
38 inner.seek(SeekFrom::Start(0))?;
39 let header = read_header(&mut inner)?;
40 let schema = header.schema(key_field_index)?;
41 let key_type = KeyType::from_field(schema.key_field())?;
42 Ok(Self {
43 inner,
44 header,
45 schema,
46 key_type,
47 })
48 }
49
50 pub fn header(&self) -> &Header {
51 &self.header
52 }
53
54 pub fn schema(&self) -> &Schema {
55 &self.schema
56 }
57
58 pub fn key_type(&self) -> KeyType {
59 self.key_type
60 }
61
62 pub fn frame_count(&self) -> u64 {
63 self.header.frame_count
64 }
65
66 pub fn read_string_table(&mut self) -> Result<Vec<String>> {
67 self.inner
68 .seek(SeekFrom::Start(self.header.string_table_position()))?;
69 let mut strings = Vec::with_capacity(self.header.string_count as usize);
70 for _ in 0..self.header.string_count {
71 strings.push(read_dotnet_string(&mut self.inner)?);
72 }
73 let pos = self.inner.stream_position()?;
74 if pos != self.header.string_table_ending() {
75 return Err(V1Error::CorruptedStringTableLength {
76 expected: self.header.string_table_length,
77 actual: pos - self.header.string_table_position(),
78 });
79 }
80 Ok(strings)
81 }
82
83 pub fn read_frame_at(&mut self, index: u64) -> Result<Option<OwnedFrame>> {
84 if index >= self.header.frame_count {
85 return Ok(None);
86 }
87 self.inner.seek(SeekFrom::Start(self.frame_offset(index)))?;
88 let mut bytes = vec![0u8; self.header.frame_length as usize];
89 self.inner.read_exact(&mut bytes)?;
90 Ok(Some(OwnedFrame::new(&self.schema, bytes)?))
91 }
92
93 pub fn read_key_at(&mut self, index: u64) -> Result<Option<Key>> {
94 if index >= self.header.frame_count {
95 return Ok(None);
96 }
97 let key_field = self.schema.key_field();
98 self.inner.seek(SeekFrom::Start(
99 self.frame_offset(index) + u64::from(key_field.offset),
100 ))?;
101 let mut bytes = vec![0u8; key_field.length as usize];
102 self.inner.read_exact(&mut bytes)?;
103 Ok(Some(Key::decode(self.key_type, &bytes)?))
104 }
105
106 pub fn lower_bound(&mut self, key: Key) -> Result<u64> {
107 let mut lo = 0;
108 let mut hi = self.header.frame_count;
109 while lo < hi {
110 let mid = lo + ((hi - lo) >> 1);
111 let mid_key = self.read_key_at(mid)?.expect("mid is in range");
112 if mid_key < key {
113 lo = mid + 1;
114 } else {
115 hi = mid;
116 }
117 }
118 Ok(lo)
119 }
120
121 pub fn upper_bound(&mut self, key: Key) -> Result<u64> {
122 let mut lo = 0;
123 let mut hi = self.header.frame_count;
124 while lo < hi {
125 let mid = lo + ((hi - lo) >> 1);
126 let mid_key = self.read_key_at(mid)?.expect("mid is in range");
127 if mid_key <= key {
128 lo = mid + 1;
129 } else {
130 hi = mid;
131 }
132 }
133 Ok(lo)
134 }
135
136 pub fn equal_range(&mut self, key: Key) -> Result<(u64, u64)> {
137 let mut lo = 0;
138 let mut hi = self.header.frame_count;
139 let mut upper_hi = hi;
140 while lo < hi {
141 let mid = lo + ((hi - lo) >> 1);
142 let mid_key = self.read_key_at(mid)?.expect("mid is in range");
143 if mid_key < key {
144 lo = mid + 1;
145 } else if mid_key > key {
146 hi = mid;
147 upper_hi = mid;
148 } else {
149 hi = mid;
150 }
151 }
152
153 let lower = lo;
154 hi = upper_hi;
155 while lo < hi {
156 let mid = lo + ((hi - lo) >> 1);
157 let mid_key = self.read_key_at(mid)?.expect("mid is in range");
158 if mid_key <= key {
159 lo = mid + 1;
160 } else {
161 hi = mid;
162 }
163 }
164 Ok((lower, hi))
165 }
166
167 pub fn frames_between(&mut self, first: Key, last: Key) -> Result<Vec<OwnedFrame>> {
168 if first > last {
169 return Ok(Vec::new());
170 }
171 let lb = self.lower_bound(first)?;
172 let ub = self.upper_bound(last)?;
173 self.read_frame_range(lb, ub)
174 }
175
176 pub fn read_all_frames(&mut self) -> Result<Vec<OwnedFrame>> {
177 self.read_frame_range(0, self.header.frame_count)
178 }
179
180 pub fn read_raw_frames_chunk(&mut self, start: u64, max_frames: usize) -> Result<Vec<u8>> {
181 if start >= self.header.frame_count || max_frames == 0 {
182 return Ok(Vec::new());
183 }
184 let count = max_frames.min((self.header.frame_count - start) as usize);
185 let mut bytes = vec![0u8; count * self.header.frame_length as usize];
186 self.inner.seek(SeekFrom::Start(self.frame_offset(start)))?;
187 self.inner.read_exact(&mut bytes)?;
188 Ok(bytes)
189 }
190
191 pub fn verify_key_order(&mut self) -> Result<()> {
192 if self.header.frame_count <= 1 {
193 return Ok(());
194 }
195 let mut last = self.read_key_at(0)?.expect("frame exists");
196 for index in 1..self.header.frame_count {
197 let key = self.read_key_at(index)?.expect("frame exists");
198 if key < last {
199 return Err(V1Error::KeyOrderViolation { index });
200 }
201 last = key;
202 }
203 Ok(())
204 }
205
206 pub fn frame_key(&self, frame: FrameRef<'_>) -> Result<Key> {
207 Ok(frame.key(&self.schema, self.key_type)?)
208 }
209
210 fn read_frame_range(&mut self, begin: u64, end: u64) -> Result<Vec<OwnedFrame>> {
211 let mut frames = Vec::with_capacity((end - begin) as usize);
212 self.inner.seek(SeekFrom::Start(self.frame_offset(begin)))?;
213 for _ in begin..end {
214 let mut bytes = vec![0u8; self.header.frame_length as usize];
215 self.inner.read_exact(&mut bytes)?;
216 frames.push(OwnedFrame::new(&self.schema, bytes)?);
217 }
218 Ok(frames)
219 }
220
221 fn frame_offset(&self, index: u64) -> u64 {
222 self.header.first_frame_position() + u64::from(self.header.frame_length) * index
223 }
224}
225
226pub(crate) fn read_dotnet_string<R: Read>(reader: &mut R) -> Result<String> {
227 let len = read_7bit_encoded_int(reader)?;
228 let mut bytes = vec![0u8; len as usize];
229 reader.read_exact(&mut bytes)?;
230 Ok(String::from_utf8_lossy(&bytes).into_owned())
231}
232
233pub(crate) fn read_7bit_encoded_int<R: Read>(reader: &mut R) -> Result<u32> {
234 let mut count = 0u32;
235 let mut shift = 0;
236 loop {
237 let mut b = [0u8; 1];
238 reader.read_exact(&mut b)?;
239 count |= u32::from(b[0] & 0x7f) << shift;
240 if b[0] & 0x80 == 0 {
241 return Ok(count);
242 }
243 shift += 7;
244 if shift >= 35 {
245 return Err(V1Error::CorruptedHeader);
246 }
247 }
248}