Skip to main content

fwob_v1/
reader.rs

1use std::{
2    fs::File,
3    io::{Read, Seek, SeekFrom},
4    path::Path,
5};
6
7use fwob_core::{FrameRef, Key, KeyType, OwnedFrame, Schema};
8
9use crate::{
10    header::{read_header, Header},
11    Result, V1Error,
12};
13
14pub struct Reader<R> {
15    inner: R,
16    header: Header,
17    schema: Schema,
18    key_type: KeyType,
19}
20
21impl Reader<File> {
22    pub fn open(path: impl AsRef<Path>, key_field_index: usize) -> Result<Self> {
23        let file = File::open(path.as_ref())?;
24        let len = file.metadata()?.len();
25        let reader = Self::new(file, key_field_index)?;
26        if reader.header.file_length() != len {
27            return Err(V1Error::CorruptedFileLength {
28                expected: reader.header.file_length(),
29                actual: len,
30            });
31        }
32        Ok(reader)
33    }
34}
35
36impl<R: Read + Seek> Reader<R> {
37    pub fn new(mut inner: R, key_field_index: usize) -> Result<Self> {
38        inner.seek(SeekFrom::Start(0))?;
39        let header = read_header(&mut inner)?;
40        let schema = header.schema(key_field_index)?;
41        let key_type = KeyType::from_field(schema.key_field())?;
42        Ok(Self {
43            inner,
44            header,
45            schema,
46            key_type,
47        })
48    }
49
50    pub fn header(&self) -> &Header {
51        &self.header
52    }
53
54    pub fn schema(&self) -> &Schema {
55        &self.schema
56    }
57
58    pub fn key_type(&self) -> KeyType {
59        self.key_type
60    }
61
62    pub fn frame_count(&self) -> u64 {
63        self.header.frame_count
64    }
65
66    pub fn read_string_table(&mut self) -> Result<Vec<String>> {
67        self.inner
68            .seek(SeekFrom::Start(self.header.string_table_position()))?;
69        let mut strings = Vec::with_capacity(self.header.string_count as usize);
70        for _ in 0..self.header.string_count {
71            strings.push(read_dotnet_string(&mut self.inner)?);
72        }
73        let pos = self.inner.stream_position()?;
74        if pos != self.header.string_table_ending() {
75            return Err(V1Error::CorruptedStringTableLength {
76                expected: self.header.string_table_length,
77                actual: pos - self.header.string_table_position(),
78            });
79        }
80        Ok(strings)
81    }
82
83    pub fn read_frame_at(&mut self, index: u64) -> Result<Option<OwnedFrame>> {
84        if index >= self.header.frame_count {
85            return Ok(None);
86        }
87        self.inner.seek(SeekFrom::Start(self.frame_offset(index)))?;
88        let mut bytes = vec![0u8; self.header.frame_length as usize];
89        self.inner.read_exact(&mut bytes)?;
90        Ok(Some(OwnedFrame::new(&self.schema, bytes)?))
91    }
92
93    pub fn read_key_at(&mut self, index: u64) -> Result<Option<Key>> {
94        if index >= self.header.frame_count {
95            return Ok(None);
96        }
97        let key_field = self.schema.key_field();
98        self.inner.seek(SeekFrom::Start(
99            self.frame_offset(index) + u64::from(key_field.offset),
100        ))?;
101        let mut bytes = vec![0u8; key_field.length as usize];
102        self.inner.read_exact(&mut bytes)?;
103        Ok(Some(Key::decode(self.key_type, &bytes)?))
104    }
105
106    pub fn lower_bound(&mut self, key: Key) -> Result<u64> {
107        let mut lo = 0;
108        let mut hi = self.header.frame_count;
109        while lo < hi {
110            let mid = lo + ((hi - lo) >> 1);
111            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
112            if mid_key < key {
113                lo = mid + 1;
114            } else {
115                hi = mid;
116            }
117        }
118        Ok(lo)
119    }
120
121    pub fn upper_bound(&mut self, key: Key) -> Result<u64> {
122        let mut lo = 0;
123        let mut hi = self.header.frame_count;
124        while lo < hi {
125            let mid = lo + ((hi - lo) >> 1);
126            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
127            if mid_key <= key {
128                lo = mid + 1;
129            } else {
130                hi = mid;
131            }
132        }
133        Ok(lo)
134    }
135
136    pub fn equal_range(&mut self, key: Key) -> Result<(u64, u64)> {
137        let mut lo = 0;
138        let mut hi = self.header.frame_count;
139        let mut upper_hi = hi;
140        while lo < hi {
141            let mid = lo + ((hi - lo) >> 1);
142            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
143            if mid_key < key {
144                lo = mid + 1;
145            } else if mid_key > key {
146                hi = mid;
147                upper_hi = mid;
148            } else {
149                hi = mid;
150            }
151        }
152
153        let lower = lo;
154        hi = upper_hi;
155        while lo < hi {
156            let mid = lo + ((hi - lo) >> 1);
157            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
158            if mid_key <= key {
159                lo = mid + 1;
160            } else {
161                hi = mid;
162            }
163        }
164        Ok((lower, hi))
165    }
166
167    pub fn frames_between(&mut self, first: Key, last: Key) -> Result<Vec<OwnedFrame>> {
168        if first > last {
169            return Ok(Vec::new());
170        }
171        let lb = self.lower_bound(first)?;
172        let ub = self.upper_bound(last)?;
173        self.read_frame_range(lb, ub)
174    }
175
176    pub fn read_all_frames(&mut self) -> Result<Vec<OwnedFrame>> {
177        self.read_frame_range(0, self.header.frame_count)
178    }
179
180    pub fn read_raw_frames_chunk(&mut self, start: u64, max_frames: usize) -> Result<Vec<u8>> {
181        if start >= self.header.frame_count || max_frames == 0 {
182            return Ok(Vec::new());
183        }
184        let count = max_frames.min((self.header.frame_count - start) as usize);
185        let mut bytes = vec![0u8; count * self.header.frame_length as usize];
186        self.inner.seek(SeekFrom::Start(self.frame_offset(start)))?;
187        self.inner.read_exact(&mut bytes)?;
188        Ok(bytes)
189    }
190
191    pub fn verify_key_order(&mut self) -> Result<()> {
192        if self.header.frame_count <= 1 {
193            return Ok(());
194        }
195        let mut last = self.read_key_at(0)?.expect("frame exists");
196        for index in 1..self.header.frame_count {
197            let key = self.read_key_at(index)?.expect("frame exists");
198            if key < last {
199                return Err(V1Error::KeyOrderViolation { index });
200            }
201            last = key;
202        }
203        Ok(())
204    }
205
206    pub fn frame_key(&self, frame: FrameRef<'_>) -> Result<Key> {
207        Ok(frame.key(&self.schema, self.key_type)?)
208    }
209
210    fn read_frame_range(&mut self, begin: u64, end: u64) -> Result<Vec<OwnedFrame>> {
211        let mut frames = Vec::with_capacity((end - begin) as usize);
212        self.inner.seek(SeekFrom::Start(self.frame_offset(begin)))?;
213        for _ in begin..end {
214            let mut bytes = vec![0u8; self.header.frame_length as usize];
215            self.inner.read_exact(&mut bytes)?;
216            frames.push(OwnedFrame::new(&self.schema, bytes)?);
217        }
218        Ok(frames)
219    }
220
221    fn frame_offset(&self, index: u64) -> u64 {
222        self.header.first_frame_position() + u64::from(self.header.frame_length) * index
223    }
224}
225
226pub(crate) fn read_dotnet_string<R: Read>(reader: &mut R) -> Result<String> {
227    let len = read_7bit_encoded_int(reader)?;
228    let mut bytes = vec![0u8; len as usize];
229    reader.read_exact(&mut bytes)?;
230    Ok(String::from_utf8_lossy(&bytes).into_owned())
231}
232
233pub(crate) fn read_7bit_encoded_int<R: Read>(reader: &mut R) -> Result<u32> {
234    let mut count = 0u32;
235    let mut shift = 0;
236    loop {
237        let mut b = [0u8; 1];
238        reader.read_exact(&mut b)?;
239        count |= u32::from(b[0] & 0x7f) << shift;
240        if b[0] & 0x80 == 0 {
241            return Ok(count);
242        }
243        shift += 7;
244        if shift >= 35 {
245            return Err(V1Error::CorruptedHeader);
246        }
247    }
248}