criware_utf_core/
reader.rs1use std::{
2 collections::HashMap,
3 io::{Cursor, Read},
4};
5
6use crate::{Error, Result, Value, ValueKind, value::sealed::Primitive};
7
8#[inline(always)]
9pub(crate) fn is_valid_value_flag(half: u8) -> bool {
10 half <= 8 || half == 0xa || half == 0xb
11}
12#[inline(always)]
13pub(crate) fn is_valid_storage_flag(half: u8) -> bool {
14 half == 0x10 || half == 0x30 || half == 0x50
15}
16
17macro_rules! handle_type_flag {
18 ($type_flag:path => $expected:path) => {
19 if $type_flag != $expected as u8 {
20 if is_valid_value_flag($type_flag) {
21 return Err(Error::WrongColumnType($type_flag, $expected as u8));
22 } else {
23 return Err(Error::InvalidColumnType($type_flag));
24 }
25 }
26 };
27}
28
29pub(crate) trait IOErrorHelper<T> {
30 fn io(self, message: &str) -> Result<T>;
31}
32impl IOErrorHelper<()> for std::io::Result<()> {
33 fn io(self, message: &str) -> Result<()> {
34 match self {
35 Ok(value) => Ok(value),
36 Err(error) => match error.kind() {
37 std::io::ErrorKind::UnexpectedEof => {
38 return Err(Error::EOF(message.to_owned()));
39 }
40 _ => return Err(Error::IOError(error)),
41 },
42 }
43 }
44}
45
46pub struct Reader {
49 column_buffer: Cursor<Vec<u8>>,
50 column_buffer_size: usize,
51 row_buffer: Cursor<Vec<u8>>,
52 row_buffer_size: usize,
53 strings: HashMap<u32, String>,
54 blobs: Vec<u8>,
55 table_name_index: u32,
56 field_count: u16,
57}
58
59impl Reader {
60 pub fn new(reader: &mut dyn Read) -> Result<Reader> {
74 let table_size = {
75 let mut header = [0u8; 8];
76 reader.read_exact(&mut header).io("@UTF header")?;
77 if &header[0..4] != b"@UTF" {
78 return Err(Error::MalformedHeader);
79 }
80 u32::from_be_bytes(header[4..8].try_into().unwrap())
81 };
82 if table_size < 24 {
83 return Err(Error::EOF("@UTF header".to_string()));
84 }
85 let mut header = [0u8; 24];
86 reader.read_exact(&mut header).io("@UTF header")?;
87 let row_offset = u32::from_be_bytes(header[0..4].try_into().unwrap());
88 let string_offset = u32::from_be_bytes(header[4..8].try_into().unwrap());
89 let blob_offset = u32::from_be_bytes(header[8..12].try_into().unwrap());
90 let table_name = u32::from_be_bytes(header[12..16].try_into().unwrap());
91 let field_count = u16::from_be_bytes(header[16..18].try_into().unwrap());
92 let row_size = u16::from_be_bytes(header[18..20].try_into().unwrap());
93 let row_count = u32::from_be_bytes(header[20..24].try_into().unwrap());
94 if 24 > row_offset
95 || row_offset > string_offset
96 || string_offset > blob_offset
97 || blob_offset > table_size
98 || (row_size as u32 * row_count) != string_offset - row_offset
99 {
100 return Err(Error::MalformedHeader);
101 }
102 let (column_buffer, column_buffer_size) = {
103 let mut buffer = vec![0u8; row_offset as usize - 24];
104 reader.read_exact(&mut buffer).io("UTF column data")?;
105 let len = buffer.len();
106 (Cursor::new(buffer), len)
107 };
108 let (row_buffer, row_buffer_size) = {
109 let mut buffer = vec![0u8; (string_offset - row_offset) as usize];
110 reader.read_exact(&mut buffer).io("UTF row data")?;
111 let len = buffer.len();
112 (Cursor::new(buffer), len)
113 };
114 let strings = {
115 let mut buffer = vec![0u8; (blob_offset - string_offset) as usize];
116 reader.read_exact(&mut buffer).io("UTF string data")?;
117 let mut strings = HashMap::new();
118 let mut start = 0;
119 let mut index = 0;
120 while index < buffer.len() {
121 if buffer[index] == 0 {
122 match std::str::from_utf8(&buffer[(start as usize)..index]) {
123 Ok(value) => strings.insert(start, value.to_owned()),
124 Err(error) => return Err(Error::StringMalformed(error)),
125 };
126 start = (index + 1) as u32;
127 }
128 index += 1;
129 }
130 strings
131 };
132 if !strings.contains_key(&table_name) {
133 return Err(Error::MalformedHeader);
134 }
135 let mut blobs = vec![0u8; (table_size - blob_offset) as usize];
136 reader.read_exact(&mut blobs).io("UTF blob data")?;
137 Ok(Reader {
138 column_buffer,
139 column_buffer_size,
140 row_buffer,
141 row_buffer_size,
142 strings,
143 blobs,
144 table_name_index: table_name,
145 field_count,
146 })
147 }
148
149 pub fn field_count(&self) -> u16 {
162 self.field_count
163 }
164
165 pub fn table_name<'a>(&'a self) -> &'a str {
178 self.strings.get(&self.table_name_index).unwrap().as_str()
179 }
180
181 pub fn more_column_data(&self) -> bool {
198 (self.column_buffer.position() as usize) < self.column_buffer_size
199 }
200
201 pub fn more_row_data(&self) -> bool {
218 (self.row_buffer.position() as usize) < self.row_buffer_size
219 }
220
221 fn read_constant_column_private<T: Value>(
222 &mut self,
223 name: &'static str,
224 optional: bool,
225 ) -> Result<Option<T>> {
226 let flag = self.read_primitive::<u8>(false)?;
227 let column_name = self.read_primitive::<str>(false)?;
228 if column_name != name {
229 return Err(Error::WrongColumnName(column_name, name));
230 }
231 let type_flag = flag & 0x0f;
232 let storage_flag = flag & 0xf0;
233 handle_type_flag!(type_flag => T::Primitive::TYPE_FLAG);
234 if storage_flag == 0x30 {
235 Ok(Some(self.read_value(false)?))
236 } else if optional && storage_flag == 0x10 {
237 Ok(None)
238 } else if is_valid_storage_flag(storage_flag) {
239 return Err(Error::WrongColumnStorage(storage_flag, "0x30"));
240 } else {
241 return Err(Error::InvalidColumnStorage(storage_flag));
242 }
243 }
244
245 pub fn read_constant_column<T: Value>(&mut self, name: &'static str) -> Result<T> {
262 Ok(self.read_constant_column_private(name, false)?.unwrap())
263 }
264
265 pub fn read_constant_column_opt<T: Value>(&mut self, name: &'static str) -> Result<Option<T>> {
284 self.read_constant_column_private(name, true)
285 }
286
287 fn read_rowed_column_private(
288 &mut self,
289 name: &'static str,
290 kind: ValueKind,
291 optional: bool,
292 ) -> Result<bool> {
293 let flag = self.read_primitive::<u8>(false)?;
294 let column_name = self.read_primitive::<str>(false)?;
295 if column_name != name {
296 return Err(Error::WrongColumnName(column_name, name));
297 }
298 let type_flag = flag & 0x0f;
299 let storage_flag = flag & 0xf0;
300 handle_type_flag!(type_flag => kind);
301 if storage_flag == 0x50 {
302 Ok(true)
303 } else if optional && storage_flag == 0x10 {
304 Ok(false)
305 } else if is_valid_storage_flag(storage_flag) {
306 return Err(Error::WrongColumnStorage(storage_flag, "0x50"));
307 } else {
308 return Err(Error::InvalidColumnStorage(storage_flag));
309 }
310 }
311
312 pub fn read_rowed_column<T: Value>(&mut self, name: &'static str) -> Result<()> {
328 self.read_rowed_column_private(name, T::Primitive::TYPE_FLAG, false)?;
329 Ok(())
330 }
331
332 pub fn read_rowed_column_opt<T: Value>(&mut self, name: &'static str) -> Result<bool> {
355 self.read_rowed_column_private(name, T::Primitive::TYPE_FLAG, true)
356 }
357
358 fn read_primitive<T: Primitive + ?Sized>(&mut self, row: bool) -> Result<T::Owned> {
359 let mut buffer: T::Buffer = Default::default();
360 let reader = if row {
361 &mut self.row_buffer
362 } else {
363 &mut self.column_buffer
364 };
365 match reader.read_exact(buffer.as_mut()) {
366 Ok(()) => (),
367 Err(error) => match error.kind() {
368 std::io::ErrorKind::UnexpectedEof => {
369 return Err(Error::EOF(format!(
370 "reading {} value",
371 std::any::type_name::<T>()
372 )));
373 }
374 _ => return Err(Error::IOError(error)),
375 },
376 };
377 match <T as Primitive>::parse(buffer, &self.strings, &self.blobs) {
378 Some(prim) => Ok(prim),
379 None => Err(Error::DataNotFound),
380 }
381 }
382
383 pub fn read_value<T: Value>(&mut self, row: bool) -> Result<T> {
400 T::from_primitive(self.read_primitive::<T::Primitive>(row)?).map_err(|error| {
401 Error::ValueConversion(
402 std::any::type_name::<T::Primitive>(),
403 std::any::type_name::<T>(),
404 error,
405 )
406 })
407 }
408}