fwob-v1 1.5.1

Reader, writer, verifier, and editor for the FWOB v1 binary format
Documentation
use std::{
    fs::File,
    io::{Read, Seek, SeekFrom},
    path::Path,
};

use fwob_core::{FrameRef, Key, KeyType, OwnedFrame, Schema};

use crate::{
    header::{read_header, Header},
    Result, V1Error,
};

pub struct Reader<R> {
    inner: R,
    header: Header,
    schema: Schema,
    key_type: KeyType,
}

impl Reader<File> {
    pub fn open(path: impl AsRef<Path>, key_field_index: usize) -> Result<Self> {
        let file = File::open(path.as_ref())?;
        let len = file.metadata()?.len();
        let reader = Self::new(file, key_field_index)?;
        if reader.header.file_length() != len {
            return Err(V1Error::CorruptedFileLength {
                expected: reader.header.file_length(),
                actual: len,
            });
        }
        Ok(reader)
    }
}

impl<R: Read + Seek> Reader<R> {
    pub fn new(mut inner: R, key_field_index: usize) -> Result<Self> {
        inner.seek(SeekFrom::Start(0))?;
        let header = read_header(&mut inner)?;
        let schema = header.schema(key_field_index)?;
        let key_type = KeyType::from_field(schema.key_field())?;
        Ok(Self {
            inner,
            header,
            schema,
            key_type,
        })
    }

    pub fn header(&self) -> &Header {
        &self.header
    }

    pub fn schema(&self) -> &Schema {
        &self.schema
    }

    pub fn key_type(&self) -> KeyType {
        self.key_type
    }

    pub fn frame_count(&self) -> u64 {
        self.header.frame_count
    }

    pub fn read_string_table(&mut self) -> Result<Vec<String>> {
        self.inner
            .seek(SeekFrom::Start(self.header.string_table_position()))?;
        let mut strings = Vec::with_capacity(self.header.string_count as usize);
        for _ in 0..self.header.string_count {
            strings.push(read_dotnet_string(&mut self.inner)?);
        }
        let pos = self.inner.stream_position()?;
        if pos != self.header.string_table_ending() {
            return Err(V1Error::CorruptedStringTableLength {
                expected: self.header.string_table_length,
                actual: pos - self.header.string_table_position(),
            });
        }
        Ok(strings)
    }

    pub fn read_frame_at(&mut self, index: u64) -> Result<Option<OwnedFrame>> {
        if index >= self.header.frame_count {
            return Ok(None);
        }
        self.inner.seek(SeekFrom::Start(self.frame_offset(index)))?;
        let mut bytes = vec![0u8; self.header.frame_length as usize];
        self.inner.read_exact(&mut bytes)?;
        Ok(Some(OwnedFrame::new(&self.schema, bytes)?))
    }

    pub fn read_key_at(&mut self, index: u64) -> Result<Option<Key>> {
        if index >= self.header.frame_count {
            return Ok(None);
        }
        let key_field = self.schema.key_field();
        self.inner.seek(SeekFrom::Start(
            self.frame_offset(index) + u64::from(key_field.offset),
        ))?;
        let mut bytes = vec![0u8; key_field.length as usize];
        self.inner.read_exact(&mut bytes)?;
        Ok(Some(Key::decode(self.key_type, &bytes)?))
    }

    pub fn lower_bound(&mut self, key: Key) -> Result<u64> {
        let mut lo = 0;
        let mut hi = self.header.frame_count;
        while lo < hi {
            let mid = lo + ((hi - lo) >> 1);
            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
            if mid_key < key {
                lo = mid + 1;
            } else {
                hi = mid;
            }
        }
        Ok(lo)
    }

    pub fn upper_bound(&mut self, key: Key) -> Result<u64> {
        let mut lo = 0;
        let mut hi = self.header.frame_count;
        while lo < hi {
            let mid = lo + ((hi - lo) >> 1);
            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
            if mid_key <= key {
                lo = mid + 1;
            } else {
                hi = mid;
            }
        }
        Ok(lo)
    }

    pub fn equal_range(&mut self, key: Key) -> Result<(u64, u64)> {
        let mut lo = 0;
        let mut hi = self.header.frame_count;
        let mut upper_hi = hi;
        while lo < hi {
            let mid = lo + ((hi - lo) >> 1);
            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
            if mid_key < key {
                lo = mid + 1;
            } else if mid_key > key {
                hi = mid;
                upper_hi = mid;
            } else {
                hi = mid;
            }
        }

        let lower = lo;
        hi = upper_hi;
        while lo < hi {
            let mid = lo + ((hi - lo) >> 1);
            let mid_key = self.read_key_at(mid)?.expect("mid is in range");
            if mid_key <= key {
                lo = mid + 1;
            } else {
                hi = mid;
            }
        }
        Ok((lower, hi))
    }

    pub fn frames_between(&mut self, first: Key, last: Key) -> Result<Vec<OwnedFrame>> {
        if first > last {
            return Ok(Vec::new());
        }
        let lb = self.lower_bound(first)?;
        let ub = self.upper_bound(last)?;
        self.read_frame_range(lb, ub)
    }

    pub fn read_all_frames(&mut self) -> Result<Vec<OwnedFrame>> {
        self.read_frame_range(0, self.header.frame_count)
    }

    pub fn read_raw_frames_chunk(&mut self, start: u64, max_frames: usize) -> Result<Vec<u8>> {
        if start >= self.header.frame_count || max_frames == 0 {
            return Ok(Vec::new());
        }
        let count = max_frames.min((self.header.frame_count - start) as usize);
        let mut bytes = vec![0u8; count * self.header.frame_length as usize];
        self.inner.seek(SeekFrom::Start(self.frame_offset(start)))?;
        self.inner.read_exact(&mut bytes)?;
        Ok(bytes)
    }

    pub fn verify_key_order(&mut self) -> Result<()> {
        if self.header.frame_count <= 1 {
            return Ok(());
        }
        let mut last = self.read_key_at(0)?.expect("frame exists");
        for index in 1..self.header.frame_count {
            let key = self.read_key_at(index)?.expect("frame exists");
            if key < last {
                return Err(V1Error::KeyOrderViolation { index });
            }
            last = key;
        }
        Ok(())
    }

    pub fn frame_key(&self, frame: FrameRef<'_>) -> Result<Key> {
        Ok(frame.key(&self.schema, self.key_type)?)
    }

    fn read_frame_range(&mut self, begin: u64, end: u64) -> Result<Vec<OwnedFrame>> {
        let mut frames = Vec::with_capacity((end - begin) as usize);
        self.inner.seek(SeekFrom::Start(self.frame_offset(begin)))?;
        for _ in begin..end {
            let mut bytes = vec![0u8; self.header.frame_length as usize];
            self.inner.read_exact(&mut bytes)?;
            frames.push(OwnedFrame::new(&self.schema, bytes)?);
        }
        Ok(frames)
    }

    fn frame_offset(&self, index: u64) -> u64 {
        self.header.first_frame_position() + u64::from(self.header.frame_length) * index
    }
}

pub(crate) fn read_dotnet_string<R: Read>(reader: &mut R) -> Result<String> {
    let len = read_7bit_encoded_int(reader)?;
    let mut bytes = vec![0u8; len as usize];
    reader.read_exact(&mut bytes)?;
    Ok(String::from_utf8_lossy(&bytes).into_owned())
}

pub(crate) fn read_7bit_encoded_int<R: Read>(reader: &mut R) -> Result<u32> {
    let mut count = 0u32;
    let mut shift = 0;
    loop {
        let mut b = [0u8; 1];
        reader.read_exact(&mut b)?;
        count |= u32::from(b[0] & 0x7f) << shift;
        if b[0] & 0x80 == 0 {
            return Ok(count);
        }
        shift += 7;
        if shift >= 35 {
            return Err(V1Error::CorruptedHeader);
        }
    }
}