dictx-index 0.1.0

Index builder and binary entry storage for DictX.
Documentation
use dictx_core::{DictEntry, DictxError, Result};
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::sync::Mutex;

pub const ENTRY_PACK_FILE: &str = "entries.dxp";
const MAGIC: &[u8; 8] = b"DXPCK001";

#[derive(Debug, Clone, Copy)]
pub struct EntryLocator {
    pub offset: u64,
    pub len: u64,
}

pub struct EntryPackWriter {
    file: File,
}

impl EntryPackWriter {
    pub fn create(path: &Path) -> Result<Self> {
        let mut file = File::create(path)?;
        file.write_all(MAGIC)?;
        Ok(Self { file })
    }

    pub fn append(&mut self, entry: &DictEntry) -> Result<EntryLocator> {
        let bytes = rmp_serde::to_vec_named(entry)
            .map_err(|err| DictxError::InvalidData(format!("词条二进制序列化失败: {err}")))?;
        let offset = self.file.stream_position()?;
        self.file.write_all(&(bytes.len() as u32).to_le_bytes())?;
        self.file.write_all(&bytes)?;
        Ok(EntryLocator {
            offset,
            len: bytes.len() as u64,
        })
    }

    pub fn finish(mut self) -> Result<()> {
        self.file.flush()?;
        Ok(())
    }
}

pub struct EntryPackReader {
    file: Mutex<File>,
}

impl EntryPackReader {
    pub fn open(path: &Path) -> Result<Self> {
        let mut file = File::open(path)?;
        let mut magic = [0u8; 8];
        file.read_exact(&mut magic)?;
        if &magic != MAGIC {
            return Err(DictxError::InvalidData(format!(
                "词条 pack 文件格式不正确: {}",
                path.display()
            )));
        }
        Ok(Self {
            file: Mutex::new(file),
        })
    }

    pub fn read(&self, locator: EntryLocator) -> Result<DictEntry> {
        let mut file = self
            .file
            .lock()
            .map_err(|_| DictxError::Message("词条 pack 读取锁已损坏".to_string()))?;
        file.seek(SeekFrom::Start(locator.offset))?;

        let mut len_buf = [0u8; 4];
        file.read_exact(&mut len_buf)?;
        let len = u32::from_le_bytes(len_buf) as u64;
        if len != locator.len {
            return Err(DictxError::InvalidData(format!(
                "词条 pack 长度不一致: index={} pack={}",
                locator.len, len
            )));
        }

        let mut bytes = vec![0u8; len as usize];
        file.read_exact(&mut bytes)?;
        rmp_serde::from_slice(&bytes)
            .map_err(|err| DictxError::InvalidData(format!("词条二进制反序列化失败: {err}")))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use dictx_core::{Definition, DictSource};

    #[test]
    fn roundtrips_entry_pack() {
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join(ENTRY_PACK_FILE);
        let mut entry = DictEntry::new(DictSource::Custom { name: "t".into() }, "apple");
        entry
            .definitions
            .push(Definition::new("fruit", "苹果", Some("n".into())));

        let mut writer = EntryPackWriter::create(&path).unwrap();
        let locator = writer.append(&entry).unwrap();
        writer.finish().unwrap();

        let reader = EntryPackReader::open(&path).unwrap();
        let decoded = reader.read(locator).unwrap();
        assert_eq!(decoded.word, "apple");
        assert_eq!(decoded.definitions[0].zh, "苹果");
    }
}