pub(crate) const DB_MAGIC: &str = "# spam-db-v1";
pub(crate) const INDEX_BUCKETS: usize = 256;
pub(crate) const INDEX_ENTRY_SIZE: usize = 8;
pub(crate) const INDEX_SIZE: usize = INDEX_BUCKETS * INDEX_ENTRY_SIZE;
use crate::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DbKind {
Options,
Packages,
}
#[derive(Debug)]
pub(crate) struct DbFile {
pub(crate) kind: DbKind,
index: [u8; INDEX_SIZE],
data: Vec<u8>,
}
impl DbFile {
pub(crate) fn open(path: impl AsRef<std::path::Path>) -> Result<Self> {
let bytes = std::fs::read(path)?;
let nl = bytes
.iter()
.position(|&b| b == b'\n')
.ok_or_else(|| Error::InvalidDatabase("missing header newline".into()))?;
let header = std::str::from_utf8(&bytes[..nl])
.map_err(|_| Error::InvalidDatabase("non-UTF-8 header".into()))?;
let kind = parse_kind(header)?;
let index_start = nl + 1;
let data_start = index_start + INDEX_SIZE;
if bytes.len() < data_start {
return Err(Error::InvalidDatabase(
"file is too short to contain index".into(),
));
}
let mut index = [0u8; INDEX_SIZE];
index.copy_from_slice(&bytes[index_start..index_start + INDEX_SIZE]);
let data = bytes[data_start..].to_vec();
Ok(Self { kind, index, data })
}
pub(crate) fn bucket_lines(&self, bucket: usize) -> Result<Vec<String>> {
let entry = bucket * INDEX_ENTRY_SIZE;
let offset = read_u32le(&self.index, entry) as usize;
let length = read_u32le(&self.index, entry + 4) as usize;
if length == 0 {
return Ok(Vec::new());
}
let end = offset
.checked_add(length)
.filter(|&e| e <= self.data.len())
.ok_or_else(|| {
Error::InvalidDatabase("bucket slice out of bounds".into())
})?;
let compressed = &self.data[offset..end];
let decompressed = zstd::decode_all(compressed)
.map_err(|e| Error::InvalidDatabase(format!("zstd error: {e}")))?;
let text = String::from_utf8(decompressed).map_err(|_| {
Error::InvalidDatabase("non-UTF-8 database content".into())
})?;
Ok(
text
.lines()
.filter(|l| !l.is_empty())
.map(String::from)
.collect(),
)
}
pub(crate) fn query_bucket(query: &str) -> usize {
query.bytes().next().map(|b| b as usize).unwrap_or(0)
}
}
fn parse_kind(header: &str) -> Result<DbKind> {
let rest = header.strip_prefix(DB_MAGIC).ok_or_else(|| {
Error::InvalidDatabase("missing spam-db magic header".into())
})?;
let kind_str = rest.strip_prefix('\t').unwrap_or(rest);
match kind_str {
"options" => Ok(DbKind::Options),
"packages" => Ok(DbKind::Packages),
other => Err(Error::InvalidDatabase(format!(
"unknown database kind: {other}"
))),
}
}
fn read_u32le(data: &[u8], offset: usize) -> u32 {
u32::from_le_bytes(
data[offset..offset + 4]
.try_into()
.expect("slice length guaranteed by caller"),
)
}