1pub(crate) const DB_MAGIC: &str = "# spam-db-v1";
3
4pub(crate) const INDEX_BUCKETS: usize = 256;
6
7pub(crate) const INDEX_ENTRY_SIZE: usize = 8;
9
10pub(crate) const INDEX_SIZE: usize = INDEX_BUCKETS * INDEX_ENTRY_SIZE;
12
13use crate::{Error, Result};
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum DbKind {
18 Options,
20 Packages,
22}
23
24#[derive(Debug)]
36pub(crate) struct DbFile {
37 pub(crate) kind: DbKind,
38 index: [u8; INDEX_SIZE],
39 data: Vec<u8>,
40}
41
42impl DbFile {
43 pub(crate) fn open(path: impl AsRef<std::path::Path>) -> Result<Self> {
45 let bytes = std::fs::read(path)?;
46
47 let nl = bytes
48 .iter()
49 .position(|&b| b == b'\n')
50 .ok_or_else(|| Error::InvalidDatabase("missing header newline".into()))?;
51
52 let header = std::str::from_utf8(&bytes[..nl])
53 .map_err(|_| Error::InvalidDatabase("non-UTF-8 header".into()))?;
54
55 let kind = parse_kind(header)?;
56
57 let index_start = nl + 1;
58 let data_start = index_start + INDEX_SIZE;
59
60 if bytes.len() < data_start {
61 return Err(Error::InvalidDatabase(
62 "file is too short to contain index".into(),
63 ));
64 }
65
66 let mut index = [0u8; INDEX_SIZE];
67 index.copy_from_slice(&bytes[index_start..index_start + INDEX_SIZE]);
68
69 let data = bytes[data_start..].to_vec();
70
71 Ok(Self { kind, index, data })
72 }
73
74 pub(crate) fn bucket_lines(&self, bucket: usize) -> Result<Vec<String>> {
76 let entry = bucket * INDEX_ENTRY_SIZE;
77 let offset = read_u32le(&self.index, entry) as usize;
78 let length = read_u32le(&self.index, entry + 4) as usize;
79
80 if length == 0 {
81 return Ok(Vec::new());
82 }
83
84 let end = offset
85 .checked_add(length)
86 .filter(|&e| e <= self.data.len())
87 .ok_or_else(|| {
88 Error::InvalidDatabase("bucket slice out of bounds".into())
89 })?;
90
91 let compressed = &self.data[offset..end];
92 let decompressed = zstd::decode_all(compressed)
93 .map_err(|e| Error::InvalidDatabase(format!("zstd error: {e}")))?;
94
95 let text = String::from_utf8(decompressed).map_err(|_| {
96 Error::InvalidDatabase("non-UTF-8 database content".into())
97 })?;
98
99 Ok(
100 text
101 .lines()
102 .filter(|l| !l.is_empty())
103 .map(String::from)
104 .collect(),
105 )
106 }
107
108 pub(crate) fn query_bucket(query: &str) -> usize {
110 query.bytes().next().map(|b| b as usize).unwrap_or(0)
111 }
112}
113
114fn parse_kind(header: &str) -> Result<DbKind> {
116 let rest = header.strip_prefix(DB_MAGIC).ok_or_else(|| {
117 Error::InvalidDatabase("missing spam-db magic header".into())
118 })?;
119
120 let kind_str = rest.strip_prefix('\t').unwrap_or(rest);
121
122 match kind_str {
123 "options" => Ok(DbKind::Options),
124 "packages" => Ok(DbKind::Packages),
125 other => Err(Error::InvalidDatabase(format!(
126 "unknown database kind: {other}"
127 ))),
128 }
129}
130
131fn read_u32le(data: &[u8], offset: usize) -> u32 {
133 u32::from_le_bytes(
134 data[offset..offset + 4]
135 .try_into()
136 .expect("slice length guaranteed by caller"),
137 )
138}