1pub(crate) const DB_MAGIC: &str = "# spam-db-v2";
3
4pub(crate) const INDEX_BUCKETS: usize = 256;
6
7pub(crate) const INDEX_ENTRY_SIZE: usize = 16;
9
10pub(crate) const INDEX_SIZE: usize = INDEX_BUCKETS * INDEX_ENTRY_SIZE;
12
13use std::{
14 io::{Read, Seek, SeekFrom},
15 path::{Path, PathBuf},
16};
17
18use crate::{Error, Result};
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DbKind {
23 Options,
25 Packages,
27}
28
29#[derive(Debug)]
41pub(crate) struct DbFile {
42 pub(crate) kind: DbKind,
43 index: [u8; INDEX_SIZE],
44 path: PathBuf,
45 data_start: u64,
46}
47
48impl DbFile {
49 pub(crate) fn open(path: impl AsRef<Path>) -> Result<Self> {
51 let path = path.as_ref().to_owned();
52 let mut file = std::fs::File::open(&path)?;
53
54 let mut header_bytes = Vec::new();
55 loop {
56 let mut byte = [0u8; 1];
57 if file.read(&mut byte)? == 0 {
58 return Err(Error::InvalidDatabase("missing header newline".into()));
59 }
60 if byte[0] == b'\n' {
61 break;
62 }
63 header_bytes.push(byte[0]);
64 }
65
66 let header = std::str::from_utf8(&header_bytes)
67 .map_err(|_| Error::InvalidDatabase("non-UTF-8 header".into()))?;
68
69 let kind = parse_kind(header)?;
70
71 let data_start = u64::try_from(header_bytes.len() + 1 + INDEX_SIZE)
72 .map_err(|_| Error::InvalidDatabase("database header is too large".into()))?;
73
74 if file.metadata()?.len() < data_start {
75 return Err(Error::InvalidDatabase(
76 "file is too short to contain index".into(),
77 ));
78 }
79
80 let mut index = [0u8; INDEX_SIZE];
81 file.read_exact(&mut index)?;
82
83 Ok(Self {
84 kind,
85 index,
86 path,
87 data_start,
88 })
89 }
90
91 pub(crate) fn bucket_lines(&self, bucket: usize) -> Result<Vec<String>> {
93 let entry = bucket * INDEX_ENTRY_SIZE;
94 let offset = read_u64le(&self.index, entry).try_into().map_err(|_| {
95 Error::InvalidDatabase("bucket offset is too large for this platform".into())
96 })?;
97 let length = read_u64le(&self.index, entry + 8).try_into().map_err(|_| {
98 Error::InvalidDatabase("bucket length is too large for this platform".into())
99 })?;
100
101 if length == 0 {
102 return Ok(Vec::new());
103 }
104
105 let start = self
106 .data_start
107 .checked_add(offset)
108 .ok_or_else(|| Error::InvalidDatabase("bucket offset overflow".into()))?;
109 let end = start
110 .checked_add(length)
111 .ok_or_else(|| Error::InvalidDatabase("bucket length overflow".into()))?;
112
113 let mut file = std::fs::File::open(&self.path)?;
114 let file_len = file.metadata()?.len();
115 if end > file_len {
116 return Err(Error::InvalidDatabase(
117 "bucket slice out of bounds".into(),
118 ));
119 }
120
121 let length_usize = length.try_into().map_err(|_| {
122 Error::InvalidDatabase("bucket length is too large for this platform".into())
123 })?;
124 let mut compressed = vec![0u8; length_usize];
125 file.seek(SeekFrom::Start(start))?;
126 file.read_exact(&mut compressed)?;
127
128 let decompressed = zstd::decode_all(compressed.as_slice())
129 .map_err(|e| Error::InvalidDatabase(format!("zstd error: {e}")))?;
130
131 let text = String::from_utf8(decompressed).map_err(|_| {
132 Error::InvalidDatabase("non-UTF-8 database content".into())
133 })?;
134
135 Ok(
136 text
137 .lines()
138 .filter(|l| !l.is_empty())
139 .map(String::from)
140 .collect(),
141 )
142 }
143
144 pub(crate) fn query_bucket(query: &str) -> usize {
146 query.bytes().next().map(|b| b as usize).unwrap_or(0)
147 }
148}
149
150fn parse_kind(header: &str) -> Result<DbKind> {
152 let rest = header.strip_prefix(DB_MAGIC).ok_or_else(|| {
153 Error::InvalidDatabase("missing spam-db magic header".into())
154 })?;
155
156 let kind_str = rest.strip_prefix('\t').unwrap_or(rest);
157
158 match kind_str {
159 "options" => Ok(DbKind::Options),
160 "packages" => Ok(DbKind::Packages),
161 other => Err(Error::InvalidDatabase(format!(
162 "unknown database kind: {other}"
163 ))),
164 }
165}
166
167fn read_u64le(data: &[u8], offset: usize) -> u64 {
169 u64::from_le_bytes(
170 data[offset..offset + 8]
171 .try_into()
172 .expect("slice length guaranteed by caller"),
173 )
174}