1pub(crate) const DB_MAGIC: &str = "# spam-db-v3";
3
4pub(crate) const INDEX_BUCKETS: usize = 256;
6
7pub(crate) const INDEX_ENTRY_SIZE: usize = 16;
9
10pub(crate) const INDEX_SIZE: usize = INDEX_BUCKETS * INDEX_ENTRY_SIZE;
12
13use std::{
14 io::{Read, Seek, SeekFrom},
15 path::{Path, PathBuf},
16};
17
18use crate::{Error, Result};
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DbKind {
23 Options,
25 Packages,
27 Index,
29}
30
31#[derive(Debug)]
43pub(crate) struct DbFile {
44 pub(crate) kind: DbKind,
45 index: Option<[u8; INDEX_SIZE]>,
46 path: PathBuf,
47 data_start: u64,
48}
49
50impl DbFile {
51 pub(crate) fn open(path: impl AsRef<Path>) -> Result<Self> {
53 let path = path.as_ref().to_owned();
54 let mut file = std::fs::File::open(&path)?;
55
56 let mut header_bytes = Vec::new();
57 loop {
58 let mut byte = [0u8; 1];
59 if file.read(&mut byte)? == 0 {
60 return Err(Error::InvalidDatabase("missing header newline".into()));
61 }
62 if byte[0] == b'\n' {
63 break;
64 }
65 header_bytes.push(byte[0]);
66 }
67
68 let header = std::str::from_utf8(&header_bytes)
69 .map_err(|_| Error::InvalidDatabase("non-UTF-8 header".into()))?;
70
71 let kind = parse_kind(header)?;
72
73 let has_bucket_index = kind != DbKind::Index;
74 let index_size = if has_bucket_index { INDEX_SIZE } else { 0 };
75
76 let data_start = u64::try_from(header_bytes.len() + 1 + index_size)
77 .map_err(|_| Error::InvalidDatabase("database header is too large".into()))?;
78
79 if file.metadata()?.len() < data_start {
80 return Err(Error::InvalidDatabase(
81 "file is too short to contain index".into(),
82 ));
83 }
84
85 let index = if has_bucket_index {
86 let mut index = [0u8; INDEX_SIZE];
87 file.read_exact(&mut index)?;
88 Some(index)
89 } else {
90 None
91 };
92
93 Ok(Self {
94 kind,
95 index,
96 path,
97 data_start,
98 })
99 }
100
101 pub(crate) fn bucket_lines(&self, bucket: usize) -> Result<Vec<String>> {
103 let index = self.index.as_ref().ok_or_else(|| {
104 Error::InvalidDatabase("database does not contain a bucket index".into())
105 })?;
106 let entry = bucket * INDEX_ENTRY_SIZE;
107 let offset = read_u64le(index, entry).try_into().map_err(|_| {
108 Error::InvalidDatabase("bucket offset is too large for this platform".into())
109 })?;
110 let length = read_u64le(index, entry + 8).try_into().map_err(|_| {
111 Error::InvalidDatabase("bucket length is too large for this platform".into())
112 })?;
113
114 if length == 0 {
115 return Ok(Vec::new());
116 }
117
118 let start = self
119 .data_start
120 .checked_add(offset)
121 .ok_or_else(|| Error::InvalidDatabase("bucket offset overflow".into()))?;
122 let end = start
123 .checked_add(length)
124 .ok_or_else(|| Error::InvalidDatabase("bucket length overflow".into()))?;
125
126 let mut file = std::fs::File::open(&self.path)?;
127 let file_len = file.metadata()?.len();
128 if end > file_len {
129 return Err(Error::InvalidDatabase("bucket slice out of bounds".into()));
130 }
131
132 let length_usize = length.try_into().map_err(|_| {
133 Error::InvalidDatabase("bucket length is too large for this platform".into())
134 })?;
135 let mut compressed = vec![0u8; length_usize];
136 file.seek(SeekFrom::Start(start))?;
137 file.read_exact(&mut compressed)?;
138
139 let decompressed = zstd::decode_all(compressed.as_slice())
140 .map_err(|e| Error::InvalidDatabase(format!("zstd error: {e}")))?;
141
142 let text = String::from_utf8(decompressed)
143 .map_err(|_| Error::InvalidDatabase("non-UTF-8 database content".into()))?;
144
145 Ok(text
146 .lines()
147 .filter(|l| !l.is_empty())
148 .map(String::from)
149 .collect())
150 }
151
152 pub(crate) fn query_bucket(query: &str) -> usize {
154 query.bytes().next().map(|b| b as usize).unwrap_or(0)
155 }
156
157 pub(crate) fn stream_lines(&self) -> Result<Vec<String>> {
159 let mut file = std::fs::File::open(&self.path)?;
160 let file_len = file.metadata()?.len();
161 if self.data_start > file_len {
162 return Err(Error::InvalidDatabase(
163 "stream payload starts past end of file".into(),
164 ));
165 }
166
167 let length = usize::try_from(file_len - self.data_start).map_err(|_| {
168 Error::InvalidDatabase("stream payload is too large for this platform".into())
169 })?;
170 let mut compressed = vec![0u8; length];
171 file.seek(SeekFrom::Start(self.data_start))?;
172 file.read_exact(&mut compressed)?;
173
174 let decompressed = zstd::decode_all(compressed.as_slice())
175 .map_err(|e| Error::InvalidDatabase(format!("zstd error: {e}")))?;
176 let text = String::from_utf8(decompressed)
177 .map_err(|_| Error::InvalidDatabase("non-UTF-8 database content".into()))?;
178
179 Ok(text
180 .lines()
181 .filter(|l| !l.is_empty())
182 .map(String::from)
183 .collect())
184 }
185}
186
187fn parse_kind(header: &str) -> Result<DbKind> {
189 let rest = header
190 .strip_prefix(DB_MAGIC)
191 .ok_or_else(|| Error::InvalidDatabase("missing spam-db magic header".into()))?;
192
193 let kind_str = rest.strip_prefix('\t').unwrap_or(rest);
194
195 match kind_str {
196 "options" => Ok(DbKind::Options),
197 "packages" => Ok(DbKind::Packages),
198 "index" => Ok(DbKind::Index),
199 other => Err(Error::InvalidDatabase(format!(
200 "unknown database kind: {other}"
201 ))),
202 }
203}
204
205fn read_u64le(data: &[u8], offset: usize) -> u64 {
207 u64::from_le_bytes(
208 data[offset..offset + 8]
209 .try_into()
210 .expect("slice length guaranteed by caller"),
211 )
212}