vyre_std/pattern/
cache.rs1use std::env;
13use std::fs;
14use std::io::Write;
15use std::path::{Path, PathBuf};
16
17use super::dfa_assemble::{AssembleOptions, Pattern};
18use super::types::{DfaPackFormat, PackedDfa, PatternError};
19
20const CACHE_VERSION: &str = "vyre-std.dfa.v2";
21
22#[must_use]
28#[inline]
29pub fn cache_path(patterns: &[Pattern<'_>], options: AssembleOptions) -> PathBuf {
30 let key = hash_key(patterns, options);
31 cache_dir().join(format!("{key}.vdfa"))
32}
33
34#[inline]
44pub fn load_or_compute(
45 patterns: &[Pattern<'_>],
46 options: AssembleOptions,
47) -> Result<PackedDfa, PatternError> {
48 if env::var_os("VYRE_NO_CACHE").is_some() {
49 return super::dfa_assemble::dfa_assemble(patterns, options);
50 }
51
52 let path = cache_path(patterns, options);
53 if let Ok(packed) = read_entry(&path) {
54 return Ok(packed);
55 }
56
57 let packed = super::dfa_assemble::dfa_assemble(patterns, options)?;
58 let _ = write_entry(&path, &packed);
59 Ok(packed)
60}
61
62#[inline]
69pub fn clear() -> std::io::Result<()> {
70 let dir = cache_dir();
71 if !dir.exists() {
72 return Ok(());
73 }
74 for entry in fs::read_dir(&dir)? {
75 let entry = entry?;
76 if let Some(name) = entry.file_name().to_str() {
77 if name.ends_with(".vdfa") {
78 let _ = fs::remove_file(entry.path());
79 }
80 }
81 }
82 Ok(())
83}
84
85#[must_use]
87#[inline]
88pub fn size() -> u64 {
89 let dir = cache_dir();
90 if !dir.exists() {
91 return 0;
92 }
93 let Ok(reader) = fs::read_dir(&dir) else {
94 return 0;
95 };
96 reader
97 .filter_map(Result::ok)
98 .filter_map(|entry| {
99 let name = entry.file_name();
100 let name = name.to_string_lossy();
101 if !name.ends_with(".vdfa") {
102 return None;
103 }
104 entry.metadata().ok().map(|m| m.len())
105 })
106 .sum()
107}
108
109fn cache_dir() -> PathBuf {
110 if let Some(xdg) = env::var_os("XDG_CACHE_HOME") {
111 return PathBuf::from(xdg).join("vyre").join("dfa");
112 }
113 if let Some(home) = env::var_os("HOME") {
114 return PathBuf::from(home).join(".cache").join("vyre").join("dfa");
115 }
116 PathBuf::from(".vyre-cache").join("dfa")
118}
119
120fn hash_key(patterns: &[Pattern<'_>], options: AssembleOptions) -> String {
121 let mut hasher = Fnv1a::new();
125 hasher.update(CACHE_VERSION.as_bytes());
126 hasher.update(&[format_tag(options.format), options.minimize as u8]);
127 hasher.update(&(patterns.len() as u64).to_le_bytes());
128 for pattern in patterns {
129 match pattern {
130 Pattern::Literal(bytes) => {
131 hasher.update(b"lit");
132 hasher.update(&(bytes.len() as u64).to_le_bytes());
133 hasher.update(bytes);
134 }
135 Pattern::Regex(source) => {
136 hasher.update(b"rgx");
137 hasher.update(&(source.len() as u64).to_le_bytes());
138 hasher.update(source.as_bytes());
139 }
140 }
141 }
142 format!("{:016x}", hasher.finish())
143}
144
145fn format_tag(format: DfaPackFormat) -> u8 {
146 match format {
147 DfaPackFormat::Dense => 0,
148 DfaPackFormat::EquivClass => 1,
149 }
150}
151
152struct Fnv1a(u64);
153
154impl Fnv1a {
155 fn new() -> Self {
156 Self(0xcbf29ce484222325)
157 }
158
159 fn update(&mut self, bytes: &[u8]) {
160 for &b in bytes {
161 self.0 ^= u64::from(b);
162 self.0 = self.0.wrapping_mul(0x100000001b3);
163 }
164 }
165
166 fn finish(&self) -> u64 {
167 self.0
168 }
169}
170
171fn read_entry(path: &Path) -> std::io::Result<PackedDfa> {
172 let buf = fs::read(path)?;
173 if buf.len() < 17 {
175 return Err(std::io::Error::new(
176 std::io::ErrorKind::InvalidData,
177 "Fix: truncated cache entry",
178 ));
179 }
180 let format = match buf[0] {
181 0 => DfaPackFormat::Dense,
182 1 => DfaPackFormat::EquivClass,
183 _ => {
184 return Err(std::io::Error::new(
185 std::io::ErrorKind::InvalidData,
186 "Fix: unknown format tag in cache entry",
187 ))
188 }
189 };
190 let start = u32::from_le_bytes(buf[1..5].try_into().unwrap());
191 let state_count = u32::from_le_bytes(buf[5..9].try_into().unwrap());
192 let payload_len_u64 = u64::from_le_bytes(buf[9..17].try_into().unwrap());
193 let payload_len = usize::try_from(payload_len_u64).map_err(|_| {
194 std::io::Error::new(
195 std::io::ErrorKind::InvalidData,
196 "Fix: cache entry payload_len exceeds addressable memory",
197 )
198 })?;
199 if buf.len() < 17 + payload_len {
200 return Err(std::io::Error::new(
201 std::io::ErrorKind::InvalidData,
202 "Fix: cache entry payload length mismatch",
203 ));
204 }
205 Ok(PackedDfa {
206 format,
207 state_count,
208 start,
209 bytes: buf[17..17 + payload_len].to_vec(),
210 })
211}
212
213fn write_entry(path: &Path, packed: &PackedDfa) -> std::io::Result<()> {
214 if let Some(parent) = path.parent() {
215 fs::create_dir_all(parent)?;
216 }
217 let mut file = fs::File::create(path)?;
218 file.write_all(&[format_tag(packed.format)])?;
219 file.write_all(&packed.start.to_le_bytes())?;
220 file.write_all(&packed.state_count.to_le_bytes())?;
221 file.write_all(&(packed.bytes.len() as u64).to_le_bytes())?;
222 file.write_all(&packed.bytes)?;
223 Ok(())
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229 use crate::pattern::dfa_assemble::{AssembleOptions, Pattern};
230
231 fn unique_cache_dir(label: &str) -> PathBuf {
232 let mut base = std::env::temp_dir();
233 base.push(format!(
234 "vyre-cache-test-{label}-{}",
235 std::time::SystemTime::now()
236 .duration_since(std::time::UNIX_EPOCH)
237 .map(|d| d.as_nanos())
238 .unwrap_or(0)
239 ));
240 base
241 }
242
243 #[test]
244 fn hash_key_is_stable_across_runs() {
245 let patterns = [Pattern::Literal(b"hello"), Pattern::Regex("[0-9]+")];
246 let options = AssembleOptions::default();
247 let a = hash_key(&patterns, options);
248 let b = hash_key(&patterns, options);
249 assert_eq!(a, b);
250 }
251
252 #[test]
253 fn hash_key_differs_for_different_patterns() {
254 let options = AssembleOptions::default();
255 let a = hash_key(&[Pattern::Literal(b"hello")], options);
256 let b = hash_key(&[Pattern::Literal(b"world")], options);
257 assert_ne!(a, b);
258 }
259
260 #[test]
261 fn hash_key_differs_for_different_options() {
262 let patterns = [Pattern::Literal(b"hello")];
263 let dense = hash_key(
264 &patterns,
265 AssembleOptions {
266 format: DfaPackFormat::Dense,
267 minimize: true,
268 },
269 );
270 let equiv = hash_key(
271 &patterns,
272 AssembleOptions {
273 format: DfaPackFormat::EquivClass,
274 minimize: true,
275 },
276 );
277 assert_ne!(dense, equiv);
278 }
279
280 #[test]
281 fn write_and_read_roundtrip() {
282 let dir = unique_cache_dir("roundtrip");
283 fs::create_dir_all(&dir).unwrap();
284 let path = dir.join("sample.vdfa");
285 let packed = super::super::dfa_assemble::dfa_assemble(
286 &[Pattern::Literal(b"hi")],
287 AssembleOptions::default(),
288 )
289 .unwrap();
290 write_entry(&path, &packed).unwrap();
291 let reloaded = read_entry(&path).unwrap();
292 assert_eq!(reloaded, packed);
293 let _ = fs::remove_dir_all(&dir);
294 }
295}