Skip to main content

file_parse_cache/
lib.rs

1use std::fmt;
2use std::hash::Hash;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicBool, Ordering};
6use std::sync::Arc;
7use std::time::{Duration, SystemTime};
8
9use moka::sync::Cache;
10
11// ---------------------------------------------------------------------------
12// Fingerprint trait + impls
13// ---------------------------------------------------------------------------
14
15/// Cheaply identifies whether a file's content has changed since the last parse.
16pub trait Fingerprint: Send + Sync + 'static {
17    /// Opaque stamp that can be compared for equality and hashed.
18    type Stamp: Eq + Hash + Clone + Send + Sync + fmt::Debug + 'static;
19
20    /// Compute the current stamp for `path`. Returns `Err` if the file is
21    /// unreadable (missing, permissions, etc.) — the cache treats this as a miss
22    /// that produces the caller's error.
23    fn stamp(&self, path: &Path) -> io::Result<Self::Stamp>;
24}
25
26// ---------------------------------------------------------------------------
27// MtimeStamp — serialization-ready replacement for SystemTime
28// ---------------------------------------------------------------------------
29
30/// Seconds + nanoseconds since UNIX epoch. Negative `secs` for pre-epoch times.
31///
32/// Uses floor semantics: `nanos` is always non-negative and `secs` is the
33/// largest integer ≤ the true value. For example, 0.5 seconds before epoch
34/// is `{ secs: -1, nanos: 500_000_000 }`, representing −1 + 0.5 = −0.5.
35#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
36#[cfg_attr(any(test, feature = "persist"), derive(serde::Serialize, serde::Deserialize))]
37#[non_exhaustive]
38pub struct MtimeStamp {
39    pub secs: i64,
40    pub nanos: u32,
41}
42
43impl From<SystemTime> for MtimeStamp {
44    fn from(t: SystemTime) -> Self {
45        match t.duration_since(SystemTime::UNIX_EPOCH) {
46            Ok(d) => MtimeStamp {
47                secs: d.as_secs() as i64,
48                nanos: d.subsec_nanos(),
49            },
50            Err(e) => {
51                let d = e.duration();
52                let sub = d.subsec_nanos();
53                if sub == 0 {
54                    MtimeStamp {
55                        secs: -(d.as_secs() as i64),
56                        nanos: 0,
57                    }
58                } else {
59                    MtimeStamp {
60                        secs: -(d.as_secs() as i64) - 1,
61                        nanos: 1_000_000_000 - sub,
62                    }
63                }
64            }
65        }
66    }
67}
68
69impl MtimeStamp {
70    /// Convert back to `SystemTime`. Lossless roundtrip with `From<SystemTime>`.
71    pub fn to_system_time(self) -> SystemTime {
72        if self.secs >= 0 {
73            SystemTime::UNIX_EPOCH + Duration::new(self.secs as u64, self.nanos)
74        } else if self.nanos == 0 {
75            SystemTime::UNIX_EPOCH - Duration::new((-self.secs) as u64, 0)
76        } else {
77            SystemTime::UNIX_EPOCH
78                - Duration::new((-self.secs - 1) as u64, 1_000_000_000 - self.nanos)
79        }
80    }
81}
82
83/// Compares `mtime` from filesystem metadata. Cheap (one syscall), but can
84/// miss edits that land within the same second on coarse-grained filesystems,
85/// and reports false changes after `git clone` or `cargo` resets mtimes.
86#[derive(Debug, Clone, Copy, Default)]
87pub struct MtimeFingerprint;
88
89impl Fingerprint for MtimeFingerprint {
90    type Stamp = MtimeStamp;
91
92    fn stamp(&self, path: &Path) -> io::Result<Self::Stamp> {
93        let mtime = std::fs::metadata(path)?.modified()?;
94        Ok(MtimeStamp::from(mtime))
95    }
96}
97
98/// BLAKE3 hash of the full file content. Robust against mtime resets, but
99/// reads the entire file on every check.
100#[derive(Debug, Clone, Copy, Default)]
101pub struct ContentHashFingerprint;
102
103/// 32-byte BLAKE3 digest, wrapped for trait impls.
104#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
105#[cfg_attr(any(test, feature = "persist"), derive(serde::Serialize, serde::Deserialize))]
106#[non_exhaustive]
107pub struct Blake3Stamp(pub [u8; 32]);
108
109impl Fingerprint for ContentHashFingerprint {
110    type Stamp = Blake3Stamp;
111
112    fn stamp(&self, path: &Path) -> io::Result<Self::Stamp> {
113        let bytes = std::fs::read(path)?;
114        Ok(Blake3Stamp(*blake3::hash(&bytes).as_bytes()))
115    }
116}
117
118// ---------------------------------------------------------------------------
119// Cache entry (stored inside moka)
120// ---------------------------------------------------------------------------
121
122#[derive(Clone, Debug)]
123struct Entry<T, S> {
124    stamp: S,
125    value: T,
126}
127
128// ---------------------------------------------------------------------------
129// Core cache
130// ---------------------------------------------------------------------------
131
132/// Mtime-gated (or content-hash-gated) file parse cache.
133///
134/// On `get`, the fingerprint of the file is checked. If it matches the cached
135/// stamp, the cached `T` is returned without re-parsing. On mismatch or cache
136/// miss the `parser` closure runs and the result is stored.
137///
138/// Backed by `moka::sync::Cache` with bounded-size LRU eviction.
139pub struct FileParseCache<T, F: Fingerprint = MtimeFingerprint> {
140    inner: Cache<PathBuf, Entry<T, F::Stamp>>,
141    fingerprint: Arc<F>,
142    dirty: AtomicBool,
143}
144
145impl<T, F> fmt::Debug for FileParseCache<T, F>
146where
147    T: Clone + Send + Sync + 'static + fmt::Debug,
148    F: Fingerprint + fmt::Debug,
149{
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        self.inner.run_pending_tasks();
152        f.debug_struct("FileParseCache")
153            .field("entry_count", &self.inner.entry_count())
154            .field("fingerprint", &self.fingerprint)
155            .finish()
156    }
157}
158
159impl<T: Clone + Send + Sync + 'static> FileParseCache<T, MtimeFingerprint> {
160    /// Create a cache with mtime-based invalidation and room for `max_entries` files.
161    pub fn new(max_entries: u64) -> Self {
162        Self::with_fingerprint(max_entries, MtimeFingerprint)
163    }
164}
165
166impl<T, F> FileParseCache<T, F>
167where
168    T: Clone + Send + Sync + 'static,
169    F: Fingerprint,
170{
171    /// Create a cache with a custom fingerprint strategy.
172    pub fn with_fingerprint(max_entries: u64, fingerprint: F) -> Self {
173        Self {
174            inner: Cache::new(max_entries),
175            fingerprint: Arc::new(fingerprint),
176            dirty: AtomicBool::new(false),
177        }
178    }
179
180    /// Return the cached value for `path`, or parse it via `parser` on miss /
181    /// fingerprint change.
182    ///
183    /// If multiple threads call `get` for the same path concurrently and all
184    /// miss, each thread runs `parser` independently. The last writer wins in
185    /// moka; all callers receive a correct (freshly-parsed) value. This trades
186    /// a rare redundant parse for a simpler API — coalescing via
187    /// `try_get_with` would require wrapping the caller's error in `Arc`.
188    ///
189    /// On unreadable files (missing, permissions), the fingerprint stat fails
190    /// and returns `Err(E::from(io::Error))` without invoking `parser`.
191    pub fn get<E>(
192        &self,
193        path: &Path,
194        parser: impl FnOnce(&Path) -> Result<T, E>,
195    ) -> Result<T, E>
196    where
197        E: From<io::Error> + Send + Sync + 'static,
198    {
199        let key = path.to_path_buf();
200        let current_stamp = self.fingerprint.stamp(path).map_err(E::from)?;
201
202        if let Some(entry) = self.inner.get(&key) {
203            if entry.stamp == current_stamp {
204                return Ok(entry.value.clone());
205            }
206        }
207
208        let value = parser(path)?;
209        self.inner.insert(
210            key,
211            Entry {
212                stamp: current_stamp,
213                value: value.clone(),
214            },
215        );
216        self.dirty.store(true, Ordering::Release);
217        Ok(value)
218    }
219
220    /// Remove entries where `predicate` returns `true`.
221    ///
222    /// Not atomic: takes a snapshot via iteration, then invalidates matching
223    /// keys one by one. An entry inserted by a concurrent `get` between the
224    /// snapshot and the invalidation pass will be missed — it will be caught
225    /// on the next `purge_if` call. Invalidated entries become immediately
226    /// invisible to `get`, but `len()` reflects the removal only after its
227    /// own pending-task flush.
228    ///
229    /// Allocates O(cache size) for the key snapshot — every key is cloned
230    /// into a temporary `Vec` before invalidation begins. Fine for caches
231    /// under ~10K entries. For larger caches, prefer moka's built-in
232    /// TTL/TTI-based eviction over manual purging.
233    pub fn purge_if(&self, predicate: impl Fn(&Path) -> bool) {
234        let keys_to_remove: Vec<PathBuf> = self
235            .inner
236            .iter()
237            .filter(|(k, _)| predicate(k))
238            .map(|(k, _)| k.as_ref().clone())
239            .collect();
240        if !keys_to_remove.is_empty() {
241            for key in &keys_to_remove {
242                self.inner.invalidate(key);
243            }
244            self.dirty.store(true, Ordering::Release);
245        }
246    }
247
248    /// Remove all entries.
249    pub fn clear(&self) {
250        self.inner.invalidate_all();
251        self.dirty.store(true, Ordering::Release);
252    }
253
254    /// Number of entries currently in the cache.
255    ///
256    /// Flushes pending bookkeeping before reading the count so the value is
257    /// immediately consistent with preceding `get`, `purge_if`, and `clear`
258    /// calls. Cost is O(pending operations), not O(1) — typically microseconds
259    /// for caches with low write rates, but callers in tight loops should be
260    /// aware.
261    pub fn len(&self) -> u64 {
262        self.inner.run_pending_tasks();
263        self.inner.entry_count()
264    }
265
266    /// Whether the cache is empty.
267    ///
268    /// Same consistency and cost as [`len`](Self::len).
269    pub fn is_empty(&self) -> bool {
270        self.len() == 0
271    }
272}
273
274// ---------------------------------------------------------------------------
275// Persistence (feature = "persist")
276// ---------------------------------------------------------------------------
277
278/// Serialization codec for disk persistence. Not object-safe due to generic
279/// methods — pass as `&Fmt` where `Fmt: Format`, not `&dyn Format`.
280#[cfg(feature = "persist")]
281pub trait Format: Send + Sync {
282    fn serialize<T: serde::Serialize>(
283        &self,
284        value: &T,
285    ) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>>;
286
287    fn deserialize<T: serde::de::DeserializeOwned>(
288        &self,
289        bytes: &[u8],
290    ) -> Result<T, Box<dyn std::error::Error + Send + Sync>>;
291}
292
293#[cfg(feature = "persist-bincode")]
294#[derive(Debug, Clone, Copy, Default)]
295pub struct BincodeFormat;
296
297#[cfg(feature = "persist-bincode")]
298impl Format for BincodeFormat {
299    fn serialize<T: serde::Serialize>(
300        &self,
301        value: &T,
302    ) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
303        bincode::serialize(value).map_err(|e| e as Box<dyn std::error::Error + Send + Sync>)
304    }
305
306    fn deserialize<T: serde::de::DeserializeOwned>(
307        &self,
308        bytes: &[u8],
309    ) -> Result<T, Box<dyn std::error::Error + Send + Sync>> {
310        bincode::deserialize(bytes).map_err(|e| e as Box<dyn std::error::Error + Send + Sync>)
311    }
312}
313
314#[cfg(feature = "persist-postcard")]
315#[derive(Debug, Clone, Copy, Default)]
316pub struct PostcardFormat;
317
318#[cfg(feature = "persist-postcard")]
319impl Format for PostcardFormat {
320    fn serialize<T: serde::Serialize>(
321        &self,
322        value: &T,
323    ) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
324        postcard::to_allocvec(value)
325            .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
326    }
327
328    fn deserialize<T: serde::de::DeserializeOwned>(
329        &self,
330        bytes: &[u8],
331    ) -> Result<T, Box<dyn std::error::Error + Send + Sync>> {
332        postcard::from_bytes(bytes)
333            .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
334    }
335}
336
337#[cfg(feature = "persist")]
338const DISK_CACHE_VERSION: u32 = 1;
339
340#[cfg(feature = "persist")]
341#[derive(serde::Serialize, serde::Deserialize)]
342struct DiskCache<T, S> {
343    version: u32,
344    entries: Vec<DiskEntry<T, S>>,
345}
346
347#[cfg(feature = "persist")]
348#[derive(serde::Serialize, serde::Deserialize)]
349struct DiskEntry<T, S> {
350    path: String,
351    stamp: S,
352    value: T,
353}
354
355#[cfg(feature = "persist")]
356#[derive(Debug)]
357#[non_exhaustive]
358pub enum SaveError {
359    Io(io::Error),
360    Serialize(Box<dyn std::error::Error + Send + Sync>),
361}
362
363#[cfg(feature = "persist")]
364impl fmt::Display for SaveError {
365    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366        match self {
367            Self::Io(e) => write!(f, "IO error: {e}"),
368            Self::Serialize(e) => write!(f, "serialization error: {e}"),
369        }
370    }
371}
372
373#[cfg(feature = "persist")]
374impl std::error::Error for SaveError {
375    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
376        match self {
377            Self::Io(e) => Some(e),
378            Self::Serialize(e) => Some(e.as_ref()),
379        }
380    }
381}
382
383#[cfg(feature = "persist")]
384#[derive(Debug)]
385#[non_exhaustive]
386pub enum LoadError {
387    Io(io::Error),
388    Deserialize(Box<dyn std::error::Error + Send + Sync>),
389    VersionMismatch { disk: u32, expected: u32 },
390}
391
392#[cfg(feature = "persist")]
393impl fmt::Display for LoadError {
394    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
395        match self {
396            Self::Io(e) => write!(f, "IO error: {e}"),
397            Self::Deserialize(e) => write!(f, "deserialization error: {e}"),
398            Self::VersionMismatch { disk, expected } => {
399                write!(f, "version mismatch: disk={disk}, expected={expected}")
400            }
401        }
402    }
403}
404
405#[cfg(feature = "persist")]
406impl std::error::Error for LoadError {
407    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
408        match self {
409            Self::Io(e) => Some(e),
410            Self::Deserialize(e) => Some(e.as_ref()),
411            Self::VersionMismatch { .. } => None,
412        }
413    }
414}
415
416#[cfg(feature = "persist")]
417#[derive(Debug, Clone, Copy)]
418pub struct LoadStats {
419    pub loaded: u64,
420    pub stale: u64,
421}
422
423#[cfg(feature = "persist")]
424impl<T, F> FileParseCache<T, F>
425where
426    T: Clone + Send + Sync + 'static,
427    F: Fingerprint,
428{
429    /// Persist the current cache to `path` using `format`.
430    ///
431    /// Returns `Ok(())` immediately if no entries have changed since the last
432    /// save. On failure, the dirty flag is restored so the next `save()` retries.
433    ///
434    /// **Deferred-insert semantics:** A concurrent `get()` that parses and
435    /// inserts during `save()` may or may not be included in this snapshot.
436    /// If missed, the insert sets the dirty flag, ensuring the next `save()`
437    /// captures it. No insert is ever lost as long as the caller saves again
438    /// when entries have changed.
439    pub fn save<Fmt: Format>(&self, path: &Path, format: &Fmt) -> Result<(), SaveError>
440    where
441        T: serde::Serialize,
442        F::Stamp: serde::Serialize,
443    {
444        if !self.dirty.swap(false, Ordering::AcqRel) {
445            return Ok(());
446        }
447
448        let entries: Vec<DiskEntry<T, F::Stamp>> = self
449            .inner
450            .iter()
451            .map(|(k, entry)| DiskEntry {
452                path: k.to_string_lossy().into_owned(),
453                stamp: entry.stamp.clone(),
454                value: entry.value.clone(),
455            })
456            .collect();
457
458        let disk = DiskCache {
459            version: DISK_CACHE_VERSION,
460            entries,
461        };
462
463        let bytes = match format.serialize(&disk) {
464            Ok(b) => b,
465            Err(e) => {
466                self.dirty.store(true, Ordering::Release);
467                return Err(SaveError::Serialize(e));
468            }
469        };
470
471        if let Some(parent) = path.parent() {
472            if let Err(e) = std::fs::create_dir_all(parent) {
473                self.dirty.store(true, Ordering::Release);
474                return Err(SaveError::Io(e));
475            }
476        }
477
478        if let Err(e) = std::fs::write(path, &bytes) {
479            self.dirty.store(true, Ordering::Release);
480            return Err(SaveError::Io(e));
481        }
482
483        Ok(())
484    }
485
486    /// Load cached entries from `path`, dropping entries whose fingerprint no
487    /// longer matches the file on disk.
488    ///
489    /// Stale validation is eager: every loaded entry is re-stamped via the
490    /// `Fingerprint` impl. Entries for files that were deleted or modified
491    /// since the cache was saved are silently skipped. Returns counts of
492    /// loaded vs stale entries.
493    ///
494    /// Does not set the dirty flag — loaded data already matches disk.
495    pub fn load<Fmt: Format>(&self, path: &Path, format: &Fmt) -> Result<LoadStats, LoadError>
496    where
497        T: serde::de::DeserializeOwned,
498        F::Stamp: serde::de::DeserializeOwned,
499    {
500        let bytes = std::fs::read(path).map_err(LoadError::Io)?;
501        let disk: DiskCache<T, F::Stamp> =
502            format.deserialize(&bytes).map_err(LoadError::Deserialize)?;
503
504        if disk.version != DISK_CACHE_VERSION {
505            return Err(LoadError::VersionMismatch {
506                disk: disk.version,
507                expected: DISK_CACHE_VERSION,
508            });
509        }
510
511        let mut loaded = 0u64;
512        let mut stale = 0u64;
513
514        for entry in disk.entries {
515            let file_path = PathBuf::from(&entry.path);
516            let current_stamp = match self.fingerprint.stamp(&file_path) {
517                Ok(s) => s,
518                Err(_) => {
519                    stale += 1;
520                    continue;
521                }
522            };
523            if current_stamp != entry.stamp {
524                stale += 1;
525                continue;
526            }
527            self.inner.insert(
528                file_path,
529                Entry {
530                    stamp: entry.stamp,
531                    value: entry.value,
532                },
533            );
534            loaded += 1;
535        }
536
537        Ok(LoadStats { loaded, stale })
538    }
539}
540
541// ===========================================================================
542// Tests
543// ===========================================================================
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548    use std::fs;
549    use std::io::Write as IoWrite;
550    use tempfile::TempDir;
551
552    fn write_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
553        let p = dir.path().join(name);
554        let mut f = fs::File::create(&p).unwrap();
555        f.write_all(content.as_bytes()).unwrap();
556        p
557    }
558
559    fn make_cache() -> FileParseCache<Vec<String>> {
560        FileParseCache::new(64)
561    }
562
563    fn line_parser(path: &Path) -> Result<Vec<String>, io::Error> {
564        let text = fs::read_to_string(path)?;
565        Ok(text.lines().map(String::from).collect())
566    }
567
568    // ----- MtimeStamp roundtrip -----
569
570    #[test]
571    fn mtime_stamp_post_epoch_roundtrip() {
572        let now = SystemTime::now();
573        let stamp = MtimeStamp::from(now);
574        assert!(stamp.secs > 0);
575        assert_eq!(stamp.to_system_time(), now);
576    }
577
578    #[test]
579    fn mtime_stamp_pre_epoch_roundtrip() {
580        let t = SystemTime::UNIX_EPOCH - Duration::new(1, 500_000_000);
581        let stamp = MtimeStamp::from(t);
582        assert_eq!(stamp.secs, -2);
583        assert_eq!(stamp.nanos, 500_000_000);
584        assert_eq!(stamp.to_system_time(), t);
585
586        let t = SystemTime::UNIX_EPOCH - Duration::new(0, 500_000_000);
587        let stamp = MtimeStamp::from(t);
588        assert_eq!(stamp.secs, -1);
589        assert_eq!(stamp.nanos, 500_000_000);
590        assert_eq!(stamp.to_system_time(), t);
591
592        let t = SystemTime::UNIX_EPOCH - Duration::from_secs(3);
593        let stamp = MtimeStamp::from(t);
594        assert_eq!(stamp.secs, -3);
595        assert_eq!(stamp.nanos, 0);
596        assert_eq!(stamp.to_system_time(), t);
597
598        let stamp = MtimeStamp::from(SystemTime::UNIX_EPOCH);
599        assert_eq!(stamp.secs, 0);
600        assert_eq!(stamp.nanos, 0);
601        assert_eq!(stamp.to_system_time(), SystemTime::UNIX_EPOCH);
602    }
603
604    #[test]
605    fn mtime_stamp_serde_roundtrip() {
606        let cases = [
607            MtimeStamp { secs: 1_700_000_000, nanos: 123_456_789 },
608            MtimeStamp { secs: -2, nanos: 500_000_000 },
609            MtimeStamp { secs: -1, nanos: 500_000_000 },
610            MtimeStamp { secs: 0, nanos: 0 },
611        ];
612        for stamp in &cases {
613            let json = serde_json::to_string(stamp).unwrap();
614            let back: MtimeStamp = serde_json::from_str(&json).unwrap();
615            assert_eq!(*stamp, back, "failed roundtrip for {stamp:?}");
616        }
617    }
618
619    #[test]
620    fn blake3_stamp_serde_roundtrip() {
621        let bytes = *blake3::hash(b"hello").as_bytes();
622        let stamp = Blake3Stamp(bytes);
623        let json = serde_json::to_string(&stamp).unwrap();
624        let back: Blake3Stamp = serde_json::from_str(&json).unwrap();
625        assert_eq!(stamp, back);
626    }
627
628    // ----- basic cache behavior -----
629
630    #[test]
631    fn returns_parsed_value_and_caches_it() {
632        let tmp = TempDir::new().unwrap();
633        let p = write_file(&tmp, "a.txt", "hello\nworld");
634        let cache = make_cache();
635
636        let first = cache.get(&p, line_parser).unwrap();
637        assert_eq!(first, vec!["hello", "world"]);
638
639        let second = cache.get(&p, line_parser).unwrap();
640        assert_eq!(second, first);
641    }
642
643    #[test]
644    fn len_is_consistent_immediately_after_insert() {
645        let tmp = TempDir::new().unwrap();
646        let a = write_file(&tmp, "a.txt", "a");
647        let b = write_file(&tmp, "b.txt", "b");
648        let cache = make_cache();
649
650        assert_eq!(cache.len(), 0);
651        cache.get(&a, line_parser).unwrap();
652        assert_eq!(cache.len(), 1);
653        cache.get(&b, line_parser).unwrap();
654        assert_eq!(cache.len(), 2);
655    }
656
657    #[test]
658    fn missing_file_returns_error() {
659        let cache = make_cache();
660        let result = cache.get(Path::new("/no/such/file.txt"), line_parser);
661        assert!(result.is_err());
662    }
663
664    // ----- invalidation -----
665
666    #[test]
667    fn reparses_when_mtime_changes() {
668        let tmp = TempDir::new().unwrap();
669        let p = write_file(&tmp, "a.txt", "v1");
670        let cache = make_cache();
671
672        let first = cache.get(&p, line_parser).unwrap();
673        assert_eq!(first, vec!["v1"]);
674
675        std::thread::sleep(std::time::Duration::from_millis(1100));
676        fs::write(&p, "v2\nv3").unwrap();
677
678        let second = cache.get(&p, line_parser).unwrap();
679        assert_eq!(second, vec!["v2", "v3"]);
680    }
681
682    // ----- content-hash fingerprint -----
683
684    #[test]
685    fn content_hash_detects_same_mtime_different_content() {
686        let tmp = TempDir::new().unwrap();
687        let p = write_file(&tmp, "a.txt", "original");
688        let cache: FileParseCache<Vec<String>, ContentHashFingerprint> =
689            FileParseCache::with_fingerprint(64, ContentHashFingerprint);
690
691        let first = cache.get(&p, line_parser).unwrap();
692        assert_eq!(first, vec!["original"]);
693
694        fs::write(&p, "changed").unwrap();
695
696        let second = cache.get(&p, line_parser).unwrap();
697        assert_eq!(second, vec!["changed"]);
698    }
699
700    #[test]
701    fn content_hash_skips_reparse_on_identical_content() {
702        let tmp = TempDir::new().unwrap();
703        let p = write_file(&tmp, "a.txt", "stable");
704
705        use std::sync::atomic::{AtomicU32, Ordering};
706        let parse_count = Arc::new(AtomicU32::new(0));
707
708        let cache: FileParseCache<Vec<String>, ContentHashFingerprint> =
709            FileParseCache::with_fingerprint(64, ContentHashFingerprint);
710
711        let counter = parse_count.clone();
712        let counting_parser = move |path: &Path| -> Result<Vec<String>, io::Error> {
713            counter.fetch_add(1, Ordering::Relaxed);
714            line_parser(path)
715        };
716
717        cache.get(&p, &counting_parser).unwrap();
718        assert_eq!(parse_count.load(Ordering::Relaxed), 1);
719
720        std::thread::sleep(std::time::Duration::from_millis(1100));
721        fs::write(&p, "stable").unwrap();
722
723        cache.get(&p, &counting_parser).unwrap();
724        assert_eq!(parse_count.load(Ordering::Relaxed), 1);
725    }
726
727    // ----- purge_if -----
728
729    #[test]
730    fn purge_if_removes_matching_entries() {
731        let tmp = TempDir::new().unwrap();
732        let a = write_file(&tmp, "keep.txt", "a");
733        let b = write_file(&tmp, "drop.txt", "b");
734        let cache = make_cache();
735
736        cache.get(&a, line_parser).unwrap();
737        cache.get(&b, line_parser).unwrap();
738
739        cache.purge_if(|p| p.file_name().map_or(false, |n| n == "drop.txt"));
740
741        // len() flushes pending tasks internally — no explicit drain needed.
742        assert_eq!(cache.len(), 1);
743    }
744
745    // ----- clear -----
746
747    #[test]
748    fn clear_removes_all_entries() {
749        let tmp = TempDir::new().unwrap();
750        let a = write_file(&tmp, "a.txt", "a");
751        let b = write_file(&tmp, "b.txt", "b");
752        let cache = make_cache();
753
754        cache.get(&a, line_parser).unwrap();
755        cache.get(&b, line_parser).unwrap();
756
757        cache.clear();
758        // len() flushes pending tasks internally — no explicit drain needed.
759        assert_eq!(cache.len(), 0);
760    }
761
762    // ----- user-controlled error type -----
763
764    #[derive(Debug)]
765    #[allow(dead_code)]
766    enum MyError {
767        Io(io::Error),
768        Parse(String),
769    }
770
771    impl From<io::Error> for MyError {
772        fn from(e: io::Error) -> Self {
773            MyError::Io(e)
774        }
775    }
776
777    #[test]
778    fn parser_error_propagates_without_caching() {
779        let tmp = TempDir::new().unwrap();
780        let p = write_file(&tmp, "bad.txt", "not-a-number");
781        let cache: FileParseCache<i32> = FileParseCache::new(64);
782
783        let result = cache.get(&p, |path| {
784            let text = fs::read_to_string(path).map_err(MyError::Io)?;
785            text.trim()
786                .parse::<i32>()
787                .map_err(|e| MyError::Parse(e.to_string()))
788        });
789
790        assert!(matches!(result, Err(MyError::Parse(_))));
791        // len() flushes pending tasks internally — no explicit drain needed.
792        assert_eq!(cache.len(), 0);
793    }
794}
795
796// ---------------------------------------------------------------------------
797// Persistence tests (bincode)
798// ---------------------------------------------------------------------------
799
800#[cfg(all(test, feature = "persist-bincode"))]
801mod persist_tests {
802    use super::*;
803    use std::fs;
804    use std::io::Write as IoWrite;
805    use tempfile::TempDir;
806
807    fn write_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
808        let p = dir.path().join(name);
809        let mut f = fs::File::create(&p).unwrap();
810        f.write_all(content.as_bytes()).unwrap();
811        p
812    }
813
814    fn make_cache() -> FileParseCache<Vec<String>> {
815        FileParseCache::new(64)
816    }
817
818    fn line_parser(path: &Path) -> Result<Vec<String>, io::Error> {
819        let text = fs::read_to_string(path)?;
820        Ok(text.lines().map(String::from).collect())
821    }
822
823    struct FailingFormat;
824    impl Format for FailingFormat {
825        fn serialize<T: serde::Serialize>(
826            &self,
827            _: &T,
828        ) -> Result<Vec<u8>, Box<dyn std::error::Error + Send + Sync>> {
829            Err("intentional failure".into())
830        }
831        fn deserialize<T: serde::de::DeserializeOwned>(
832            &self,
833            _: &[u8],
834        ) -> Result<T, Box<dyn std::error::Error + Send + Sync>> {
835            Err("intentional failure".into())
836        }
837    }
838
839    #[test]
840    fn save_and_load_roundtrip() {
841        let tmp = TempDir::new().unwrap();
842        let cache_path = tmp.path().join("cache.bin");
843        let p = write_file(&tmp, "a.txt", "hello\nworld");
844
845        let cache = make_cache();
846        cache.get(&p, line_parser).unwrap();
847        cache.save(&cache_path, &BincodeFormat).unwrap();
848        assert!(cache_path.exists());
849
850        let cache2 = make_cache();
851        let stats = cache2.load(&cache_path, &BincodeFormat).unwrap();
852        assert_eq!(stats.loaded, 1);
853        assert_eq!(stats.stale, 0);
854
855        let entries = cache2.get(&p, line_parser).unwrap();
856        assert_eq!(entries, vec!["hello", "world"]);
857    }
858
859    #[test]
860    fn load_drops_stale_entries() {
861        let tmp = TempDir::new().unwrap();
862        let cache_path = tmp.path().join("cache.bin");
863        let p = write_file(&tmp, "a.txt", "v1");
864
865        let cache = make_cache();
866        cache.get(&p, line_parser).unwrap();
867        cache.save(&cache_path, &BincodeFormat).unwrap();
868
869        // Modify the file so mtime changes — entry becomes stale.
870        std::thread::sleep(std::time::Duration::from_millis(1100));
871        fs::write(&p, "v2").unwrap();
872
873        let cache2 = make_cache();
874        let stats = cache2.load(&cache_path, &BincodeFormat).unwrap();
875        assert_eq!(stats.loaded, 0);
876        assert_eq!(stats.stale, 1);
877        assert_eq!(cache2.len(), 0);
878    }
879
880    #[test]
881    fn load_drops_missing_files() {
882        let tmp = TempDir::new().unwrap();
883        let cache_path = tmp.path().join("cache.bin");
884        let p = write_file(&tmp, "a.txt", "v1");
885
886        let cache = make_cache();
887        cache.get(&p, line_parser).unwrap();
888        cache.save(&cache_path, &BincodeFormat).unwrap();
889
890        fs::remove_file(&p).unwrap();
891
892        let cache2 = make_cache();
893        let stats = cache2.load(&cache_path, &BincodeFormat).unwrap();
894        assert_eq!(stats.loaded, 0);
895        assert_eq!(stats.stale, 1);
896    }
897
898    #[test]
899    fn save_noop_when_not_dirty() {
900        let tmp = TempDir::new().unwrap();
901        let cache_path = tmp.path().join("cache.bin");
902        let cache = make_cache();
903
904        cache.save(&cache_path, &BincodeFormat).unwrap();
905        assert!(!cache_path.exists());
906    }
907
908    #[test]
909    fn save_after_load_is_noop() {
910        let tmp = TempDir::new().unwrap();
911        let cache_path = tmp.path().join("cache.bin");
912        let p = write_file(&tmp, "a.txt", "hello");
913
914        let cache = make_cache();
915        cache.get(&p, line_parser).unwrap();
916        cache.save(&cache_path, &BincodeFormat).unwrap();
917
918        // Load into fresh cache — dirty should remain false.
919        let cache2 = make_cache();
920        cache2.load(&cache_path, &BincodeFormat).unwrap();
921
922        // Remove the file and try to save — should be noop (not dirty).
923        fs::remove_file(&cache_path).unwrap();
924        cache2.save(&cache_path, &BincodeFormat).unwrap();
925        assert!(!cache_path.exists());
926    }
927
928    #[test]
929    fn save_restores_dirty_on_failure() {
930        let tmp = TempDir::new().unwrap();
931        let cache_path = tmp.path().join("cache.bin");
932        let p = write_file(&tmp, "a.txt", "hello");
933        let cache = make_cache();
934
935        cache.get(&p, line_parser).unwrap();
936
937        let result = cache.save(&cache_path, &FailingFormat);
938        assert!(result.is_err());
939
940        // Dirty was restored — real save should now succeed.
941        cache.save(&cache_path, &BincodeFormat).unwrap();
942        assert!(cache_path.exists());
943    }
944
945    #[test]
946    fn version_mismatch_returns_error() {
947        let tmp = TempDir::new().unwrap();
948        let cache_path = tmp.path().join("cache.bin");
949
950        // Write a cache with a bogus version.
951        let disk: DiskCache<Vec<String>, MtimeStamp> = DiskCache {
952            version: 99,
953            entries: vec![],
954        };
955        let bytes = bincode::serialize(&disk).unwrap();
956        fs::write(&cache_path, &bytes).unwrap();
957
958        let cache = make_cache();
959        let result = cache.load(&cache_path, &BincodeFormat);
960        assert!(matches!(
961            result,
962            Err(LoadError::VersionMismatch { disk: 99, expected: 1 })
963        ));
964    }
965
966    #[test]
967    fn multiple_entries_roundtrip() {
968        let tmp = TempDir::new().unwrap();
969        let cache_path = tmp.path().join("cache.bin");
970        let a = write_file(&tmp, "a.txt", "alpha");
971        let b = write_file(&tmp, "b.txt", "beta\ngamma");
972
973        let cache = make_cache();
974        cache.get(&a, line_parser).unwrap();
975        cache.get(&b, line_parser).unwrap();
976        cache.save(&cache_path, &BincodeFormat).unwrap();
977
978        let cache2 = make_cache();
979        let stats = cache2.load(&cache_path, &BincodeFormat).unwrap();
980        assert_eq!(stats.loaded, 2);
981
982        assert_eq!(cache2.get(&a, line_parser).unwrap(), vec!["alpha"]);
983        assert_eq!(cache2.get(&b, line_parser).unwrap(), vec!["beta", "gamma"]);
984    }
985}
986
987// ---------------------------------------------------------------------------
988// Persistence tests (postcard)
989// ---------------------------------------------------------------------------
990
991#[cfg(all(test, feature = "persist-postcard"))]
992mod postcard_tests {
993    use super::*;
994    use std::fs;
995    use std::io::Write as IoWrite;
996    use tempfile::TempDir;
997
998    fn write_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
999        let p = dir.path().join(name);
1000        let mut f = fs::File::create(&p).unwrap();
1001        f.write_all(content.as_bytes()).unwrap();
1002        p
1003    }
1004
1005    fn line_parser(path: &Path) -> Result<Vec<String>, io::Error> {
1006        let text = fs::read_to_string(path)?;
1007        Ok(text.lines().map(String::from).collect())
1008    }
1009
1010    #[test]
1011    fn postcard_save_and_load_roundtrip() {
1012        let tmp = TempDir::new().unwrap();
1013        let cache_path = tmp.path().join("cache.pc");
1014        let p = write_file(&tmp, "a.txt", "hello\nworld");
1015
1016        let cache: FileParseCache<Vec<String>> = FileParseCache::new(64);
1017        cache.get(&p, line_parser).unwrap();
1018        cache.save(&cache_path, &PostcardFormat).unwrap();
1019
1020        let cache2: FileParseCache<Vec<String>> = FileParseCache::new(64);
1021        let stats = cache2.load(&cache_path, &PostcardFormat).unwrap();
1022        assert_eq!(stats.loaded, 1);
1023        assert_eq!(stats.stale, 0);
1024
1025        let entries = cache2.get(&p, line_parser).unwrap();
1026        assert_eq!(entries, vec!["hello", "world"]);
1027    }
1028}