Skip to main content

aft/inspect/
cache.rs

1use std::collections::{BTreeSet, HashMap, VecDeque};
2use std::fmt;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::{Mutex, RwLock};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
9
10use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
11
12use super::job::{
13    contribution_with_type_ref_names, type_ref_names_from_contribution, FileContribution,
14    InspectCategory, JobKey,
15};
16
17#[derive(Debug, Default)]
18pub(crate) struct Tier2ContributionUpdates {
19    pub upserts: Vec<FileContribution>,
20    pub deletes: Vec<PathBuf>,
21    pub metadata_updates: Vec<(PathBuf, FileFreshness)>,
22}
23
24#[derive(Debug)]
25pub enum InspectCacheError {
26    Io(std::io::Error),
27    Sql(rusqlite::Error),
28    Json(serde_json::Error),
29    LockPoisoned(&'static str),
30    InvalidHash(String),
31}
32
33impl fmt::Display for InspectCacheError {
34    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
35        match self {
36            InspectCacheError::Io(error) => write!(formatter, "inspect cache io error: {error}"),
37            InspectCacheError::Sql(error) => {
38                write!(formatter, "inspect cache sqlite error: {error}")
39            }
40            InspectCacheError::Json(error) => {
41                write!(formatter, "inspect cache json error: {error}")
42            }
43            InspectCacheError::LockPoisoned(name) => {
44                write!(formatter, "inspect cache lock poisoned: {name}")
45            }
46            InspectCacheError::InvalidHash(hash) => {
47                write!(formatter, "inspect cache invalid blake3 hash: {hash}")
48            }
49        }
50    }
51}
52
53impl std::error::Error for InspectCacheError {}
54
55impl From<std::io::Error> for InspectCacheError {
56    fn from(error: std::io::Error) -> Self {
57        Self::Io(error)
58    }
59}
60
61impl From<rusqlite::Error> for InspectCacheError {
62    fn from(error: rusqlite::Error) -> Self {
63        Self::Sql(error)
64    }
65}
66
67impl From<serde_json::Error> for InspectCacheError {
68    fn from(error: serde_json::Error) -> Self {
69        Self::Json(error)
70    }
71}
72
73/// Persisted Tier-2 contribution/aggregate format version.
74///
75/// Bump this when `FileContribution.contribution` JSON changes in a way that
76/// requires existing per-file contributions to be rebuilt before roll-up, OR
77/// when the roll-up/aggregation LOGIC changes (e.g. dead_code reachability):
78/// cached aggregates are keyed by a `contribution_set_hash` that folds in this
79/// version, so a logic-only change is invisible to existing caches unless the
80/// version moves. v6: dead_code now propagates liveness through dispatch-only
81/// method bodies (free fns reached only via `obj.method()` were false-dead).
82/// v7: duplicates now collapses nested/overlapping fragments (a duplicated
83/// block no longer reports every nested subtree as its own group).
84/// v8: entry-point recognition seeds npm `scripts` source files as liveness
85/// roots (baked into per-file liveness_roots), and dead_code/unused_exports
86/// exclude test-support files (fixtures/corpora/mocks) from reporting.
87/// v9: unused_exports resolves NodeNext `./x.js` import specifiers to their
88/// `.ts` source (alters resolved import edges), fixing false-unused on symbols
89/// re-exported/imported with a `.js` extension in a `.ts` source tree.
90/// v10: public-API entry resolution remaps build-output entries (dist/index.js)
91/// to their src/ source equivalent, so the source barrel is recognized as a
92/// public-API file and its re-exports are suppressed (changes public-API set).
93/// v11: dead_code/unused_exports drill-down is ranked by signal tier (product
94/// findings before benchmark/tooling noise) before the cap, and a ranked `top`
95/// preview is folded into all three Tier-2 aggregates — changes cached payload.
96pub(crate) const TIER2_CONTRIBUTION_CACHE_VERSION: u32 = 11;
97
98#[derive(Debug, Clone)]
99pub struct ContributionRecord {
100    pub category: InspectCategory,
101    pub file_path: PathBuf,
102    pub freshness: FileFreshness,
103    pub contribution: serde_json::Value,
104    pub type_ref_names: BTreeSet<String>,
105}
106
107#[derive(Debug, Clone)]
108struct MemoryAggregate {
109    payload: serde_json::Value,
110    generated_at: i64,
111    contribution_set_hash: Option<String>,
112}
113
114const TIER1_FILE_MEMO_MAX_ENTRIES: usize = 4_096;
115
116#[derive(Debug, Clone)]
117struct Tier1MemoEntry<T> {
118    freshness: FileFreshness,
119    value: T,
120    generation: u64,
121}
122
123#[derive(Debug, Clone)]
124struct LruNode {
125    path: PathBuf,
126    generation: u64,
127}
128
129#[derive(Debug)]
130struct Tier1MemoState<T> {
131    entries: HashMap<PathBuf, Tier1MemoEntry<T>>,
132    lru: VecDeque<LruNode>,
133    next_generation: u64,
134}
135
136impl<T> Default for Tier1MemoState<T> {
137    fn default() -> Self {
138        Self {
139            entries: HashMap::new(),
140            lru: VecDeque::new(),
141            next_generation: 0,
142        }
143    }
144}
145
146impl<T> Tier1MemoState<T> {
147    fn insert(&mut self, path: PathBuf, mut entry: Tier1MemoEntry<T>) {
148        let generation = self.allocate_generation();
149        entry.generation = generation;
150        self.entries.insert(path.clone(), entry);
151        self.lru.push_back(LruNode { path, generation });
152        self.compact_lru_if_needed();
153        self.evict_lru();
154    }
155
156    fn remove(&mut self, path: &Path) {
157        self.entries.remove(path);
158        self.compact_lru_if_needed();
159    }
160
161    fn touch(&mut self, path: &Path) {
162        if !self.entries.contains_key(path) {
163            return;
164        }
165
166        let generation = self.allocate_generation();
167        if let Some(entry) = self.entries.get_mut(path) {
168            entry.generation = generation;
169            self.lru.push_back(LruNode {
170                path: path.to_path_buf(),
171                generation,
172            });
173        }
174        self.compact_lru_if_needed();
175    }
176
177    fn allocate_generation(&mut self) -> u64 {
178        if self.next_generation == u64::MAX {
179            self.rebuild_lru();
180        }
181        let generation = self.next_generation;
182        self.next_generation += 1;
183        generation
184    }
185
186    fn compact_lru_if_needed(&mut self) {
187        let max_lru_nodes = TIER1_FILE_MEMO_MAX_ENTRIES
188            .saturating_mul(2)
189            .max(self.entries.len());
190        if self.lru.len() > max_lru_nodes {
191            self.rebuild_lru();
192        }
193    }
194
195    fn rebuild_lru(&mut self) {
196        let mut live_nodes = self
197            .entries
198            .iter()
199            .map(|(path, entry)| (entry.generation, path.clone()))
200            .collect::<Vec<_>>();
201        live_nodes.sort_by_key(|(generation, _)| *generation);
202
203        self.lru.clear();
204        for (generation, (_, path)) in live_nodes.into_iter().enumerate() {
205            let generation = generation as u64;
206            if let Some(entry) = self.entries.get_mut(&path) {
207                entry.generation = generation;
208            }
209            self.lru.push_back(LruNode { path, generation });
210        }
211        self.next_generation = self.lru.len() as u64;
212    }
213
214    fn evict_lru(&mut self) {
215        while self.entries.len() > TIER1_FILE_MEMO_MAX_ENTRIES {
216            let Some(node) = self.lru.pop_front() else {
217                break;
218            };
219            if self
220                .entries
221                .get(&node.path)
222                .is_some_and(|entry| entry.generation == node.generation)
223            {
224                self.entries.remove(&node.path);
225            }
226        }
227        self.compact_lru_if_needed();
228    }
229}
230
231#[derive(Debug)]
232pub(crate) struct Tier1FileMemo<T> {
233    state: Mutex<Tier1MemoState<T>>,
234}
235
236impl<T> Default for Tier1FileMemo<T> {
237    fn default() -> Self {
238        Self {
239            state: Mutex::new(Tier1MemoState::default()),
240        }
241    }
242}
243
244impl<T: Clone> Tier1FileMemo<T> {
245    pub(crate) fn get_or_insert_with<F>(&self, path: &Path, scan: F) -> T
246    where
247        F: FnOnce(&Path) -> (Option<FileFreshness>, T),
248    {
249        if let Some(cached) = self.cached_value(path) {
250            return cached;
251        }
252
253        let (freshness, value) = scan(path);
254        if let Ok(mut state) = self.state.lock() {
255            if let Some(freshness) = freshness {
256                state.insert(
257                    path.to_path_buf(),
258                    Tier1MemoEntry {
259                        freshness,
260                        value: value.clone(),
261                        generation: 0,
262                    },
263                );
264            } else {
265                state.remove(path);
266            }
267        }
268        value
269    }
270
271    fn cached_value(&self, path: &Path) -> Option<T> {
272        let mut cached = self
273            .state
274            .lock()
275            .ok()
276            .and_then(|state| state.entries.get(path).cloned())?;
277
278        match crate::cache_freshness::verify_file(path, &cached.freshness) {
279            FreshnessVerdict::HotFresh => {
280                if let Ok(mut state) = self.state.lock() {
281                    state.touch(path);
282                }
283                Some(cached.value)
284            }
285            FreshnessVerdict::ContentFresh {
286                new_mtime,
287                new_size,
288            } => {
289                cached.freshness.mtime = new_mtime;
290                cached.freshness.size = new_size;
291                let value = cached.value.clone();
292                if let Ok(mut state) = self.state.lock() {
293                    state.insert(path.to_path_buf(), cached);
294                }
295                Some(value)
296            }
297            FreshnessVerdict::Stale => None,
298            FreshnessVerdict::Deleted => {
299                if let Ok(mut state) = self.state.lock() {
300                    state.remove(path);
301                }
302                None
303            }
304        }
305    }
306}
307
308#[derive(Debug)]
309pub struct InspectCache {
310    project_root: PathBuf,
311    project_key: String,
312    sqlite_path: PathBuf,
313    conn: Mutex<Connection>,
314    memory: RwLock<HashMap<JobKey, MemoryAggregate>>,
315}
316
317impl InspectCache {
318    pub fn open(inspect_dir: PathBuf, project_root: PathBuf) -> Result<Self, InspectCacheError> {
319        std::fs::create_dir_all(&inspect_dir)?;
320        let project_key = crate::search_index::project_cache_key(&project_root);
321        let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
322        let conn = Connection::open(&sqlite_path)?;
323        configure_connection(&conn)?;
324        initialize_schema(&conn)?;
325        Ok(Self::from_connection(
326            project_root,
327            project_key,
328            sqlite_path,
329            conn,
330        ))
331    }
332
333    pub fn open_readonly(
334        inspect_dir: PathBuf,
335        project_root: PathBuf,
336    ) -> Result<Option<Self>, InspectCacheError> {
337        let project_key = crate::search_index::project_cache_key(&project_root);
338        let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
339        if !sqlite_path.is_file() {
340            return Ok(None);
341        }
342        let conn = Connection::open_with_flags(&sqlite_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
343        conn.busy_timeout(Duration::from_millis(5_000))?;
344        Ok(Some(Self::from_connection(
345            project_root,
346            project_key,
347            sqlite_path,
348            conn,
349        )))
350    }
351
352    fn from_connection(
353        project_root: PathBuf,
354        project_key: String,
355        sqlite_path: PathBuf,
356        conn: Connection,
357    ) -> Self {
358        Self {
359            project_root,
360            project_key,
361            sqlite_path,
362            conn: Mutex::new(conn),
363            memory: RwLock::new(HashMap::new()),
364        }
365    }
366
367    pub fn project_root(&self) -> &Path {
368        &self.project_root
369    }
370
371    pub fn project_key(&self) -> &str {
372        &self.project_key
373    }
374
375    pub fn sqlite_path(&self) -> &Path {
376        &self.sqlite_path
377    }
378
379    pub fn store_aggregated(
380        &self,
381        key: JobKey,
382        payload: serde_json::Value,
383    ) -> Result<(), InspectCacheError> {
384        self.store_memory_aggregate(key, payload, None)
385    }
386
387    fn store_memory_aggregate(
388        &self,
389        key: JobKey,
390        payload: serde_json::Value,
391        contribution_set_hash: Option<String>,
392    ) -> Result<(), InspectCacheError> {
393        self.memory
394            .write()
395            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
396            .insert(
397                key,
398                MemoryAggregate {
399                    payload,
400                    generated_at: unix_seconds_now(),
401                    contribution_set_hash,
402                },
403            );
404        Ok(())
405    }
406
407    pub fn get_aggregated(
408        &self,
409        key: &JobKey,
410    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
411        if !key.category.is_tier2() {
412            return Ok(self
413                .memory
414                .read()
415                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
416                .get(key)
417                .map(|entry| entry.payload.clone()));
418        }
419
420        let current_hash = {
421            let conn = self
422                .conn
423                .lock()
424                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
425            contribution_set_hash_with_conn(
426                &conn,
427                key.category,
428                &self.project_key,
429                &self.project_root,
430            )?
431        };
432
433        let memory_entry = {
434            self.memory
435                .read()
436                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
437                .get(key)
438                .cloned()
439        };
440        if let Some(entry) = memory_entry {
441            if entry.contribution_set_hash.as_deref() == Some(current_hash.as_str()) {
442                return Ok(Some(entry.payload));
443            }
444            self.memory
445                .write()
446                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
447                .remove(key);
448        }
449
450        let payload = {
451            let conn = self
452                .conn
453                .lock()
454                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
455            conn.query_row(
456                "SELECT aggregate FROM tier2_aggregates \
457                 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
458                params![key.category.as_str(), self.project_key, current_hash],
459                |row| row.get::<_, Vec<u8>>(0),
460            )
461            .optional()?
462        };
463
464        match payload {
465            Some(bytes) => {
466                let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
467                self.store_memory_aggregate(key.clone(), value.clone(), Some(current_hash))?;
468                Ok(Some(value))
469            }
470            None => Ok(None),
471        }
472    }
473
474    pub fn store_tier2_result(
475        &self,
476        key: JobKey,
477        scanned_files: &[PathBuf],
478        contributions: &[FileContribution],
479        aggregate: serde_json::Value,
480    ) -> Result<(), InspectCacheError> {
481        if !key.category.is_tier2() {
482            self.store_aggregated(key, aggregate)?;
483            return Ok(());
484        }
485
486        let now = unix_seconds_now();
487        let mut conn = self
488            .conn
489            .lock()
490            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
491        let tx = conn.transaction()?;
492
493        let scanned_relative = scanned_files
494            .iter()
495            .map(|path| relative_string(&self.project_root, path))
496            .collect::<BTreeSet<_>>();
497        let existing = existing_contribution_paths(&tx, key.category, &self.project_key)?;
498        for file_path in existing {
499            if !scanned_relative.contains(&file_path) {
500                tx.execute(
501                    "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
502                    params![key.category.as_str(), self.project_key, file_path],
503                )?;
504            }
505        }
506
507        for contribution in contributions {
508            let file_path = relative_string(&self.project_root, &contribution.file_path);
509            let blob = serde_json::to_vec(&contribution_with_type_ref_names(
510                contribution.contribution.clone(),
511                &contribution.type_ref_names,
512            ))?;
513            tx.execute(
514                "INSERT INTO tier2_contributions \
515                 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
516                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
517                 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
518                 file_mtime_ns = excluded.file_mtime_ns, \
519                 file_size = excluded.file_size, \
520                 file_hash = excluded.file_hash, \
521                 contribution = excluded.contribution, \
522                 generated_at = excluded.generated_at",
523                params![
524                    contribution.category.as_str(),
525                    self.project_key,
526                    file_path,
527                    system_time_to_ns(contribution.freshness.mtime),
528                    contribution.freshness.size as i64,
529                    hash_to_hex(contribution.freshness.content_hash),
530                    blob,
531                    now,
532                ],
533            )?;
534        }
535
536        let contribution_set_hash = contribution_set_hash_with_conn(
537            &tx,
538            key.category,
539            &self.project_key,
540            &self.project_root,
541        )?;
542        let aggregate_blob = serde_json::to_vec(&aggregate)?;
543        tx.execute(
544            "INSERT INTO tier2_aggregates \
545             (category, project_key, contribution_set_hash, aggregate, generated_at) \
546             VALUES (?1, ?2, ?3, ?4, ?5) \
547             ON CONFLICT(category, project_key) DO UPDATE SET \
548             contribution_set_hash = excluded.contribution_set_hash, \
549             aggregate = excluded.aggregate, \
550             generated_at = excluded.generated_at",
551            params![
552                key.category.as_str(),
553                self.project_key,
554                contribution_set_hash,
555                aggregate_blob,
556                now,
557            ],
558        )?;
559        tx.execute(
560            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
561             ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
562            params![key.category.as_str(), self.project_key, now],
563        )?;
564        tx.commit()?;
565
566        self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash))
567    }
568
569    pub(crate) fn apply_contribution_updates(
570        &self,
571        category: InspectCategory,
572        updates: Tier2ContributionUpdates,
573    ) -> Result<String, InspectCacheError> {
574        let now = unix_seconds_now();
575        let mut conn = self
576            .conn
577            .lock()
578            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
579        let tx = conn.transaction()?;
580
581        for relative_file in updates.deletes {
582            tx.execute(
583                "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
584                params![
585                    category.as_str(),
586                    self.project_key,
587                    relative_file.to_string_lossy().to_string()
588                ],
589            )?;
590        }
591
592        for (relative_file, freshness) in updates.metadata_updates {
593            tx.execute(
594                "UPDATE tier2_contributions \
595                 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
596                 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
597                params![
598                    category.as_str(),
599                    self.project_key,
600                    relative_file.to_string_lossy().to_string(),
601                    system_time_to_ns(freshness.mtime),
602                    freshness.size as i64,
603                    hash_to_hex(freshness.content_hash),
604                ],
605            )?;
606        }
607
608        for contribution in updates.upserts {
609            let file_path = relative_string(&self.project_root, &contribution.file_path);
610            let blob = serde_json::to_vec(&contribution_with_type_ref_names(
611                contribution.contribution.clone(),
612                &contribution.type_ref_names,
613            ))?;
614            tx.execute(
615                "INSERT INTO tier2_contributions \
616                 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
617                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
618                 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
619                 file_mtime_ns = excluded.file_mtime_ns, \
620                 file_size = excluded.file_size, \
621                 file_hash = excluded.file_hash, \
622                 contribution = excluded.contribution, \
623                 generated_at = excluded.generated_at",
624                params![
625                    contribution.category.as_str(),
626                    self.project_key,
627                    file_path,
628                    system_time_to_ns(contribution.freshness.mtime),
629                    contribution.freshness.size as i64,
630                    hash_to_hex(contribution.freshness.content_hash),
631                    blob,
632                    now,
633                ],
634            )?;
635        }
636
637        let contribution_set_hash =
638            contribution_set_hash_with_conn(&tx, category, &self.project_key, &self.project_root)?;
639        tx.commit()?;
640
641        self.memory
642            .write()
643            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
644            .remove(&JobKey::for_project_category(category));
645
646        Ok(contribution_set_hash)
647    }
648
649    pub(crate) fn load_aggregate_if_hash_matches(
650        &self,
651        category: InspectCategory,
652        contribution_set_hash: &str,
653    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
654        let payload = {
655            let conn = self
656                .conn
657                .lock()
658                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
659            conn.query_row(
660                "SELECT aggregate FROM tier2_aggregates \
661                 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
662                params![category.as_str(), self.project_key, contribution_set_hash],
663                |row| row.get::<_, Vec<u8>>(0),
664            )
665            .optional()?
666        };
667
668        match payload {
669            Some(bytes) => {
670                let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
671                self.store_memory_aggregate(
672                    JobKey::for_project_category(category),
673                    value.clone(),
674                    Some(contribution_set_hash.to_string()),
675                )?;
676                Ok(Some(value))
677            }
678            None => Ok(None),
679        }
680    }
681
682    pub(crate) fn latest_aggregate_any_hash(
683        &self,
684        category: InspectCategory,
685    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
686        let payload = {
687            let conn = self
688                .conn
689                .lock()
690                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
691            conn.query_row(
692                "SELECT aggregate FROM tier2_aggregates \
693                 WHERE category = ?1 AND project_key = ?2 \
694                 ORDER BY generated_at DESC LIMIT 1",
695                params![category.as_str(), self.project_key],
696                |row| row.get::<_, Vec<u8>>(0),
697            )
698            .optional()?
699        };
700
701        match payload {
702            Some(bytes) => serde_json::from_slice::<serde_json::Value>(&bytes)
703                .map(Some)
704                .map_err(InspectCacheError::from),
705            None => Ok(None),
706        }
707    }
708
709    pub(crate) fn touch_tier2_last_full_run(
710        &self,
711        category: InspectCategory,
712    ) -> Result<i64, InspectCacheError> {
713        let mut conn = self
714            .conn
715            .lock()
716            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
717        let tx = conn.transaction()?;
718        let previous = tx
719            .query_row(
720                "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
721                params![category.as_str(), self.project_key],
722                |row| row.get::<_, i64>(0),
723            )
724            .optional()?;
725        let now = unix_seconds_now();
726        let last_full_run = previous.map_or(now, |previous| now.max(previous.saturating_add(1)));
727        tx.execute(
728            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3)              ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
729            params![category.as_str(), self.project_key, last_full_run],
730        )?;
731        tx.commit()?;
732        Ok(last_full_run)
733    }
734
735    pub(crate) fn store_tier2_aggregate(
736        &self,
737        key: JobKey,
738        contribution_set_hash: &str,
739        aggregate: serde_json::Value,
740    ) -> Result<(), InspectCacheError> {
741        if !key.category.is_tier2() {
742            self.store_aggregated(key, aggregate)?;
743            return Ok(());
744        }
745
746        let now = unix_seconds_now();
747        let aggregate_blob = serde_json::to_vec(&aggregate)?;
748        let mut conn = self
749            .conn
750            .lock()
751            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
752        let tx = conn.transaction()?;
753        tx.execute(
754            "INSERT INTO tier2_aggregates \
755             (category, project_key, contribution_set_hash, aggregate, generated_at) \
756             VALUES (?1, ?2, ?3, ?4, ?5) \
757             ON CONFLICT(category, project_key) DO UPDATE SET \
758             contribution_set_hash = excluded.contribution_set_hash, \
759             aggregate = excluded.aggregate, \
760             generated_at = excluded.generated_at",
761            params![
762                key.category.as_str(),
763                self.project_key,
764                contribution_set_hash,
765                aggregate_blob,
766                now,
767            ],
768        )?;
769        tx.execute(
770            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
771             ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
772            params![key.category.as_str(), self.project_key, now],
773        )?;
774        tx.commit()?;
775
776        self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash.to_string()))
777    }
778
779    pub fn load_tier2_contributions(
780        &self,
781        category: InspectCategory,
782    ) -> Result<Vec<ContributionRecord>, InspectCacheError> {
783        let conn = self
784            .conn
785            .lock()
786            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
787        let mut stmt = conn.prepare(
788            "SELECT file_path, file_mtime_ns, file_size, file_hash, contribution \
789             FROM tier2_contributions \
790             WHERE category = ?1 AND project_key = ?2 \
791             ORDER BY file_path ASC",
792        )?;
793        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
794            let file_path: String = row.get(0)?;
795            let mtime_ns: i64 = row.get(1)?;
796            let file_size: i64 = row.get(2)?;
797            let file_hash: String = row.get(3)?;
798            let contribution: Vec<u8> = row.get(4)?;
799            Ok((file_path, mtime_ns, file_size, file_hash, contribution))
800        })?;
801
802        let mut records = Vec::new();
803        for row in rows {
804            let (file_path, mtime_ns, file_size, file_hash, contribution) = row?;
805            let contribution: serde_json::Value = serde_json::from_slice(&contribution)?;
806            let type_ref_names = type_ref_names_from_contribution(&contribution);
807            records.push(ContributionRecord {
808                category,
809                file_path: PathBuf::from(file_path),
810                freshness: FileFreshness {
811                    mtime: ns_to_system_time(mtime_ns),
812                    size: file_size.max(0) as u64,
813                    content_hash: hash_from_hex(&file_hash)?,
814                },
815                contribution,
816                type_ref_names,
817            });
818        }
819        Ok(records)
820    }
821
822    pub fn delete_tier2_contribution(
823        &self,
824        category: InspectCategory,
825        relative_file: &Path,
826    ) -> Result<(), InspectCacheError> {
827        let conn = self
828            .conn
829            .lock()
830            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
831        conn.execute(
832            "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
833            params![
834                category.as_str(),
835                self.project_key,
836                relative_file.to_string_lossy().to_string()
837            ],
838        )?;
839        Ok(())
840    }
841
842    pub fn update_content_fresh_metadata(
843        &self,
844        category: InspectCategory,
845        relative_file: &Path,
846        freshness: &FileFreshness,
847    ) -> Result<(), InspectCacheError> {
848        let conn = self
849            .conn
850            .lock()
851            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
852        conn.execute(
853            "UPDATE tier2_contributions \
854             SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
855             WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
856            params![
857                category.as_str(),
858                self.project_key,
859                relative_file.to_string_lossy().to_string(),
860                system_time_to_ns(freshness.mtime),
861                freshness.size as i64,
862                hash_to_hex(freshness.content_hash),
863            ],
864        )?;
865        Ok(())
866    }
867
868    pub(crate) fn contribution_fingerprint(
869        &self,
870        category: InspectCategory,
871    ) -> Result<(usize, String, bool), InspectCacheError> {
872        let conn = self
873            .conn
874            .lock()
875            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
876        let mut stmt = conn.prepare(
877            "SELECT file_path, file_mtime_ns, file_size, file_hash \
878             FROM tier2_contributions \
879             WHERE category = ?1 AND project_key = ?2 \
880             ORDER BY file_path ASC",
881        )?;
882        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
883            Ok((
884                row.get::<_, String>(0)?,
885                row.get::<_, i64>(1)?,
886                row.get::<_, i64>(2)?,
887                row.get::<_, String>(3)?,
888            ))
889        })?;
890
891        let zero_hash = hash_to_hex(cache_freshness::zero_hash());
892        let mut count = 0usize;
893        let mut hash_complete = true;
894        let mut hasher = blake3::Hasher::new();
895        for row in rows {
896            let (file_path, mtime_ns, file_size, file_hash) = row?;
897            count += 1;
898            if file_hash == zero_hash {
899                hash_complete = false;
900            }
901            update_contribution_fingerprint_hash(
902                &mut hasher,
903                &file_path,
904                mtime_ns.max(0),
905                file_size.max(0) as u64,
906                &file_hash,
907            );
908        }
909
910        Ok((count, hasher.finalize().to_hex().to_string(), hash_complete))
911    }
912
913    pub(crate) fn contribution_freshness(
914        &self,
915        category: InspectCategory,
916    ) -> Result<Vec<(PathBuf, FileFreshness)>, InspectCacheError> {
917        let conn = self
918            .conn
919            .lock()
920            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
921        let mut stmt = conn.prepare(
922            "SELECT file_path, file_mtime_ns, file_size, file_hash \
923             FROM tier2_contributions \
924             WHERE category = ?1 AND project_key = ?2 \
925             ORDER BY file_path ASC",
926        )?;
927        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
928            Ok((
929                row.get::<_, String>(0)?,
930                row.get::<_, i64>(1)?,
931                row.get::<_, i64>(2)?,
932                row.get::<_, String>(3)?,
933            ))
934        })?;
935
936        let mut records = Vec::new();
937        for row in rows {
938            let (file_path, mtime_ns, file_size, file_hash) = row?;
939            records.push((
940                PathBuf::from(file_path),
941                FileFreshness {
942                    mtime: ns_to_system_time(mtime_ns),
943                    size: file_size.max(0) as u64,
944                    content_hash: hash_from_hex(&file_hash)?,
945                },
946            ));
947        }
948        Ok(records)
949    }
950
951    pub fn contribution_set_hash(
952        &self,
953        category: InspectCategory,
954    ) -> Result<String, InspectCacheError> {
955        let conn = self
956            .conn
957            .lock()
958            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
959        contribution_set_hash_with_conn(&conn, category, &self.project_key, &self.project_root)
960    }
961
962    pub fn last_full_run(
963        &self,
964        category: InspectCategory,
965    ) -> Result<Option<i64>, InspectCacheError> {
966        let conn = self
967            .conn
968            .lock()
969            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
970        conn.query_row(
971            "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
972            params![category.as_str(), self.project_key],
973            |row| row.get::<_, i64>(0),
974        )
975        .optional()
976        .map_err(InspectCacheError::from)
977    }
978
979    pub fn memory_generated_at(&self, key: &JobKey) -> Result<Option<i64>, InspectCacheError> {
980        Ok(self
981            .memory
982            .read()
983            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
984            .get(key)
985            .map(|entry| entry.generated_at))
986    }
987}
988
989fn configure_connection(conn: &Connection) -> Result<(), InspectCacheError> {
990    conn.pragma_update(None, "journal_mode", "WAL")?;
991    conn.pragma_update(None, "busy_timeout", 5_000)?;
992    Ok(())
993}
994
995fn initialize_schema(conn: &Connection) -> Result<(), InspectCacheError> {
996    conn.execute_batch(
997        "CREATE TABLE IF NOT EXISTS tier2_contributions (
998            category        TEXT NOT NULL,
999            project_key     TEXT NOT NULL,
1000            file_path       TEXT NOT NULL,
1001            file_mtime_ns   INTEGER NOT NULL,
1002            file_size       INTEGER NOT NULL,
1003            file_hash       TEXT NOT NULL,
1004            contribution    BLOB NOT NULL,
1005            generated_at    INTEGER NOT NULL,
1006            PRIMARY KEY (category, project_key, file_path)
1007        );
1008
1009        CREATE TABLE IF NOT EXISTS tier2_aggregates (
1010            category        TEXT NOT NULL,
1011            project_key     TEXT NOT NULL,
1012            contribution_set_hash TEXT NOT NULL,
1013            aggregate       BLOB NOT NULL,
1014            generated_at    INTEGER NOT NULL,
1015            PRIMARY KEY (category, project_key)
1016        );
1017
1018        CREATE TABLE IF NOT EXISTS tier2_meta (
1019            category        TEXT NOT NULL,
1020            project_key     TEXT NOT NULL,
1021            last_full_run   INTEGER NOT NULL,
1022            PRIMARY KEY (category, project_key)
1023        );",
1024    )?;
1025    Ok(())
1026}
1027
1028fn existing_contribution_paths(
1029    conn: &Connection,
1030    category: InspectCategory,
1031    project_key: &str,
1032) -> Result<Vec<String>, InspectCacheError> {
1033    let mut stmt = conn.prepare(
1034        "SELECT file_path FROM tier2_contributions WHERE category = ?1 AND project_key = ?2",
1035    )?;
1036    let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1037        row.get::<_, String>(0)
1038    })?;
1039    rows.collect::<Result<Vec<_>, _>>()
1040        .map_err(InspectCacheError::from)
1041}
1042
1043fn contribution_set_hash_with_conn(
1044    conn: &Connection,
1045    category: InspectCategory,
1046    project_key: &str,
1047    project_root: &Path,
1048) -> Result<String, InspectCacheError> {
1049    let mut stmt = conn.prepare(
1050        "SELECT file_path, file_hash FROM tier2_contributions \
1051         WHERE category = ?1 AND project_key = ?2 ORDER BY file_path ASC",
1052    )?;
1053    let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1054        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1055    })?;
1056
1057    let mut hasher = blake3::Hasher::new();
1058    hasher.update(b"tier2-contributions\0");
1059    hasher.update(&TIER2_CONTRIBUTION_CACHE_VERSION.to_le_bytes());
1060    hasher.update(b"\0");
1061    for row in rows {
1062        let (file_path, file_hash) = row?;
1063        hasher.update(file_path.as_bytes());
1064        hasher.update(b"\0");
1065        hasher.update(file_hash.as_bytes());
1066        hasher.update(b"\0");
1067    }
1068    update_manifest_fingerprint_hash(&mut hasher, project_root)?;
1069    Ok(hasher.finalize().to_hex().to_string())
1070}
1071
1072fn update_manifest_fingerprint_hash(
1073    hasher: &mut blake3::Hasher,
1074    project_root: &Path,
1075) -> Result<(), InspectCacheError> {
1076    let manifest_root =
1077        fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1078    hasher.update(b"entry-point-manifests\0");
1079    for manifest in super::entry_points::collect_entry_point_manifests(project_root) {
1080        let relative_path = manifest
1081            .strip_prefix(&manifest_root)
1082            .unwrap_or(manifest.as_path())
1083            .to_string_lossy()
1084            .replace('\\', "/");
1085        let content_hash = blake3::hash(&fs::read(&manifest)?);
1086        hasher.update(relative_path.as_bytes());
1087        hasher.update(b"\0");
1088        hasher.update(content_hash.as_bytes());
1089        hasher.update(b"\0");
1090    }
1091    Ok(())
1092}
1093
1094fn update_contribution_fingerprint_hash(
1095    hasher: &mut blake3::Hasher,
1096    relative_path: &str,
1097    mtime_ns: i64,
1098    file_size: u64,
1099    file_hash: &str,
1100) {
1101    hasher.update(relative_path.as_bytes());
1102    hasher.update(&[0]);
1103    hasher.update(&mtime_ns.to_le_bytes());
1104    hasher.update(&file_size.to_le_bytes());
1105    hasher.update(&[0]);
1106    hasher.update(file_hash.as_bytes());
1107}
1108
1109fn relative_string(project_root: &Path, path: &Path) -> String {
1110    path.strip_prefix(project_root)
1111        .unwrap_or(path)
1112        .to_string_lossy()
1113        .to_string()
1114}
1115
1116fn system_time_to_ns(time: SystemTime) -> i64 {
1117    let nanos = time
1118        .duration_since(UNIX_EPOCH)
1119        .unwrap_or_else(|_| Duration::from_secs(0))
1120        .as_nanos();
1121    nanos.min(i64::MAX as u128) as i64
1122}
1123
1124fn ns_to_system_time(value: i64) -> SystemTime {
1125    UNIX_EPOCH + Duration::from_nanos(value.max(0) as u64)
1126}
1127
1128fn hash_to_hex(hash: blake3::Hash) -> String {
1129    hash.to_hex().to_string()
1130}
1131
1132fn hash_from_hex(value: &str) -> Result<blake3::Hash, InspectCacheError> {
1133    if value.len() != 64 {
1134        return Err(InspectCacheError::InvalidHash(value.to_string()));
1135    }
1136    let mut bytes = [0u8; 32];
1137    for (index, chunk) in value.as_bytes().chunks(2).enumerate() {
1138        let hex = std::str::from_utf8(chunk)
1139            .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1140        bytes[index] = u8::from_str_radix(hex, 16)
1141            .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1142    }
1143    Ok(blake3::Hash::from_bytes(bytes))
1144}
1145
1146fn unix_seconds_now() -> i64 {
1147    SystemTime::now()
1148        .duration_since(UNIX_EPOCH)
1149        .unwrap_or_else(|_| Duration::from_secs(0))
1150        .as_secs()
1151        .min(i64::MAX as u64) as i64
1152}
1153
1154#[cfg(test)]
1155mod tests {
1156    use super::*;
1157    use std::cell::Cell;
1158    use std::fs;
1159    use std::path::{Path, PathBuf};
1160
1161    fn collect_freshness(path: &Path) -> FileFreshness {
1162        crate::cache_freshness::collect(path).unwrap()
1163    }
1164
1165    #[test]
1166    fn tier1_file_memo_evicts_lru_and_keeps_recent_hits() {
1167        let temp = tempfile::tempdir().unwrap();
1168        let memo = Tier1FileMemo::<usize>::default();
1169        let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1170
1171        for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1172            let path = temp.path().join(format!("file-{index}.txt"));
1173            fs::write(&path, index.to_string()).unwrap();
1174            let value =
1175                memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1176            assert_eq!(value, index);
1177            paths.push(path);
1178        }
1179
1180        let recent_path = paths[0].clone();
1181        let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1182            panic!("recently inserted entry should hit before eviction")
1183        });
1184        assert_eq!(recent_value, 0);
1185
1186        let evicting_path = temp.path().join("new-file.txt");
1187        fs::write(&evicting_path, "new").unwrap();
1188        let evicting_value = memo.get_or_insert_with(&evicting_path, |path| {
1189            (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1190        });
1191        assert_eq!(evicting_value, TIER1_FILE_MEMO_MAX_ENTRIES);
1192
1193        let state = memo.state.lock().unwrap();
1194        assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1195        assert!(state.entries.contains_key(&recent_path));
1196        assert!(state.entries.contains_key(&evicting_path));
1197        assert!(!state.entries.contains_key(&paths[1]));
1198        drop(state);
1199
1200        let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1201            panic!("recently used entry should survive eviction")
1202        });
1203        assert_eq!(recent_value, 0);
1204    }
1205
1206    #[test]
1207    fn tier1_file_memo_repeated_touches_keep_lazy_lru_bounded() {
1208        let temp = tempfile::tempdir().unwrap();
1209        let memo = Tier1FileMemo::<usize>::default();
1210        let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1211
1212        for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1213            let path = temp.path().join(format!("file-{index}.txt"));
1214            fs::write(&path, index.to_string()).unwrap();
1215            memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1216            paths.push(path);
1217        }
1218
1219        for _ in 0..(TIER1_FILE_MEMO_MAX_ENTRIES * 3) {
1220            let value = memo.get_or_insert_with(&paths[0], |_| {
1221                panic!("hot entry should stay cached while it is repeatedly touched")
1222            });
1223            assert_eq!(value, 0);
1224        }
1225
1226        let evicting_path = temp.path().join("new-file.txt");
1227        fs::write(&evicting_path, "new").unwrap();
1228        memo.get_or_insert_with(&evicting_path, |path| {
1229            (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1230        });
1231
1232        let state = memo.state.lock().unwrap();
1233        assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1234        assert!(state.entries.contains_key(&paths[0]));
1235        assert!(state.entries.contains_key(&evicting_path));
1236        assert!(!state.entries.contains_key(&paths[1]));
1237        assert!(
1238            state.lru.len() <= TIER1_FILE_MEMO_MAX_ENTRIES * 2,
1239            "lazy LRU queue should be compacted instead of growing without bound"
1240        );
1241    }
1242
1243    #[test]
1244    fn tier1_file_memo_reuses_fresh_entries_and_rescans_stale_files() {
1245        let temp = tempfile::tempdir().unwrap();
1246        let path = temp.path().join("memo.txt");
1247        fs::write(&path, "first").unwrap();
1248
1249        let memo = Tier1FileMemo::<String>::default();
1250        let scans = Cell::new(0);
1251
1252        let first = memo.get_or_insert_with(&path, |path| {
1253            scans.set(scans.get() + 1);
1254            (Some(collect_freshness(path)), "first scan".to_string())
1255        });
1256        assert_eq!(first, "first scan");
1257        assert_eq!(scans.get(), 1);
1258
1259        let unchanged =
1260            memo.get_or_insert_with(&path, |_| panic!("unchanged file should reuse Tier-1 memo"));
1261        assert_eq!(unchanged, "first scan");
1262        assert_eq!(scans.get(), 1);
1263
1264        fs::write(&path, "changed file contents").unwrap();
1265        let changed = memo.get_or_insert_with(&path, |path| {
1266            scans.set(scans.get() + 1);
1267            (Some(collect_freshness(path)), "second scan".to_string())
1268        });
1269        assert_eq!(changed, "second scan");
1270        assert_eq!(scans.get(), 2);
1271
1272        let fresh_after_rescan = memo.get_or_insert_with(&path, |_| {
1273            panic!("rescanned file should reuse refreshed Tier-1 memo")
1274        });
1275        assert_eq!(fresh_after_rescan, "second scan");
1276        assert_eq!(scans.get(), 2);
1277    }
1278
1279    #[derive(serde::Deserialize, serde::Serialize)]
1280    struct RoundTripContributionRecord {
1281        category: String,
1282        file_path: PathBuf,
1283        contribution: serde_json::Value,
1284        type_ref_names: BTreeSet<String>,
1285    }
1286
1287    impl From<&ContributionRecord> for RoundTripContributionRecord {
1288        fn from(record: &ContributionRecord) -> Self {
1289            Self {
1290                category: record.category.as_str().to_string(),
1291                file_path: record.file_path.clone(),
1292                contribution: record.contribution.clone(),
1293                type_ref_names: record.type_ref_names.clone(),
1294            }
1295        }
1296    }
1297
1298    #[test]
1299    fn contribution_record_round_trip_preserves_dead_code_liveness_metadata() {
1300        let temp = tempfile::tempdir().unwrap();
1301        let project_root = temp.path().join("project");
1302        let inspect_dir = temp.path().join("inspect");
1303        let source = project_root.join("src/lib.ts");
1304        fs::create_dir_all(source.parent().unwrap()).unwrap();
1305        fs::write(&source, "export interface Widget { id: string }\n").unwrap();
1306
1307        let cache = InspectCache::open(inspect_dir.clone(), project_root.clone()).unwrap();
1308        let contribution = FileContribution::new(
1309            InspectCategory::DeadCode,
1310            source.clone(),
1311            collect_freshness(&source),
1312            serde_json::json!({
1313                "file": "src/lib.ts",
1314                "exports": [{
1315                    "symbol": "Widget",
1316                    "kind": "interface",
1317                    "line": 1,
1318                    "is_type_like": true,
1319                    "is_entry_point": false,
1320                }],
1321                "internal_calls": [],
1322                "liveness_roots": [],
1323                "dispatched_method_names": ["render"],
1324                "type_ref_names": ["Widget"],
1325            }),
1326        )
1327        .with_type_ref_names(["Widget".to_string()]);
1328        cache
1329            .store_tier2_result(
1330                JobKey::for_project_category(InspectCategory::DeadCode),
1331                std::slice::from_ref(&source),
1332                &[contribution],
1333                serde_json::json!({ "count": 0, "items": [] }),
1334            )
1335            .unwrap();
1336        drop(cache);
1337
1338        let cache = InspectCache::open(inspect_dir, project_root).unwrap();
1339        let records = cache
1340            .load_tier2_contributions(InspectCategory::DeadCode)
1341            .unwrap();
1342        assert_eq!(records.len(), 1);
1343
1344        let serialized =
1345            serde_json::to_vec(&RoundTripContributionRecord::from(&records[0])).unwrap();
1346        let decoded: RoundTripContributionRecord = serde_json::from_slice(&serialized).unwrap();
1347        assert_eq!(decoded.category, InspectCategory::DeadCode.as_str());
1348        assert_eq!(decoded.contribution["dispatched_method_names"][0], "render");
1349        assert_eq!(decoded.contribution["type_ref_names"][0], "Widget");
1350        assert!(decoded.type_ref_names.contains("Widget"));
1351        assert_eq!(
1352            decoded.contribution["exports"][0]["is_type_like"].as_bool(),
1353            Some(true)
1354        );
1355        assert_eq!(TIER2_CONTRIBUTION_CACHE_VERSION, 11);
1356    }
1357}