Skip to main content

aft/inspect/
cache.rs

1use std::collections::{BTreeSet, HashMap, VecDeque};
2use std::fmt;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::{Mutex, RwLock};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
9
10use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
11
12use super::job::{
13    contribution_with_type_ref_names, type_ref_names_from_contribution, FileContribution,
14    InspectCategory, JobKey,
15};
16
17#[derive(Debug, Default)]
18pub(crate) struct Tier2ContributionUpdates {
19    pub upserts: Vec<FileContribution>,
20    pub deletes: Vec<PathBuf>,
21    pub metadata_updates: Vec<(PathBuf, FileFreshness)>,
22}
23
24#[derive(Debug)]
25pub enum InspectCacheError {
26    Io(std::io::Error),
27    Sql(rusqlite::Error),
28    Json(serde_json::Error),
29    LockPoisoned(&'static str),
30    InvalidHash(String),
31}
32
33impl fmt::Display for InspectCacheError {
34    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
35        match self {
36            InspectCacheError::Io(error) => write!(formatter, "inspect cache io error: {error}"),
37            InspectCacheError::Sql(error) => {
38                write!(formatter, "inspect cache sqlite error: {error}")
39            }
40            InspectCacheError::Json(error) => {
41                write!(formatter, "inspect cache json error: {error}")
42            }
43            InspectCacheError::LockPoisoned(name) => {
44                write!(formatter, "inspect cache lock poisoned: {name}")
45            }
46            InspectCacheError::InvalidHash(hash) => {
47                write!(formatter, "inspect cache invalid blake3 hash: {hash}")
48            }
49        }
50    }
51}
52
53impl std::error::Error for InspectCacheError {}
54
55impl From<std::io::Error> for InspectCacheError {
56    fn from(error: std::io::Error) -> Self {
57        Self::Io(error)
58    }
59}
60
61impl From<rusqlite::Error> for InspectCacheError {
62    fn from(error: rusqlite::Error) -> Self {
63        Self::Sql(error)
64    }
65}
66
67impl From<serde_json::Error> for InspectCacheError {
68    fn from(error: serde_json::Error) -> Self {
69        Self::Json(error)
70    }
71}
72
73/// Persisted Tier-2 contribution/aggregate format version.
74///
75/// Bump this when `FileContribution.contribution` JSON changes in a way that
76/// requires existing per-file contributions to be rebuilt before roll-up, OR
77/// when the roll-up/aggregation LOGIC changes (e.g. dead_code reachability):
78/// cached aggregates are keyed by a `contribution_set_hash` that folds in this
79/// version, so a logic-only change is invisible to existing caches unless the
80/// version moves. v6: dead_code now propagates liveness through dispatch-only
81/// method bodies (free fns reached only via `obj.method()` were false-dead).
82/// v7: duplicates now collapses nested/overlapping fragments (a duplicated
83/// block no longer reports every nested subtree as its own group).
84/// v8: entry-point recognition seeds npm `scripts` source files as liveness
85/// roots (baked into per-file liveness_roots), and dead_code/unused_exports
86/// exclude test-support files (fixtures/corpora/mocks) from reporting.
87/// v9: unused_exports resolves NodeNext `./x.js` import specifiers to their
88/// `.ts` source (alters resolved import edges), fixing false-unused on symbols
89/// re-exported/imported with a `.js` extension in a `.ts` source tree.
90/// v10: public-API entry resolution remaps build-output entries (dist/index.js)
91/// to their src/ source equivalent, so the source barrel is recognized as a
92/// public-API file and its re-exports are suppressed (changes public-API set).
93/// v11: dead_code/unused_exports drill-down is ranked by signal tier (product
94/// findings before benchmark/tooling noise) before the cap, and a ranked `top`
95/// preview is folded into all three Tier-2 aggregates — changes cached payload.
96/// v12: dead_code internal call rows include call-edge provenance, changing
97/// cached per-file contribution payloads and aggregate roll-up inputs.
98/// v13: dead_code callgraph snapshots are projected from the persisted
99/// CallgraphStore; per-row provenance now reflects store resolution tiers.
100pub(crate) const TIER2_CONTRIBUTION_CACHE_VERSION: u32 = 13;
101
102#[derive(Debug, Clone)]
103pub struct ContributionRecord {
104    pub category: InspectCategory,
105    pub file_path: PathBuf,
106    pub freshness: FileFreshness,
107    pub contribution: serde_json::Value,
108    pub type_ref_names: BTreeSet<String>,
109}
110
111#[derive(Debug, Clone)]
112struct MemoryAggregate {
113    payload: serde_json::Value,
114    generated_at: i64,
115    contribution_set_hash: Option<String>,
116}
117
118const TIER1_FILE_MEMO_MAX_ENTRIES: usize = 4_096;
119
120#[derive(Debug, Clone)]
121struct Tier1MemoEntry<T> {
122    freshness: FileFreshness,
123    value: T,
124    generation: u64,
125}
126
127#[derive(Debug, Clone)]
128struct LruNode {
129    path: PathBuf,
130    generation: u64,
131}
132
133#[derive(Debug)]
134struct Tier1MemoState<T> {
135    entries: HashMap<PathBuf, Tier1MemoEntry<T>>,
136    lru: VecDeque<LruNode>,
137    next_generation: u64,
138}
139
140impl<T> Default for Tier1MemoState<T> {
141    fn default() -> Self {
142        Self {
143            entries: HashMap::new(),
144            lru: VecDeque::new(),
145            next_generation: 0,
146        }
147    }
148}
149
150impl<T> Tier1MemoState<T> {
151    fn insert(&mut self, path: PathBuf, mut entry: Tier1MemoEntry<T>) {
152        let generation = self.allocate_generation();
153        entry.generation = generation;
154        self.entries.insert(path.clone(), entry);
155        self.lru.push_back(LruNode { path, generation });
156        self.compact_lru_if_needed();
157        self.evict_lru();
158    }
159
160    fn remove(&mut self, path: &Path) {
161        self.entries.remove(path);
162        self.compact_lru_if_needed();
163    }
164
165    fn touch(&mut self, path: &Path) {
166        if !self.entries.contains_key(path) {
167            return;
168        }
169
170        let generation = self.allocate_generation();
171        if let Some(entry) = self.entries.get_mut(path) {
172            entry.generation = generation;
173            self.lru.push_back(LruNode {
174                path: path.to_path_buf(),
175                generation,
176            });
177        }
178        self.compact_lru_if_needed();
179    }
180
181    fn allocate_generation(&mut self) -> u64 {
182        if self.next_generation == u64::MAX {
183            self.rebuild_lru();
184        }
185        let generation = self.next_generation;
186        self.next_generation += 1;
187        generation
188    }
189
190    fn compact_lru_if_needed(&mut self) {
191        let max_lru_nodes = TIER1_FILE_MEMO_MAX_ENTRIES
192            .saturating_mul(2)
193            .max(self.entries.len());
194        if self.lru.len() > max_lru_nodes {
195            self.rebuild_lru();
196        }
197    }
198
199    fn rebuild_lru(&mut self) {
200        let mut live_nodes = self
201            .entries
202            .iter()
203            .map(|(path, entry)| (entry.generation, path.clone()))
204            .collect::<Vec<_>>();
205        live_nodes.sort_by_key(|(generation, _)| *generation);
206
207        self.lru.clear();
208        for (generation, (_, path)) in live_nodes.into_iter().enumerate() {
209            let generation = generation as u64;
210            if let Some(entry) = self.entries.get_mut(&path) {
211                entry.generation = generation;
212            }
213            self.lru.push_back(LruNode { path, generation });
214        }
215        self.next_generation = self.lru.len() as u64;
216    }
217
218    fn evict_lru(&mut self) {
219        while self.entries.len() > TIER1_FILE_MEMO_MAX_ENTRIES {
220            let Some(node) = self.lru.pop_front() else {
221                break;
222            };
223            if self
224                .entries
225                .get(&node.path)
226                .is_some_and(|entry| entry.generation == node.generation)
227            {
228                self.entries.remove(&node.path);
229            }
230        }
231        self.compact_lru_if_needed();
232    }
233}
234
235#[derive(Debug)]
236pub(crate) struct Tier1FileMemo<T> {
237    state: Mutex<Tier1MemoState<T>>,
238}
239
240impl<T> Default for Tier1FileMemo<T> {
241    fn default() -> Self {
242        Self {
243            state: Mutex::new(Tier1MemoState::default()),
244        }
245    }
246}
247
248impl<T: Clone> Tier1FileMemo<T> {
249    pub(crate) fn get_or_insert_with<F>(&self, path: &Path, scan: F) -> T
250    where
251        F: FnOnce(&Path) -> (Option<FileFreshness>, T),
252    {
253        if let Some(cached) = self.cached_value(path) {
254            return cached;
255        }
256
257        let (freshness, value) = scan(path);
258        if let Ok(mut state) = self.state.lock() {
259            if let Some(freshness) = freshness {
260                state.insert(
261                    path.to_path_buf(),
262                    Tier1MemoEntry {
263                        freshness,
264                        value: value.clone(),
265                        generation: 0,
266                    },
267                );
268            } else {
269                state.remove(path);
270            }
271        }
272        value
273    }
274
275    fn cached_value(&self, path: &Path) -> Option<T> {
276        let mut cached = self
277            .state
278            .lock()
279            .ok()
280            .and_then(|state| state.entries.get(path).cloned())?;
281
282        match crate::cache_freshness::verify_file(path, &cached.freshness) {
283            FreshnessVerdict::HotFresh => {
284                if let Ok(mut state) = self.state.lock() {
285                    state.touch(path);
286                }
287                Some(cached.value)
288            }
289            FreshnessVerdict::ContentFresh {
290                new_mtime,
291                new_size,
292            } => {
293                cached.freshness.mtime = new_mtime;
294                cached.freshness.size = new_size;
295                let value = cached.value.clone();
296                if let Ok(mut state) = self.state.lock() {
297                    state.insert(path.to_path_buf(), cached);
298                }
299                Some(value)
300            }
301            FreshnessVerdict::Stale => None,
302            FreshnessVerdict::Deleted => {
303                if let Ok(mut state) = self.state.lock() {
304                    state.remove(path);
305                }
306                None
307            }
308        }
309    }
310}
311
312#[derive(Debug)]
313pub struct InspectCache {
314    project_root: PathBuf,
315    project_key: String,
316    sqlite_path: PathBuf,
317    conn: Mutex<Connection>,
318    memory: RwLock<HashMap<JobKey, MemoryAggregate>>,
319}
320
321impl InspectCache {
322    pub fn open(inspect_dir: PathBuf, project_root: PathBuf) -> Result<Self, InspectCacheError> {
323        std::fs::create_dir_all(&inspect_dir)?;
324        let project_key = crate::search_index::project_cache_key(&project_root);
325        let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
326        let conn = Connection::open(&sqlite_path)?;
327        configure_connection(&conn)?;
328        initialize_schema(&conn)?;
329        Ok(Self::from_connection(
330            project_root,
331            project_key,
332            sqlite_path,
333            conn,
334        ))
335    }
336
337    pub fn open_readonly(
338        inspect_dir: PathBuf,
339        project_root: PathBuf,
340    ) -> Result<Option<Self>, InspectCacheError> {
341        let project_key = crate::search_index::project_cache_key(&project_root);
342        let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
343        if !sqlite_path.is_file() {
344            return Ok(None);
345        }
346        let conn = Connection::open_with_flags(&sqlite_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
347        conn.busy_timeout(Duration::from_millis(5_000))?;
348        Ok(Some(Self::from_connection(
349            project_root,
350            project_key,
351            sqlite_path,
352            conn,
353        )))
354    }
355
356    fn from_connection(
357        project_root: PathBuf,
358        project_key: String,
359        sqlite_path: PathBuf,
360        conn: Connection,
361    ) -> Self {
362        Self {
363            project_root,
364            project_key,
365            sqlite_path,
366            conn: Mutex::new(conn),
367            memory: RwLock::new(HashMap::new()),
368        }
369    }
370
371    pub fn project_root(&self) -> &Path {
372        &self.project_root
373    }
374
375    pub fn project_key(&self) -> &str {
376        &self.project_key
377    }
378
379    pub fn sqlite_path(&self) -> &Path {
380        &self.sqlite_path
381    }
382
383    pub fn store_aggregated(
384        &self,
385        key: JobKey,
386        payload: serde_json::Value,
387    ) -> Result<(), InspectCacheError> {
388        self.store_memory_aggregate(key, payload, None)
389    }
390
391    fn store_memory_aggregate(
392        &self,
393        key: JobKey,
394        payload: serde_json::Value,
395        contribution_set_hash: Option<String>,
396    ) -> Result<(), InspectCacheError> {
397        self.memory
398            .write()
399            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
400            .insert(
401                key,
402                MemoryAggregate {
403                    payload,
404                    generated_at: unix_seconds_now(),
405                    contribution_set_hash,
406                },
407            );
408        Ok(())
409    }
410
411    pub fn get_aggregated(
412        &self,
413        key: &JobKey,
414    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
415        if !key.category.is_tier2() {
416            return Ok(self
417                .memory
418                .read()
419                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
420                .get(key)
421                .map(|entry| entry.payload.clone()));
422        }
423
424        let current_hash = {
425            let conn = self
426                .conn
427                .lock()
428                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
429            contribution_set_hash_with_conn(
430                &conn,
431                key.category,
432                &self.project_key,
433                &self.project_root,
434            )?
435        };
436
437        let memory_entry = {
438            self.memory
439                .read()
440                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
441                .get(key)
442                .cloned()
443        };
444        if let Some(entry) = memory_entry {
445            if entry.contribution_set_hash.as_deref() == Some(current_hash.as_str()) {
446                return Ok(Some(entry.payload));
447            }
448            self.memory
449                .write()
450                .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
451                .remove(key);
452        }
453
454        let payload = {
455            let conn = self
456                .conn
457                .lock()
458                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
459            conn.query_row(
460                "SELECT aggregate FROM tier2_aggregates \
461                 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
462                params![key.category.as_str(), self.project_key, current_hash],
463                |row| row.get::<_, Vec<u8>>(0),
464            )
465            .optional()?
466        };
467
468        match payload {
469            Some(bytes) => {
470                let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
471                self.store_memory_aggregate(key.clone(), value.clone(), Some(current_hash))?;
472                Ok(Some(value))
473            }
474            None => Ok(None),
475        }
476    }
477
478    pub fn store_tier2_result(
479        &self,
480        key: JobKey,
481        scanned_files: &[PathBuf],
482        contributions: &[FileContribution],
483        aggregate: serde_json::Value,
484    ) -> Result<(), InspectCacheError> {
485        if !key.category.is_tier2() {
486            self.store_aggregated(key, aggregate)?;
487            return Ok(());
488        }
489
490        let now = unix_seconds_now();
491        let mut conn = self
492            .conn
493            .lock()
494            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
495        let tx = conn.transaction()?;
496
497        let scanned_relative = scanned_files
498            .iter()
499            .map(|path| relative_string(&self.project_root, path))
500            .collect::<BTreeSet<_>>();
501        let existing = existing_contribution_paths(&tx, key.category, &self.project_key)?;
502        for file_path in existing {
503            if !scanned_relative.contains(&file_path) {
504                tx.execute(
505                    "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
506                    params![key.category.as_str(), self.project_key, file_path],
507                )?;
508            }
509        }
510
511        for contribution in contributions {
512            let file_path = relative_string(&self.project_root, &contribution.file_path);
513            let blob = serde_json::to_vec(&contribution_with_type_ref_names(
514                contribution.contribution.clone(),
515                &contribution.type_ref_names,
516            ))?;
517            tx.execute(
518                "INSERT INTO tier2_contributions \
519                 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
520                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
521                 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
522                 file_mtime_ns = excluded.file_mtime_ns, \
523                 file_size = excluded.file_size, \
524                 file_hash = excluded.file_hash, \
525                 contribution = excluded.contribution, \
526                 generated_at = excluded.generated_at",
527                params![
528                    contribution.category.as_str(),
529                    self.project_key,
530                    file_path,
531                    system_time_to_ns(contribution.freshness.mtime),
532                    contribution.freshness.size as i64,
533                    hash_to_hex(contribution.freshness.content_hash),
534                    blob,
535                    now,
536                ],
537            )?;
538        }
539
540        let contribution_set_hash = contribution_set_hash_with_conn(
541            &tx,
542            key.category,
543            &self.project_key,
544            &self.project_root,
545        )?;
546        let aggregate_blob = serde_json::to_vec(&aggregate)?;
547        tx.execute(
548            "INSERT INTO tier2_aggregates \
549             (category, project_key, contribution_set_hash, aggregate, generated_at) \
550             VALUES (?1, ?2, ?3, ?4, ?5) \
551             ON CONFLICT(category, project_key) DO UPDATE SET \
552             contribution_set_hash = excluded.contribution_set_hash, \
553             aggregate = excluded.aggregate, \
554             generated_at = excluded.generated_at",
555            params![
556                key.category.as_str(),
557                self.project_key,
558                contribution_set_hash,
559                aggregate_blob,
560                now,
561            ],
562        )?;
563        tx.execute(
564            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
565             ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
566            params![key.category.as_str(), self.project_key, now],
567        )?;
568        tx.commit()?;
569
570        self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash))
571    }
572
573    pub(crate) fn apply_contribution_updates(
574        &self,
575        category: InspectCategory,
576        updates: Tier2ContributionUpdates,
577    ) -> Result<String, InspectCacheError> {
578        let now = unix_seconds_now();
579        let mut conn = self
580            .conn
581            .lock()
582            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
583        let tx = conn.transaction()?;
584
585        for relative_file in updates.deletes {
586            tx.execute(
587                "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
588                params![
589                    category.as_str(),
590                    self.project_key,
591                    relative_file.to_string_lossy().to_string()
592                ],
593            )?;
594        }
595
596        for (relative_file, freshness) in updates.metadata_updates {
597            tx.execute(
598                "UPDATE tier2_contributions \
599                 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
600                 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
601                params![
602                    category.as_str(),
603                    self.project_key,
604                    relative_file.to_string_lossy().to_string(),
605                    system_time_to_ns(freshness.mtime),
606                    freshness.size as i64,
607                    hash_to_hex(freshness.content_hash),
608                ],
609            )?;
610        }
611
612        for contribution in updates.upserts {
613            let file_path = relative_string(&self.project_root, &contribution.file_path);
614            let blob = serde_json::to_vec(&contribution_with_type_ref_names(
615                contribution.contribution.clone(),
616                &contribution.type_ref_names,
617            ))?;
618            tx.execute(
619                "INSERT INTO tier2_contributions \
620                 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
621                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
622                 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
623                 file_mtime_ns = excluded.file_mtime_ns, \
624                 file_size = excluded.file_size, \
625                 file_hash = excluded.file_hash, \
626                 contribution = excluded.contribution, \
627                 generated_at = excluded.generated_at",
628                params![
629                    contribution.category.as_str(),
630                    self.project_key,
631                    file_path,
632                    system_time_to_ns(contribution.freshness.mtime),
633                    contribution.freshness.size as i64,
634                    hash_to_hex(contribution.freshness.content_hash),
635                    blob,
636                    now,
637                ],
638            )?;
639        }
640
641        let contribution_set_hash =
642            contribution_set_hash_with_conn(&tx, category, &self.project_key, &self.project_root)?;
643        tx.commit()?;
644
645        self.memory
646            .write()
647            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
648            .remove(&JobKey::for_project_category(category));
649
650        Ok(contribution_set_hash)
651    }
652
653    pub(crate) fn load_aggregate_if_hash_matches(
654        &self,
655        category: InspectCategory,
656        contribution_set_hash: &str,
657    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
658        let payload = {
659            let conn = self
660                .conn
661                .lock()
662                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
663            conn.query_row(
664                "SELECT aggregate FROM tier2_aggregates \
665                 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
666                params![category.as_str(), self.project_key, contribution_set_hash],
667                |row| row.get::<_, Vec<u8>>(0),
668            )
669            .optional()?
670        };
671
672        match payload {
673            Some(bytes) => {
674                let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
675                self.store_memory_aggregate(
676                    JobKey::for_project_category(category),
677                    value.clone(),
678                    Some(contribution_set_hash.to_string()),
679                )?;
680                Ok(Some(value))
681            }
682            None => Ok(None),
683        }
684    }
685
686    pub(crate) fn latest_aggregate_any_hash(
687        &self,
688        category: InspectCategory,
689    ) -> Result<Option<serde_json::Value>, InspectCacheError> {
690        let payload = {
691            let conn = self
692                .conn
693                .lock()
694                .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
695            conn.query_row(
696                "SELECT aggregate FROM tier2_aggregates \
697                 WHERE category = ?1 AND project_key = ?2 \
698                 ORDER BY generated_at DESC LIMIT 1",
699                params![category.as_str(), self.project_key],
700                |row| row.get::<_, Vec<u8>>(0),
701            )
702            .optional()?
703        };
704
705        match payload {
706            Some(bytes) => serde_json::from_slice::<serde_json::Value>(&bytes)
707                .map(Some)
708                .map_err(InspectCacheError::from),
709            None => Ok(None),
710        }
711    }
712
713    pub(crate) fn touch_tier2_last_full_run(
714        &self,
715        category: InspectCategory,
716    ) -> Result<i64, InspectCacheError> {
717        let mut conn = self
718            .conn
719            .lock()
720            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
721        let tx = conn.transaction()?;
722        let previous = tx
723            .query_row(
724                "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
725                params![category.as_str(), self.project_key],
726                |row| row.get::<_, i64>(0),
727            )
728            .optional()?;
729        let now = unix_seconds_now();
730        let last_full_run = previous.map_or(now, |previous| now.max(previous.saturating_add(1)));
731        tx.execute(
732            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3)              ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
733            params![category.as_str(), self.project_key, last_full_run],
734        )?;
735        tx.commit()?;
736        Ok(last_full_run)
737    }
738
739    pub(crate) fn store_tier2_aggregate(
740        &self,
741        key: JobKey,
742        contribution_set_hash: &str,
743        aggregate: serde_json::Value,
744    ) -> Result<(), InspectCacheError> {
745        if !key.category.is_tier2() {
746            self.store_aggregated(key, aggregate)?;
747            return Ok(());
748        }
749
750        let now = unix_seconds_now();
751        let aggregate_blob = serde_json::to_vec(&aggregate)?;
752        let mut conn = self
753            .conn
754            .lock()
755            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
756        let tx = conn.transaction()?;
757        tx.execute(
758            "INSERT INTO tier2_aggregates \
759             (category, project_key, contribution_set_hash, aggregate, generated_at) \
760             VALUES (?1, ?2, ?3, ?4, ?5) \
761             ON CONFLICT(category, project_key) DO UPDATE SET \
762             contribution_set_hash = excluded.contribution_set_hash, \
763             aggregate = excluded.aggregate, \
764             generated_at = excluded.generated_at",
765            params![
766                key.category.as_str(),
767                self.project_key,
768                contribution_set_hash,
769                aggregate_blob,
770                now,
771            ],
772        )?;
773        tx.execute(
774            "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
775             ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
776            params![key.category.as_str(), self.project_key, now],
777        )?;
778        tx.commit()?;
779
780        self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash.to_string()))
781    }
782
783    pub fn load_tier2_contributions(
784        &self,
785        category: InspectCategory,
786    ) -> Result<Vec<ContributionRecord>, InspectCacheError> {
787        let conn = self
788            .conn
789            .lock()
790            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
791        let mut stmt = conn.prepare(
792            "SELECT file_path, file_mtime_ns, file_size, file_hash, contribution \
793             FROM tier2_contributions \
794             WHERE category = ?1 AND project_key = ?2 \
795             ORDER BY file_path ASC",
796        )?;
797        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
798            let file_path: String = row.get(0)?;
799            let mtime_ns: i64 = row.get(1)?;
800            let file_size: i64 = row.get(2)?;
801            let file_hash: String = row.get(3)?;
802            let contribution: Vec<u8> = row.get(4)?;
803            Ok((file_path, mtime_ns, file_size, file_hash, contribution))
804        })?;
805
806        let mut records = Vec::new();
807        for row in rows {
808            let (file_path, mtime_ns, file_size, file_hash, contribution) = row?;
809            let contribution: serde_json::Value = serde_json::from_slice(&contribution)?;
810            let type_ref_names = type_ref_names_from_contribution(&contribution);
811            records.push(ContributionRecord {
812                category,
813                file_path: PathBuf::from(file_path),
814                freshness: FileFreshness {
815                    mtime: ns_to_system_time(mtime_ns),
816                    size: file_size.max(0) as u64,
817                    content_hash: hash_from_hex(&file_hash)?,
818                },
819                contribution,
820                type_ref_names,
821            });
822        }
823        Ok(records)
824    }
825
826    pub fn delete_tier2_contribution(
827        &self,
828        category: InspectCategory,
829        relative_file: &Path,
830    ) -> Result<(), InspectCacheError> {
831        let conn = self
832            .conn
833            .lock()
834            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
835        conn.execute(
836            "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
837            params![
838                category.as_str(),
839                self.project_key,
840                relative_file.to_string_lossy().to_string()
841            ],
842        )?;
843        Ok(())
844    }
845
846    pub fn update_content_fresh_metadata(
847        &self,
848        category: InspectCategory,
849        relative_file: &Path,
850        freshness: &FileFreshness,
851    ) -> Result<(), InspectCacheError> {
852        let conn = self
853            .conn
854            .lock()
855            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
856        conn.execute(
857            "UPDATE tier2_contributions \
858             SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
859             WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
860            params![
861                category.as_str(),
862                self.project_key,
863                relative_file.to_string_lossy().to_string(),
864                system_time_to_ns(freshness.mtime),
865                freshness.size as i64,
866                hash_to_hex(freshness.content_hash),
867            ],
868        )?;
869        Ok(())
870    }
871
872    pub(crate) fn contribution_fingerprint(
873        &self,
874        category: InspectCategory,
875    ) -> Result<(usize, String, bool), InspectCacheError> {
876        let conn = self
877            .conn
878            .lock()
879            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
880        let mut stmt = conn.prepare(
881            "SELECT file_path, file_mtime_ns, file_size, file_hash \
882             FROM tier2_contributions \
883             WHERE category = ?1 AND project_key = ?2 \
884             ORDER BY file_path ASC",
885        )?;
886        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
887            Ok((
888                row.get::<_, String>(0)?,
889                row.get::<_, i64>(1)?,
890                row.get::<_, i64>(2)?,
891                row.get::<_, String>(3)?,
892            ))
893        })?;
894
895        let zero_hash = hash_to_hex(cache_freshness::zero_hash());
896        let mut count = 0usize;
897        let mut hash_complete = true;
898        let mut hasher = blake3::Hasher::new();
899        for row in rows {
900            let (file_path, mtime_ns, file_size, file_hash) = row?;
901            count += 1;
902            if file_hash == zero_hash {
903                hash_complete = false;
904            }
905            update_contribution_fingerprint_hash(
906                &mut hasher,
907                &file_path,
908                mtime_ns.max(0),
909                file_size.max(0) as u64,
910                &file_hash,
911            );
912        }
913
914        Ok((count, hasher.finalize().to_hex().to_string(), hash_complete))
915    }
916
917    pub(crate) fn contribution_freshness(
918        &self,
919        category: InspectCategory,
920    ) -> Result<Vec<(PathBuf, FileFreshness)>, InspectCacheError> {
921        let conn = self
922            .conn
923            .lock()
924            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
925        let mut stmt = conn.prepare(
926            "SELECT file_path, file_mtime_ns, file_size, file_hash \
927             FROM tier2_contributions \
928             WHERE category = ?1 AND project_key = ?2 \
929             ORDER BY file_path ASC",
930        )?;
931        let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
932            Ok((
933                row.get::<_, String>(0)?,
934                row.get::<_, i64>(1)?,
935                row.get::<_, i64>(2)?,
936                row.get::<_, String>(3)?,
937            ))
938        })?;
939
940        let mut records = Vec::new();
941        for row in rows {
942            let (file_path, mtime_ns, file_size, file_hash) = row?;
943            records.push((
944                PathBuf::from(file_path),
945                FileFreshness {
946                    mtime: ns_to_system_time(mtime_ns),
947                    size: file_size.max(0) as u64,
948                    content_hash: hash_from_hex(&file_hash)?,
949                },
950            ));
951        }
952        Ok(records)
953    }
954
955    pub fn contribution_set_hash(
956        &self,
957        category: InspectCategory,
958    ) -> Result<String, InspectCacheError> {
959        let conn = self
960            .conn
961            .lock()
962            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
963        contribution_set_hash_with_conn(&conn, category, &self.project_key, &self.project_root)
964    }
965
966    pub fn last_full_run(
967        &self,
968        category: InspectCategory,
969    ) -> Result<Option<i64>, InspectCacheError> {
970        let conn = self
971            .conn
972            .lock()
973            .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
974        conn.query_row(
975            "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
976            params![category.as_str(), self.project_key],
977            |row| row.get::<_, i64>(0),
978        )
979        .optional()
980        .map_err(InspectCacheError::from)
981    }
982
983    pub fn memory_generated_at(&self, key: &JobKey) -> Result<Option<i64>, InspectCacheError> {
984        Ok(self
985            .memory
986            .read()
987            .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
988            .get(key)
989            .map(|entry| entry.generated_at))
990    }
991}
992
993fn configure_connection(conn: &Connection) -> Result<(), InspectCacheError> {
994    conn.pragma_update(None, "journal_mode", "WAL")?;
995    conn.pragma_update(None, "busy_timeout", 5_000)?;
996    Ok(())
997}
998
999fn initialize_schema(conn: &Connection) -> Result<(), InspectCacheError> {
1000    conn.execute_batch(
1001        "CREATE TABLE IF NOT EXISTS tier2_contributions (
1002            category        TEXT NOT NULL,
1003            project_key     TEXT NOT NULL,
1004            file_path       TEXT NOT NULL,
1005            file_mtime_ns   INTEGER NOT NULL,
1006            file_size       INTEGER NOT NULL,
1007            file_hash       TEXT NOT NULL,
1008            contribution    BLOB NOT NULL,
1009            generated_at    INTEGER NOT NULL,
1010            PRIMARY KEY (category, project_key, file_path)
1011        );
1012
1013        CREATE TABLE IF NOT EXISTS tier2_aggregates (
1014            category        TEXT NOT NULL,
1015            project_key     TEXT NOT NULL,
1016            contribution_set_hash TEXT NOT NULL,
1017            aggregate       BLOB NOT NULL,
1018            generated_at    INTEGER NOT NULL,
1019            PRIMARY KEY (category, project_key)
1020        );
1021
1022        CREATE TABLE IF NOT EXISTS tier2_meta (
1023            category        TEXT NOT NULL,
1024            project_key     TEXT NOT NULL,
1025            last_full_run   INTEGER NOT NULL,
1026            PRIMARY KEY (category, project_key)
1027        );",
1028    )?;
1029    Ok(())
1030}
1031
1032fn existing_contribution_paths(
1033    conn: &Connection,
1034    category: InspectCategory,
1035    project_key: &str,
1036) -> Result<Vec<String>, InspectCacheError> {
1037    let mut stmt = conn.prepare(
1038        "SELECT file_path FROM tier2_contributions WHERE category = ?1 AND project_key = ?2",
1039    )?;
1040    let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1041        row.get::<_, String>(0)
1042    })?;
1043    rows.collect::<Result<Vec<_>, _>>()
1044        .map_err(InspectCacheError::from)
1045}
1046
1047fn contribution_set_hash_with_conn(
1048    conn: &Connection,
1049    category: InspectCategory,
1050    project_key: &str,
1051    project_root: &Path,
1052) -> Result<String, InspectCacheError> {
1053    let mut stmt = conn.prepare(
1054        "SELECT file_path, file_hash FROM tier2_contributions \
1055         WHERE category = ?1 AND project_key = ?2 ORDER BY file_path ASC",
1056    )?;
1057    let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1058        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1059    })?;
1060
1061    let mut hasher = blake3::Hasher::new();
1062    hasher.update(b"tier2-contributions\0");
1063    hasher.update(&TIER2_CONTRIBUTION_CACHE_VERSION.to_le_bytes());
1064    hasher.update(b"\0");
1065    for row in rows {
1066        let (file_path, file_hash) = row?;
1067        hasher.update(file_path.as_bytes());
1068        hasher.update(b"\0");
1069        hasher.update(file_hash.as_bytes());
1070        hasher.update(b"\0");
1071    }
1072    update_manifest_fingerprint_hash(&mut hasher, project_root)?;
1073    Ok(hasher.finalize().to_hex().to_string())
1074}
1075
1076fn update_manifest_fingerprint_hash(
1077    hasher: &mut blake3::Hasher,
1078    project_root: &Path,
1079) -> Result<(), InspectCacheError> {
1080    let manifest_root =
1081        fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1082    hasher.update(b"entry-point-manifests\0");
1083    for manifest in super::entry_points::collect_entry_point_manifests(project_root) {
1084        let relative_path = manifest
1085            .strip_prefix(&manifest_root)
1086            .unwrap_or(manifest.as_path())
1087            .to_string_lossy()
1088            .replace('\\', "/");
1089        let content_hash = blake3::hash(&fs::read(&manifest)?);
1090        hasher.update(relative_path.as_bytes());
1091        hasher.update(b"\0");
1092        hasher.update(content_hash.as_bytes());
1093        hasher.update(b"\0");
1094    }
1095    Ok(())
1096}
1097
1098fn update_contribution_fingerprint_hash(
1099    hasher: &mut blake3::Hasher,
1100    relative_path: &str,
1101    mtime_ns: i64,
1102    file_size: u64,
1103    file_hash: &str,
1104) {
1105    hasher.update(relative_path.as_bytes());
1106    hasher.update(&[0]);
1107    hasher.update(&mtime_ns.to_le_bytes());
1108    hasher.update(&file_size.to_le_bytes());
1109    hasher.update(&[0]);
1110    hasher.update(file_hash.as_bytes());
1111}
1112
1113fn relative_string(project_root: &Path, path: &Path) -> String {
1114    path.strip_prefix(project_root)
1115        .unwrap_or(path)
1116        .to_string_lossy()
1117        .to_string()
1118}
1119
1120fn system_time_to_ns(time: SystemTime) -> i64 {
1121    let nanos = time
1122        .duration_since(UNIX_EPOCH)
1123        .unwrap_or_else(|_| Duration::from_secs(0))
1124        .as_nanos();
1125    nanos.min(i64::MAX as u128) as i64
1126}
1127
1128fn ns_to_system_time(value: i64) -> SystemTime {
1129    UNIX_EPOCH + Duration::from_nanos(value.max(0) as u64)
1130}
1131
1132fn hash_to_hex(hash: blake3::Hash) -> String {
1133    hash.to_hex().to_string()
1134}
1135
1136fn hash_from_hex(value: &str) -> Result<blake3::Hash, InspectCacheError> {
1137    if value.len() != 64 {
1138        return Err(InspectCacheError::InvalidHash(value.to_string()));
1139    }
1140    let mut bytes = [0u8; 32];
1141    for (index, chunk) in value.as_bytes().chunks(2).enumerate() {
1142        let hex = std::str::from_utf8(chunk)
1143            .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1144        bytes[index] = u8::from_str_radix(hex, 16)
1145            .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1146    }
1147    Ok(blake3::Hash::from_bytes(bytes))
1148}
1149
1150fn unix_seconds_now() -> i64 {
1151    SystemTime::now()
1152        .duration_since(UNIX_EPOCH)
1153        .unwrap_or_else(|_| Duration::from_secs(0))
1154        .as_secs()
1155        .min(i64::MAX as u64) as i64
1156}
1157
1158#[cfg(test)]
1159mod tests {
1160    use super::*;
1161    use std::cell::Cell;
1162    use std::fs;
1163    use std::path::{Path, PathBuf};
1164
1165    fn collect_freshness(path: &Path) -> FileFreshness {
1166        crate::cache_freshness::collect(path).unwrap()
1167    }
1168
1169    #[test]
1170    fn tier1_file_memo_evicts_lru_and_keeps_recent_hits() {
1171        let temp = tempfile::tempdir().unwrap();
1172        let memo = Tier1FileMemo::<usize>::default();
1173        let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1174
1175        for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1176            let path = temp.path().join(format!("file-{index}.txt"));
1177            fs::write(&path, index.to_string()).unwrap();
1178            let value =
1179                memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1180            assert_eq!(value, index);
1181            paths.push(path);
1182        }
1183
1184        let recent_path = paths[0].clone();
1185        let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1186            panic!("recently inserted entry should hit before eviction")
1187        });
1188        assert_eq!(recent_value, 0);
1189
1190        let evicting_path = temp.path().join("new-file.txt");
1191        fs::write(&evicting_path, "new").unwrap();
1192        let evicting_value = memo.get_or_insert_with(&evicting_path, |path| {
1193            (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1194        });
1195        assert_eq!(evicting_value, TIER1_FILE_MEMO_MAX_ENTRIES);
1196
1197        let state = memo.state.lock().unwrap();
1198        assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1199        assert!(state.entries.contains_key(&recent_path));
1200        assert!(state.entries.contains_key(&evicting_path));
1201        assert!(!state.entries.contains_key(&paths[1]));
1202        drop(state);
1203
1204        let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1205            panic!("recently used entry should survive eviction")
1206        });
1207        assert_eq!(recent_value, 0);
1208    }
1209
1210    #[test]
1211    fn tier1_file_memo_repeated_touches_keep_lazy_lru_bounded() {
1212        let temp = tempfile::tempdir().unwrap();
1213        let memo = Tier1FileMemo::<usize>::default();
1214        let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1215
1216        for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1217            let path = temp.path().join(format!("file-{index}.txt"));
1218            fs::write(&path, index.to_string()).unwrap();
1219            memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1220            paths.push(path);
1221        }
1222
1223        for _ in 0..(TIER1_FILE_MEMO_MAX_ENTRIES * 3) {
1224            let value = memo.get_or_insert_with(&paths[0], |_| {
1225                panic!("hot entry should stay cached while it is repeatedly touched")
1226            });
1227            assert_eq!(value, 0);
1228        }
1229
1230        let evicting_path = temp.path().join("new-file.txt");
1231        fs::write(&evicting_path, "new").unwrap();
1232        memo.get_or_insert_with(&evicting_path, |path| {
1233            (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1234        });
1235
1236        let state = memo.state.lock().unwrap();
1237        assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1238        assert!(state.entries.contains_key(&paths[0]));
1239        assert!(state.entries.contains_key(&evicting_path));
1240        assert!(!state.entries.contains_key(&paths[1]));
1241        assert!(
1242            state.lru.len() <= TIER1_FILE_MEMO_MAX_ENTRIES * 2,
1243            "lazy LRU queue should be compacted instead of growing without bound"
1244        );
1245    }
1246
1247    #[test]
1248    fn tier1_file_memo_reuses_fresh_entries_and_rescans_stale_files() {
1249        let temp = tempfile::tempdir().unwrap();
1250        let path = temp.path().join("memo.txt");
1251        fs::write(&path, "first").unwrap();
1252
1253        let memo = Tier1FileMemo::<String>::default();
1254        let scans = Cell::new(0);
1255
1256        let first = memo.get_or_insert_with(&path, |path| {
1257            scans.set(scans.get() + 1);
1258            (Some(collect_freshness(path)), "first scan".to_string())
1259        });
1260        assert_eq!(first, "first scan");
1261        assert_eq!(scans.get(), 1);
1262
1263        let unchanged =
1264            memo.get_or_insert_with(&path, |_| panic!("unchanged file should reuse Tier-1 memo"));
1265        assert_eq!(unchanged, "first scan");
1266        assert_eq!(scans.get(), 1);
1267
1268        fs::write(&path, "changed file contents").unwrap();
1269        let changed = memo.get_or_insert_with(&path, |path| {
1270            scans.set(scans.get() + 1);
1271            (Some(collect_freshness(path)), "second scan".to_string())
1272        });
1273        assert_eq!(changed, "second scan");
1274        assert_eq!(scans.get(), 2);
1275
1276        let fresh_after_rescan = memo.get_or_insert_with(&path, |_| {
1277            panic!("rescanned file should reuse refreshed Tier-1 memo")
1278        });
1279        assert_eq!(fresh_after_rescan, "second scan");
1280        assert_eq!(scans.get(), 2);
1281    }
1282
1283    #[derive(serde::Deserialize, serde::Serialize)]
1284    struct RoundTripContributionRecord {
1285        category: String,
1286        file_path: PathBuf,
1287        contribution: serde_json::Value,
1288        type_ref_names: BTreeSet<String>,
1289    }
1290
1291    impl From<&ContributionRecord> for RoundTripContributionRecord {
1292        fn from(record: &ContributionRecord) -> Self {
1293            Self {
1294                category: record.category.as_str().to_string(),
1295                file_path: record.file_path.clone(),
1296                contribution: record.contribution.clone(),
1297                type_ref_names: record.type_ref_names.clone(),
1298            }
1299        }
1300    }
1301
1302    #[test]
1303    fn contribution_record_round_trip_preserves_dead_code_liveness_metadata() {
1304        let temp = tempfile::tempdir().unwrap();
1305        let project_root = temp.path().join("project");
1306        let inspect_dir = temp.path().join("inspect");
1307        let source = project_root.join("src/lib.ts");
1308        fs::create_dir_all(source.parent().unwrap()).unwrap();
1309        fs::write(&source, "export interface Widget { id: string }\n").unwrap();
1310
1311        let cache = InspectCache::open(inspect_dir.clone(), project_root.clone()).unwrap();
1312        let contribution = FileContribution::new(
1313            InspectCategory::DeadCode,
1314            source.clone(),
1315            collect_freshness(&source),
1316            serde_json::json!({
1317                "file": "src/lib.ts",
1318                "exports": [{
1319                    "symbol": "Widget",
1320                    "kind": "interface",
1321                    "line": 1,
1322                    "is_type_like": true,
1323                    "is_entry_point": false,
1324                }],
1325                "internal_calls": [],
1326                "liveness_roots": [],
1327                "dispatched_method_names": ["render"],
1328                "type_ref_names": ["Widget"],
1329            }),
1330        )
1331        .with_type_ref_names(["Widget".to_string()]);
1332        cache
1333            .store_tier2_result(
1334                JobKey::for_project_category(InspectCategory::DeadCode),
1335                std::slice::from_ref(&source),
1336                &[contribution],
1337                serde_json::json!({ "count": 0, "items": [] }),
1338            )
1339            .unwrap();
1340        drop(cache);
1341
1342        let cache = InspectCache::open(inspect_dir, project_root).unwrap();
1343        let records = cache
1344            .load_tier2_contributions(InspectCategory::DeadCode)
1345            .unwrap();
1346        assert_eq!(records.len(), 1);
1347
1348        let serialized =
1349            serde_json::to_vec(&RoundTripContributionRecord::from(&records[0])).unwrap();
1350        let decoded: RoundTripContributionRecord = serde_json::from_slice(&serialized).unwrap();
1351        assert_eq!(decoded.category, InspectCategory::DeadCode.as_str());
1352        assert_eq!(decoded.contribution["dispatched_method_names"][0], "render");
1353        assert_eq!(decoded.contribution["type_ref_names"][0], "Widget");
1354        assert!(decoded.type_ref_names.contains("Widget"));
1355        assert_eq!(
1356            decoded.contribution["exports"][0]["is_type_like"].as_bool(),
1357            Some(true)
1358        );
1359        assert_eq!(TIER2_CONTRIBUTION_CACHE_VERSION, 13);
1360    }
1361}