1use std::collections::{BTreeSet, HashMap, VecDeque};
2use std::fmt;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::{Mutex, RwLock};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
9
10use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
11
12use super::job::{
13 contribution_with_type_ref_names, type_ref_names_from_contribution, FileContribution,
14 InspectCategory, JobKey,
15};
16
17#[derive(Debug, Default)]
18pub(crate) struct Tier2ContributionUpdates {
19 pub upserts: Vec<FileContribution>,
20 pub deletes: Vec<PathBuf>,
21 pub metadata_updates: Vec<(PathBuf, FileFreshness)>,
22}
23
24#[derive(Debug)]
25pub enum InspectCacheError {
26 Io(std::io::Error),
27 Sql(rusqlite::Error),
28 Json(serde_json::Error),
29 LockPoisoned(&'static str),
30 InvalidHash(String),
31}
32
33impl fmt::Display for InspectCacheError {
34 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
35 match self {
36 InspectCacheError::Io(error) => write!(formatter, "inspect cache io error: {error}"),
37 InspectCacheError::Sql(error) => {
38 write!(formatter, "inspect cache sqlite error: {error}")
39 }
40 InspectCacheError::Json(error) => {
41 write!(formatter, "inspect cache json error: {error}")
42 }
43 InspectCacheError::LockPoisoned(name) => {
44 write!(formatter, "inspect cache lock poisoned: {name}")
45 }
46 InspectCacheError::InvalidHash(hash) => {
47 write!(formatter, "inspect cache invalid blake3 hash: {hash}")
48 }
49 }
50 }
51}
52
53impl std::error::Error for InspectCacheError {}
54
55impl From<std::io::Error> for InspectCacheError {
56 fn from(error: std::io::Error) -> Self {
57 Self::Io(error)
58 }
59}
60
61impl From<rusqlite::Error> for InspectCacheError {
62 fn from(error: rusqlite::Error) -> Self {
63 Self::Sql(error)
64 }
65}
66
67impl From<serde_json::Error> for InspectCacheError {
68 fn from(error: serde_json::Error) -> Self {
69 Self::Json(error)
70 }
71}
72
73pub(crate) const TIER2_CONTRIBUTION_CACHE_VERSION: u32 = 14;
103
104#[derive(Debug, Clone)]
105pub struct ContributionRecord {
106 pub category: InspectCategory,
107 pub file_path: PathBuf,
108 pub freshness: FileFreshness,
109 pub contribution: serde_json::Value,
110 pub type_ref_names: BTreeSet<String>,
111}
112
113#[derive(Debug, Clone)]
114struct MemoryAggregate {
115 payload: serde_json::Value,
116 generated_at: i64,
117 contribution_set_hash: Option<String>,
118}
119
120const TIER1_FILE_MEMO_MAX_ENTRIES: usize = 4_096;
121
122#[derive(Debug, Clone)]
123struct Tier1MemoEntry<T> {
124 freshness: FileFreshness,
125 value: T,
126 generation: u64,
127}
128
129#[derive(Debug, Clone)]
130struct LruNode {
131 path: PathBuf,
132 generation: u64,
133}
134
135#[derive(Debug)]
136struct Tier1MemoState<T> {
137 entries: HashMap<PathBuf, Tier1MemoEntry<T>>,
138 lru: VecDeque<LruNode>,
139 next_generation: u64,
140}
141
142impl<T> Default for Tier1MemoState<T> {
143 fn default() -> Self {
144 Self {
145 entries: HashMap::new(),
146 lru: VecDeque::new(),
147 next_generation: 0,
148 }
149 }
150}
151
152impl<T> Tier1MemoState<T> {
153 fn insert(&mut self, path: PathBuf, mut entry: Tier1MemoEntry<T>) {
154 let generation = self.allocate_generation();
155 entry.generation = generation;
156 self.entries.insert(path.clone(), entry);
157 self.lru.push_back(LruNode { path, generation });
158 self.compact_lru_if_needed();
159 self.evict_lru();
160 }
161
162 fn remove(&mut self, path: &Path) {
163 self.entries.remove(path);
164 self.compact_lru_if_needed();
165 }
166
167 fn touch(&mut self, path: &Path) {
168 if !self.entries.contains_key(path) {
169 return;
170 }
171
172 let generation = self.allocate_generation();
173 if let Some(entry) = self.entries.get_mut(path) {
174 entry.generation = generation;
175 self.lru.push_back(LruNode {
176 path: path.to_path_buf(),
177 generation,
178 });
179 }
180 self.compact_lru_if_needed();
181 }
182
183 fn allocate_generation(&mut self) -> u64 {
184 if self.next_generation == u64::MAX {
185 self.rebuild_lru();
186 }
187 let generation = self.next_generation;
188 self.next_generation += 1;
189 generation
190 }
191
192 fn compact_lru_if_needed(&mut self) {
193 let max_lru_nodes = TIER1_FILE_MEMO_MAX_ENTRIES
194 .saturating_mul(2)
195 .max(self.entries.len());
196 if self.lru.len() > max_lru_nodes {
197 self.rebuild_lru();
198 }
199 }
200
201 fn rebuild_lru(&mut self) {
202 let mut live_nodes = self
203 .entries
204 .iter()
205 .map(|(path, entry)| (entry.generation, path.clone()))
206 .collect::<Vec<_>>();
207 live_nodes.sort_by_key(|(generation, _)| *generation);
208
209 self.lru.clear();
210 for (generation, (_, path)) in live_nodes.into_iter().enumerate() {
211 let generation = generation as u64;
212 if let Some(entry) = self.entries.get_mut(&path) {
213 entry.generation = generation;
214 }
215 self.lru.push_back(LruNode { path, generation });
216 }
217 self.next_generation = self.lru.len() as u64;
218 }
219
220 fn evict_lru(&mut self) {
221 while self.entries.len() > TIER1_FILE_MEMO_MAX_ENTRIES {
222 let Some(node) = self.lru.pop_front() else {
223 break;
224 };
225 if self
226 .entries
227 .get(&node.path)
228 .is_some_and(|entry| entry.generation == node.generation)
229 {
230 self.entries.remove(&node.path);
231 }
232 }
233 self.compact_lru_if_needed();
234 }
235}
236
237#[derive(Debug)]
238pub(crate) struct Tier1FileMemo<T> {
239 state: Mutex<Tier1MemoState<T>>,
240}
241
242impl<T> Default for Tier1FileMemo<T> {
243 fn default() -> Self {
244 Self {
245 state: Mutex::new(Tier1MemoState::default()),
246 }
247 }
248}
249
250impl<T: Clone> Tier1FileMemo<T> {
251 pub(crate) fn get_or_insert_with<F>(&self, path: &Path, scan: F) -> T
252 where
253 F: FnOnce(&Path) -> (Option<FileFreshness>, T),
254 {
255 if let Some(cached) = self.cached_value(path) {
256 return cached;
257 }
258
259 let (freshness, value) = scan(path);
260 if let Ok(mut state) = self.state.lock() {
261 if let Some(freshness) = freshness {
262 state.insert(
263 path.to_path_buf(),
264 Tier1MemoEntry {
265 freshness,
266 value: value.clone(),
267 generation: 0,
268 },
269 );
270 } else {
271 state.remove(path);
272 }
273 }
274 value
275 }
276
277 fn cached_value(&self, path: &Path) -> Option<T> {
278 let mut cached = self
279 .state
280 .lock()
281 .ok()
282 .and_then(|state| state.entries.get(path).cloned())?;
283
284 match crate::cache_freshness::verify_file(path, &cached.freshness) {
285 FreshnessVerdict::HotFresh => {
286 if let Ok(mut state) = self.state.lock() {
287 state.touch(path);
288 }
289 Some(cached.value)
290 }
291 FreshnessVerdict::ContentFresh {
292 new_mtime,
293 new_size,
294 } => {
295 cached.freshness.mtime = new_mtime;
296 cached.freshness.size = new_size;
297 let value = cached.value.clone();
298 if let Ok(mut state) = self.state.lock() {
299 state.insert(path.to_path_buf(), cached);
300 }
301 Some(value)
302 }
303 FreshnessVerdict::Stale => None,
304 FreshnessVerdict::Deleted => {
305 if let Ok(mut state) = self.state.lock() {
306 state.remove(path);
307 }
308 None
309 }
310 }
311 }
312}
313
314#[derive(Debug)]
315pub struct InspectCache {
316 project_root: PathBuf,
317 project_key: String,
318 sqlite_path: PathBuf,
319 conn: Mutex<Connection>,
320 memory: RwLock<HashMap<JobKey, MemoryAggregate>>,
321}
322
323impl InspectCache {
324 pub fn open(inspect_dir: PathBuf, project_root: PathBuf) -> Result<Self, InspectCacheError> {
325 std::fs::create_dir_all(&inspect_dir)?;
326 let project_key = crate::search_index::project_cache_key(&project_root);
327 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
328 let conn = Connection::open(&sqlite_path)?;
329 configure_connection(&conn)?;
330 initialize_schema(&conn)?;
331 Ok(Self::from_connection(
332 project_root,
333 project_key,
334 sqlite_path,
335 conn,
336 ))
337 }
338
339 pub fn open_readonly(
340 inspect_dir: PathBuf,
341 project_root: PathBuf,
342 ) -> Result<Option<Self>, InspectCacheError> {
343 let project_key = crate::search_index::project_cache_key(&project_root);
344 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
345 if !sqlite_path.is_file() {
346 return Ok(None);
347 }
348 let conn = Connection::open_with_flags(&sqlite_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
349 conn.busy_timeout(Duration::from_millis(5_000))?;
350 Ok(Some(Self::from_connection(
351 project_root,
352 project_key,
353 sqlite_path,
354 conn,
355 )))
356 }
357
358 fn from_connection(
359 project_root: PathBuf,
360 project_key: String,
361 sqlite_path: PathBuf,
362 conn: Connection,
363 ) -> Self {
364 Self {
365 project_root,
366 project_key,
367 sqlite_path,
368 conn: Mutex::new(conn),
369 memory: RwLock::new(HashMap::new()),
370 }
371 }
372
373 pub fn project_root(&self) -> &Path {
374 &self.project_root
375 }
376
377 pub fn project_key(&self) -> &str {
378 &self.project_key
379 }
380
381 pub fn sqlite_path(&self) -> &Path {
382 &self.sqlite_path
383 }
384
385 pub fn store_aggregated(
386 &self,
387 key: JobKey,
388 payload: serde_json::Value,
389 ) -> Result<(), InspectCacheError> {
390 self.store_memory_aggregate(key, payload, None)
391 }
392
393 fn store_memory_aggregate(
394 &self,
395 key: JobKey,
396 payload: serde_json::Value,
397 contribution_set_hash: Option<String>,
398 ) -> Result<(), InspectCacheError> {
399 self.memory
400 .write()
401 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
402 .insert(
403 key,
404 MemoryAggregate {
405 payload,
406 generated_at: unix_seconds_now(),
407 contribution_set_hash,
408 },
409 );
410 Ok(())
411 }
412
413 pub fn get_aggregated(
414 &self,
415 key: &JobKey,
416 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
417 if !key.category.is_tier2() {
418 return Ok(self
419 .memory
420 .read()
421 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
422 .get(key)
423 .map(|entry| entry.payload.clone()));
424 }
425
426 let current_hash = {
427 let conn = self
428 .conn
429 .lock()
430 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
431 contribution_set_hash_with_conn(
432 &conn,
433 key.category,
434 &self.project_key,
435 &self.project_root,
436 )?
437 };
438
439 let memory_entry = {
440 self.memory
441 .read()
442 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
443 .get(key)
444 .cloned()
445 };
446 if let Some(entry) = memory_entry {
447 if entry.contribution_set_hash.as_deref() == Some(current_hash.as_str()) {
448 return Ok(Some(entry.payload));
449 }
450 self.memory
451 .write()
452 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
453 .remove(key);
454 }
455
456 let payload = {
457 let conn = self
458 .conn
459 .lock()
460 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
461 conn.query_row(
462 "SELECT aggregate FROM tier2_aggregates \
463 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
464 params![key.category.as_str(), self.project_key, current_hash],
465 |row| row.get::<_, Vec<u8>>(0),
466 )
467 .optional()?
468 };
469
470 match payload {
471 Some(bytes) => {
472 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
473 self.store_memory_aggregate(key.clone(), value.clone(), Some(current_hash))?;
474 Ok(Some(value))
475 }
476 None => Ok(None),
477 }
478 }
479
480 pub fn store_tier2_result(
481 &self,
482 key: JobKey,
483 scanned_files: &[PathBuf],
484 contributions: &[FileContribution],
485 aggregate: serde_json::Value,
486 ) -> Result<(), InspectCacheError> {
487 if !key.category.is_tier2() {
488 self.store_aggregated(key, aggregate)?;
489 return Ok(());
490 }
491
492 let now = unix_seconds_now();
493 let mut conn = self
494 .conn
495 .lock()
496 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
497 let tx = conn.transaction()?;
498
499 let scanned_relative = scanned_files
500 .iter()
501 .map(|path| relative_string(&self.project_root, path))
502 .collect::<BTreeSet<_>>();
503 let existing = existing_contribution_paths(&tx, key.category, &self.project_key)?;
504 for file_path in existing {
505 if !scanned_relative.contains(&file_path) {
506 tx.execute(
507 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
508 params![key.category.as_str(), self.project_key, file_path],
509 )?;
510 }
511 }
512
513 for contribution in contributions {
514 let file_path = relative_string(&self.project_root, &contribution.file_path);
515 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
516 contribution.contribution.clone(),
517 &contribution.type_ref_names,
518 ))?;
519 tx.execute(
520 "INSERT INTO tier2_contributions \
521 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
522 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
523 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
524 file_mtime_ns = excluded.file_mtime_ns, \
525 file_size = excluded.file_size, \
526 file_hash = excluded.file_hash, \
527 contribution = excluded.contribution, \
528 generated_at = excluded.generated_at",
529 params![
530 contribution.category.as_str(),
531 self.project_key,
532 file_path,
533 system_time_to_ns(contribution.freshness.mtime),
534 contribution.freshness.size as i64,
535 hash_to_hex(contribution.freshness.content_hash),
536 blob,
537 now,
538 ],
539 )?;
540 }
541
542 let contribution_set_hash = contribution_set_hash_with_conn(
543 &tx,
544 key.category,
545 &self.project_key,
546 &self.project_root,
547 )?;
548 let aggregate_blob = serde_json::to_vec(&aggregate)?;
549 tx.execute(
550 "INSERT INTO tier2_aggregates \
551 (category, project_key, contribution_set_hash, aggregate, generated_at) \
552 VALUES (?1, ?2, ?3, ?4, ?5) \
553 ON CONFLICT(category, project_key) DO UPDATE SET \
554 contribution_set_hash = excluded.contribution_set_hash, \
555 aggregate = excluded.aggregate, \
556 generated_at = excluded.generated_at",
557 params![
558 key.category.as_str(),
559 self.project_key,
560 contribution_set_hash,
561 aggregate_blob,
562 now,
563 ],
564 )?;
565 tx.execute(
566 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
567 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
568 params![key.category.as_str(), self.project_key, now],
569 )?;
570 tx.commit()?;
571
572 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash))
573 }
574
575 pub(crate) fn apply_contribution_updates(
576 &self,
577 category: InspectCategory,
578 updates: Tier2ContributionUpdates,
579 ) -> Result<String, InspectCacheError> {
580 let now = unix_seconds_now();
581 let mut conn = self
582 .conn
583 .lock()
584 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
585 let tx = conn.transaction()?;
586
587 for relative_file in updates.deletes {
588 tx.execute(
589 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
590 params![
591 category.as_str(),
592 self.project_key,
593 relative_file.to_string_lossy().to_string()
594 ],
595 )?;
596 }
597
598 for (relative_file, freshness) in updates.metadata_updates {
599 tx.execute(
600 "UPDATE tier2_contributions \
601 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
602 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
603 params![
604 category.as_str(),
605 self.project_key,
606 relative_file.to_string_lossy().to_string(),
607 system_time_to_ns(freshness.mtime),
608 freshness.size as i64,
609 hash_to_hex(freshness.content_hash),
610 ],
611 )?;
612 }
613
614 for contribution in updates.upserts {
615 let file_path = relative_string(&self.project_root, &contribution.file_path);
616 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
617 contribution.contribution.clone(),
618 &contribution.type_ref_names,
619 ))?;
620 tx.execute(
621 "INSERT INTO tier2_contributions \
622 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
623 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
624 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
625 file_mtime_ns = excluded.file_mtime_ns, \
626 file_size = excluded.file_size, \
627 file_hash = excluded.file_hash, \
628 contribution = excluded.contribution, \
629 generated_at = excluded.generated_at",
630 params![
631 contribution.category.as_str(),
632 self.project_key,
633 file_path,
634 system_time_to_ns(contribution.freshness.mtime),
635 contribution.freshness.size as i64,
636 hash_to_hex(contribution.freshness.content_hash),
637 blob,
638 now,
639 ],
640 )?;
641 }
642
643 let contribution_set_hash =
644 contribution_set_hash_with_conn(&tx, category, &self.project_key, &self.project_root)?;
645 tx.commit()?;
646
647 self.memory
648 .write()
649 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
650 .remove(&JobKey::for_project_category(category));
651
652 Ok(contribution_set_hash)
653 }
654
655 pub(crate) fn load_aggregate_if_hash_matches(
656 &self,
657 category: InspectCategory,
658 contribution_set_hash: &str,
659 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
660 let payload = {
661 let conn = self
662 .conn
663 .lock()
664 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
665 conn.query_row(
666 "SELECT aggregate FROM tier2_aggregates \
667 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
668 params![category.as_str(), self.project_key, contribution_set_hash],
669 |row| row.get::<_, Vec<u8>>(0),
670 )
671 .optional()?
672 };
673
674 match payload {
675 Some(bytes) => {
676 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
677 self.store_memory_aggregate(
678 JobKey::for_project_category(category),
679 value.clone(),
680 Some(contribution_set_hash.to_string()),
681 )?;
682 Ok(Some(value))
683 }
684 None => Ok(None),
685 }
686 }
687
688 pub(crate) fn latest_aggregate_any_hash(
689 &self,
690 category: InspectCategory,
691 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
692 let payload = {
693 let conn = self
694 .conn
695 .lock()
696 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
697 conn.query_row(
698 "SELECT aggregate FROM tier2_aggregates \
699 WHERE category = ?1 AND project_key = ?2 \
700 ORDER BY generated_at DESC LIMIT 1",
701 params![category.as_str(), self.project_key],
702 |row| row.get::<_, Vec<u8>>(0),
703 )
704 .optional()?
705 };
706
707 match payload {
708 Some(bytes) => serde_json::from_slice::<serde_json::Value>(&bytes)
709 .map(Some)
710 .map_err(InspectCacheError::from),
711 None => Ok(None),
712 }
713 }
714
715 pub(crate) fn touch_tier2_last_full_run(
716 &self,
717 category: InspectCategory,
718 ) -> Result<i64, InspectCacheError> {
719 let mut conn = self
720 .conn
721 .lock()
722 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
723 let tx = conn.transaction()?;
724 let previous = tx
725 .query_row(
726 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
727 params![category.as_str(), self.project_key],
728 |row| row.get::<_, i64>(0),
729 )
730 .optional()?;
731 let now = unix_seconds_now();
732 let last_full_run = previous.map_or(now, |previous| now.max(previous.saturating_add(1)));
733 tx.execute(
734 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
735 params![category.as_str(), self.project_key, last_full_run],
736 )?;
737 tx.commit()?;
738 Ok(last_full_run)
739 }
740
741 pub(crate) fn store_tier2_aggregate(
742 &self,
743 key: JobKey,
744 contribution_set_hash: &str,
745 aggregate: serde_json::Value,
746 ) -> Result<(), InspectCacheError> {
747 if !key.category.is_tier2() {
748 self.store_aggregated(key, aggregate)?;
749 return Ok(());
750 }
751
752 let now = unix_seconds_now();
753 let aggregate_blob = serde_json::to_vec(&aggregate)?;
754 let mut conn = self
755 .conn
756 .lock()
757 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
758 let tx = conn.transaction()?;
759 tx.execute(
760 "INSERT INTO tier2_aggregates \
761 (category, project_key, contribution_set_hash, aggregate, generated_at) \
762 VALUES (?1, ?2, ?3, ?4, ?5) \
763 ON CONFLICT(category, project_key) DO UPDATE SET \
764 contribution_set_hash = excluded.contribution_set_hash, \
765 aggregate = excluded.aggregate, \
766 generated_at = excluded.generated_at",
767 params![
768 key.category.as_str(),
769 self.project_key,
770 contribution_set_hash,
771 aggregate_blob,
772 now,
773 ],
774 )?;
775 tx.execute(
776 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
777 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
778 params![key.category.as_str(), self.project_key, now],
779 )?;
780 tx.commit()?;
781
782 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash.to_string()))
783 }
784
785 pub fn load_tier2_contributions(
786 &self,
787 category: InspectCategory,
788 ) -> Result<Vec<ContributionRecord>, InspectCacheError> {
789 let conn = self
790 .conn
791 .lock()
792 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
793 let mut stmt = conn.prepare(
794 "SELECT file_path, file_mtime_ns, file_size, file_hash, contribution \
795 FROM tier2_contributions \
796 WHERE category = ?1 AND project_key = ?2 \
797 ORDER BY file_path ASC",
798 )?;
799 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
800 let file_path: String = row.get(0)?;
801 let mtime_ns: i64 = row.get(1)?;
802 let file_size: i64 = row.get(2)?;
803 let file_hash: String = row.get(3)?;
804 let contribution: Vec<u8> = row.get(4)?;
805 Ok((file_path, mtime_ns, file_size, file_hash, contribution))
806 })?;
807
808 let mut records = Vec::new();
809 for row in rows {
810 let (file_path, mtime_ns, file_size, file_hash, contribution) = row?;
811 let contribution: serde_json::Value = serde_json::from_slice(&contribution)?;
812 let type_ref_names = type_ref_names_from_contribution(&contribution);
813 records.push(ContributionRecord {
814 category,
815 file_path: PathBuf::from(file_path),
816 freshness: FileFreshness {
817 mtime: ns_to_system_time(mtime_ns),
818 size: file_size.max(0) as u64,
819 content_hash: hash_from_hex(&file_hash)?,
820 },
821 contribution,
822 type_ref_names,
823 });
824 }
825 Ok(records)
826 }
827
828 pub fn delete_tier2_contribution(
829 &self,
830 category: InspectCategory,
831 relative_file: &Path,
832 ) -> Result<(), InspectCacheError> {
833 let conn = self
834 .conn
835 .lock()
836 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
837 conn.execute(
838 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
839 params![
840 category.as_str(),
841 self.project_key,
842 relative_file.to_string_lossy().to_string()
843 ],
844 )?;
845 Ok(())
846 }
847
848 pub fn update_content_fresh_metadata(
849 &self,
850 category: InspectCategory,
851 relative_file: &Path,
852 freshness: &FileFreshness,
853 ) -> Result<(), InspectCacheError> {
854 let conn = self
855 .conn
856 .lock()
857 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
858 conn.execute(
859 "UPDATE tier2_contributions \
860 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
861 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
862 params![
863 category.as_str(),
864 self.project_key,
865 relative_file.to_string_lossy().to_string(),
866 system_time_to_ns(freshness.mtime),
867 freshness.size as i64,
868 hash_to_hex(freshness.content_hash),
869 ],
870 )?;
871 Ok(())
872 }
873
874 pub(crate) fn contribution_fingerprint(
875 &self,
876 category: InspectCategory,
877 ) -> Result<(usize, String, bool), InspectCacheError> {
878 let conn = self
879 .conn
880 .lock()
881 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
882 let mut stmt = conn.prepare(
883 "SELECT file_path, file_mtime_ns, file_size, file_hash \
884 FROM tier2_contributions \
885 WHERE category = ?1 AND project_key = ?2 \
886 ORDER BY file_path ASC",
887 )?;
888 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
889 Ok((
890 row.get::<_, String>(0)?,
891 row.get::<_, i64>(1)?,
892 row.get::<_, i64>(2)?,
893 row.get::<_, String>(3)?,
894 ))
895 })?;
896
897 let zero_hash = hash_to_hex(cache_freshness::zero_hash());
898 let mut count = 0usize;
899 let mut hash_complete = true;
900 let mut hasher = blake3::Hasher::new();
901 for row in rows {
902 let (file_path, mtime_ns, file_size, file_hash) = row?;
903 count += 1;
904 if file_hash == zero_hash {
905 hash_complete = false;
906 }
907 update_contribution_fingerprint_hash(
908 &mut hasher,
909 &file_path,
910 mtime_ns.max(0),
911 file_size.max(0) as u64,
912 &file_hash,
913 );
914 }
915
916 Ok((count, hasher.finalize().to_hex().to_string(), hash_complete))
917 }
918
919 pub(crate) fn contribution_freshness(
920 &self,
921 category: InspectCategory,
922 ) -> Result<Vec<(PathBuf, FileFreshness)>, InspectCacheError> {
923 let conn = self
924 .conn
925 .lock()
926 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
927 let mut stmt = conn.prepare(
928 "SELECT file_path, file_mtime_ns, file_size, file_hash \
929 FROM tier2_contributions \
930 WHERE category = ?1 AND project_key = ?2 \
931 ORDER BY file_path ASC",
932 )?;
933 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
934 Ok((
935 row.get::<_, String>(0)?,
936 row.get::<_, i64>(1)?,
937 row.get::<_, i64>(2)?,
938 row.get::<_, String>(3)?,
939 ))
940 })?;
941
942 let mut records = Vec::new();
943 for row in rows {
944 let (file_path, mtime_ns, file_size, file_hash) = row?;
945 records.push((
946 PathBuf::from(file_path),
947 FileFreshness {
948 mtime: ns_to_system_time(mtime_ns),
949 size: file_size.max(0) as u64,
950 content_hash: hash_from_hex(&file_hash)?,
951 },
952 ));
953 }
954 Ok(records)
955 }
956
957 pub fn contribution_set_hash(
958 &self,
959 category: InspectCategory,
960 ) -> Result<String, InspectCacheError> {
961 let conn = self
962 .conn
963 .lock()
964 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
965 contribution_set_hash_with_conn(&conn, category, &self.project_key, &self.project_root)
966 }
967
968 pub fn last_full_run(
969 &self,
970 category: InspectCategory,
971 ) -> Result<Option<i64>, InspectCacheError> {
972 let conn = self
973 .conn
974 .lock()
975 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
976 conn.query_row(
977 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
978 params![category.as_str(), self.project_key],
979 |row| row.get::<_, i64>(0),
980 )
981 .optional()
982 .map_err(InspectCacheError::from)
983 }
984
985 pub fn memory_generated_at(&self, key: &JobKey) -> Result<Option<i64>, InspectCacheError> {
986 Ok(self
987 .memory
988 .read()
989 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
990 .get(key)
991 .map(|entry| entry.generated_at))
992 }
993}
994
995fn configure_connection(conn: &Connection) -> Result<(), InspectCacheError> {
996 conn.pragma_update(None, "journal_mode", "WAL")?;
997 conn.pragma_update(None, "busy_timeout", 5_000)?;
998 Ok(())
999}
1000
1001fn initialize_schema(conn: &Connection) -> Result<(), InspectCacheError> {
1002 conn.execute_batch(
1003 "CREATE TABLE IF NOT EXISTS tier2_contributions (
1004 category TEXT NOT NULL,
1005 project_key TEXT NOT NULL,
1006 file_path TEXT NOT NULL,
1007 file_mtime_ns INTEGER NOT NULL,
1008 file_size INTEGER NOT NULL,
1009 file_hash TEXT NOT NULL,
1010 contribution BLOB NOT NULL,
1011 generated_at INTEGER NOT NULL,
1012 PRIMARY KEY (category, project_key, file_path)
1013 );
1014
1015 CREATE TABLE IF NOT EXISTS tier2_aggregates (
1016 category TEXT NOT NULL,
1017 project_key TEXT NOT NULL,
1018 contribution_set_hash TEXT NOT NULL,
1019 aggregate BLOB NOT NULL,
1020 generated_at INTEGER NOT NULL,
1021 PRIMARY KEY (category, project_key)
1022 );
1023
1024 CREATE TABLE IF NOT EXISTS tier2_meta (
1025 category TEXT NOT NULL,
1026 project_key TEXT NOT NULL,
1027 last_full_run INTEGER NOT NULL,
1028 PRIMARY KEY (category, project_key)
1029 );",
1030 )?;
1031 Ok(())
1032}
1033
1034fn existing_contribution_paths(
1035 conn: &Connection,
1036 category: InspectCategory,
1037 project_key: &str,
1038) -> Result<Vec<String>, InspectCacheError> {
1039 let mut stmt = conn.prepare(
1040 "SELECT file_path FROM tier2_contributions WHERE category = ?1 AND project_key = ?2",
1041 )?;
1042 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1043 row.get::<_, String>(0)
1044 })?;
1045 rows.collect::<Result<Vec<_>, _>>()
1046 .map_err(InspectCacheError::from)
1047}
1048
1049fn contribution_set_hash_with_conn(
1050 conn: &Connection,
1051 category: InspectCategory,
1052 project_key: &str,
1053 project_root: &Path,
1054) -> Result<String, InspectCacheError> {
1055 let mut stmt = conn.prepare(
1056 "SELECT file_path, file_hash FROM tier2_contributions \
1057 WHERE category = ?1 AND project_key = ?2 ORDER BY file_path ASC",
1058 )?;
1059 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1060 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1061 })?;
1062
1063 let mut hasher = blake3::Hasher::new();
1064 hasher.update(b"tier2-contributions\0");
1065 hasher.update(&TIER2_CONTRIBUTION_CACHE_VERSION.to_le_bytes());
1066 hasher.update(b"\0");
1067 for row in rows {
1068 let (file_path, file_hash) = row?;
1069 hasher.update(file_path.as_bytes());
1070 hasher.update(b"\0");
1071 hasher.update(file_hash.as_bytes());
1072 hasher.update(b"\0");
1073 }
1074 update_manifest_fingerprint_hash(&mut hasher, project_root)?;
1075 if matches!(
1076 category,
1077 InspectCategory::DeadCode | InspectCategory::UnusedExports
1078 ) {
1079 update_resolver_config_fingerprint_hash(&mut hasher, project_root)?;
1080 }
1081 Ok(hasher.finalize().to_hex().to_string())
1082}
1083
1084fn update_resolver_config_fingerprint_hash(
1085 hasher: &mut blake3::Hasher,
1086 project_root: &Path,
1087) -> Result<(), InspectCacheError> {
1088 let manifest_root =
1089 fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1090 hasher.update(b"ts-js-resolver-configs\0");
1091 let mut configs = crate::callgraph::walk_project_files(project_root)
1092 .filter(|path| {
1093 path.file_name()
1094 .and_then(|name| name.to_str())
1095 .is_some_and(|name| name == "tsconfig.json")
1096 })
1097 .collect::<Vec<_>>();
1098 configs.sort();
1099 configs.dedup();
1100 for config in configs {
1101 let relative_path = config
1102 .strip_prefix(&manifest_root)
1103 .unwrap_or(config.as_path())
1104 .to_string_lossy()
1105 .replace('\\', "/");
1106 let content_hash = blake3::hash(&fs::read(&config)?);
1107 hasher.update(relative_path.as_bytes());
1108 hasher.update(b"\0");
1109 hasher.update(content_hash.as_bytes());
1110 hasher.update(b"\0");
1111 }
1112 Ok(())
1113}
1114
1115fn update_manifest_fingerprint_hash(
1116 hasher: &mut blake3::Hasher,
1117 project_root: &Path,
1118) -> Result<(), InspectCacheError> {
1119 let manifest_root =
1120 fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1121 hasher.update(b"entry-point-manifests\0");
1122 for manifest in super::entry_points::collect_entry_point_manifests(project_root) {
1123 let relative_path = manifest
1124 .strip_prefix(&manifest_root)
1125 .unwrap_or(manifest.as_path())
1126 .to_string_lossy()
1127 .replace('\\', "/");
1128 let content_hash = blake3::hash(&fs::read(&manifest)?);
1129 hasher.update(relative_path.as_bytes());
1130 hasher.update(b"\0");
1131 hasher.update(content_hash.as_bytes());
1132 hasher.update(b"\0");
1133 }
1134 Ok(())
1135}
1136
1137fn update_contribution_fingerprint_hash(
1138 hasher: &mut blake3::Hasher,
1139 relative_path: &str,
1140 mtime_ns: i64,
1141 file_size: u64,
1142 file_hash: &str,
1143) {
1144 hasher.update(relative_path.as_bytes());
1145 hasher.update(&[0]);
1146 hasher.update(&mtime_ns.to_le_bytes());
1147 hasher.update(&file_size.to_le_bytes());
1148 hasher.update(&[0]);
1149 hasher.update(file_hash.as_bytes());
1150}
1151
1152fn relative_string(project_root: &Path, path: &Path) -> String {
1153 path.strip_prefix(project_root)
1154 .unwrap_or(path)
1155 .to_string_lossy()
1156 .to_string()
1157}
1158
1159fn system_time_to_ns(time: SystemTime) -> i64 {
1160 let nanos = time
1161 .duration_since(UNIX_EPOCH)
1162 .unwrap_or_else(|_| Duration::from_secs(0))
1163 .as_nanos();
1164 nanos.min(i64::MAX as u128) as i64
1165}
1166
1167fn ns_to_system_time(value: i64) -> SystemTime {
1168 UNIX_EPOCH + Duration::from_nanos(value.max(0) as u64)
1169}
1170
1171fn hash_to_hex(hash: blake3::Hash) -> String {
1172 hash.to_hex().to_string()
1173}
1174
1175fn hash_from_hex(value: &str) -> Result<blake3::Hash, InspectCacheError> {
1176 if value.len() != 64 {
1177 return Err(InspectCacheError::InvalidHash(value.to_string()));
1178 }
1179 let mut bytes = [0u8; 32];
1180 for (index, chunk) in value.as_bytes().chunks(2).enumerate() {
1181 let hex = std::str::from_utf8(chunk)
1182 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1183 bytes[index] = u8::from_str_radix(hex, 16)
1184 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1185 }
1186 Ok(blake3::Hash::from_bytes(bytes))
1187}
1188
1189fn unix_seconds_now() -> i64 {
1190 SystemTime::now()
1191 .duration_since(UNIX_EPOCH)
1192 .unwrap_or_else(|_| Duration::from_secs(0))
1193 .as_secs()
1194 .min(i64::MAX as u64) as i64
1195}
1196
1197#[cfg(test)]
1198mod tests {
1199 use super::*;
1200 use std::cell::Cell;
1201 use std::fs;
1202 use std::path::{Path, PathBuf};
1203
1204 fn collect_freshness(path: &Path) -> FileFreshness {
1205 crate::cache_freshness::collect(path).unwrap()
1206 }
1207
1208 #[test]
1209 fn tier1_file_memo_evicts_lru_and_keeps_recent_hits() {
1210 let temp = tempfile::tempdir().unwrap();
1211 let memo = Tier1FileMemo::<usize>::default();
1212 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1213
1214 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1215 let path = temp.path().join(format!("file-{index}.txt"));
1216 fs::write(&path, index.to_string()).unwrap();
1217 let value =
1218 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1219 assert_eq!(value, index);
1220 paths.push(path);
1221 }
1222
1223 let recent_path = paths[0].clone();
1224 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1225 panic!("recently inserted entry should hit before eviction")
1226 });
1227 assert_eq!(recent_value, 0);
1228
1229 let evicting_path = temp.path().join("new-file.txt");
1230 fs::write(&evicting_path, "new").unwrap();
1231 let evicting_value = memo.get_or_insert_with(&evicting_path, |path| {
1232 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1233 });
1234 assert_eq!(evicting_value, TIER1_FILE_MEMO_MAX_ENTRIES);
1235
1236 let state = memo.state.lock().unwrap();
1237 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1238 assert!(state.entries.contains_key(&recent_path));
1239 assert!(state.entries.contains_key(&evicting_path));
1240 assert!(!state.entries.contains_key(&paths[1]));
1241 drop(state);
1242
1243 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1244 panic!("recently used entry should survive eviction")
1245 });
1246 assert_eq!(recent_value, 0);
1247 }
1248
1249 #[test]
1250 fn tier1_file_memo_repeated_touches_keep_lazy_lru_bounded() {
1251 let temp = tempfile::tempdir().unwrap();
1252 let memo = Tier1FileMemo::<usize>::default();
1253 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1254
1255 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1256 let path = temp.path().join(format!("file-{index}.txt"));
1257 fs::write(&path, index.to_string()).unwrap();
1258 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1259 paths.push(path);
1260 }
1261
1262 for _ in 0..(TIER1_FILE_MEMO_MAX_ENTRIES * 3) {
1263 let value = memo.get_or_insert_with(&paths[0], |_| {
1264 panic!("hot entry should stay cached while it is repeatedly touched")
1265 });
1266 assert_eq!(value, 0);
1267 }
1268
1269 let evicting_path = temp.path().join("new-file.txt");
1270 fs::write(&evicting_path, "new").unwrap();
1271 memo.get_or_insert_with(&evicting_path, |path| {
1272 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1273 });
1274
1275 let state = memo.state.lock().unwrap();
1276 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1277 assert!(state.entries.contains_key(&paths[0]));
1278 assert!(state.entries.contains_key(&evicting_path));
1279 assert!(!state.entries.contains_key(&paths[1]));
1280 assert!(
1281 state.lru.len() <= TIER1_FILE_MEMO_MAX_ENTRIES * 2,
1282 "lazy LRU queue should be compacted instead of growing without bound"
1283 );
1284 }
1285
1286 #[test]
1287 fn tier1_file_memo_reuses_fresh_entries_and_rescans_stale_files() {
1288 let temp = tempfile::tempdir().unwrap();
1289 let path = temp.path().join("memo.txt");
1290 fs::write(&path, "first").unwrap();
1291
1292 let memo = Tier1FileMemo::<String>::default();
1293 let scans = Cell::new(0);
1294
1295 let first = memo.get_or_insert_with(&path, |path| {
1296 scans.set(scans.get() + 1);
1297 (Some(collect_freshness(path)), "first scan".to_string())
1298 });
1299 assert_eq!(first, "first scan");
1300 assert_eq!(scans.get(), 1);
1301
1302 let unchanged =
1303 memo.get_or_insert_with(&path, |_| panic!("unchanged file should reuse Tier-1 memo"));
1304 assert_eq!(unchanged, "first scan");
1305 assert_eq!(scans.get(), 1);
1306
1307 fs::write(&path, "changed file contents").unwrap();
1308 let changed = memo.get_or_insert_with(&path, |path| {
1309 scans.set(scans.get() + 1);
1310 (Some(collect_freshness(path)), "second scan".to_string())
1311 });
1312 assert_eq!(changed, "second scan");
1313 assert_eq!(scans.get(), 2);
1314
1315 let fresh_after_rescan = memo.get_or_insert_with(&path, |_| {
1316 panic!("rescanned file should reuse refreshed Tier-1 memo")
1317 });
1318 assert_eq!(fresh_after_rescan, "second scan");
1319 assert_eq!(scans.get(), 2);
1320 }
1321
1322 #[derive(serde::Deserialize, serde::Serialize)]
1323 struct RoundTripContributionRecord {
1324 category: String,
1325 file_path: PathBuf,
1326 contribution: serde_json::Value,
1327 type_ref_names: BTreeSet<String>,
1328 }
1329
1330 impl From<&ContributionRecord> for RoundTripContributionRecord {
1331 fn from(record: &ContributionRecord) -> Self {
1332 Self {
1333 category: record.category.as_str().to_string(),
1334 file_path: record.file_path.clone(),
1335 contribution: record.contribution.clone(),
1336 type_ref_names: record.type_ref_names.clone(),
1337 }
1338 }
1339 }
1340
1341 #[test]
1342 fn contribution_record_round_trip_preserves_dead_code_liveness_metadata() {
1343 let temp = tempfile::tempdir().unwrap();
1344 let project_root = temp.path().join("project");
1345 let inspect_dir = temp.path().join("inspect");
1346 let source = project_root.join("src/lib.ts");
1347 fs::create_dir_all(source.parent().unwrap()).unwrap();
1348 fs::write(&source, "export interface Widget { id: string }\n").unwrap();
1349
1350 let cache = InspectCache::open(inspect_dir.clone(), project_root.clone()).unwrap();
1351 let contribution = FileContribution::new(
1352 InspectCategory::DeadCode,
1353 source.clone(),
1354 collect_freshness(&source),
1355 serde_json::json!({
1356 "file": "src/lib.ts",
1357 "exports": [{
1358 "symbol": "Widget",
1359 "kind": "interface",
1360 "line": 1,
1361 "is_type_like": true,
1362 "is_entry_point": false,
1363 }],
1364 "internal_calls": [],
1365 "liveness_roots": [],
1366 "dispatched_method_names": ["render"],
1367 "type_ref_names": ["Widget"],
1368 }),
1369 )
1370 .with_type_ref_names(["Widget".to_string()]);
1371 cache
1372 .store_tier2_result(
1373 JobKey::for_project_category(InspectCategory::DeadCode),
1374 std::slice::from_ref(&source),
1375 &[contribution],
1376 serde_json::json!({ "count": 0, "items": [] }),
1377 )
1378 .unwrap();
1379 drop(cache);
1380
1381 let cache = InspectCache::open(inspect_dir, project_root).unwrap();
1382 let records = cache
1383 .load_tier2_contributions(InspectCategory::DeadCode)
1384 .unwrap();
1385 assert_eq!(records.len(), 1);
1386
1387 let serialized =
1388 serde_json::to_vec(&RoundTripContributionRecord::from(&records[0])).unwrap();
1389 let decoded: RoundTripContributionRecord = serde_json::from_slice(&serialized).unwrap();
1390 assert_eq!(decoded.category, InspectCategory::DeadCode.as_str());
1391 assert_eq!(decoded.contribution["dispatched_method_names"][0], "render");
1392 assert_eq!(decoded.contribution["type_ref_names"][0], "Widget");
1393 assert!(decoded.type_ref_names.contains("Widget"));
1394 assert_eq!(
1395 decoded.contribution["exports"][0]["is_type_like"].as_bool(),
1396 Some(true)
1397 );
1398 assert_eq!(TIER2_CONTRIBUTION_CACHE_VERSION, 14);
1399 }
1400}