1use std::collections::{BTreeSet, HashMap, VecDeque};
2use std::fmt;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::{Mutex, RwLock};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
9
10use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
11
12use super::job::{
13 contribution_with_type_ref_names, type_ref_names_from_contribution, FileContribution,
14 InspectCategory, JobKey,
15};
16
17#[derive(Debug, Default)]
18pub(crate) struct Tier2ContributionUpdates {
19 pub upserts: Vec<FileContribution>,
20 pub deletes: Vec<PathBuf>,
21 pub metadata_updates: Vec<(PathBuf, FileFreshness)>,
22}
23
24#[derive(Debug)]
25pub enum InspectCacheError {
26 Io(std::io::Error),
27 Sql(rusqlite::Error),
28 Json(serde_json::Error),
29 LockPoisoned(&'static str),
30 InvalidHash(String),
31}
32
33impl fmt::Display for InspectCacheError {
34 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
35 match self {
36 InspectCacheError::Io(error) => write!(formatter, "inspect cache io error: {error}"),
37 InspectCacheError::Sql(error) => {
38 write!(formatter, "inspect cache sqlite error: {error}")
39 }
40 InspectCacheError::Json(error) => {
41 write!(formatter, "inspect cache json error: {error}")
42 }
43 InspectCacheError::LockPoisoned(name) => {
44 write!(formatter, "inspect cache lock poisoned: {name}")
45 }
46 InspectCacheError::InvalidHash(hash) => {
47 write!(formatter, "inspect cache invalid blake3 hash: {hash}")
48 }
49 }
50 }
51}
52
53impl std::error::Error for InspectCacheError {}
54
55impl From<std::io::Error> for InspectCacheError {
56 fn from(error: std::io::Error) -> Self {
57 Self::Io(error)
58 }
59}
60
61impl From<rusqlite::Error> for InspectCacheError {
62 fn from(error: rusqlite::Error) -> Self {
63 Self::Sql(error)
64 }
65}
66
67impl From<serde_json::Error> for InspectCacheError {
68 fn from(error: serde_json::Error) -> Self {
69 Self::Json(error)
70 }
71}
72
73pub(crate) const TIER2_CONTRIBUTION_CACHE_VERSION: u32 = 13;
101
102#[derive(Debug, Clone)]
103pub struct ContributionRecord {
104 pub category: InspectCategory,
105 pub file_path: PathBuf,
106 pub freshness: FileFreshness,
107 pub contribution: serde_json::Value,
108 pub type_ref_names: BTreeSet<String>,
109}
110
111#[derive(Debug, Clone)]
112struct MemoryAggregate {
113 payload: serde_json::Value,
114 generated_at: i64,
115 contribution_set_hash: Option<String>,
116}
117
118const TIER1_FILE_MEMO_MAX_ENTRIES: usize = 4_096;
119
120#[derive(Debug, Clone)]
121struct Tier1MemoEntry<T> {
122 freshness: FileFreshness,
123 value: T,
124 generation: u64,
125}
126
127#[derive(Debug, Clone)]
128struct LruNode {
129 path: PathBuf,
130 generation: u64,
131}
132
133#[derive(Debug)]
134struct Tier1MemoState<T> {
135 entries: HashMap<PathBuf, Tier1MemoEntry<T>>,
136 lru: VecDeque<LruNode>,
137 next_generation: u64,
138}
139
140impl<T> Default for Tier1MemoState<T> {
141 fn default() -> Self {
142 Self {
143 entries: HashMap::new(),
144 lru: VecDeque::new(),
145 next_generation: 0,
146 }
147 }
148}
149
150impl<T> Tier1MemoState<T> {
151 fn insert(&mut self, path: PathBuf, mut entry: Tier1MemoEntry<T>) {
152 let generation = self.allocate_generation();
153 entry.generation = generation;
154 self.entries.insert(path.clone(), entry);
155 self.lru.push_back(LruNode { path, generation });
156 self.compact_lru_if_needed();
157 self.evict_lru();
158 }
159
160 fn remove(&mut self, path: &Path) {
161 self.entries.remove(path);
162 self.compact_lru_if_needed();
163 }
164
165 fn touch(&mut self, path: &Path) {
166 if !self.entries.contains_key(path) {
167 return;
168 }
169
170 let generation = self.allocate_generation();
171 if let Some(entry) = self.entries.get_mut(path) {
172 entry.generation = generation;
173 self.lru.push_back(LruNode {
174 path: path.to_path_buf(),
175 generation,
176 });
177 }
178 self.compact_lru_if_needed();
179 }
180
181 fn allocate_generation(&mut self) -> u64 {
182 if self.next_generation == u64::MAX {
183 self.rebuild_lru();
184 }
185 let generation = self.next_generation;
186 self.next_generation += 1;
187 generation
188 }
189
190 fn compact_lru_if_needed(&mut self) {
191 let max_lru_nodes = TIER1_FILE_MEMO_MAX_ENTRIES
192 .saturating_mul(2)
193 .max(self.entries.len());
194 if self.lru.len() > max_lru_nodes {
195 self.rebuild_lru();
196 }
197 }
198
199 fn rebuild_lru(&mut self) {
200 let mut live_nodes = self
201 .entries
202 .iter()
203 .map(|(path, entry)| (entry.generation, path.clone()))
204 .collect::<Vec<_>>();
205 live_nodes.sort_by_key(|(generation, _)| *generation);
206
207 self.lru.clear();
208 for (generation, (_, path)) in live_nodes.into_iter().enumerate() {
209 let generation = generation as u64;
210 if let Some(entry) = self.entries.get_mut(&path) {
211 entry.generation = generation;
212 }
213 self.lru.push_back(LruNode { path, generation });
214 }
215 self.next_generation = self.lru.len() as u64;
216 }
217
218 fn evict_lru(&mut self) {
219 while self.entries.len() > TIER1_FILE_MEMO_MAX_ENTRIES {
220 let Some(node) = self.lru.pop_front() else {
221 break;
222 };
223 if self
224 .entries
225 .get(&node.path)
226 .is_some_and(|entry| entry.generation == node.generation)
227 {
228 self.entries.remove(&node.path);
229 }
230 }
231 self.compact_lru_if_needed();
232 }
233}
234
235#[derive(Debug)]
236pub(crate) struct Tier1FileMemo<T> {
237 state: Mutex<Tier1MemoState<T>>,
238}
239
240impl<T> Default for Tier1FileMemo<T> {
241 fn default() -> Self {
242 Self {
243 state: Mutex::new(Tier1MemoState::default()),
244 }
245 }
246}
247
248impl<T: Clone> Tier1FileMemo<T> {
249 pub(crate) fn get_or_insert_with<F>(&self, path: &Path, scan: F) -> T
250 where
251 F: FnOnce(&Path) -> (Option<FileFreshness>, T),
252 {
253 if let Some(cached) = self.cached_value(path) {
254 return cached;
255 }
256
257 let (freshness, value) = scan(path);
258 if let Ok(mut state) = self.state.lock() {
259 if let Some(freshness) = freshness {
260 state.insert(
261 path.to_path_buf(),
262 Tier1MemoEntry {
263 freshness,
264 value: value.clone(),
265 generation: 0,
266 },
267 );
268 } else {
269 state.remove(path);
270 }
271 }
272 value
273 }
274
275 fn cached_value(&self, path: &Path) -> Option<T> {
276 let mut cached = self
277 .state
278 .lock()
279 .ok()
280 .and_then(|state| state.entries.get(path).cloned())?;
281
282 match crate::cache_freshness::verify_file(path, &cached.freshness) {
283 FreshnessVerdict::HotFresh => {
284 if let Ok(mut state) = self.state.lock() {
285 state.touch(path);
286 }
287 Some(cached.value)
288 }
289 FreshnessVerdict::ContentFresh {
290 new_mtime,
291 new_size,
292 } => {
293 cached.freshness.mtime = new_mtime;
294 cached.freshness.size = new_size;
295 let value = cached.value.clone();
296 if let Ok(mut state) = self.state.lock() {
297 state.insert(path.to_path_buf(), cached);
298 }
299 Some(value)
300 }
301 FreshnessVerdict::Stale => None,
302 FreshnessVerdict::Deleted => {
303 if let Ok(mut state) = self.state.lock() {
304 state.remove(path);
305 }
306 None
307 }
308 }
309 }
310}
311
312#[derive(Debug)]
313pub struct InspectCache {
314 project_root: PathBuf,
315 project_key: String,
316 sqlite_path: PathBuf,
317 conn: Mutex<Connection>,
318 memory: RwLock<HashMap<JobKey, MemoryAggregate>>,
319}
320
321impl InspectCache {
322 pub fn open(inspect_dir: PathBuf, project_root: PathBuf) -> Result<Self, InspectCacheError> {
323 std::fs::create_dir_all(&inspect_dir)?;
324 let project_key = crate::search_index::project_cache_key(&project_root);
325 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
326 let conn = Connection::open(&sqlite_path)?;
327 configure_connection(&conn)?;
328 initialize_schema(&conn)?;
329 Ok(Self::from_connection(
330 project_root,
331 project_key,
332 sqlite_path,
333 conn,
334 ))
335 }
336
337 pub fn open_readonly(
338 inspect_dir: PathBuf,
339 project_root: PathBuf,
340 ) -> Result<Option<Self>, InspectCacheError> {
341 let project_key = crate::search_index::project_cache_key(&project_root);
342 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
343 if !sqlite_path.is_file() {
344 return Ok(None);
345 }
346 let conn = Connection::open_with_flags(&sqlite_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
347 conn.busy_timeout(Duration::from_millis(5_000))?;
348 Ok(Some(Self::from_connection(
349 project_root,
350 project_key,
351 sqlite_path,
352 conn,
353 )))
354 }
355
356 fn from_connection(
357 project_root: PathBuf,
358 project_key: String,
359 sqlite_path: PathBuf,
360 conn: Connection,
361 ) -> Self {
362 Self {
363 project_root,
364 project_key,
365 sqlite_path,
366 conn: Mutex::new(conn),
367 memory: RwLock::new(HashMap::new()),
368 }
369 }
370
371 pub fn project_root(&self) -> &Path {
372 &self.project_root
373 }
374
375 pub fn project_key(&self) -> &str {
376 &self.project_key
377 }
378
379 pub fn sqlite_path(&self) -> &Path {
380 &self.sqlite_path
381 }
382
383 pub fn store_aggregated(
384 &self,
385 key: JobKey,
386 payload: serde_json::Value,
387 ) -> Result<(), InspectCacheError> {
388 self.store_memory_aggregate(key, payload, None)
389 }
390
391 fn store_memory_aggregate(
392 &self,
393 key: JobKey,
394 payload: serde_json::Value,
395 contribution_set_hash: Option<String>,
396 ) -> Result<(), InspectCacheError> {
397 self.memory
398 .write()
399 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
400 .insert(
401 key,
402 MemoryAggregate {
403 payload,
404 generated_at: unix_seconds_now(),
405 contribution_set_hash,
406 },
407 );
408 Ok(())
409 }
410
411 pub fn get_aggregated(
412 &self,
413 key: &JobKey,
414 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
415 if !key.category.is_tier2() {
416 return Ok(self
417 .memory
418 .read()
419 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
420 .get(key)
421 .map(|entry| entry.payload.clone()));
422 }
423
424 let current_hash = {
425 let conn = self
426 .conn
427 .lock()
428 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
429 contribution_set_hash_with_conn(
430 &conn,
431 key.category,
432 &self.project_key,
433 &self.project_root,
434 )?
435 };
436
437 let memory_entry = {
438 self.memory
439 .read()
440 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
441 .get(key)
442 .cloned()
443 };
444 if let Some(entry) = memory_entry {
445 if entry.contribution_set_hash.as_deref() == Some(current_hash.as_str()) {
446 return Ok(Some(entry.payload));
447 }
448 self.memory
449 .write()
450 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
451 .remove(key);
452 }
453
454 let payload = {
455 let conn = self
456 .conn
457 .lock()
458 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
459 conn.query_row(
460 "SELECT aggregate FROM tier2_aggregates \
461 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
462 params![key.category.as_str(), self.project_key, current_hash],
463 |row| row.get::<_, Vec<u8>>(0),
464 )
465 .optional()?
466 };
467
468 match payload {
469 Some(bytes) => {
470 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
471 self.store_memory_aggregate(key.clone(), value.clone(), Some(current_hash))?;
472 Ok(Some(value))
473 }
474 None => Ok(None),
475 }
476 }
477
478 pub fn store_tier2_result(
479 &self,
480 key: JobKey,
481 scanned_files: &[PathBuf],
482 contributions: &[FileContribution],
483 aggregate: serde_json::Value,
484 ) -> Result<(), InspectCacheError> {
485 if !key.category.is_tier2() {
486 self.store_aggregated(key, aggregate)?;
487 return Ok(());
488 }
489
490 let now = unix_seconds_now();
491 let mut conn = self
492 .conn
493 .lock()
494 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
495 let tx = conn.transaction()?;
496
497 let scanned_relative = scanned_files
498 .iter()
499 .map(|path| relative_string(&self.project_root, path))
500 .collect::<BTreeSet<_>>();
501 let existing = existing_contribution_paths(&tx, key.category, &self.project_key)?;
502 for file_path in existing {
503 if !scanned_relative.contains(&file_path) {
504 tx.execute(
505 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
506 params![key.category.as_str(), self.project_key, file_path],
507 )?;
508 }
509 }
510
511 for contribution in contributions {
512 let file_path = relative_string(&self.project_root, &contribution.file_path);
513 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
514 contribution.contribution.clone(),
515 &contribution.type_ref_names,
516 ))?;
517 tx.execute(
518 "INSERT INTO tier2_contributions \
519 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
520 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
521 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
522 file_mtime_ns = excluded.file_mtime_ns, \
523 file_size = excluded.file_size, \
524 file_hash = excluded.file_hash, \
525 contribution = excluded.contribution, \
526 generated_at = excluded.generated_at",
527 params![
528 contribution.category.as_str(),
529 self.project_key,
530 file_path,
531 system_time_to_ns(contribution.freshness.mtime),
532 contribution.freshness.size as i64,
533 hash_to_hex(contribution.freshness.content_hash),
534 blob,
535 now,
536 ],
537 )?;
538 }
539
540 let contribution_set_hash = contribution_set_hash_with_conn(
541 &tx,
542 key.category,
543 &self.project_key,
544 &self.project_root,
545 )?;
546 let aggregate_blob = serde_json::to_vec(&aggregate)?;
547 tx.execute(
548 "INSERT INTO tier2_aggregates \
549 (category, project_key, contribution_set_hash, aggregate, generated_at) \
550 VALUES (?1, ?2, ?3, ?4, ?5) \
551 ON CONFLICT(category, project_key) DO UPDATE SET \
552 contribution_set_hash = excluded.contribution_set_hash, \
553 aggregate = excluded.aggregate, \
554 generated_at = excluded.generated_at",
555 params![
556 key.category.as_str(),
557 self.project_key,
558 contribution_set_hash,
559 aggregate_blob,
560 now,
561 ],
562 )?;
563 tx.execute(
564 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
565 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
566 params![key.category.as_str(), self.project_key, now],
567 )?;
568 tx.commit()?;
569
570 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash))
571 }
572
573 pub(crate) fn apply_contribution_updates(
574 &self,
575 category: InspectCategory,
576 updates: Tier2ContributionUpdates,
577 ) -> Result<String, InspectCacheError> {
578 let now = unix_seconds_now();
579 let mut conn = self
580 .conn
581 .lock()
582 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
583 let tx = conn.transaction()?;
584
585 for relative_file in updates.deletes {
586 tx.execute(
587 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
588 params![
589 category.as_str(),
590 self.project_key,
591 relative_file.to_string_lossy().to_string()
592 ],
593 )?;
594 }
595
596 for (relative_file, freshness) in updates.metadata_updates {
597 tx.execute(
598 "UPDATE tier2_contributions \
599 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
600 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
601 params![
602 category.as_str(),
603 self.project_key,
604 relative_file.to_string_lossy().to_string(),
605 system_time_to_ns(freshness.mtime),
606 freshness.size as i64,
607 hash_to_hex(freshness.content_hash),
608 ],
609 )?;
610 }
611
612 for contribution in updates.upserts {
613 let file_path = relative_string(&self.project_root, &contribution.file_path);
614 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
615 contribution.contribution.clone(),
616 &contribution.type_ref_names,
617 ))?;
618 tx.execute(
619 "INSERT INTO tier2_contributions \
620 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
621 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
622 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
623 file_mtime_ns = excluded.file_mtime_ns, \
624 file_size = excluded.file_size, \
625 file_hash = excluded.file_hash, \
626 contribution = excluded.contribution, \
627 generated_at = excluded.generated_at",
628 params![
629 contribution.category.as_str(),
630 self.project_key,
631 file_path,
632 system_time_to_ns(contribution.freshness.mtime),
633 contribution.freshness.size as i64,
634 hash_to_hex(contribution.freshness.content_hash),
635 blob,
636 now,
637 ],
638 )?;
639 }
640
641 let contribution_set_hash =
642 contribution_set_hash_with_conn(&tx, category, &self.project_key, &self.project_root)?;
643 tx.commit()?;
644
645 self.memory
646 .write()
647 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
648 .remove(&JobKey::for_project_category(category));
649
650 Ok(contribution_set_hash)
651 }
652
653 pub(crate) fn load_aggregate_if_hash_matches(
654 &self,
655 category: InspectCategory,
656 contribution_set_hash: &str,
657 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
658 let payload = {
659 let conn = self
660 .conn
661 .lock()
662 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
663 conn.query_row(
664 "SELECT aggregate FROM tier2_aggregates \
665 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
666 params![category.as_str(), self.project_key, contribution_set_hash],
667 |row| row.get::<_, Vec<u8>>(0),
668 )
669 .optional()?
670 };
671
672 match payload {
673 Some(bytes) => {
674 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
675 self.store_memory_aggregate(
676 JobKey::for_project_category(category),
677 value.clone(),
678 Some(contribution_set_hash.to_string()),
679 )?;
680 Ok(Some(value))
681 }
682 None => Ok(None),
683 }
684 }
685
686 pub(crate) fn latest_aggregate_any_hash(
687 &self,
688 category: InspectCategory,
689 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
690 let payload = {
691 let conn = self
692 .conn
693 .lock()
694 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
695 conn.query_row(
696 "SELECT aggregate FROM tier2_aggregates \
697 WHERE category = ?1 AND project_key = ?2 \
698 ORDER BY generated_at DESC LIMIT 1",
699 params![category.as_str(), self.project_key],
700 |row| row.get::<_, Vec<u8>>(0),
701 )
702 .optional()?
703 };
704
705 match payload {
706 Some(bytes) => serde_json::from_slice::<serde_json::Value>(&bytes)
707 .map(Some)
708 .map_err(InspectCacheError::from),
709 None => Ok(None),
710 }
711 }
712
713 pub(crate) fn touch_tier2_last_full_run(
714 &self,
715 category: InspectCategory,
716 ) -> Result<i64, InspectCacheError> {
717 let mut conn = self
718 .conn
719 .lock()
720 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
721 let tx = conn.transaction()?;
722 let previous = tx
723 .query_row(
724 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
725 params![category.as_str(), self.project_key],
726 |row| row.get::<_, i64>(0),
727 )
728 .optional()?;
729 let now = unix_seconds_now();
730 let last_full_run = previous.map_or(now, |previous| now.max(previous.saturating_add(1)));
731 tx.execute(
732 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
733 params![category.as_str(), self.project_key, last_full_run],
734 )?;
735 tx.commit()?;
736 Ok(last_full_run)
737 }
738
739 pub(crate) fn store_tier2_aggregate(
740 &self,
741 key: JobKey,
742 contribution_set_hash: &str,
743 aggregate: serde_json::Value,
744 ) -> Result<(), InspectCacheError> {
745 if !key.category.is_tier2() {
746 self.store_aggregated(key, aggregate)?;
747 return Ok(());
748 }
749
750 let now = unix_seconds_now();
751 let aggregate_blob = serde_json::to_vec(&aggregate)?;
752 let mut conn = self
753 .conn
754 .lock()
755 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
756 let tx = conn.transaction()?;
757 tx.execute(
758 "INSERT INTO tier2_aggregates \
759 (category, project_key, contribution_set_hash, aggregate, generated_at) \
760 VALUES (?1, ?2, ?3, ?4, ?5) \
761 ON CONFLICT(category, project_key) DO UPDATE SET \
762 contribution_set_hash = excluded.contribution_set_hash, \
763 aggregate = excluded.aggregate, \
764 generated_at = excluded.generated_at",
765 params![
766 key.category.as_str(),
767 self.project_key,
768 contribution_set_hash,
769 aggregate_blob,
770 now,
771 ],
772 )?;
773 tx.execute(
774 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
775 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
776 params![key.category.as_str(), self.project_key, now],
777 )?;
778 tx.commit()?;
779
780 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash.to_string()))
781 }
782
783 pub fn load_tier2_contributions(
784 &self,
785 category: InspectCategory,
786 ) -> Result<Vec<ContributionRecord>, InspectCacheError> {
787 let conn = self
788 .conn
789 .lock()
790 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
791 let mut stmt = conn.prepare(
792 "SELECT file_path, file_mtime_ns, file_size, file_hash, contribution \
793 FROM tier2_contributions \
794 WHERE category = ?1 AND project_key = ?2 \
795 ORDER BY file_path ASC",
796 )?;
797 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
798 let file_path: String = row.get(0)?;
799 let mtime_ns: i64 = row.get(1)?;
800 let file_size: i64 = row.get(2)?;
801 let file_hash: String = row.get(3)?;
802 let contribution: Vec<u8> = row.get(4)?;
803 Ok((file_path, mtime_ns, file_size, file_hash, contribution))
804 })?;
805
806 let mut records = Vec::new();
807 for row in rows {
808 let (file_path, mtime_ns, file_size, file_hash, contribution) = row?;
809 let contribution: serde_json::Value = serde_json::from_slice(&contribution)?;
810 let type_ref_names = type_ref_names_from_contribution(&contribution);
811 records.push(ContributionRecord {
812 category,
813 file_path: PathBuf::from(file_path),
814 freshness: FileFreshness {
815 mtime: ns_to_system_time(mtime_ns),
816 size: file_size.max(0) as u64,
817 content_hash: hash_from_hex(&file_hash)?,
818 },
819 contribution,
820 type_ref_names,
821 });
822 }
823 Ok(records)
824 }
825
826 pub fn delete_tier2_contribution(
827 &self,
828 category: InspectCategory,
829 relative_file: &Path,
830 ) -> Result<(), InspectCacheError> {
831 let conn = self
832 .conn
833 .lock()
834 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
835 conn.execute(
836 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
837 params![
838 category.as_str(),
839 self.project_key,
840 relative_file.to_string_lossy().to_string()
841 ],
842 )?;
843 Ok(())
844 }
845
846 pub fn update_content_fresh_metadata(
847 &self,
848 category: InspectCategory,
849 relative_file: &Path,
850 freshness: &FileFreshness,
851 ) -> Result<(), InspectCacheError> {
852 let conn = self
853 .conn
854 .lock()
855 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
856 conn.execute(
857 "UPDATE tier2_contributions \
858 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
859 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
860 params![
861 category.as_str(),
862 self.project_key,
863 relative_file.to_string_lossy().to_string(),
864 system_time_to_ns(freshness.mtime),
865 freshness.size as i64,
866 hash_to_hex(freshness.content_hash),
867 ],
868 )?;
869 Ok(())
870 }
871
872 pub(crate) fn contribution_fingerprint(
873 &self,
874 category: InspectCategory,
875 ) -> Result<(usize, String, bool), InspectCacheError> {
876 let conn = self
877 .conn
878 .lock()
879 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
880 let mut stmt = conn.prepare(
881 "SELECT file_path, file_mtime_ns, file_size, file_hash \
882 FROM tier2_contributions \
883 WHERE category = ?1 AND project_key = ?2 \
884 ORDER BY file_path ASC",
885 )?;
886 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
887 Ok((
888 row.get::<_, String>(0)?,
889 row.get::<_, i64>(1)?,
890 row.get::<_, i64>(2)?,
891 row.get::<_, String>(3)?,
892 ))
893 })?;
894
895 let zero_hash = hash_to_hex(cache_freshness::zero_hash());
896 let mut count = 0usize;
897 let mut hash_complete = true;
898 let mut hasher = blake3::Hasher::new();
899 for row in rows {
900 let (file_path, mtime_ns, file_size, file_hash) = row?;
901 count += 1;
902 if file_hash == zero_hash {
903 hash_complete = false;
904 }
905 update_contribution_fingerprint_hash(
906 &mut hasher,
907 &file_path,
908 mtime_ns.max(0),
909 file_size.max(0) as u64,
910 &file_hash,
911 );
912 }
913
914 Ok((count, hasher.finalize().to_hex().to_string(), hash_complete))
915 }
916
917 pub(crate) fn contribution_freshness(
918 &self,
919 category: InspectCategory,
920 ) -> Result<Vec<(PathBuf, FileFreshness)>, InspectCacheError> {
921 let conn = self
922 .conn
923 .lock()
924 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
925 let mut stmt = conn.prepare(
926 "SELECT file_path, file_mtime_ns, file_size, file_hash \
927 FROM tier2_contributions \
928 WHERE category = ?1 AND project_key = ?2 \
929 ORDER BY file_path ASC",
930 )?;
931 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
932 Ok((
933 row.get::<_, String>(0)?,
934 row.get::<_, i64>(1)?,
935 row.get::<_, i64>(2)?,
936 row.get::<_, String>(3)?,
937 ))
938 })?;
939
940 let mut records = Vec::new();
941 for row in rows {
942 let (file_path, mtime_ns, file_size, file_hash) = row?;
943 records.push((
944 PathBuf::from(file_path),
945 FileFreshness {
946 mtime: ns_to_system_time(mtime_ns),
947 size: file_size.max(0) as u64,
948 content_hash: hash_from_hex(&file_hash)?,
949 },
950 ));
951 }
952 Ok(records)
953 }
954
955 pub fn contribution_set_hash(
956 &self,
957 category: InspectCategory,
958 ) -> Result<String, InspectCacheError> {
959 let conn = self
960 .conn
961 .lock()
962 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
963 contribution_set_hash_with_conn(&conn, category, &self.project_key, &self.project_root)
964 }
965
966 pub fn last_full_run(
967 &self,
968 category: InspectCategory,
969 ) -> Result<Option<i64>, InspectCacheError> {
970 let conn = self
971 .conn
972 .lock()
973 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
974 conn.query_row(
975 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
976 params![category.as_str(), self.project_key],
977 |row| row.get::<_, i64>(0),
978 )
979 .optional()
980 .map_err(InspectCacheError::from)
981 }
982
983 pub fn memory_generated_at(&self, key: &JobKey) -> Result<Option<i64>, InspectCacheError> {
984 Ok(self
985 .memory
986 .read()
987 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
988 .get(key)
989 .map(|entry| entry.generated_at))
990 }
991}
992
993fn configure_connection(conn: &Connection) -> Result<(), InspectCacheError> {
994 conn.pragma_update(None, "journal_mode", "WAL")?;
995 conn.pragma_update(None, "busy_timeout", 5_000)?;
996 Ok(())
997}
998
999fn initialize_schema(conn: &Connection) -> Result<(), InspectCacheError> {
1000 conn.execute_batch(
1001 "CREATE TABLE IF NOT EXISTS tier2_contributions (
1002 category TEXT NOT NULL,
1003 project_key TEXT NOT NULL,
1004 file_path TEXT NOT NULL,
1005 file_mtime_ns INTEGER NOT NULL,
1006 file_size INTEGER NOT NULL,
1007 file_hash TEXT NOT NULL,
1008 contribution BLOB NOT NULL,
1009 generated_at INTEGER NOT NULL,
1010 PRIMARY KEY (category, project_key, file_path)
1011 );
1012
1013 CREATE TABLE IF NOT EXISTS tier2_aggregates (
1014 category TEXT NOT NULL,
1015 project_key TEXT NOT NULL,
1016 contribution_set_hash TEXT NOT NULL,
1017 aggregate BLOB NOT NULL,
1018 generated_at INTEGER NOT NULL,
1019 PRIMARY KEY (category, project_key)
1020 );
1021
1022 CREATE TABLE IF NOT EXISTS tier2_meta (
1023 category TEXT NOT NULL,
1024 project_key TEXT NOT NULL,
1025 last_full_run INTEGER NOT NULL,
1026 PRIMARY KEY (category, project_key)
1027 );",
1028 )?;
1029 Ok(())
1030}
1031
1032fn existing_contribution_paths(
1033 conn: &Connection,
1034 category: InspectCategory,
1035 project_key: &str,
1036) -> Result<Vec<String>, InspectCacheError> {
1037 let mut stmt = conn.prepare(
1038 "SELECT file_path FROM tier2_contributions WHERE category = ?1 AND project_key = ?2",
1039 )?;
1040 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1041 row.get::<_, String>(0)
1042 })?;
1043 rows.collect::<Result<Vec<_>, _>>()
1044 .map_err(InspectCacheError::from)
1045}
1046
1047fn contribution_set_hash_with_conn(
1048 conn: &Connection,
1049 category: InspectCategory,
1050 project_key: &str,
1051 project_root: &Path,
1052) -> Result<String, InspectCacheError> {
1053 let mut stmt = conn.prepare(
1054 "SELECT file_path, file_hash FROM tier2_contributions \
1055 WHERE category = ?1 AND project_key = ?2 ORDER BY file_path ASC",
1056 )?;
1057 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1058 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1059 })?;
1060
1061 let mut hasher = blake3::Hasher::new();
1062 hasher.update(b"tier2-contributions\0");
1063 hasher.update(&TIER2_CONTRIBUTION_CACHE_VERSION.to_le_bytes());
1064 hasher.update(b"\0");
1065 for row in rows {
1066 let (file_path, file_hash) = row?;
1067 hasher.update(file_path.as_bytes());
1068 hasher.update(b"\0");
1069 hasher.update(file_hash.as_bytes());
1070 hasher.update(b"\0");
1071 }
1072 update_manifest_fingerprint_hash(&mut hasher, project_root)?;
1073 Ok(hasher.finalize().to_hex().to_string())
1074}
1075
1076fn update_manifest_fingerprint_hash(
1077 hasher: &mut blake3::Hasher,
1078 project_root: &Path,
1079) -> Result<(), InspectCacheError> {
1080 let manifest_root =
1081 fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1082 hasher.update(b"entry-point-manifests\0");
1083 for manifest in super::entry_points::collect_entry_point_manifests(project_root) {
1084 let relative_path = manifest
1085 .strip_prefix(&manifest_root)
1086 .unwrap_or(manifest.as_path())
1087 .to_string_lossy()
1088 .replace('\\', "/");
1089 let content_hash = blake3::hash(&fs::read(&manifest)?);
1090 hasher.update(relative_path.as_bytes());
1091 hasher.update(b"\0");
1092 hasher.update(content_hash.as_bytes());
1093 hasher.update(b"\0");
1094 }
1095 Ok(())
1096}
1097
1098fn update_contribution_fingerprint_hash(
1099 hasher: &mut blake3::Hasher,
1100 relative_path: &str,
1101 mtime_ns: i64,
1102 file_size: u64,
1103 file_hash: &str,
1104) {
1105 hasher.update(relative_path.as_bytes());
1106 hasher.update(&[0]);
1107 hasher.update(&mtime_ns.to_le_bytes());
1108 hasher.update(&file_size.to_le_bytes());
1109 hasher.update(&[0]);
1110 hasher.update(file_hash.as_bytes());
1111}
1112
1113fn relative_string(project_root: &Path, path: &Path) -> String {
1114 path.strip_prefix(project_root)
1115 .unwrap_or(path)
1116 .to_string_lossy()
1117 .to_string()
1118}
1119
1120fn system_time_to_ns(time: SystemTime) -> i64 {
1121 let nanos = time
1122 .duration_since(UNIX_EPOCH)
1123 .unwrap_or_else(|_| Duration::from_secs(0))
1124 .as_nanos();
1125 nanos.min(i64::MAX as u128) as i64
1126}
1127
1128fn ns_to_system_time(value: i64) -> SystemTime {
1129 UNIX_EPOCH + Duration::from_nanos(value.max(0) as u64)
1130}
1131
1132fn hash_to_hex(hash: blake3::Hash) -> String {
1133 hash.to_hex().to_string()
1134}
1135
1136fn hash_from_hex(value: &str) -> Result<blake3::Hash, InspectCacheError> {
1137 if value.len() != 64 {
1138 return Err(InspectCacheError::InvalidHash(value.to_string()));
1139 }
1140 let mut bytes = [0u8; 32];
1141 for (index, chunk) in value.as_bytes().chunks(2).enumerate() {
1142 let hex = std::str::from_utf8(chunk)
1143 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1144 bytes[index] = u8::from_str_radix(hex, 16)
1145 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1146 }
1147 Ok(blake3::Hash::from_bytes(bytes))
1148}
1149
1150fn unix_seconds_now() -> i64 {
1151 SystemTime::now()
1152 .duration_since(UNIX_EPOCH)
1153 .unwrap_or_else(|_| Duration::from_secs(0))
1154 .as_secs()
1155 .min(i64::MAX as u64) as i64
1156}
1157
1158#[cfg(test)]
1159mod tests {
1160 use super::*;
1161 use std::cell::Cell;
1162 use std::fs;
1163 use std::path::{Path, PathBuf};
1164
1165 fn collect_freshness(path: &Path) -> FileFreshness {
1166 crate::cache_freshness::collect(path).unwrap()
1167 }
1168
1169 #[test]
1170 fn tier1_file_memo_evicts_lru_and_keeps_recent_hits() {
1171 let temp = tempfile::tempdir().unwrap();
1172 let memo = Tier1FileMemo::<usize>::default();
1173 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1174
1175 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1176 let path = temp.path().join(format!("file-{index}.txt"));
1177 fs::write(&path, index.to_string()).unwrap();
1178 let value =
1179 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1180 assert_eq!(value, index);
1181 paths.push(path);
1182 }
1183
1184 let recent_path = paths[0].clone();
1185 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1186 panic!("recently inserted entry should hit before eviction")
1187 });
1188 assert_eq!(recent_value, 0);
1189
1190 let evicting_path = temp.path().join("new-file.txt");
1191 fs::write(&evicting_path, "new").unwrap();
1192 let evicting_value = memo.get_or_insert_with(&evicting_path, |path| {
1193 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1194 });
1195 assert_eq!(evicting_value, TIER1_FILE_MEMO_MAX_ENTRIES);
1196
1197 let state = memo.state.lock().unwrap();
1198 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1199 assert!(state.entries.contains_key(&recent_path));
1200 assert!(state.entries.contains_key(&evicting_path));
1201 assert!(!state.entries.contains_key(&paths[1]));
1202 drop(state);
1203
1204 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1205 panic!("recently used entry should survive eviction")
1206 });
1207 assert_eq!(recent_value, 0);
1208 }
1209
1210 #[test]
1211 fn tier1_file_memo_repeated_touches_keep_lazy_lru_bounded() {
1212 let temp = tempfile::tempdir().unwrap();
1213 let memo = Tier1FileMemo::<usize>::default();
1214 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1215
1216 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1217 let path = temp.path().join(format!("file-{index}.txt"));
1218 fs::write(&path, index.to_string()).unwrap();
1219 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1220 paths.push(path);
1221 }
1222
1223 for _ in 0..(TIER1_FILE_MEMO_MAX_ENTRIES * 3) {
1224 let value = memo.get_or_insert_with(&paths[0], |_| {
1225 panic!("hot entry should stay cached while it is repeatedly touched")
1226 });
1227 assert_eq!(value, 0);
1228 }
1229
1230 let evicting_path = temp.path().join("new-file.txt");
1231 fs::write(&evicting_path, "new").unwrap();
1232 memo.get_or_insert_with(&evicting_path, |path| {
1233 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1234 });
1235
1236 let state = memo.state.lock().unwrap();
1237 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1238 assert!(state.entries.contains_key(&paths[0]));
1239 assert!(state.entries.contains_key(&evicting_path));
1240 assert!(!state.entries.contains_key(&paths[1]));
1241 assert!(
1242 state.lru.len() <= TIER1_FILE_MEMO_MAX_ENTRIES * 2,
1243 "lazy LRU queue should be compacted instead of growing without bound"
1244 );
1245 }
1246
1247 #[test]
1248 fn tier1_file_memo_reuses_fresh_entries_and_rescans_stale_files() {
1249 let temp = tempfile::tempdir().unwrap();
1250 let path = temp.path().join("memo.txt");
1251 fs::write(&path, "first").unwrap();
1252
1253 let memo = Tier1FileMemo::<String>::default();
1254 let scans = Cell::new(0);
1255
1256 let first = memo.get_or_insert_with(&path, |path| {
1257 scans.set(scans.get() + 1);
1258 (Some(collect_freshness(path)), "first scan".to_string())
1259 });
1260 assert_eq!(first, "first scan");
1261 assert_eq!(scans.get(), 1);
1262
1263 let unchanged =
1264 memo.get_or_insert_with(&path, |_| panic!("unchanged file should reuse Tier-1 memo"));
1265 assert_eq!(unchanged, "first scan");
1266 assert_eq!(scans.get(), 1);
1267
1268 fs::write(&path, "changed file contents").unwrap();
1269 let changed = memo.get_or_insert_with(&path, |path| {
1270 scans.set(scans.get() + 1);
1271 (Some(collect_freshness(path)), "second scan".to_string())
1272 });
1273 assert_eq!(changed, "second scan");
1274 assert_eq!(scans.get(), 2);
1275
1276 let fresh_after_rescan = memo.get_or_insert_with(&path, |_| {
1277 panic!("rescanned file should reuse refreshed Tier-1 memo")
1278 });
1279 assert_eq!(fresh_after_rescan, "second scan");
1280 assert_eq!(scans.get(), 2);
1281 }
1282
1283 #[derive(serde::Deserialize, serde::Serialize)]
1284 struct RoundTripContributionRecord {
1285 category: String,
1286 file_path: PathBuf,
1287 contribution: serde_json::Value,
1288 type_ref_names: BTreeSet<String>,
1289 }
1290
1291 impl From<&ContributionRecord> for RoundTripContributionRecord {
1292 fn from(record: &ContributionRecord) -> Self {
1293 Self {
1294 category: record.category.as_str().to_string(),
1295 file_path: record.file_path.clone(),
1296 contribution: record.contribution.clone(),
1297 type_ref_names: record.type_ref_names.clone(),
1298 }
1299 }
1300 }
1301
1302 #[test]
1303 fn contribution_record_round_trip_preserves_dead_code_liveness_metadata() {
1304 let temp = tempfile::tempdir().unwrap();
1305 let project_root = temp.path().join("project");
1306 let inspect_dir = temp.path().join("inspect");
1307 let source = project_root.join("src/lib.ts");
1308 fs::create_dir_all(source.parent().unwrap()).unwrap();
1309 fs::write(&source, "export interface Widget { id: string }\n").unwrap();
1310
1311 let cache = InspectCache::open(inspect_dir.clone(), project_root.clone()).unwrap();
1312 let contribution = FileContribution::new(
1313 InspectCategory::DeadCode,
1314 source.clone(),
1315 collect_freshness(&source),
1316 serde_json::json!({
1317 "file": "src/lib.ts",
1318 "exports": [{
1319 "symbol": "Widget",
1320 "kind": "interface",
1321 "line": 1,
1322 "is_type_like": true,
1323 "is_entry_point": false,
1324 }],
1325 "internal_calls": [],
1326 "liveness_roots": [],
1327 "dispatched_method_names": ["render"],
1328 "type_ref_names": ["Widget"],
1329 }),
1330 )
1331 .with_type_ref_names(["Widget".to_string()]);
1332 cache
1333 .store_tier2_result(
1334 JobKey::for_project_category(InspectCategory::DeadCode),
1335 std::slice::from_ref(&source),
1336 &[contribution],
1337 serde_json::json!({ "count": 0, "items": [] }),
1338 )
1339 .unwrap();
1340 drop(cache);
1341
1342 let cache = InspectCache::open(inspect_dir, project_root).unwrap();
1343 let records = cache
1344 .load_tier2_contributions(InspectCategory::DeadCode)
1345 .unwrap();
1346 assert_eq!(records.len(), 1);
1347
1348 let serialized =
1349 serde_json::to_vec(&RoundTripContributionRecord::from(&records[0])).unwrap();
1350 let decoded: RoundTripContributionRecord = serde_json::from_slice(&serialized).unwrap();
1351 assert_eq!(decoded.category, InspectCategory::DeadCode.as_str());
1352 assert_eq!(decoded.contribution["dispatched_method_names"][0], "render");
1353 assert_eq!(decoded.contribution["type_ref_names"][0], "Widget");
1354 assert!(decoded.type_ref_names.contains("Widget"));
1355 assert_eq!(
1356 decoded.contribution["exports"][0]["is_type_like"].as_bool(),
1357 Some(true)
1358 );
1359 assert_eq!(TIER2_CONTRIBUTION_CACHE_VERSION, 13);
1360 }
1361}