1use std::collections::{BTreeSet, HashMap, VecDeque};
2use std::fmt;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::{Mutex, RwLock};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use rusqlite::{params, Connection, OpenFlags, OptionalExtension};
9
10use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
11
12use super::job::{
13 contribution_with_type_ref_names, type_ref_names_from_contribution, FileContribution,
14 InspectCategory, JobKey,
15};
16
17#[derive(Debug, Default)]
18pub(crate) struct Tier2ContributionUpdates {
19 pub upserts: Vec<FileContribution>,
20 pub deletes: Vec<PathBuf>,
21 pub metadata_updates: Vec<(PathBuf, FileFreshness)>,
22}
23
24#[derive(Debug)]
25pub enum InspectCacheError {
26 Io(std::io::Error),
27 Sql(rusqlite::Error),
28 Json(serde_json::Error),
29 LockPoisoned(&'static str),
30 InvalidHash(String),
31}
32
33impl fmt::Display for InspectCacheError {
34 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
35 match self {
36 InspectCacheError::Io(error) => write!(formatter, "inspect cache io error: {error}"),
37 InspectCacheError::Sql(error) => {
38 write!(formatter, "inspect cache sqlite error: {error}")
39 }
40 InspectCacheError::Json(error) => {
41 write!(formatter, "inspect cache json error: {error}")
42 }
43 InspectCacheError::LockPoisoned(name) => {
44 write!(formatter, "inspect cache lock poisoned: {name}")
45 }
46 InspectCacheError::InvalidHash(hash) => {
47 write!(formatter, "inspect cache invalid blake3 hash: {hash}")
48 }
49 }
50 }
51}
52
53impl std::error::Error for InspectCacheError {}
54
55impl From<std::io::Error> for InspectCacheError {
56 fn from(error: std::io::Error) -> Self {
57 Self::Io(error)
58 }
59}
60
61impl From<rusqlite::Error> for InspectCacheError {
62 fn from(error: rusqlite::Error) -> Self {
63 Self::Sql(error)
64 }
65}
66
67impl From<serde_json::Error> for InspectCacheError {
68 fn from(error: serde_json::Error) -> Self {
69 Self::Json(error)
70 }
71}
72
73pub(crate) const TIER2_CONTRIBUTION_CACHE_VERSION: u32 = 11;
97
98#[derive(Debug, Clone)]
99pub struct ContributionRecord {
100 pub category: InspectCategory,
101 pub file_path: PathBuf,
102 pub freshness: FileFreshness,
103 pub contribution: serde_json::Value,
104 pub type_ref_names: BTreeSet<String>,
105}
106
107#[derive(Debug, Clone)]
108struct MemoryAggregate {
109 payload: serde_json::Value,
110 generated_at: i64,
111 contribution_set_hash: Option<String>,
112}
113
114const TIER1_FILE_MEMO_MAX_ENTRIES: usize = 4_096;
115
116#[derive(Debug, Clone)]
117struct Tier1MemoEntry<T> {
118 freshness: FileFreshness,
119 value: T,
120 generation: u64,
121}
122
123#[derive(Debug, Clone)]
124struct LruNode {
125 path: PathBuf,
126 generation: u64,
127}
128
129#[derive(Debug)]
130struct Tier1MemoState<T> {
131 entries: HashMap<PathBuf, Tier1MemoEntry<T>>,
132 lru: VecDeque<LruNode>,
133 next_generation: u64,
134}
135
136impl<T> Default for Tier1MemoState<T> {
137 fn default() -> Self {
138 Self {
139 entries: HashMap::new(),
140 lru: VecDeque::new(),
141 next_generation: 0,
142 }
143 }
144}
145
146impl<T> Tier1MemoState<T> {
147 fn insert(&mut self, path: PathBuf, mut entry: Tier1MemoEntry<T>) {
148 let generation = self.allocate_generation();
149 entry.generation = generation;
150 self.entries.insert(path.clone(), entry);
151 self.lru.push_back(LruNode { path, generation });
152 self.compact_lru_if_needed();
153 self.evict_lru();
154 }
155
156 fn remove(&mut self, path: &Path) {
157 self.entries.remove(path);
158 self.compact_lru_if_needed();
159 }
160
161 fn touch(&mut self, path: &Path) {
162 if !self.entries.contains_key(path) {
163 return;
164 }
165
166 let generation = self.allocate_generation();
167 if let Some(entry) = self.entries.get_mut(path) {
168 entry.generation = generation;
169 self.lru.push_back(LruNode {
170 path: path.to_path_buf(),
171 generation,
172 });
173 }
174 self.compact_lru_if_needed();
175 }
176
177 fn allocate_generation(&mut self) -> u64 {
178 if self.next_generation == u64::MAX {
179 self.rebuild_lru();
180 }
181 let generation = self.next_generation;
182 self.next_generation += 1;
183 generation
184 }
185
186 fn compact_lru_if_needed(&mut self) {
187 let max_lru_nodes = TIER1_FILE_MEMO_MAX_ENTRIES
188 .saturating_mul(2)
189 .max(self.entries.len());
190 if self.lru.len() > max_lru_nodes {
191 self.rebuild_lru();
192 }
193 }
194
195 fn rebuild_lru(&mut self) {
196 let mut live_nodes = self
197 .entries
198 .iter()
199 .map(|(path, entry)| (entry.generation, path.clone()))
200 .collect::<Vec<_>>();
201 live_nodes.sort_by_key(|(generation, _)| *generation);
202
203 self.lru.clear();
204 for (generation, (_, path)) in live_nodes.into_iter().enumerate() {
205 let generation = generation as u64;
206 if let Some(entry) = self.entries.get_mut(&path) {
207 entry.generation = generation;
208 }
209 self.lru.push_back(LruNode { path, generation });
210 }
211 self.next_generation = self.lru.len() as u64;
212 }
213
214 fn evict_lru(&mut self) {
215 while self.entries.len() > TIER1_FILE_MEMO_MAX_ENTRIES {
216 let Some(node) = self.lru.pop_front() else {
217 break;
218 };
219 if self
220 .entries
221 .get(&node.path)
222 .is_some_and(|entry| entry.generation == node.generation)
223 {
224 self.entries.remove(&node.path);
225 }
226 }
227 self.compact_lru_if_needed();
228 }
229}
230
231#[derive(Debug)]
232pub(crate) struct Tier1FileMemo<T> {
233 state: Mutex<Tier1MemoState<T>>,
234}
235
236impl<T> Default for Tier1FileMemo<T> {
237 fn default() -> Self {
238 Self {
239 state: Mutex::new(Tier1MemoState::default()),
240 }
241 }
242}
243
244impl<T: Clone> Tier1FileMemo<T> {
245 pub(crate) fn get_or_insert_with<F>(&self, path: &Path, scan: F) -> T
246 where
247 F: FnOnce(&Path) -> (Option<FileFreshness>, T),
248 {
249 if let Some(cached) = self.cached_value(path) {
250 return cached;
251 }
252
253 let (freshness, value) = scan(path);
254 if let Ok(mut state) = self.state.lock() {
255 if let Some(freshness) = freshness {
256 state.insert(
257 path.to_path_buf(),
258 Tier1MemoEntry {
259 freshness,
260 value: value.clone(),
261 generation: 0,
262 },
263 );
264 } else {
265 state.remove(path);
266 }
267 }
268 value
269 }
270
271 fn cached_value(&self, path: &Path) -> Option<T> {
272 let mut cached = self
273 .state
274 .lock()
275 .ok()
276 .and_then(|state| state.entries.get(path).cloned())?;
277
278 match crate::cache_freshness::verify_file(path, &cached.freshness) {
279 FreshnessVerdict::HotFresh => {
280 if let Ok(mut state) = self.state.lock() {
281 state.touch(path);
282 }
283 Some(cached.value)
284 }
285 FreshnessVerdict::ContentFresh {
286 new_mtime,
287 new_size,
288 } => {
289 cached.freshness.mtime = new_mtime;
290 cached.freshness.size = new_size;
291 let value = cached.value.clone();
292 if let Ok(mut state) = self.state.lock() {
293 state.insert(path.to_path_buf(), cached);
294 }
295 Some(value)
296 }
297 FreshnessVerdict::Stale => None,
298 FreshnessVerdict::Deleted => {
299 if let Ok(mut state) = self.state.lock() {
300 state.remove(path);
301 }
302 None
303 }
304 }
305 }
306}
307
308#[derive(Debug)]
309pub struct InspectCache {
310 project_root: PathBuf,
311 project_key: String,
312 sqlite_path: PathBuf,
313 conn: Mutex<Connection>,
314 memory: RwLock<HashMap<JobKey, MemoryAggregate>>,
315}
316
317impl InspectCache {
318 pub fn open(inspect_dir: PathBuf, project_root: PathBuf) -> Result<Self, InspectCacheError> {
319 std::fs::create_dir_all(&inspect_dir)?;
320 let project_key = crate::search_index::project_cache_key(&project_root);
321 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
322 let conn = Connection::open(&sqlite_path)?;
323 configure_connection(&conn)?;
324 initialize_schema(&conn)?;
325 Ok(Self::from_connection(
326 project_root,
327 project_key,
328 sqlite_path,
329 conn,
330 ))
331 }
332
333 pub fn open_readonly(
334 inspect_dir: PathBuf,
335 project_root: PathBuf,
336 ) -> Result<Option<Self>, InspectCacheError> {
337 let project_key = crate::search_index::project_cache_key(&project_root);
338 let sqlite_path = inspect_dir.join(format!("{project_key}.sqlite"));
339 if !sqlite_path.is_file() {
340 return Ok(None);
341 }
342 let conn = Connection::open_with_flags(&sqlite_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
343 conn.busy_timeout(Duration::from_millis(5_000))?;
344 Ok(Some(Self::from_connection(
345 project_root,
346 project_key,
347 sqlite_path,
348 conn,
349 )))
350 }
351
352 fn from_connection(
353 project_root: PathBuf,
354 project_key: String,
355 sqlite_path: PathBuf,
356 conn: Connection,
357 ) -> Self {
358 Self {
359 project_root,
360 project_key,
361 sqlite_path,
362 conn: Mutex::new(conn),
363 memory: RwLock::new(HashMap::new()),
364 }
365 }
366
367 pub fn project_root(&self) -> &Path {
368 &self.project_root
369 }
370
371 pub fn project_key(&self) -> &str {
372 &self.project_key
373 }
374
375 pub fn sqlite_path(&self) -> &Path {
376 &self.sqlite_path
377 }
378
379 pub fn store_aggregated(
380 &self,
381 key: JobKey,
382 payload: serde_json::Value,
383 ) -> Result<(), InspectCacheError> {
384 self.store_memory_aggregate(key, payload, None)
385 }
386
387 fn store_memory_aggregate(
388 &self,
389 key: JobKey,
390 payload: serde_json::Value,
391 contribution_set_hash: Option<String>,
392 ) -> Result<(), InspectCacheError> {
393 self.memory
394 .write()
395 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
396 .insert(
397 key,
398 MemoryAggregate {
399 payload,
400 generated_at: unix_seconds_now(),
401 contribution_set_hash,
402 },
403 );
404 Ok(())
405 }
406
407 pub fn get_aggregated(
408 &self,
409 key: &JobKey,
410 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
411 if !key.category.is_tier2() {
412 return Ok(self
413 .memory
414 .read()
415 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
416 .get(key)
417 .map(|entry| entry.payload.clone()));
418 }
419
420 let current_hash = {
421 let conn = self
422 .conn
423 .lock()
424 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
425 contribution_set_hash_with_conn(
426 &conn,
427 key.category,
428 &self.project_key,
429 &self.project_root,
430 )?
431 };
432
433 let memory_entry = {
434 self.memory
435 .read()
436 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
437 .get(key)
438 .cloned()
439 };
440 if let Some(entry) = memory_entry {
441 if entry.contribution_set_hash.as_deref() == Some(current_hash.as_str()) {
442 return Ok(Some(entry.payload));
443 }
444 self.memory
445 .write()
446 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
447 .remove(key);
448 }
449
450 let payload = {
451 let conn = self
452 .conn
453 .lock()
454 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
455 conn.query_row(
456 "SELECT aggregate FROM tier2_aggregates \
457 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
458 params![key.category.as_str(), self.project_key, current_hash],
459 |row| row.get::<_, Vec<u8>>(0),
460 )
461 .optional()?
462 };
463
464 match payload {
465 Some(bytes) => {
466 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
467 self.store_memory_aggregate(key.clone(), value.clone(), Some(current_hash))?;
468 Ok(Some(value))
469 }
470 None => Ok(None),
471 }
472 }
473
474 pub fn store_tier2_result(
475 &self,
476 key: JobKey,
477 scanned_files: &[PathBuf],
478 contributions: &[FileContribution],
479 aggregate: serde_json::Value,
480 ) -> Result<(), InspectCacheError> {
481 if !key.category.is_tier2() {
482 self.store_aggregated(key, aggregate)?;
483 return Ok(());
484 }
485
486 let now = unix_seconds_now();
487 let mut conn = self
488 .conn
489 .lock()
490 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
491 let tx = conn.transaction()?;
492
493 let scanned_relative = scanned_files
494 .iter()
495 .map(|path| relative_string(&self.project_root, path))
496 .collect::<BTreeSet<_>>();
497 let existing = existing_contribution_paths(&tx, key.category, &self.project_key)?;
498 for file_path in existing {
499 if !scanned_relative.contains(&file_path) {
500 tx.execute(
501 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
502 params![key.category.as_str(), self.project_key, file_path],
503 )?;
504 }
505 }
506
507 for contribution in contributions {
508 let file_path = relative_string(&self.project_root, &contribution.file_path);
509 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
510 contribution.contribution.clone(),
511 &contribution.type_ref_names,
512 ))?;
513 tx.execute(
514 "INSERT INTO tier2_contributions \
515 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
516 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
517 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
518 file_mtime_ns = excluded.file_mtime_ns, \
519 file_size = excluded.file_size, \
520 file_hash = excluded.file_hash, \
521 contribution = excluded.contribution, \
522 generated_at = excluded.generated_at",
523 params![
524 contribution.category.as_str(),
525 self.project_key,
526 file_path,
527 system_time_to_ns(contribution.freshness.mtime),
528 contribution.freshness.size as i64,
529 hash_to_hex(contribution.freshness.content_hash),
530 blob,
531 now,
532 ],
533 )?;
534 }
535
536 let contribution_set_hash = contribution_set_hash_with_conn(
537 &tx,
538 key.category,
539 &self.project_key,
540 &self.project_root,
541 )?;
542 let aggregate_blob = serde_json::to_vec(&aggregate)?;
543 tx.execute(
544 "INSERT INTO tier2_aggregates \
545 (category, project_key, contribution_set_hash, aggregate, generated_at) \
546 VALUES (?1, ?2, ?3, ?4, ?5) \
547 ON CONFLICT(category, project_key) DO UPDATE SET \
548 contribution_set_hash = excluded.contribution_set_hash, \
549 aggregate = excluded.aggregate, \
550 generated_at = excluded.generated_at",
551 params![
552 key.category.as_str(),
553 self.project_key,
554 contribution_set_hash,
555 aggregate_blob,
556 now,
557 ],
558 )?;
559 tx.execute(
560 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
561 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
562 params![key.category.as_str(), self.project_key, now],
563 )?;
564 tx.commit()?;
565
566 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash))
567 }
568
569 pub(crate) fn apply_contribution_updates(
570 &self,
571 category: InspectCategory,
572 updates: Tier2ContributionUpdates,
573 ) -> Result<String, InspectCacheError> {
574 let now = unix_seconds_now();
575 let mut conn = self
576 .conn
577 .lock()
578 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
579 let tx = conn.transaction()?;
580
581 for relative_file in updates.deletes {
582 tx.execute(
583 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
584 params![
585 category.as_str(),
586 self.project_key,
587 relative_file.to_string_lossy().to_string()
588 ],
589 )?;
590 }
591
592 for (relative_file, freshness) in updates.metadata_updates {
593 tx.execute(
594 "UPDATE tier2_contributions \
595 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
596 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
597 params![
598 category.as_str(),
599 self.project_key,
600 relative_file.to_string_lossy().to_string(),
601 system_time_to_ns(freshness.mtime),
602 freshness.size as i64,
603 hash_to_hex(freshness.content_hash),
604 ],
605 )?;
606 }
607
608 for contribution in updates.upserts {
609 let file_path = relative_string(&self.project_root, &contribution.file_path);
610 let blob = serde_json::to_vec(&contribution_with_type_ref_names(
611 contribution.contribution.clone(),
612 &contribution.type_ref_names,
613 ))?;
614 tx.execute(
615 "INSERT INTO tier2_contributions \
616 (category, project_key, file_path, file_mtime_ns, file_size, file_hash, contribution, generated_at) \
617 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \
618 ON CONFLICT(category, project_key, file_path) DO UPDATE SET \
619 file_mtime_ns = excluded.file_mtime_ns, \
620 file_size = excluded.file_size, \
621 file_hash = excluded.file_hash, \
622 contribution = excluded.contribution, \
623 generated_at = excluded.generated_at",
624 params![
625 contribution.category.as_str(),
626 self.project_key,
627 file_path,
628 system_time_to_ns(contribution.freshness.mtime),
629 contribution.freshness.size as i64,
630 hash_to_hex(contribution.freshness.content_hash),
631 blob,
632 now,
633 ],
634 )?;
635 }
636
637 let contribution_set_hash =
638 contribution_set_hash_with_conn(&tx, category, &self.project_key, &self.project_root)?;
639 tx.commit()?;
640
641 self.memory
642 .write()
643 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
644 .remove(&JobKey::for_project_category(category));
645
646 Ok(contribution_set_hash)
647 }
648
649 pub(crate) fn load_aggregate_if_hash_matches(
650 &self,
651 category: InspectCategory,
652 contribution_set_hash: &str,
653 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
654 let payload = {
655 let conn = self
656 .conn
657 .lock()
658 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
659 conn.query_row(
660 "SELECT aggregate FROM tier2_aggregates \
661 WHERE category = ?1 AND project_key = ?2 AND contribution_set_hash = ?3",
662 params![category.as_str(), self.project_key, contribution_set_hash],
663 |row| row.get::<_, Vec<u8>>(0),
664 )
665 .optional()?
666 };
667
668 match payload {
669 Some(bytes) => {
670 let value = serde_json::from_slice::<serde_json::Value>(&bytes)?;
671 self.store_memory_aggregate(
672 JobKey::for_project_category(category),
673 value.clone(),
674 Some(contribution_set_hash.to_string()),
675 )?;
676 Ok(Some(value))
677 }
678 None => Ok(None),
679 }
680 }
681
682 pub(crate) fn latest_aggregate_any_hash(
683 &self,
684 category: InspectCategory,
685 ) -> Result<Option<serde_json::Value>, InspectCacheError> {
686 let payload = {
687 let conn = self
688 .conn
689 .lock()
690 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
691 conn.query_row(
692 "SELECT aggregate FROM tier2_aggregates \
693 WHERE category = ?1 AND project_key = ?2 \
694 ORDER BY generated_at DESC LIMIT 1",
695 params![category.as_str(), self.project_key],
696 |row| row.get::<_, Vec<u8>>(0),
697 )
698 .optional()?
699 };
700
701 match payload {
702 Some(bytes) => serde_json::from_slice::<serde_json::Value>(&bytes)
703 .map(Some)
704 .map_err(InspectCacheError::from),
705 None => Ok(None),
706 }
707 }
708
709 pub(crate) fn touch_tier2_last_full_run(
710 &self,
711 category: InspectCategory,
712 ) -> Result<i64, InspectCacheError> {
713 let mut conn = self
714 .conn
715 .lock()
716 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
717 let tx = conn.transaction()?;
718 let previous = tx
719 .query_row(
720 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
721 params![category.as_str(), self.project_key],
722 |row| row.get::<_, i64>(0),
723 )
724 .optional()?;
725 let now = unix_seconds_now();
726 let last_full_run = previous.map_or(now, |previous| now.max(previous.saturating_add(1)));
727 tx.execute(
728 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
729 params![category.as_str(), self.project_key, last_full_run],
730 )?;
731 tx.commit()?;
732 Ok(last_full_run)
733 }
734
735 pub(crate) fn store_tier2_aggregate(
736 &self,
737 key: JobKey,
738 contribution_set_hash: &str,
739 aggregate: serde_json::Value,
740 ) -> Result<(), InspectCacheError> {
741 if !key.category.is_tier2() {
742 self.store_aggregated(key, aggregate)?;
743 return Ok(());
744 }
745
746 let now = unix_seconds_now();
747 let aggregate_blob = serde_json::to_vec(&aggregate)?;
748 let mut conn = self
749 .conn
750 .lock()
751 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
752 let tx = conn.transaction()?;
753 tx.execute(
754 "INSERT INTO tier2_aggregates \
755 (category, project_key, contribution_set_hash, aggregate, generated_at) \
756 VALUES (?1, ?2, ?3, ?4, ?5) \
757 ON CONFLICT(category, project_key) DO UPDATE SET \
758 contribution_set_hash = excluded.contribution_set_hash, \
759 aggregate = excluded.aggregate, \
760 generated_at = excluded.generated_at",
761 params![
762 key.category.as_str(),
763 self.project_key,
764 contribution_set_hash,
765 aggregate_blob,
766 now,
767 ],
768 )?;
769 tx.execute(
770 "INSERT INTO tier2_meta (category, project_key, last_full_run) VALUES (?1, ?2, ?3) \
771 ON CONFLICT(category, project_key) DO UPDATE SET last_full_run = excluded.last_full_run",
772 params![key.category.as_str(), self.project_key, now],
773 )?;
774 tx.commit()?;
775
776 self.store_memory_aggregate(key, aggregate, Some(contribution_set_hash.to_string()))
777 }
778
779 pub fn load_tier2_contributions(
780 &self,
781 category: InspectCategory,
782 ) -> Result<Vec<ContributionRecord>, InspectCacheError> {
783 let conn = self
784 .conn
785 .lock()
786 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
787 let mut stmt = conn.prepare(
788 "SELECT file_path, file_mtime_ns, file_size, file_hash, contribution \
789 FROM tier2_contributions \
790 WHERE category = ?1 AND project_key = ?2 \
791 ORDER BY file_path ASC",
792 )?;
793 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
794 let file_path: String = row.get(0)?;
795 let mtime_ns: i64 = row.get(1)?;
796 let file_size: i64 = row.get(2)?;
797 let file_hash: String = row.get(3)?;
798 let contribution: Vec<u8> = row.get(4)?;
799 Ok((file_path, mtime_ns, file_size, file_hash, contribution))
800 })?;
801
802 let mut records = Vec::new();
803 for row in rows {
804 let (file_path, mtime_ns, file_size, file_hash, contribution) = row?;
805 let contribution: serde_json::Value = serde_json::from_slice(&contribution)?;
806 let type_ref_names = type_ref_names_from_contribution(&contribution);
807 records.push(ContributionRecord {
808 category,
809 file_path: PathBuf::from(file_path),
810 freshness: FileFreshness {
811 mtime: ns_to_system_time(mtime_ns),
812 size: file_size.max(0) as u64,
813 content_hash: hash_from_hex(&file_hash)?,
814 },
815 contribution,
816 type_ref_names,
817 });
818 }
819 Ok(records)
820 }
821
822 pub fn delete_tier2_contribution(
823 &self,
824 category: InspectCategory,
825 relative_file: &Path,
826 ) -> Result<(), InspectCacheError> {
827 let conn = self
828 .conn
829 .lock()
830 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
831 conn.execute(
832 "DELETE FROM tier2_contributions WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
833 params![
834 category.as_str(),
835 self.project_key,
836 relative_file.to_string_lossy().to_string()
837 ],
838 )?;
839 Ok(())
840 }
841
842 pub fn update_content_fresh_metadata(
843 &self,
844 category: InspectCategory,
845 relative_file: &Path,
846 freshness: &FileFreshness,
847 ) -> Result<(), InspectCacheError> {
848 let conn = self
849 .conn
850 .lock()
851 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
852 conn.execute(
853 "UPDATE tier2_contributions \
854 SET file_mtime_ns = ?4, file_size = ?5, file_hash = ?6 \
855 WHERE category = ?1 AND project_key = ?2 AND file_path = ?3",
856 params![
857 category.as_str(),
858 self.project_key,
859 relative_file.to_string_lossy().to_string(),
860 system_time_to_ns(freshness.mtime),
861 freshness.size as i64,
862 hash_to_hex(freshness.content_hash),
863 ],
864 )?;
865 Ok(())
866 }
867
868 pub(crate) fn contribution_fingerprint(
869 &self,
870 category: InspectCategory,
871 ) -> Result<(usize, String, bool), InspectCacheError> {
872 let conn = self
873 .conn
874 .lock()
875 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
876 let mut stmt = conn.prepare(
877 "SELECT file_path, file_mtime_ns, file_size, file_hash \
878 FROM tier2_contributions \
879 WHERE category = ?1 AND project_key = ?2 \
880 ORDER BY file_path ASC",
881 )?;
882 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
883 Ok((
884 row.get::<_, String>(0)?,
885 row.get::<_, i64>(1)?,
886 row.get::<_, i64>(2)?,
887 row.get::<_, String>(3)?,
888 ))
889 })?;
890
891 let zero_hash = hash_to_hex(cache_freshness::zero_hash());
892 let mut count = 0usize;
893 let mut hash_complete = true;
894 let mut hasher = blake3::Hasher::new();
895 for row in rows {
896 let (file_path, mtime_ns, file_size, file_hash) = row?;
897 count += 1;
898 if file_hash == zero_hash {
899 hash_complete = false;
900 }
901 update_contribution_fingerprint_hash(
902 &mut hasher,
903 &file_path,
904 mtime_ns.max(0),
905 file_size.max(0) as u64,
906 &file_hash,
907 );
908 }
909
910 Ok((count, hasher.finalize().to_hex().to_string(), hash_complete))
911 }
912
913 pub(crate) fn contribution_freshness(
914 &self,
915 category: InspectCategory,
916 ) -> Result<Vec<(PathBuf, FileFreshness)>, InspectCacheError> {
917 let conn = self
918 .conn
919 .lock()
920 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
921 let mut stmt = conn.prepare(
922 "SELECT file_path, file_mtime_ns, file_size, file_hash \
923 FROM tier2_contributions \
924 WHERE category = ?1 AND project_key = ?2 \
925 ORDER BY file_path ASC",
926 )?;
927 let rows = stmt.query_map(params![category.as_str(), self.project_key], |row| {
928 Ok((
929 row.get::<_, String>(0)?,
930 row.get::<_, i64>(1)?,
931 row.get::<_, i64>(2)?,
932 row.get::<_, String>(3)?,
933 ))
934 })?;
935
936 let mut records = Vec::new();
937 for row in rows {
938 let (file_path, mtime_ns, file_size, file_hash) = row?;
939 records.push((
940 PathBuf::from(file_path),
941 FileFreshness {
942 mtime: ns_to_system_time(mtime_ns),
943 size: file_size.max(0) as u64,
944 content_hash: hash_from_hex(&file_hash)?,
945 },
946 ));
947 }
948 Ok(records)
949 }
950
951 pub fn contribution_set_hash(
952 &self,
953 category: InspectCategory,
954 ) -> Result<String, InspectCacheError> {
955 let conn = self
956 .conn
957 .lock()
958 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
959 contribution_set_hash_with_conn(&conn, category, &self.project_key, &self.project_root)
960 }
961
962 pub fn last_full_run(
963 &self,
964 category: InspectCategory,
965 ) -> Result<Option<i64>, InspectCacheError> {
966 let conn = self
967 .conn
968 .lock()
969 .map_err(|_| InspectCacheError::LockPoisoned("connection"))?;
970 conn.query_row(
971 "SELECT last_full_run FROM tier2_meta WHERE category = ?1 AND project_key = ?2",
972 params![category.as_str(), self.project_key],
973 |row| row.get::<_, i64>(0),
974 )
975 .optional()
976 .map_err(InspectCacheError::from)
977 }
978
979 pub fn memory_generated_at(&self, key: &JobKey) -> Result<Option<i64>, InspectCacheError> {
980 Ok(self
981 .memory
982 .read()
983 .map_err(|_| InspectCacheError::LockPoisoned("memory"))?
984 .get(key)
985 .map(|entry| entry.generated_at))
986 }
987}
988
989fn configure_connection(conn: &Connection) -> Result<(), InspectCacheError> {
990 conn.pragma_update(None, "journal_mode", "WAL")?;
991 conn.pragma_update(None, "busy_timeout", 5_000)?;
992 Ok(())
993}
994
995fn initialize_schema(conn: &Connection) -> Result<(), InspectCacheError> {
996 conn.execute_batch(
997 "CREATE TABLE IF NOT EXISTS tier2_contributions (
998 category TEXT NOT NULL,
999 project_key TEXT NOT NULL,
1000 file_path TEXT NOT NULL,
1001 file_mtime_ns INTEGER NOT NULL,
1002 file_size INTEGER NOT NULL,
1003 file_hash TEXT NOT NULL,
1004 contribution BLOB NOT NULL,
1005 generated_at INTEGER NOT NULL,
1006 PRIMARY KEY (category, project_key, file_path)
1007 );
1008
1009 CREATE TABLE IF NOT EXISTS tier2_aggregates (
1010 category TEXT NOT NULL,
1011 project_key TEXT NOT NULL,
1012 contribution_set_hash TEXT NOT NULL,
1013 aggregate BLOB NOT NULL,
1014 generated_at INTEGER NOT NULL,
1015 PRIMARY KEY (category, project_key)
1016 );
1017
1018 CREATE TABLE IF NOT EXISTS tier2_meta (
1019 category TEXT NOT NULL,
1020 project_key TEXT NOT NULL,
1021 last_full_run INTEGER NOT NULL,
1022 PRIMARY KEY (category, project_key)
1023 );",
1024 )?;
1025 Ok(())
1026}
1027
1028fn existing_contribution_paths(
1029 conn: &Connection,
1030 category: InspectCategory,
1031 project_key: &str,
1032) -> Result<Vec<String>, InspectCacheError> {
1033 let mut stmt = conn.prepare(
1034 "SELECT file_path FROM tier2_contributions WHERE category = ?1 AND project_key = ?2",
1035 )?;
1036 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1037 row.get::<_, String>(0)
1038 })?;
1039 rows.collect::<Result<Vec<_>, _>>()
1040 .map_err(InspectCacheError::from)
1041}
1042
1043fn contribution_set_hash_with_conn(
1044 conn: &Connection,
1045 category: InspectCategory,
1046 project_key: &str,
1047 project_root: &Path,
1048) -> Result<String, InspectCacheError> {
1049 let mut stmt = conn.prepare(
1050 "SELECT file_path, file_hash FROM tier2_contributions \
1051 WHERE category = ?1 AND project_key = ?2 ORDER BY file_path ASC",
1052 )?;
1053 let rows = stmt.query_map(params![category.as_str(), project_key], |row| {
1054 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1055 })?;
1056
1057 let mut hasher = blake3::Hasher::new();
1058 hasher.update(b"tier2-contributions\0");
1059 hasher.update(&TIER2_CONTRIBUTION_CACHE_VERSION.to_le_bytes());
1060 hasher.update(b"\0");
1061 for row in rows {
1062 let (file_path, file_hash) = row?;
1063 hasher.update(file_path.as_bytes());
1064 hasher.update(b"\0");
1065 hasher.update(file_hash.as_bytes());
1066 hasher.update(b"\0");
1067 }
1068 update_manifest_fingerprint_hash(&mut hasher, project_root)?;
1069 Ok(hasher.finalize().to_hex().to_string())
1070}
1071
1072fn update_manifest_fingerprint_hash(
1073 hasher: &mut blake3::Hasher,
1074 project_root: &Path,
1075) -> Result<(), InspectCacheError> {
1076 let manifest_root =
1077 fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
1078 hasher.update(b"entry-point-manifests\0");
1079 for manifest in super::entry_points::collect_entry_point_manifests(project_root) {
1080 let relative_path = manifest
1081 .strip_prefix(&manifest_root)
1082 .unwrap_or(manifest.as_path())
1083 .to_string_lossy()
1084 .replace('\\', "/");
1085 let content_hash = blake3::hash(&fs::read(&manifest)?);
1086 hasher.update(relative_path.as_bytes());
1087 hasher.update(b"\0");
1088 hasher.update(content_hash.as_bytes());
1089 hasher.update(b"\0");
1090 }
1091 Ok(())
1092}
1093
1094fn update_contribution_fingerprint_hash(
1095 hasher: &mut blake3::Hasher,
1096 relative_path: &str,
1097 mtime_ns: i64,
1098 file_size: u64,
1099 file_hash: &str,
1100) {
1101 hasher.update(relative_path.as_bytes());
1102 hasher.update(&[0]);
1103 hasher.update(&mtime_ns.to_le_bytes());
1104 hasher.update(&file_size.to_le_bytes());
1105 hasher.update(&[0]);
1106 hasher.update(file_hash.as_bytes());
1107}
1108
1109fn relative_string(project_root: &Path, path: &Path) -> String {
1110 path.strip_prefix(project_root)
1111 .unwrap_or(path)
1112 .to_string_lossy()
1113 .to_string()
1114}
1115
1116fn system_time_to_ns(time: SystemTime) -> i64 {
1117 let nanos = time
1118 .duration_since(UNIX_EPOCH)
1119 .unwrap_or_else(|_| Duration::from_secs(0))
1120 .as_nanos();
1121 nanos.min(i64::MAX as u128) as i64
1122}
1123
1124fn ns_to_system_time(value: i64) -> SystemTime {
1125 UNIX_EPOCH + Duration::from_nanos(value.max(0) as u64)
1126}
1127
1128fn hash_to_hex(hash: blake3::Hash) -> String {
1129 hash.to_hex().to_string()
1130}
1131
1132fn hash_from_hex(value: &str) -> Result<blake3::Hash, InspectCacheError> {
1133 if value.len() != 64 {
1134 return Err(InspectCacheError::InvalidHash(value.to_string()));
1135 }
1136 let mut bytes = [0u8; 32];
1137 for (index, chunk) in value.as_bytes().chunks(2).enumerate() {
1138 let hex = std::str::from_utf8(chunk)
1139 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1140 bytes[index] = u8::from_str_radix(hex, 16)
1141 .map_err(|_| InspectCacheError::InvalidHash(value.to_string()))?;
1142 }
1143 Ok(blake3::Hash::from_bytes(bytes))
1144}
1145
1146fn unix_seconds_now() -> i64 {
1147 SystemTime::now()
1148 .duration_since(UNIX_EPOCH)
1149 .unwrap_or_else(|_| Duration::from_secs(0))
1150 .as_secs()
1151 .min(i64::MAX as u64) as i64
1152}
1153
1154#[cfg(test)]
1155mod tests {
1156 use super::*;
1157 use std::cell::Cell;
1158 use std::fs;
1159 use std::path::{Path, PathBuf};
1160
1161 fn collect_freshness(path: &Path) -> FileFreshness {
1162 crate::cache_freshness::collect(path).unwrap()
1163 }
1164
1165 #[test]
1166 fn tier1_file_memo_evicts_lru_and_keeps_recent_hits() {
1167 let temp = tempfile::tempdir().unwrap();
1168 let memo = Tier1FileMemo::<usize>::default();
1169 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1170
1171 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1172 let path = temp.path().join(format!("file-{index}.txt"));
1173 fs::write(&path, index.to_string()).unwrap();
1174 let value =
1175 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1176 assert_eq!(value, index);
1177 paths.push(path);
1178 }
1179
1180 let recent_path = paths[0].clone();
1181 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1182 panic!("recently inserted entry should hit before eviction")
1183 });
1184 assert_eq!(recent_value, 0);
1185
1186 let evicting_path = temp.path().join("new-file.txt");
1187 fs::write(&evicting_path, "new").unwrap();
1188 let evicting_value = memo.get_or_insert_with(&evicting_path, |path| {
1189 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1190 });
1191 assert_eq!(evicting_value, TIER1_FILE_MEMO_MAX_ENTRIES);
1192
1193 let state = memo.state.lock().unwrap();
1194 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1195 assert!(state.entries.contains_key(&recent_path));
1196 assert!(state.entries.contains_key(&evicting_path));
1197 assert!(!state.entries.contains_key(&paths[1]));
1198 drop(state);
1199
1200 let recent_value = memo.get_or_insert_with(&recent_path, |_| {
1201 panic!("recently used entry should survive eviction")
1202 });
1203 assert_eq!(recent_value, 0);
1204 }
1205
1206 #[test]
1207 fn tier1_file_memo_repeated_touches_keep_lazy_lru_bounded() {
1208 let temp = tempfile::tempdir().unwrap();
1209 let memo = Tier1FileMemo::<usize>::default();
1210 let mut paths = Vec::with_capacity(TIER1_FILE_MEMO_MAX_ENTRIES);
1211
1212 for index in 0..TIER1_FILE_MEMO_MAX_ENTRIES {
1213 let path = temp.path().join(format!("file-{index}.txt"));
1214 fs::write(&path, index.to_string()).unwrap();
1215 memo.get_or_insert_with(&path, |path| (Some(collect_freshness(path)), index));
1216 paths.push(path);
1217 }
1218
1219 for _ in 0..(TIER1_FILE_MEMO_MAX_ENTRIES * 3) {
1220 let value = memo.get_or_insert_with(&paths[0], |_| {
1221 panic!("hot entry should stay cached while it is repeatedly touched")
1222 });
1223 assert_eq!(value, 0);
1224 }
1225
1226 let evicting_path = temp.path().join("new-file.txt");
1227 fs::write(&evicting_path, "new").unwrap();
1228 memo.get_or_insert_with(&evicting_path, |path| {
1229 (Some(collect_freshness(path)), TIER1_FILE_MEMO_MAX_ENTRIES)
1230 });
1231
1232 let state = memo.state.lock().unwrap();
1233 assert_eq!(state.entries.len(), TIER1_FILE_MEMO_MAX_ENTRIES);
1234 assert!(state.entries.contains_key(&paths[0]));
1235 assert!(state.entries.contains_key(&evicting_path));
1236 assert!(!state.entries.contains_key(&paths[1]));
1237 assert!(
1238 state.lru.len() <= TIER1_FILE_MEMO_MAX_ENTRIES * 2,
1239 "lazy LRU queue should be compacted instead of growing without bound"
1240 );
1241 }
1242
1243 #[test]
1244 fn tier1_file_memo_reuses_fresh_entries_and_rescans_stale_files() {
1245 let temp = tempfile::tempdir().unwrap();
1246 let path = temp.path().join("memo.txt");
1247 fs::write(&path, "first").unwrap();
1248
1249 let memo = Tier1FileMemo::<String>::default();
1250 let scans = Cell::new(0);
1251
1252 let first = memo.get_or_insert_with(&path, |path| {
1253 scans.set(scans.get() + 1);
1254 (Some(collect_freshness(path)), "first scan".to_string())
1255 });
1256 assert_eq!(first, "first scan");
1257 assert_eq!(scans.get(), 1);
1258
1259 let unchanged =
1260 memo.get_or_insert_with(&path, |_| panic!("unchanged file should reuse Tier-1 memo"));
1261 assert_eq!(unchanged, "first scan");
1262 assert_eq!(scans.get(), 1);
1263
1264 fs::write(&path, "changed file contents").unwrap();
1265 let changed = memo.get_or_insert_with(&path, |path| {
1266 scans.set(scans.get() + 1);
1267 (Some(collect_freshness(path)), "second scan".to_string())
1268 });
1269 assert_eq!(changed, "second scan");
1270 assert_eq!(scans.get(), 2);
1271
1272 let fresh_after_rescan = memo.get_or_insert_with(&path, |_| {
1273 panic!("rescanned file should reuse refreshed Tier-1 memo")
1274 });
1275 assert_eq!(fresh_after_rescan, "second scan");
1276 assert_eq!(scans.get(), 2);
1277 }
1278
1279 #[derive(serde::Deserialize, serde::Serialize)]
1280 struct RoundTripContributionRecord {
1281 category: String,
1282 file_path: PathBuf,
1283 contribution: serde_json::Value,
1284 type_ref_names: BTreeSet<String>,
1285 }
1286
1287 impl From<&ContributionRecord> for RoundTripContributionRecord {
1288 fn from(record: &ContributionRecord) -> Self {
1289 Self {
1290 category: record.category.as_str().to_string(),
1291 file_path: record.file_path.clone(),
1292 contribution: record.contribution.clone(),
1293 type_ref_names: record.type_ref_names.clone(),
1294 }
1295 }
1296 }
1297
1298 #[test]
1299 fn contribution_record_round_trip_preserves_dead_code_liveness_metadata() {
1300 let temp = tempfile::tempdir().unwrap();
1301 let project_root = temp.path().join("project");
1302 let inspect_dir = temp.path().join("inspect");
1303 let source = project_root.join("src/lib.ts");
1304 fs::create_dir_all(source.parent().unwrap()).unwrap();
1305 fs::write(&source, "export interface Widget { id: string }\n").unwrap();
1306
1307 let cache = InspectCache::open(inspect_dir.clone(), project_root.clone()).unwrap();
1308 let contribution = FileContribution::new(
1309 InspectCategory::DeadCode,
1310 source.clone(),
1311 collect_freshness(&source),
1312 serde_json::json!({
1313 "file": "src/lib.ts",
1314 "exports": [{
1315 "symbol": "Widget",
1316 "kind": "interface",
1317 "line": 1,
1318 "is_type_like": true,
1319 "is_entry_point": false,
1320 }],
1321 "internal_calls": [],
1322 "liveness_roots": [],
1323 "dispatched_method_names": ["render"],
1324 "type_ref_names": ["Widget"],
1325 }),
1326 )
1327 .with_type_ref_names(["Widget".to_string()]);
1328 cache
1329 .store_tier2_result(
1330 JobKey::for_project_category(InspectCategory::DeadCode),
1331 std::slice::from_ref(&source),
1332 &[contribution],
1333 serde_json::json!({ "count": 0, "items": [] }),
1334 )
1335 .unwrap();
1336 drop(cache);
1337
1338 let cache = InspectCache::open(inspect_dir, project_root).unwrap();
1339 let records = cache
1340 .load_tier2_contributions(InspectCategory::DeadCode)
1341 .unwrap();
1342 assert_eq!(records.len(), 1);
1343
1344 let serialized =
1345 serde_json::to_vec(&RoundTripContributionRecord::from(&records[0])).unwrap();
1346 let decoded: RoundTripContributionRecord = serde_json::from_slice(&serialized).unwrap();
1347 assert_eq!(decoded.category, InspectCategory::DeadCode.as_str());
1348 assert_eq!(decoded.contribution["dispatched_method_names"][0], "render");
1349 assert_eq!(decoded.contribution["type_ref_names"][0], "Widget");
1350 assert!(decoded.type_ref_names.contains("Widget"));
1351 assert_eq!(
1352 decoded.contribution["exports"][0]["is_type_like"].as_bool(),
1353 Some(true)
1354 );
1355 assert_eq!(TIER2_CONTRIBUTION_CACHE_VERSION, 11);
1356 }
1357}