normalize_native_rules/
cache.rs

1//! SQLite-backed per-file findings cache.
2//!
3//! Stored at `<project_root>/.normalize/findings-cache.sqlite`.
4//! Keyed by `(path, engine)`: each engine stores its own findings per file.
5//! A `config_hash` column invalidates the entry when rule config changes.
6//!
7//! Backed by libsql. To keep the public API synchronous (the `FileRule` trait
8//! is sync because rule implementations parse files with tree-sitter and run
9//! pure analysis), we own a dedicated current-thread tokio runtime per cache
10//! and drive libsql through `runtime.block_on(...)`.
11
12use libsql::{Builder, Connection, Database, params};
13use std::future::Future;
14use std::path::Path;
15use tokio::runtime::{Handle, Runtime};
16
17/// SQLite-backed per-file findings cache.
18///
19/// Stored at `<project_root>/.normalize/findings-cache.sqlite`.
20/// Keyed by `(path, engine)`: each engine stores its own findings per file.
21/// A `config_hash` column invalidates the entry when rule config changes.
22pub struct FindingsCache {
23    conn: Connection,
24    /// Keep the Database alive for the lifetime of the connection.
25    #[allow(dead_code)]
26    db: Database,
27    /// Owned runtime — only present when we are not running inside an existing
28    /// tokio runtime. If `None`, calls use `Handle::current()` + `block_in_place`.
29    runtime: Option<Runtime>,
30}
31
32impl FindingsCache {
33    fn block_on<F: Future + Send>(&self, fut: F) -> F::Output
34    where
35        F::Output: Send,
36    {
37        block_on_helper(&self.runtime, fut)
38    }
39}
40
41/// Drive `fut` to completion, choosing a strategy based on the *current* thread's
42/// tokio context — not the context at cache-construction time. The cached `runtime`
43/// (set when the cache was opened from a sync context) is only used as a fallback
44/// when we are not currently inside any runtime; calling `cached_rt.block_on()` from
45/// inside another runtime panics with "Cannot start a runtime from within a runtime".
46fn block_on_helper<F: Future + Send>(runtime: &Option<Runtime>, fut: F) -> F::Output
47where
48    F::Output: Send,
49{
50    if let Ok(handle) = Handle::try_current() {
51        return match handle.runtime_flavor() {
52            tokio::runtime::RuntimeFlavor::MultiThread => {
53                tokio::task::block_in_place(|| handle.block_on(fut))
54            }
55            _ => spawn_scoped_runtime(fut),
56        };
57    }
58    if let Some(rt) = runtime {
59        return rt.block_on(fut);
60    }
61    spawn_scoped_runtime(fut)
62}
63
64fn spawn_scoped_runtime<F: Future + Send>(fut: F) -> F::Output
65where
66    F::Output: Send,
67{
68    std::thread::scope(|s| {
69        s.spawn(|| {
70            let rt = tokio::runtime::Builder::new_current_thread()
71                .enable_all()
72                .build()
73                .expect("failed to build tokio runtime worker thread");
74            rt.block_on(fut)
75        })
76        .join()
77        .expect("libsql worker thread panicked")
78    })
79}
80
81impl FindingsCache {
82    /// Open (or create) the cache database at `<project_root>/.normalize/findings-cache.sqlite`.
83    ///
84    /// Returns an in-memory fallback if the database cannot be opened (e.g. permission error),
85    /// so callers never need to handle failure — the cost is just a cold run.
86    pub fn open(project_root: &Path) -> Self {
87        let dir = project_root.join(".normalize");
88        let _ = std::fs::create_dir_all(&dir);
89        let db_path = dir.join("findings-cache.sqlite");
90
91        let runtime: Option<Runtime> = if Handle::try_current().is_ok() {
92            None
93        } else {
94            Some(
95                tokio::runtime::Builder::new_current_thread()
96                    .enable_all()
97                    .build()
98                    .expect("failed to build tokio runtime for findings cache"),
99            )
100        };
101        let init = async {
102            // Try opening the on-disk DB; fall back to in-memory on any error.
103            let db = match Builder::new_local(&db_path).build().await {
104                Ok(db) => db,
105                Err(_) => Builder::new_local(":memory:")
106                    .build()
107                    .await
108                    .expect("failed to open in-memory libsql database"),
109            };
110            let conn = db.connect().expect("failed to connect to libsql database");
111            // Best-effort schema setup.
112            let _ = conn
113                .execute_batch(
114                    "PRAGMA journal_mode=WAL;
115                     PRAGMA synchronous=NORMAL;
116                     CREATE TABLE IF NOT EXISTS findings_cache (
117                        path TEXT NOT NULL,
118                        engine TEXT NOT NULL,
119                        mtime_nanos INTEGER NOT NULL,
120                        config_hash TEXT NOT NULL,
121                        findings_json TEXT NOT NULL,
122                        PRIMARY KEY (path, engine)
123                    );",
124                )
125                .await;
126            (db, conn)
127        };
128        let (db, conn) = block_on_helper(&runtime, init);
129
130        Self { conn, db, runtime }
131    }
132
133    /// Return cached findings JSON blob if `(path, mtime_nanos, config_hash, engine)` all match.
134    pub fn get(
135        &self,
136        path: &str,
137        mtime_nanos: u64,
138        config_hash: &str,
139        engine: &str,
140    ) -> Option<String> {
141        let conn = &self.conn;
142        self.block_on(async {
143            let mut rows = conn
144                .query(
145                    "SELECT findings_json FROM findings_cache
146                     WHERE path = ?1 AND engine = ?2 AND mtime_nanos = ?3 AND config_hash = ?4",
147                    params![path, engine, mtime_nanos as i64, config_hash],
148                )
149                .await
150                .ok()?;
151            let row = rows.next().await.ok()??;
152            row.get::<String>(0).ok()
153        })
154    }
155
156    /// Store findings for a file. Called after a fresh analysis.
157    pub fn put(
158        &self,
159        path: &str,
160        mtime_nanos: u64,
161        config_hash: &str,
162        engine: &str,
163        findings_json: &str,
164    ) {
165        let conn = &self.conn;
166        let _ = self.block_on(async {
167            conn.execute(
168                "INSERT OR REPLACE INTO findings_cache (path, engine, mtime_nanos, config_hash, findings_json)
169                 VALUES (?1, ?2, ?3, ?4, ?5)",
170                params![path, engine, mtime_nanos as i64, config_hash, findings_json],
171            )
172            .await
173        });
174    }
175
176    pub fn begin(&self) {
177        let conn = &self.conn;
178        let _ = self.block_on(async { conn.execute_batch("BEGIN;").await });
179    }
180
181    pub fn commit(&self) {
182        let conn = &self.conn;
183        let _ = self.block_on(async { conn.execute_batch("COMMIT;").await });
184    }
185
186    /// No-op — retained for API symmetry; callers should use begin/commit.
187    pub fn flush(&self) {}
188}
189
190/// Get the mtime of a file in nanoseconds since UNIX epoch cast to `u64`, or 0 on failure.
191///
192/// `u64` is used rather than `u128` so the value fits in SQLite's `INTEGER` (64-bit signed).
193/// Nanosecond precision avoids false cache hits when a file is modified within the same second.
194pub fn file_mtime_nanos(path: &Path) -> u64 {
195    path.metadata()
196        .and_then(|m| m.modified())
197        .map(|t| {
198            t.duration_since(std::time::UNIX_EPOCH)
199                .map(|d| d.as_nanos() as u64)
200                .unwrap_or(0)
201        })
202        .unwrap_or(0)
203}
204
205/// Trait for native rules that check individual files.
206///
207/// Implementing this trait gives automatic SQLite caching and parallel execution.
208/// Rule authors implement `check_file()` and `to_diagnostics()` — the framework handles the rest.
209pub trait FileRule: Send + Sync {
210    /// Serializable per-file finding type.
211    type Finding: serde::Serialize + serde::de::DeserializeOwned + Send;
212
213    /// Unique engine name for cache keying (e.g. "long-function", "high-complexity").
214    fn engine_name(&self) -> &str;
215
216    /// Config hash for cache invalidation (e.g. threshold.to_string()).
217    fn config_hash(&self) -> String;
218
219    /// Check a single file. Returns findings for that file.
220    /// `path` is absolute, `root` is the project root for computing relative paths.
221    fn check_file(&self, path: &Path, root: &Path) -> Vec<Self::Finding>;
222
223    /// Convert collected findings into a DiagnosticsReport.
224    /// `findings` maps file path to that file's findings.
225    /// `files_checked` is the total number of files examined (cached + fresh).
226    fn to_diagnostics(
227        &self,
228        findings: Vec<(std::path::PathBuf, Vec<Self::Finding>)>,
229        root: &Path,
230        files_checked: usize,
231    ) -> normalize_output::diagnostics::DiagnosticsReport;
232}
233
234/// Run a `FileRule` against a set of files with automatic caching and parallel execution.
235///
236/// 1. Walk files (or use `explicit_files`)
237/// 2. Check cache for each file (sequential — fast DB lookups)
238/// 3. Compute cache misses in parallel (rayon `par_iter`)
239/// 4. Store new results in cache
240/// 5. Merge cached + fresh findings and call `to_diagnostics()`
241pub fn run_file_rule<R: FileRule>(
242    rule: &R,
243    root: &Path,
244    explicit_files: Option<&[std::path::PathBuf]>,
245    walk_config: &normalize_rules_config::WalkConfig,
246) -> normalize_output::diagnostics::DiagnosticsReport {
247    let files: Vec<std::path::PathBuf> = if let Some(ef) = explicit_files {
248        ef.iter()
249            .filter(|p| p.is_file())
250            .filter(|p| normalize_languages::support_for_path(p).is_some())
251            .cloned()
252            .collect()
253    } else {
254        super::walk::gitignore_walk(root, walk_config)
255            .filter(|e| e.path().is_file())
256            .filter(|e| normalize_languages::support_for_path(e.path()).is_some())
257            .map(|e| e.path().to_path_buf())
258            .collect()
259    };
260
261    let files_checked = files.len();
262    let cache = FindingsCache::open(root);
263    let config_hash = rule.config_hash();
264    let engine = rule.engine_name();
265
266    // Phase 1: separate cache hits from misses (sequential, fast DB lookups).
267    let mut cached_findings: Vec<(std::path::PathBuf, Vec<R::Finding>)> = Vec::new();
268    let mut cache_misses: Vec<std::path::PathBuf> = Vec::new();
269
270    for file in &files {
271        let path_key = file.to_string_lossy().to_string();
272        let mtime = file_mtime_nanos(file);
273        if mtime > 0
274            && let Some(json) = cache.get(&path_key, mtime, &config_hash, engine)
275            && let Ok(findings) = serde_json::from_str::<Vec<R::Finding>>(&json)
276        {
277            cached_findings.push((file.clone(), findings));
278            continue;
279        }
280        cache_misses.push(file.clone());
281    }
282
283    // Phase 2: compute misses in parallel.
284    use rayon::prelude::*;
285    let fresh_findings: Vec<(std::path::PathBuf, Vec<R::Finding>)> = cache_misses
286        .par_iter()
287        .map(|path| {
288            let findings = rule.check_file(path, root);
289            (path.clone(), findings)
290        })
291        .collect();
292
293    // Phase 3: store fresh results in cache (single transaction).
294    cache.begin();
295    for (path, findings) in &fresh_findings {
296        let path_key = path.to_string_lossy().to_string();
297        let mtime = file_mtime_nanos(path);
298        if mtime > 0
299            && let Ok(json) = serde_json::to_string(findings)
300        {
301            cache.put(&path_key, mtime, &config_hash, engine, &json);
302        }
303    }
304    cache.commit();
305
306    // Phase 4: merge and build report.
307    let mut all_findings: Vec<(std::path::PathBuf, Vec<R::Finding>)> = cached_findings;
308    all_findings.extend(fresh_findings);
309
310    rule.to_diagnostics(all_findings, root, files_checked)
311}
normalize_native_rules/cache.rs

normalize_native_rules/
cache.rs