1#![allow(unsafe_code)]
4
5use sha1::{Digest, Sha1};
6use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePool, SqlitePoolOptions};
7use std::path::{Path, PathBuf};
8
9#[cfg(test)]
18pub(crate) fn shared_test_home() -> &'static Path {
19 use std::sync::OnceLock;
20 use tempfile::TempDir;
21 static HOME: OnceLock<TempDir> = OnceLock::new();
22 HOME.get_or_init(|| {
23 let dir = TempDir::new().expect("create shared test home tempdir");
24 unsafe {
28 std::env::set_var("DIFFLORE_HOME", dir.path());
29 }
30 dir
31 })
32 .path()
33}
34
35#[cfg_attr(test, allow(clippy::unnecessary_wraps))]
36pub(crate) fn difflore_dir() -> Result<PathBuf, String> {
37 if let Some(custom) = crate::env::difflore_home() {
42 return Ok(PathBuf::from(custom));
43 }
44 #[cfg(test)]
52 {
53 Ok(shared_test_home().to_path_buf())
54 }
55 #[cfg(not(test))]
56 Ok(dirs::home_dir()
57 .ok_or_else(|| "cannot resolve home directory".to_owned())?
58 .join(".difflore"))
59}
60
61pub fn data_db_path() -> Result<PathBuf, String> {
65 Ok(difflore_dir()?.join("data.db"))
66}
67
68pub fn project_hash_from_root(root: &Path) -> String {
79 let as_str = stable_project_identity(root);
80 let mut hasher = Sha1::new();
81 hasher.update(as_str.as_bytes());
82 let digest = hasher.finalize();
83 let mut hex = String::with_capacity(12);
84 for byte in digest.iter().take(6) {
85 hex.push_str(&format!("{byte:02x}"));
86 }
87 hex
88}
89
90fn stable_project_identity(root: &Path) -> String {
91 let raw = root.to_string_lossy().replace('\\', "/");
92 let raw = strip_windows_extended_prefix(raw.trim());
93 let absolute = if is_absolute_like(&raw) {
94 raw
95 } else if let Ok(cwd) = std::env::current_dir() {
96 format!("{}/{}", cwd.to_string_lossy().replace('\\', "/"), raw)
97 } else {
98 raw
99 };
100 lexical_normalize_path(&absolute)
101}
102
103fn strip_windows_extended_prefix(path: &str) -> String {
104 if let Some(rest) = path.strip_prefix("//?/UNC/") {
105 format!("//{rest}")
106 } else if let Some(rest) = path.strip_prefix("//?/") {
107 rest.to_owned()
108 } else if let Some(rest) = path.strip_prefix("//./") {
109 rest.to_owned()
110 } else {
111 path.to_owned()
112 }
113}
114
115fn is_absolute_like(path: &str) -> bool {
116 path.starts_with('/')
117 || (path.len() >= 3
118 && path.as_bytes()[1] == b':'
119 && path.as_bytes()[2] == b'/'
120 && path.as_bytes()[0].is_ascii_alphabetic())
121}
122
123fn lexical_normalize_path(path: &str) -> String {
124 let path = strip_windows_extended_prefix(path).replace('\\', "/");
125 let (prefix, rest, absolute) = if path.len() >= 3
126 && path.as_bytes()[1] == b':'
127 && path.as_bytes()[2] == b'/'
128 && path.as_bytes()[0].is_ascii_alphabetic()
129 {
130 (
131 format!("{}:/", char::from(path.as_bytes()[0]).to_ascii_lowercase()),
132 &path[3..],
133 true,
134 )
135 } else if path.starts_with("//") {
136 ("//".to_owned(), path.trim_start_matches('/'), true)
137 } else if let Some(rest) = path.strip_prefix('/') {
138 ("/".to_owned(), rest, true)
139 } else {
140 (String::new(), path.as_str(), false)
141 };
142
143 let mut parts: Vec<&str> = Vec::new();
144 for part in rest.split('/') {
145 match part {
146 "" | "." => {}
147 ".." => {
148 if parts.last().is_some_and(|last| *last != "..") {
149 parts.pop();
150 } else if !absolute {
151 parts.push(part);
152 }
153 }
154 _ => parts.push(part),
155 }
156 }
157
158 if parts.is_empty() {
159 match prefix.as_str() {
160 "" => ".".to_owned(),
161 "/" => "/".to_owned(),
162 "//" => "//".to_owned(),
163 _ if prefix.ends_with(":/") => prefix.trim_end_matches('/').to_owned(),
164 _ => prefix,
165 }
166 } else if prefix == "/" || prefix == "//" || prefix.ends_with(":/") {
167 format!("{prefix}{}", parts.join("/"))
168 } else {
169 parts.join("/")
170 }
171}
172
173pub fn current_project_root() -> PathBuf {
178 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
179 let output = std::process::Command::new("git")
180 .args(["rev-parse", "--show-toplevel"])
181 .current_dir(&cwd)
182 .output();
183 if let Ok(out) = output
184 && out.status.success()
185 {
186 let s = String::from_utf8_lossy(&out.stdout).trim().to_owned();
187 if !s.is_empty() {
188 return PathBuf::from(s);
189 }
190 }
191 cwd
192}
193
194pub fn project_index_dir(project_hash: &str) -> PathBuf {
198 let mut p = difflore_dir().unwrap_or_else(|_| PathBuf::from(".difflore"));
199 p.push("projects");
200 p.push(project_hash);
201 p
202}
203
204static MIGRATION_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
213
214pub async fn run_migrations(pool: &SqlitePool) -> Result<(), String> {
219 let _guard = MIGRATION_LOCK.lock().await;
220 sqlx::migrate!("./migrations")
227 .run(pool)
228 .await
229 .map_err(|e| format!("migration failed: {e}"))
230}
231
232static POOL_CACHE: tokio::sync::Mutex<Option<std::collections::HashMap<PathBuf, SqlitePool>>> =
242 tokio::sync::Mutex::const_new(None);
243
244#[cfg(unix)]
251pub(crate) fn restrict_to_owner(path: &Path, is_dir: bool) {
252 use std::os::unix::fs::PermissionsExt;
253 let mode = if is_dir { 0o700 } else { 0o600 };
254 let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode));
255}
256
257#[cfg(not(unix))]
258pub(crate) const fn restrict_to_owner(_path: &Path, _is_dir: bool) {}
259
260#[cfg(unix)]
263pub(crate) fn restrict_sqlite_files(db_path: &Path) {
264 restrict_to_owner(db_path, false);
265 for suffix in ["-wal", "-shm", "-journal"] {
266 let mut sidecar = db_path.as_os_str().to_owned();
267 sidecar.push(suffix);
268 restrict_to_owner(Path::new(&sidecar), false);
269 }
270}
271
272#[cfg(not(unix))]
273pub(crate) const fn restrict_sqlite_files(_db_path: &Path) {}
274
275pub async fn init_db() -> Result<SqlitePool, String> {
276 let dir = difflore_dir()?;
277 std::fs::create_dir_all(&dir).map_err(|e| format!("failed to create ~/.difflore: {e}"))?;
278 restrict_to_owner(&dir, true);
279 let db_path = dir.join("data.db");
280
281 let mut guard = POOL_CACHE.lock().await;
286 let cache = guard.get_or_insert_with(std::collections::HashMap::new);
287
288 if let Some(pool) = cache.get(&db_path) {
289 return Ok(pool.clone());
290 }
291
292 let opts = SqliteConnectOptions::new()
293 .filename(&db_path)
294 .create_if_missing(true)
295 .journal_mode(SqliteJournalMode::Wal)
296 .busy_timeout(std::time::Duration::from_secs(5))
297 .foreign_keys(true);
298
299 let pool = SqlitePoolOptions::new()
300 .max_connections(5)
301 .connect_with(opts)
302 .await
303 .map_err(|e| {
304 if is_readonly_home_open_error(&e) {
312 format!(
313 "failed to open data.db: {e}\n\
314 hint: ~/.difflore appears read-only. A sandboxed agent (e.g. codex with a \
315 restrictive --sandbox) blocks DiffLore's writes. Run DiffLore unsandboxed for \
316 that client, or set DIFFLORE_HOME to a writable path."
317 )
318 } else {
319 format!("failed to open data.db: {e}")
320 }
321 })?;
322
323 restrict_sqlite_files(&db_path);
325
326 run_migrations(&pool).await?;
327
328 cache.insert(db_path.clone(), pool.clone());
329 Ok(pool)
330}
331
332fn is_readonly_home_open_error(err: &sqlx::Error) -> bool {
336 let s = err.to_string().to_ascii_lowercase();
337 s.contains("code: 14") || s.contains("code: 8") || s.contains("unable to open database file")
340 || s.contains("readonly")
341 || s.contains("read-only")
342}
343
344pub async fn table_counts(
350 pool: &SqlitePool,
351 tables: &[&str],
352) -> Vec<(String, Result<i64, String>)> {
353 let mut out = Vec::with_capacity(tables.len());
354 for t in tables {
355 let count: Result<i64, String> = match *t {
356 "skills" => sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM skills"#)
357 .fetch_one(pool)
358 .await
359 .map_err(|e| e.to_string()),
360 "review_items" => {
361 sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM review_items"#)
362 .fetch_one(pool)
363 .await
364 .map_err(|e| e.to_string())
365 }
366 "review_comments" => {
367 sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM review_comments"#)
368 .fetch_one(pool)
369 .await
370 .map_err(|e| e.to_string())
371 }
372 "providers" => sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM providers"#)
373 .fetch_one(pool)
374 .await
375 .map_err(|e| e.to_string()),
376 "cloud_outbox" => {
377 sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM cloud_outbox"#)
378 .fetch_one(pool)
379 .await
380 .map_err(|e| e.to_string())
381 }
382 "projects" => sqlx::query_scalar!(r#"SELECT COUNT(*) AS "n!: i64" FROM projects"#)
383 .fetch_one(pool)
384 .await
385 .map_err(|e| e.to_string()),
386 other => Err(format!("unknown table: {other}")),
387 };
388 out.push((t.to_string(), count));
389 }
390 out
391}
392
393#[derive(Debug, Default)]
400pub struct CorpusHealth {
401 pub total: i64,
402 pub by_origin: Vec<(String, i64)>,
403 pub by_source_repo: Vec<(String, i64)>,
404 pub empty_file_patterns: i64,
405}
406
407pub async fn corpus_health(pool: &SqlitePool) -> Result<CorpusHealth, String> {
408 let total =
409 sqlx::query_scalar!("SELECT COUNT(*) as \"n!: i64\" FROM skills WHERE status = 'active'")
410 .fetch_one(pool)
411 .await
412 .map_err(|e| e.to_string())?;
413
414 let by_origin_rows = sqlx::query!(
415 "SELECT COALESCE(origin, '<unknown>') AS \"origin!: String\", COUNT(*) AS \"n!: i64\" FROM skills \
416 WHERE status = 'active' GROUP BY origin ORDER BY COUNT(*) DESC"
417 )
418 .fetch_all(pool)
419 .await
420 .map_err(|e| e.to_string())?;
421 let by_origin: Vec<(String, i64)> = by_origin_rows
422 .into_iter()
423 .map(|r| (r.origin, r.n))
424 .collect();
425
426 let by_source_repo_rows = sqlx::query_as::<_, (Option<String>, i64)>(
430 "WITH skill_repos AS ( \
431 SELECT source_repo AS repo \
432 FROM skills WHERE status = 'active' \
433 ) \
434 SELECT repo, COUNT(*) AS n \
435 FROM skill_repos \
436 GROUP BY repo \
437 ORDER BY n DESC, COALESCE(repo, '') ASC \
438 LIMIT 10",
439 )
440 .fetch_all(pool)
441 .await
442 .map_err(|e| e.to_string())?;
443 let by_source_repo: Vec<(String, i64)> = by_source_repo_rows
444 .into_iter()
445 .map(|(repo, n)| (repo.unwrap_or_else(|| "<unset>".to_owned()), n))
446 .collect();
447
448 let empty = sqlx::query_scalar!(
449 "SELECT COUNT(*) as \"n!: i64\" FROM skills WHERE status = 'active' \
450 AND (file_patterns IS NULL OR file_patterns = '' OR file_patterns = '[]')"
451 )
452 .fetch_one(pool)
453 .await
454 .map_err(|e| e.to_string())?;
455
456 Ok(CorpusHealth {
457 total,
458 by_origin,
459 by_source_repo,
460 empty_file_patterns: empty,
461 })
462}
463
464#[cfg(test)]
465mod tests {
466 use super::*;
467
468 #[test]
469 fn project_hash_is_stable_across_calls() {
470 let p = PathBuf::from("/some/path/to/project");
471 let h1 = project_hash_from_root(&p);
472 let h2 = project_hash_from_root(&p);
473 assert_eq!(h1, h2, "same path must hash to same value");
474 assert_eq!(h1.len(), 12, "hash length must be 12 hex chars");
475 assert!(
476 h1.chars().all(|c| c.is_ascii_hexdigit()),
477 "hash must be hex only: {h1}"
478 );
479 }
480
481 #[test]
482 fn restrict_to_owner_tightens_perms_without_erroring() {
483 let tmp = tempfile::tempdir().unwrap();
484 let dir = tmp.path().join("secret_dir");
485 std::fs::create_dir(&dir).unwrap();
486 let file = tmp.path().join("data.db");
487 std::fs::write(&file, b"token").unwrap();
488
489 restrict_to_owner(&dir, true);
492 restrict_to_owner(&file, false);
493 restrict_sqlite_files(&file);
494
495 #[cfg(unix)]
496 {
497 use std::os::unix::fs::PermissionsExt;
498 let mode = |p: &Path| std::fs::metadata(p).unwrap().permissions().mode() & 0o777;
499 assert_eq!(mode(&dir), 0o700, "~/.difflore should be 0700");
500 assert_eq!(mode(&file), 0o600, "the token/data DB should be 0600");
501 }
502 }
503
504 #[test]
505 fn project_hash_differs_for_different_roots() {
506 let a = project_hash_from_root(&PathBuf::from("/projects/alpha"));
507 let b = project_hash_from_root(&PathBuf::from("/projects/beta"));
508 assert_ne!(a, b, "different roots should hash differently");
509 }
510
511 #[test]
512 fn project_hash_normalises_windows_separator_variants() {
513 let posix = project_hash_from_root(&PathBuf::from("C:/Users/alice/repo"));
517 let forward = project_hash_from_root(Path::new("C:/Users/alice/repo"));
518 let backward = project_hash_from_root(Path::new("C:\\Users\\alice\\repo"));
519 assert_eq!(posix, forward);
520 assert_eq!(forward, backward);
521 }
522
523 #[test]
524 fn project_hash_does_not_change_when_directory_is_created() {
525 let tmp = tempfile::TempDir::new().expect("tempdir");
526 let root = tmp.path().join("repo-that-does-not-exist-yet");
527
528 let before = project_hash_from_root(&root);
529 std::fs::create_dir(&root).expect("create project root");
530 let after = project_hash_from_root(&root);
531
532 assert_eq!(
533 before, after,
534 "same path must not re-hash after it starts existing"
535 );
536 }
537
538 #[test]
539 fn project_hash_normalises_dot_segments_without_filesystem_lookup() {
540 let tmp = tempfile::TempDir::new().expect("tempdir");
541 let root = tmp.path().join("missing-repo");
542 let dotted = root.join(".").join("nested").join("..");
543
544 assert_eq!(
545 project_hash_from_root(&root),
546 project_hash_from_root(&dotted),
547 "lexically equivalent paths should share a project identity"
548 );
549 }
550
551 #[test]
552 fn project_hash_strips_windows_extended_prefix() {
553 let normal = project_hash_from_root(Path::new("C:\\Users\\alice\\repo"));
554 let extended = project_hash_from_root(Path::new("\\\\?\\C:\\Users\\alice\\repo"));
555
556 assert_eq!(
557 normal, extended,
558 "Windows extended path prefix must not fork project identity"
559 );
560 }
561
562 #[tokio::test]
567 async fn table_counts_returns_per_table_results() {
568 use sqlx::sqlite::SqlitePoolOptions;
569
570 let pool = SqlitePoolOptions::new()
571 .max_connections(1)
572 .connect("sqlite::memory:")
573 .await
574 .expect("open pool");
575 sqlx::migrate!("./migrations")
576 .run(&pool)
577 .await
578 .expect("apply migrations");
579 sqlx::query!("INSERT INTO projects (id, name, path) VALUES ('p1', 'demo', '/tmp/demo')")
580 .execute(&pool)
581 .await
582 .expect("seed projects");
583
584 let results = table_counts(&pool, &["projects", "skills", "no_such_table"]).await;
585 assert_eq!(results.len(), 3);
586 assert_eq!(results[0].0, "projects");
587 assert_eq!(results[0].1.as_ref().copied().ok(), Some(1));
588 assert_eq!(results[1].0, "skills");
589 assert_eq!(results[1].1.as_ref().copied().ok(), Some(0));
590 assert_eq!(results[2].0, "no_such_table");
591 assert!(
592 results[2].1.is_err(),
593 "missing table must surface as Err, got {:?}",
594 results[2].1
595 );
596 }
597
598 #[test]
599 fn project_index_dir_uses_difflore_home() {
600 let home = shared_test_home();
607 let dir = project_index_dir("abc123def456");
608 assert!(
609 dir.starts_with(home),
610 "project_index_dir should live under DIFFLORE_HOME, got {dir:?}"
611 );
612 assert!(dir.ends_with(PathBuf::from("projects").join("abc123def456")));
613 }
614}