1use std::fs::{File, OpenOptions};
24use std::path::{Path, PathBuf};
25use std::thread;
26use std::time::{Duration, Instant};
27
28use directories::ProjectDirs;
29use fs4::fs_std::FileExt;
30
31use crate::constants::{
32 CLI_LOCK_POLL_INTERVAL_MS, EMBEDDING_LOAD_EXPECTED_RSS_MB, JOB_SINGLETON_POLL_INTERVAL_MS,
33 LLM_WORKER_RSS_MB, MAX_CONCURRENT_CLI_INSTANCES,
34};
35use crate::errors::AppError;
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum JobType {
44 Enrich,
46 IngestClaudeCode,
48 IngestCodex,
50}
51
52impl JobType {
53 fn tag(self) -> &'static str {
55 match self {
56 JobType::Enrich => "enrich",
57 JobType::IngestClaudeCode => "ingest-claude-code",
58 JobType::IngestCodex => "ingest-codex",
59 }
60 }
61}
62
63fn slot_path(slot: usize) -> Result<PathBuf, AppError> {
69 let cache = cache_dir()?;
70 std::fs::create_dir_all(&cache)?;
71 Ok(cache.join(format!("cli-slot-{slot}.lock")))
72}
73
74fn cache_dir() -> Result<PathBuf, AppError> {
76 if let Some(override_dir) = std::env::var_os("SQLITE_GRAPHRAG_CACHE_DIR") {
77 Ok(PathBuf::from(override_dir))
78 } else {
79 let dirs = ProjectDirs::from("", "", "sqlite-graphrag").ok_or_else(|| {
80 AppError::Io(std::io::Error::new(
81 std::io::ErrorKind::NotFound,
82 "could not determine cache directory for sqlite-graphrag lock files",
83 ))
84 })?;
85 Ok(dirs.cache_dir().to_path_buf())
86 }
87}
88
89pub fn db_path_hash(db_path: &Path) -> String {
94 let canonical = db_path
95 .canonicalize()
96 .unwrap_or_else(|_| db_path.to_path_buf());
97 let hash = blake3::hash(canonical.to_string_lossy().as_bytes());
98 hash.to_hex().to_string()[..12].to_string()
99}
100
101pub fn job_singleton_path(
114 job_type: JobType,
115 namespace: &str,
116 db_hash: &str,
117) -> Result<PathBuf, AppError> {
118 let cache = cache_dir()?;
119 std::fs::create_dir_all(&cache)?;
120 let slug = if namespace.is_empty() {
121 "default".to_string()
122 } else {
123 namespace
124 .chars()
125 .map(|c| {
126 if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
127 c.to_ascii_lowercase()
128 } else {
129 '-'
130 }
131 })
132 .collect::<String>()
133 };
134 let safe_hash: String = db_hash
135 .chars()
136 .filter(|c| c.is_ascii_alphanumeric())
137 .take(16)
138 .collect();
139 Ok(cache.join(format!(
140 "job-singleton-{}-{slug}-{safe_hash}.lock",
141 job_type.tag()
142 )))
143}
144
145fn try_acquire_slot(slot: usize) -> Result<File, AppError> {
150 let path = slot_path(slot)?;
151 let file = OpenOptions::new()
152 .read(true)
153 .write(true)
154 .create(true)
155 .truncate(false)
156 .open(&path)?;
157 file.try_lock_exclusive().map_err(AppError::Io)?;
158 Ok(file)
159}
160
161pub fn calculate_safe_concurrency() -> usize {
186 use sysinfo::System;
187 let mut sys = System::new();
188 sys.refresh_memory();
189 let available_mb = sys.available_memory() / 1_048_576;
190 let cpus = std::thread::available_parallelism()
191 .map(|n| n.get())
192 .unwrap_or(2);
193
194 let per_worker_mb = if cfg!(feature = "llm-only") && !cfg!(feature = "embedding-legacy") {
195 LLM_WORKER_RSS_MB
196 } else if cfg!(feature = "embedding-legacy") && !cfg!(feature = "llm-only") {
197 EMBEDDING_LOAD_EXPECTED_RSS_MB
198 } else {
199 LLM_WORKER_RSS_MB.min(EMBEDDING_LOAD_EXPECTED_RSS_MB)
200 };
201
202 let memory_bound = if available_mb == 0 {
203 cpus
204 } else {
205 (available_mb / per_worker_mb.max(1)) as usize
206 };
207 let raw = cpus.min(memory_bound).max(1);
208 raw.min(MAX_CONCURRENT_CLI_INSTANCES)
209}
210
211pub fn worker_cost_mb() -> u64 {
214 if cfg!(feature = "llm-only") && !cfg!(feature = "embedding-legacy") {
215 LLM_WORKER_RSS_MB
216 } else if cfg!(feature = "embedding-legacy") && !cfg!(feature = "llm-only") {
217 EMBEDDING_LOAD_EXPECTED_RSS_MB
218 } else {
219 LLM_WORKER_RSS_MB.min(EMBEDDING_LOAD_EXPECTED_RSS_MB)
220 }
221}
222
223pub fn acquire_cli_slot(
228 max_concurrency: usize,
229 wait_seconds: Option<u64>,
230) -> Result<(File, usize), AppError> {
231 let ncpus = std::thread::available_parallelism()
233 .map(|n| n.get())
234 .unwrap_or(4);
235 let ceiling = std::env::var("SQLITE_GRAPHRAG_MAX_CLI_INSTANCES")
236 .ok()
237 .and_then(|v| v.parse::<usize>().ok())
238 .unwrap_or_else(|| (2 * ncpus).max(MAX_CONCURRENT_CLI_INSTANCES));
239 let max = max_concurrency.clamp(1, ceiling);
240 let wait_secs = wait_seconds.unwrap_or(0);
241
242 if let Some((file, slot)) = try_any_slot(max)? {
244 return Ok((file, slot));
245 }
246
247 if wait_secs == 0 {
248 return Err(AppError::AllSlotsFull {
249 max,
250 waited_secs: 0,
251 });
252 }
253
254 let deadline = Instant::now() + Duration::from_secs(wait_secs);
256 let mut polls: u64 = 0;
257 loop {
258 let poll_delay = CLI_LOCK_POLL_INTERVAL_MS
259 .saturating_mul(1 + polls / 4)
260 .min(CLI_LOCK_POLL_INTERVAL_MS * 4);
261 thread::sleep(Duration::from_millis(poll_delay));
262 polls += 1;
263 if let Some((file, slot)) = try_any_slot(max)? {
264 return Ok((file, slot));
265 }
266 if Instant::now() >= deadline {
267 return Err(AppError::AllSlotsFull {
268 max,
269 waited_secs: wait_secs,
270 });
271 }
272 }
273}
274
275pub fn acquire_job_singleton(
289 job_type: JobType,
290 namespace: &str,
291 db_path: &Path,
292 wait_seconds: Option<u64>,
293 force: bool,
294) -> Result<File, AppError> {
295 let db_hash = db_path_hash(db_path);
296 let path = job_singleton_path(job_type, namespace, &db_hash)?;
297
298 if force && path.exists() {
304 tracing::warn!(target: "lock",
305 path = %path.display(),
306 "force=true; removing pre-existing singleton lock file"
307 );
308 let _ = std::fs::remove_file(&path);
309 }
310
311 let file = OpenOptions::new()
312 .read(true)
313 .write(true)
314 .create(true)
315 .truncate(false)
316 .open(&path)?;
317 if let Err(e) = file.try_lock_exclusive() {
318 if !is_lock_contended(&e) {
319 return Err(AppError::Io(e));
320 }
321 let wait_secs = wait_seconds.unwrap_or(0);
323 if wait_secs == 0 {
324 return Err(AppError::JobSingletonLocked {
325 job_type: job_type.tag().to_string(),
326 namespace: namespace.to_string(),
327 });
328 }
329 let deadline = Instant::now() + Duration::from_secs(wait_secs);
330 drop(file);
333 loop {
334 thread::sleep(Duration::from_millis(JOB_SINGLETON_POLL_INTERVAL_MS));
335 let file = OpenOptions::new()
336 .read(true)
337 .write(true)
338 .create(true)
339 .truncate(false)
340 .open(&path)?;
341 if file.try_lock_exclusive().is_ok() {
342 return Ok(file);
343 }
344 if Instant::now() >= deadline {
345 return Err(AppError::JobSingletonLocked {
346 job_type: job_type.tag().to_string(),
347 namespace: namespace.to_string(),
348 });
349 }
350 }
351 }
352 Ok(file)
353}
354
355fn try_any_slot(max: usize) -> Result<Option<(File, usize)>, AppError> {
360 for slot in 1..=max {
361 match try_acquire_slot(slot) {
362 Ok(file) => return Ok(Some((file, slot))),
363 Err(AppError::Io(e)) if is_lock_contended(&e) => continue,
364 Err(e) => return Err(e),
365 }
366 }
367 Ok(None)
368}
369
370fn is_lock_contended(error: &std::io::Error) -> bool {
371 if error.kind() == std::io::ErrorKind::WouldBlock {
372 return true;
373 }
374
375 #[cfg(windows)]
376 {
377 matches!(error.raw_os_error(), Some(32 | 33))
378 }
379
380 #[cfg(not(windows))]
381 {
382 false
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use std::sync::atomic::{AtomicUsize, Ordering};
390 static SEQ: AtomicUsize = AtomicUsize::new(0);
391
392 fn unique_ns() -> String {
393 let n = SEQ.fetch_add(1, Ordering::SeqCst);
394 let pid = std::process::id();
395 format!("test-{pid}-{n}")
396 }
397
398 #[test]
399 fn job_singleton_path_sanitises_namespace() {
400 let p = job_singleton_path(JobType::Enrich, "Foo Bar/Baz", "abc123def456")
401 .expect("path should resolve");
402 let name = p.file_name().unwrap().to_string_lossy().to_string();
403 assert!(name.contains("enrich"), "got {name}");
404 assert!(name.contains("foo-bar-baz"), "got {name}");
405 assert!(
406 name.contains("abc123def456"),
407 "must embed db_hash: got {name}"
408 );
409 }
410
411 #[test]
412 fn job_singleton_blocks_second_invocation_same_namespace() {
413 let ns = unique_ns();
414 let db = std::env::temp_dir().join(format!("test-{}.sqlite", unique_ns()));
415 let first = acquire_job_singleton(JobType::Enrich, &ns, &db, Some(0), false)
416 .expect("first acquire should succeed");
417 let second = acquire_job_singleton(JobType::Enrich, &ns, &db, Some(0), false);
418 assert!(
419 matches!(second, Err(AppError::JobSingletonLocked { .. })),
420 "expected JobSingletonLocked, got {second:?}"
421 );
422 drop(first);
423 }
424
425 #[test]
426 fn job_singleton_allows_different_namespaces() {
427 let ns_a = unique_ns();
428 let ns_b = unique_ns();
429 let db_a = std::env::temp_dir().join(format!("test-a-{}.sqlite", unique_ns()));
430 let db_b = std::env::temp_dir().join(format!("test-b-{}.sqlite", unique_ns()));
431 let first = acquire_job_singleton(JobType::IngestClaudeCode, &ns_a, &db_a, Some(0), false)
432 .expect("ns_a should acquire");
433 let second = acquire_job_singleton(JobType::IngestClaudeCode, &ns_b, &db_b, Some(0), false)
434 .expect("ns_b should acquire in parallel");
435 drop(first);
436 drop(second);
437 }
438
439 #[test]
440 fn job_singleton_scoped_by_db_hash() {
441 let ns = unique_ns();
444 let db_a = std::env::temp_dir().join(format!("test-x-{}.sqlite", unique_ns()));
445 let db_b = std::env::temp_dir().join(format!("test-y-{}.sqlite", unique_ns()));
446 let first = acquire_job_singleton(JobType::Enrich, &ns, &db_a, Some(0), false)
447 .expect("db_a should acquire");
448 let second = acquire_job_singleton(JobType::Enrich, &ns, &db_b, Some(0), false)
449 .expect("db_b should acquire independently (G30 fix)");
450 drop(first);
451 drop(second);
452 }
453
454 #[test]
455 fn db_path_hash_is_stable_for_same_path() {
456 let p = std::env::temp_dir().join("hashing-test.sqlite");
457 let h1 = db_path_hash(&p);
458 let h2 = db_path_hash(&p);
459 assert_eq!(h1, h2, "same path must produce same hash");
460 assert_eq!(h1.len(), 12, "BLAKE3 prefix must be 12 hex chars");
461 }
462
463 #[test]
464 fn db_path_hash_differs_for_different_paths() {
465 let a = std::env::temp_dir().join("hash-a.sqlite");
466 let b = std::env::temp_dir().join("hash-b.sqlite");
467 assert_ne!(db_path_hash(&a), db_path_hash(&b));
468 }
469}