1use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::process::Command;
16use uuid::Uuid;
17
18pub mod access;
19pub mod chunker;
20pub mod consolidation;
21pub mod db;
22pub mod document;
23pub mod embedder;
24pub mod error;
25pub mod graph;
26pub mod item;
27pub mod mcp;
28pub mod retry;
29
30pub use chunker::{ChunkResult, ChunkingConfig, chunk_content};
31pub use db::Database;
32pub use document::ContentType;
33pub use embedder::{EMBEDDING_DIM, Embedder};
34pub use error::{Result, SedimentError};
35pub use item::{Chunk, ConflictInfo, Item, ItemFilters, SearchResult, StoreResult};
36pub use retry::{RetryConfig, with_retry};
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum StoreScope {
42 #[default]
44 Project,
45 Global,
47}
48
49impl std::str::FromStr for StoreScope {
50 type Err = String;
51
52 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
53 match s.to_lowercase().as_str() {
54 "project" => Ok(StoreScope::Project),
55 "global" => Ok(StoreScope::Global),
56 _ => Err(format!(
57 "Invalid store scope: {}. Use 'project' or 'global'",
58 s
59 )),
60 }
61 }
62}
63
64impl std::fmt::Display for StoreScope {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 match self {
67 StoreScope::Project => write!(f, "project"),
68 StoreScope::Global => write!(f, "global"),
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum ListScope {
77 #[default]
79 Project,
80 Global,
82 All,
84}
85
86impl std::str::FromStr for ListScope {
87 type Err = String;
88
89 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
90 match s.to_lowercase().as_str() {
91 "project" => Ok(ListScope::Project),
92 "global" => Ok(ListScope::Global),
93 "all" => Ok(ListScope::All),
94 _ => Err(format!(
95 "Invalid list scope: {}. Use 'project', 'global', or 'all'",
96 s
97 )),
98 }
99 }
100}
101
102impl std::fmt::Display for ListScope {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 match self {
105 ListScope::Project => write!(f, "project"),
106 ListScope::Global => write!(f, "global"),
107 ListScope::All => write!(f, "all"),
108 }
109 }
110}
111
112pub fn central_db_path() -> PathBuf {
117 if let Ok(path) = std::env::var("SEDIMENT_DB") {
118 return PathBuf::from(path);
119 }
120
121 dirs::home_dir()
122 .unwrap_or_else(|| PathBuf::from("."))
123 .join(".sediment")
124 .join("data")
125}
126
127pub fn default_db_path() -> PathBuf {
129 central_db_path()
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ProjectConfig {
135 pub project_id: String,
137 #[serde(default = "default_source")]
139 pub source: String,
140 #[serde(default, skip_serializing_if = "Option::is_none")]
142 pub migrated_from: Option<String>,
143}
144
145fn default_source() -> String {
146 "uuid".to_string()
147}
148
149pub fn derive_git_root_commit(project_root: &Path) -> std::io::Result<Option<String>> {
154 let shallow_check = match Command::new("git")
156 .args(["rev-parse", "--is-shallow-repository"])
157 .current_dir(project_root)
158 .stdout(std::process::Stdio::piped())
159 .stderr(std::process::Stdio::null())
160 .output()
161 {
162 Ok(o) => o,
163 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
164 Err(e) => return Err(e),
165 };
166
167 if shallow_check.status.success() {
168 let stdout = String::from_utf8_lossy(&shallow_check.stdout);
169 if stdout.trim() == "true" {
170 return Ok(None);
171 }
172 }
173
174 let output = match Command::new("git")
175 .args(["rev-list", "--max-parents=0", "HEAD"])
176 .current_dir(project_root)
177 .stdout(std::process::Stdio::piped())
178 .stderr(std::process::Stdio::null())
179 .output()
180 {
181 Ok(o) => o,
182 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
183 Err(e) => return Err(e),
184 };
185
186 if !output.status.success() {
187 return Ok(None);
188 }
189
190 let stdout = String::from_utf8_lossy(&output.stdout);
191 let hash = stdout.lines().next().unwrap_or("").trim();
192
193 if !hash.is_empty() && hash.len() <= 64 && hash.chars().all(|c| c.is_ascii_hexdigit()) {
195 Ok(Some(hash.to_string()))
196 } else {
197 Ok(None)
198 }
199}
200
201pub fn get_or_create_project_id(project_root: &Path) -> std::io::Result<String> {
207 let sediment_dir = project_root.join(".sediment");
208 let config_path = sediment_dir.join("config");
209
210 if config_path.exists() {
212 let content = std::fs::read_to_string(&config_path)?;
213 if let Ok(config) = serde_json::from_str::<ProjectConfig>(&content) {
214 if config.source == "git-root-commit" {
215 return Ok(config.project_id);
217 }
218
219 if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
221 let new_config = ProjectConfig {
222 project_id: git_hash.clone(),
223 source: "git-root-commit".to_string(),
224 migrated_from: Some(config.project_id),
225 };
226 write_config_atomic(&sediment_dir, &config_path, &new_config)?;
227 return Ok(git_hash);
228 }
229
230 return Ok(config.project_id);
232 }
233 }
234
235 std::fs::create_dir_all(&sediment_dir)?;
237
238 let config = if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
239 ProjectConfig {
240 project_id: git_hash,
241 source: "git-root-commit".to_string(),
242 migrated_from: None,
243 }
244 } else {
245 ProjectConfig {
246 project_id: Uuid::new_v4().to_string(),
247 source: "uuid".to_string(),
248 migrated_from: None,
249 }
250 };
251
252 write_config_atomic(&sediment_dir, &config_path, &config)?;
253
254 let final_content = std::fs::read_to_string(&config_path)?;
256 if let Ok(final_config) = serde_json::from_str::<ProjectConfig>(&final_content) {
257 Ok(final_config.project_id)
258 } else {
259 Ok(config.project_id)
260 }
261}
262
263fn write_config_atomic(
265 sediment_dir: &Path,
266 config_path: &Path,
267 config: &ProjectConfig,
268) -> std::io::Result<()> {
269 let content =
270 serde_json::to_string_pretty(config).map_err(|e| std::io::Error::other(e.to_string()))?;
271 let tmp_path = sediment_dir.join(format!("config.tmp.{}", std::process::id()));
272 std::fs::write(&tmp_path, &content)?;
273
274 if let Err(e) = std::fs::rename(&tmp_path, config_path) {
275 let _ = std::fs::remove_file(&tmp_path);
276 return Err(e);
277 }
278 Ok(())
279}
280
281pub fn pending_migration(project_root: &Path) -> Option<String> {
286 let config_path = project_root.join(".sediment").join("config");
287 let content = std::fs::read_to_string(&config_path).ok()?;
288 let config: ProjectConfig = serde_json::from_str(&content).ok()?;
289 config.migrated_from
290}
291
292pub fn clear_migration_marker(project_root: &Path) -> std::io::Result<()> {
294 let sediment_dir = project_root.join(".sediment");
295 let config_path = sediment_dir.join("config");
296
297 let content = std::fs::read_to_string(&config_path)?;
298 if let Ok(mut config) = serde_json::from_str::<ProjectConfig>(&content)
299 && config.migrated_from.is_some()
300 {
301 config.migrated_from = None;
302 write_config_atomic(&sediment_dir, &config_path, &config)?;
303 }
304 Ok(())
305}
306
307pub fn boost_similarity(
313 base: f32,
314 mem_project: Option<&str>,
315 current_project: Option<&str>,
316) -> f32 {
317 match (mem_project, current_project) {
318 (Some(m), Some(c)) if m == c => (base * 1.15).min(1.0), (Some(_), Some(_)) => base * 0.95, _ => base, }
322}
323
324pub fn find_project_root(start: &Path) -> Option<PathBuf> {
329 let mut current = start.to_path_buf();
330
331 if current.is_file() {
333 current = current.parent()?.to_path_buf();
334 }
335
336 let mut depth = 0;
337 loop {
338 if depth >= 100 {
339 return None;
340 }
341 depth += 1;
342
343 if current.join(".sediment").is_dir() {
345 return Some(current);
346 }
347
348 if current.join(".git").exists() {
350 return Some(current);
351 }
352
353 match current.parent() {
355 Some(parent) if parent == current => return None,
356 Some(parent) => current = parent.to_path_buf(),
357 None => return None,
358 }
359 }
360}
361
362pub fn init_project(project_root: &Path) -> std::io::Result<PathBuf> {
366 let sediment_dir = project_root.join(".sediment");
367 std::fs::create_dir_all(&sediment_dir)?;
368
369 get_or_create_project_id(project_root)?;
371
372 Ok(sediment_dir)
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378
379 #[test]
380 fn test_list_scope_default_is_project() {
381 assert_eq!(ListScope::default(), ListScope::Project);
383 }
384
385 #[test]
386 fn test_store_scope_default_is_project() {
387 assert_eq!(StoreScope::default(), StoreScope::Project);
388 }
389
390 #[test]
391 fn test_project_config_idempotent() {
392 let tmp = tempfile::TempDir::new().unwrap();
394 let id1 = get_or_create_project_id(tmp.path()).unwrap();
395 let id2 = get_or_create_project_id(tmp.path()).unwrap();
396 assert_eq!(id1, id2, "Repeated calls should return the same project ID");
397 }
398
399 #[test]
400 fn test_boost_similarity() {
401 assert!((boost_similarity(0.5, Some("p1"), Some("p1")) - 0.575).abs() < 0.001);
402 assert!((boost_similarity(0.5, Some("p1"), Some("p2")) - 0.475).abs() < 0.001);
403 assert!((boost_similarity(0.5, None, Some("p1")) - 0.5).abs() < 0.001);
404 }
405
406 #[test]
407 fn test_project_config_backward_compat() {
408 let json = r#"{"project_id": "550e8400-e29b-41d4-a716-446655440000"}"#;
410 let config: ProjectConfig = serde_json::from_str(json).unwrap();
411 assert_eq!(config.source, "uuid");
412 }
413
414 #[test]
415 #[ignore] fn test_derive_git_root_commit_in_repo() {
417 let tmp = tempfile::TempDir::new().unwrap();
418 let dir = tmp.path();
419
420 Command::new("git")
422 .args(["init"])
423 .current_dir(dir)
424 .output()
425 .unwrap();
426 Command::new("git")
427 .args(["config", "user.email", "test@test.com"])
428 .current_dir(dir)
429 .output()
430 .unwrap();
431 Command::new("git")
432 .args(["config", "user.name", "Test"])
433 .current_dir(dir)
434 .output()
435 .unwrap();
436 Command::new("git")
437 .args(["commit", "--allow-empty", "-m", "init"])
438 .current_dir(dir)
439 .output()
440 .unwrap();
441
442 let result = derive_git_root_commit(dir).unwrap();
443 assert!(result.is_some(), "Should return root commit hash");
444 let hash = result.unwrap();
445 assert_eq!(hash.len(), 40, "SHA-1 hash should be 40 chars");
446 assert!(hash.chars().all(|c| c.is_ascii_hexdigit()), "Should be hex");
447 }
448
449 #[test]
450 #[ignore] fn test_derive_git_root_commit_no_commits() {
452 let tmp = tempfile::TempDir::new().unwrap();
453 let dir = tmp.path();
454
455 Command::new("git")
456 .args(["init"])
457 .current_dir(dir)
458 .output()
459 .unwrap();
460
461 let result = derive_git_root_commit(dir).unwrap();
462 assert!(result.is_none(), "Repo with no commits should return None");
463 }
464
465 #[test]
466 fn test_derive_git_root_commit_no_git() {
467 let tmp = tempfile::TempDir::new().unwrap();
468 let result = derive_git_root_commit(tmp.path()).unwrap();
469 assert!(result.is_none(), "Non-git directory should return None");
470 }
471
472 #[test]
473 #[ignore] fn test_project_id_from_git_root_commit() {
475 let tmp = tempfile::TempDir::new().unwrap();
476 let dir = tmp.path();
477
478 Command::new("git")
479 .args(["init"])
480 .current_dir(dir)
481 .output()
482 .unwrap();
483 Command::new("git")
484 .args(["config", "user.email", "test@test.com"])
485 .current_dir(dir)
486 .output()
487 .unwrap();
488 Command::new("git")
489 .args(["config", "user.name", "Test"])
490 .current_dir(dir)
491 .output()
492 .unwrap();
493 Command::new("git")
494 .args(["commit", "--allow-empty", "-m", "init"])
495 .current_dir(dir)
496 .output()
497 .unwrap();
498
499 let project_id = get_or_create_project_id(dir).unwrap();
500 let expected = derive_git_root_commit(dir).unwrap().unwrap();
501 assert_eq!(
502 project_id, expected,
503 "Project ID should be the git root commit hash"
504 );
505
506 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
508 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
509 assert_eq!(config.source, "git-root-commit");
510 }
511
512 #[test]
513 #[ignore] fn test_project_id_migration_uuid_to_git() {
515 let tmp = tempfile::TempDir::new().unwrap();
516 let dir = tmp.path();
517
518 let sediment_dir = dir.join(".sediment");
520 std::fs::create_dir_all(&sediment_dir).unwrap();
521 let old_uuid = "550e8400-e29b-41d4-a716-446655440000";
522 let old_config = format!(r#"{{"project_id": "{}"}}"#, old_uuid);
523 std::fs::write(sediment_dir.join("config"), &old_config).unwrap();
524
525 Command::new("git")
527 .args(["init"])
528 .current_dir(dir)
529 .output()
530 .unwrap();
531 Command::new("git")
532 .args(["config", "user.email", "test@test.com"])
533 .current_dir(dir)
534 .output()
535 .unwrap();
536 Command::new("git")
537 .args(["config", "user.name", "Test"])
538 .current_dir(dir)
539 .output()
540 .unwrap();
541 Command::new("git")
542 .args(["commit", "--allow-empty", "-m", "init"])
543 .current_dir(dir)
544 .output()
545 .unwrap();
546
547 let project_id = get_or_create_project_id(dir).unwrap();
549 let git_hash = derive_git_root_commit(dir).unwrap().unwrap();
550 assert_eq!(project_id, git_hash, "Should migrate to git hash");
551
552 let config_content = std::fs::read_to_string(sediment_dir.join("config")).unwrap();
554 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
555 assert_eq!(config.source, "git-root-commit");
556 assert_eq!(config.migrated_from.as_deref(), Some(old_uuid));
557
558 assert_eq!(pending_migration(dir), Some(old_uuid.to_string()));
560
561 clear_migration_marker(dir).unwrap();
563 assert_eq!(pending_migration(dir), None);
564 }
565
566 #[test]
567 #[ignore] fn test_git_root_commit_fast_path() {
569 let tmp = tempfile::TempDir::new().unwrap();
570 let dir = tmp.path();
571
572 Command::new("git")
574 .args(["init"])
575 .current_dir(dir)
576 .output()
577 .unwrap();
578 Command::new("git")
579 .args(["config", "user.email", "test@test.com"])
580 .current_dir(dir)
581 .output()
582 .unwrap();
583 Command::new("git")
584 .args(["config", "user.name", "Test"])
585 .current_dir(dir)
586 .output()
587 .unwrap();
588 Command::new("git")
589 .args(["commit", "--allow-empty", "-m", "init"])
590 .current_dir(dir)
591 .output()
592 .unwrap();
593
594 let id1 = get_or_create_project_id(dir).unwrap();
596
597 let id2 = get_or_create_project_id(dir).unwrap();
599 assert_eq!(id1, id2, "Fast path should return same ID");
600
601 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
603 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
604 assert_eq!(config.source, "git-root-commit");
605 assert!(
606 config.migrated_from.is_none(),
607 "No migration on fresh git config"
608 );
609 }
610
611 #[test]
612 fn test_uuid_retained_when_git_unavailable() {
613 let tmp = tempfile::TempDir::new().unwrap();
615 let dir = tmp.path();
616
617 let id1 = get_or_create_project_id(dir).unwrap();
618
619 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
621 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
622 assert_eq!(config.source, "uuid");
623 assert!(config.migrated_from.is_none());
624
625 let id2 = get_or_create_project_id(dir).unwrap();
627 assert_eq!(id1, id2, "UUID should be retained on repeated calls");
628 }
629
630 #[test]
631 #[ignore] fn test_shallow_clone_falls_back_to_uuid() {
633 let tmp = tempfile::TempDir::new().unwrap();
634 let origin_dir = tmp.path().join("origin");
635 let shallow_dir = tmp.path().join("shallow");
636 std::fs::create_dir_all(&origin_dir).unwrap();
637
638 Command::new("git")
640 .args(["init"])
641 .current_dir(&origin_dir)
642 .output()
643 .unwrap();
644 Command::new("git")
645 .args(["config", "user.email", "test@test.com"])
646 .current_dir(&origin_dir)
647 .output()
648 .unwrap();
649 Command::new("git")
650 .args(["config", "user.name", "Test"])
651 .current_dir(&origin_dir)
652 .output()
653 .unwrap();
654 Command::new("git")
655 .args(["commit", "--allow-empty", "-m", "init"])
656 .current_dir(&origin_dir)
657 .output()
658 .unwrap();
659 Command::new("git")
660 .args(["commit", "--allow-empty", "-m", "second"])
661 .current_dir(&origin_dir)
662 .output()
663 .unwrap();
664
665 let origin_url = format!("file://{}", origin_dir.display());
667 Command::new("git")
668 .args([
669 "clone",
670 "--depth=1",
671 &origin_url,
672 shallow_dir.to_str().unwrap(),
673 ])
674 .output()
675 .unwrap();
676
677 let result = derive_git_root_commit(&shallow_dir).unwrap();
679 assert!(result.is_none(), "Shallow clone should return None");
680 }
681}