1use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::process::Command;
16use uuid::Uuid;
17
18pub mod access;
19pub mod chunker;
20pub mod consolidation;
21pub mod db;
22pub mod document;
23pub mod embedder;
24pub mod error;
25pub mod graph;
26pub mod item;
27pub mod mcp;
28pub mod retry;
29
30pub use chunker::{ChunkResult, ChunkingConfig, chunk_content};
31pub use db::Database;
32pub use document::ContentType;
33pub use embedder::{EMBEDDING_DIM, Embedder, EmbeddingModel};
34pub use error::{Result, SedimentError};
35pub use item::{Chunk, ConflictInfo, Item, ItemFilters, SearchResult, StoreResult};
36pub use retry::{RetryConfig, with_retry};
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum StoreScope {
42 #[default]
44 Project,
45 Global,
47}
48
49impl std::str::FromStr for StoreScope {
50 type Err = String;
51
52 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
53 match s.to_lowercase().as_str() {
54 "project" => Ok(StoreScope::Project),
55 "global" => Ok(StoreScope::Global),
56 _ => Err(format!(
57 "Invalid store scope: {}. Use 'project' or 'global'",
58 s
59 )),
60 }
61 }
62}
63
64impl std::fmt::Display for StoreScope {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 match self {
67 StoreScope::Project => write!(f, "project"),
68 StoreScope::Global => write!(f, "global"),
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum ListScope {
77 #[default]
79 Project,
80 Global,
82 All,
84}
85
86impl std::str::FromStr for ListScope {
87 type Err = String;
88
89 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
90 match s.to_lowercase().as_str() {
91 "project" => Ok(ListScope::Project),
92 "global" => Ok(ListScope::Global),
93 "all" => Ok(ListScope::All),
94 _ => Err(format!(
95 "Invalid list scope: {}. Use 'project', 'global', or 'all'",
96 s
97 )),
98 }
99 }
100}
101
102impl std::fmt::Display for ListScope {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 match self {
105 ListScope::Project => write!(f, "project"),
106 ListScope::Global => write!(f, "global"),
107 ListScope::All => write!(f, "all"),
108 }
109 }
110}
111
112pub fn central_db_path() -> PathBuf {
117 if let Ok(path) = std::env::var("SEDIMENT_DB") {
118 return PathBuf::from(path);
119 }
120
121 dirs::home_dir()
122 .unwrap_or_else(|| PathBuf::from("."))
123 .join(".sediment")
124 .join("data")
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct ProjectConfig {
130 pub project_id: String,
132 #[serde(default = "default_source")]
134 pub source: String,
135 #[serde(default, skip_serializing_if = "Option::is_none")]
137 pub migrated_from: Option<String>,
138}
139
140fn default_source() -> String {
141 "uuid".to_string()
142}
143
144pub fn derive_git_root_commit(project_root: &Path) -> std::io::Result<Option<String>> {
149 let shallow_check = match Command::new("git")
151 .args(["rev-parse", "--is-shallow-repository"])
152 .current_dir(project_root)
153 .stdout(std::process::Stdio::piped())
154 .stderr(std::process::Stdio::null())
155 .output()
156 {
157 Ok(o) => o,
158 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
159 Err(e) => return Err(e),
160 };
161
162 if shallow_check.status.success() {
163 let stdout = String::from_utf8_lossy(&shallow_check.stdout);
164 if stdout.trim() == "true" {
165 return Ok(None);
166 }
167 }
168
169 let output = match Command::new("git")
170 .args(["rev-list", "--max-parents=0", "HEAD"])
171 .current_dir(project_root)
172 .stdout(std::process::Stdio::piped())
173 .stderr(std::process::Stdio::null())
174 .output()
175 {
176 Ok(o) => o,
177 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
178 Err(e) => return Err(e),
179 };
180
181 if !output.status.success() {
182 return Ok(None);
183 }
184
185 let stdout = String::from_utf8_lossy(&output.stdout);
186 let hash = stdout.lines().next().unwrap_or("").trim();
187
188 if !hash.is_empty() && hash.len() <= 64 && hash.chars().all(|c| c.is_ascii_hexdigit()) {
190 Ok(Some(hash.to_string()))
191 } else {
192 Ok(None)
193 }
194}
195
196pub fn get_or_create_project_id(project_root: &Path) -> std::io::Result<String> {
202 let sediment_dir = project_root.join(".sediment");
203 let config_path = sediment_dir.join("config");
204
205 if config_path.exists() {
207 let content = std::fs::read_to_string(&config_path)?;
208 if let Ok(config) = serde_json::from_str::<ProjectConfig>(&content) {
209 if config.source == "git-root-commit" {
210 return Ok(config.project_id);
212 }
213
214 if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
216 let new_config = ProjectConfig {
217 project_id: git_hash.clone(),
218 source: "git-root-commit".to_string(),
219 migrated_from: Some(config.project_id),
220 };
221 write_config_atomic(&sediment_dir, &config_path, &new_config)?;
222 return Ok(git_hash);
223 }
224
225 return Ok(config.project_id);
227 }
228 }
229
230 std::fs::create_dir_all(&sediment_dir)?;
232
233 let config = if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
234 ProjectConfig {
235 project_id: git_hash,
236 source: "git-root-commit".to_string(),
237 migrated_from: None,
238 }
239 } else {
240 ProjectConfig {
241 project_id: Uuid::new_v4().to_string(),
242 source: "uuid".to_string(),
243 migrated_from: None,
244 }
245 };
246
247 write_config_atomic(&sediment_dir, &config_path, &config)?;
248
249 let final_content = std::fs::read_to_string(&config_path)?;
251 if let Ok(final_config) = serde_json::from_str::<ProjectConfig>(&final_content) {
252 Ok(final_config.project_id)
253 } else {
254 Ok(config.project_id)
255 }
256}
257
258fn write_config_atomic(
260 sediment_dir: &Path,
261 config_path: &Path,
262 config: &ProjectConfig,
263) -> std::io::Result<()> {
264 let content =
265 serde_json::to_string_pretty(config).map_err(|e| std::io::Error::other(e.to_string()))?;
266 let tmp_path = sediment_dir.join(format!("config.tmp.{}", std::process::id()));
267 std::fs::write(&tmp_path, &content)?;
268
269 if let Err(e) = std::fs::rename(&tmp_path, config_path) {
270 let _ = std::fs::remove_file(&tmp_path);
271 return Err(e);
272 }
273 Ok(())
274}
275
276pub fn pending_migration(project_root: &Path) -> Option<String> {
281 let config_path = project_root.join(".sediment").join("config");
282 let content = std::fs::read_to_string(&config_path).ok()?;
283 let config: ProjectConfig = serde_json::from_str(&content).ok()?;
284 config.migrated_from
285}
286
287pub fn clear_migration_marker(project_root: &Path) -> std::io::Result<()> {
289 let sediment_dir = project_root.join(".sediment");
290 let config_path = sediment_dir.join("config");
291
292 let content = std::fs::read_to_string(&config_path)?;
293 if let Ok(mut config) = serde_json::from_str::<ProjectConfig>(&content)
294 && config.migrated_from.is_some()
295 {
296 config.migrated_from = None;
297 write_config_atomic(&sediment_dir, &config_path, &config)?;
298 }
299 Ok(())
300}
301
302pub fn boost_similarity(
308 base: f32,
309 mem_project: Option<&str>,
310 current_project: Option<&str>,
311) -> f32 {
312 match (mem_project, current_project) {
313 (Some(m), Some(c)) if m == c => base, (Some(_), Some(_)) => base * 0.875, _ => base, }
317}
318
319pub fn find_project_root(start: &Path) -> Option<PathBuf> {
324 let mut current = start.to_path_buf();
325
326 if current.is_file() {
328 current = current.parent()?.to_path_buf();
329 }
330
331 let mut depth = 0;
332 loop {
333 if depth >= 100 {
334 return None;
335 }
336 depth += 1;
337
338 if current.join(".sediment").is_dir() {
340 return Some(current);
341 }
342
343 if current.join(".git").exists() {
345 return Some(current);
346 }
347
348 match current.parent() {
350 Some(parent) if parent == current => return None,
351 Some(parent) => current = parent.to_path_buf(),
352 None => return None,
353 }
354 }
355}
356
357pub fn init_project(project_root: &Path) -> std::io::Result<PathBuf> {
361 let sediment_dir = project_root.join(".sediment");
362 std::fs::create_dir_all(&sediment_dir)?;
363
364 get_or_create_project_id(project_root)?;
366
367 Ok(sediment_dir)
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373
374 #[test]
375 fn test_list_scope_default_is_project() {
376 assert_eq!(ListScope::default(), ListScope::Project);
378 }
379
380 #[test]
381 fn test_store_scope_default_is_project() {
382 assert_eq!(StoreScope::default(), StoreScope::Project);
383 }
384
385 #[test]
386 fn test_project_config_idempotent() {
387 let tmp = tempfile::TempDir::new().unwrap();
389 let id1 = get_or_create_project_id(tmp.path()).unwrap();
390 let id2 = get_or_create_project_id(tmp.path()).unwrap();
391 assert_eq!(id1, id2, "Repeated calls should return the same project ID");
392 }
393
394 #[test]
395 fn test_boost_similarity() {
396 assert!((boost_similarity(0.5, Some("p1"), Some("p1")) - 0.5).abs() < 0.001);
397 assert!((boost_similarity(0.5, Some("p1"), Some("p2")) - 0.4375).abs() < 0.001);
398 assert!((boost_similarity(0.5, None, Some("p1")) - 0.5).abs() < 0.001);
399 }
400
401 #[test]
402 fn test_project_config_backward_compat() {
403 let json = r#"{"project_id": "550e8400-e29b-41d4-a716-446655440000"}"#;
405 let config: ProjectConfig = serde_json::from_str(json).unwrap();
406 assert_eq!(config.source, "uuid");
407 }
408
409 #[test]
410 #[ignore] fn test_derive_git_root_commit_in_repo() {
412 let tmp = tempfile::TempDir::new().unwrap();
413 let dir = tmp.path();
414
415 Command::new("git")
417 .args(["init"])
418 .current_dir(dir)
419 .output()
420 .unwrap();
421 Command::new("git")
422 .args(["config", "user.email", "test@test.com"])
423 .current_dir(dir)
424 .output()
425 .unwrap();
426 Command::new("git")
427 .args(["config", "user.name", "Test"])
428 .current_dir(dir)
429 .output()
430 .unwrap();
431 Command::new("git")
432 .args(["commit", "--allow-empty", "-m", "init"])
433 .current_dir(dir)
434 .output()
435 .unwrap();
436
437 let result = derive_git_root_commit(dir).unwrap();
438 assert!(result.is_some(), "Should return root commit hash");
439 let hash = result.unwrap();
440 assert_eq!(hash.len(), 40, "SHA-1 hash should be 40 chars");
441 assert!(hash.chars().all(|c| c.is_ascii_hexdigit()), "Should be hex");
442 }
443
444 #[test]
445 #[ignore] fn test_derive_git_root_commit_no_commits() {
447 let tmp = tempfile::TempDir::new().unwrap();
448 let dir = tmp.path();
449
450 Command::new("git")
451 .args(["init"])
452 .current_dir(dir)
453 .output()
454 .unwrap();
455
456 let result = derive_git_root_commit(dir).unwrap();
457 assert!(result.is_none(), "Repo with no commits should return None");
458 }
459
460 #[test]
461 fn test_derive_git_root_commit_no_git() {
462 let tmp = tempfile::TempDir::new().unwrap();
463 let result = derive_git_root_commit(tmp.path()).unwrap();
464 assert!(result.is_none(), "Non-git directory should return None");
465 }
466
467 #[test]
468 #[ignore] fn test_project_id_from_git_root_commit() {
470 let tmp = tempfile::TempDir::new().unwrap();
471 let dir = tmp.path();
472
473 Command::new("git")
474 .args(["init"])
475 .current_dir(dir)
476 .output()
477 .unwrap();
478 Command::new("git")
479 .args(["config", "user.email", "test@test.com"])
480 .current_dir(dir)
481 .output()
482 .unwrap();
483 Command::new("git")
484 .args(["config", "user.name", "Test"])
485 .current_dir(dir)
486 .output()
487 .unwrap();
488 Command::new("git")
489 .args(["commit", "--allow-empty", "-m", "init"])
490 .current_dir(dir)
491 .output()
492 .unwrap();
493
494 let project_id = get_or_create_project_id(dir).unwrap();
495 let expected = derive_git_root_commit(dir).unwrap().unwrap();
496 assert_eq!(
497 project_id, expected,
498 "Project ID should be the git root commit hash"
499 );
500
501 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
503 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
504 assert_eq!(config.source, "git-root-commit");
505 }
506
507 #[test]
508 #[ignore] fn test_project_id_migration_uuid_to_git() {
510 let tmp = tempfile::TempDir::new().unwrap();
511 let dir = tmp.path();
512
513 let sediment_dir = dir.join(".sediment");
515 std::fs::create_dir_all(&sediment_dir).unwrap();
516 let old_uuid = "550e8400-e29b-41d4-a716-446655440000";
517 let old_config = format!(r#"{{"project_id": "{}"}}"#, old_uuid);
518 std::fs::write(sediment_dir.join("config"), &old_config).unwrap();
519
520 Command::new("git")
522 .args(["init"])
523 .current_dir(dir)
524 .output()
525 .unwrap();
526 Command::new("git")
527 .args(["config", "user.email", "test@test.com"])
528 .current_dir(dir)
529 .output()
530 .unwrap();
531 Command::new("git")
532 .args(["config", "user.name", "Test"])
533 .current_dir(dir)
534 .output()
535 .unwrap();
536 Command::new("git")
537 .args(["commit", "--allow-empty", "-m", "init"])
538 .current_dir(dir)
539 .output()
540 .unwrap();
541
542 let project_id = get_or_create_project_id(dir).unwrap();
544 let git_hash = derive_git_root_commit(dir).unwrap().unwrap();
545 assert_eq!(project_id, git_hash, "Should migrate to git hash");
546
547 let config_content = std::fs::read_to_string(sediment_dir.join("config")).unwrap();
549 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
550 assert_eq!(config.source, "git-root-commit");
551 assert_eq!(config.migrated_from.as_deref(), Some(old_uuid));
552
553 assert_eq!(pending_migration(dir), Some(old_uuid.to_string()));
555
556 clear_migration_marker(dir).unwrap();
558 assert_eq!(pending_migration(dir), None);
559 }
560
561 #[test]
562 #[ignore] fn test_git_root_commit_fast_path() {
564 let tmp = tempfile::TempDir::new().unwrap();
565 let dir = tmp.path();
566
567 Command::new("git")
569 .args(["init"])
570 .current_dir(dir)
571 .output()
572 .unwrap();
573 Command::new("git")
574 .args(["config", "user.email", "test@test.com"])
575 .current_dir(dir)
576 .output()
577 .unwrap();
578 Command::new("git")
579 .args(["config", "user.name", "Test"])
580 .current_dir(dir)
581 .output()
582 .unwrap();
583 Command::new("git")
584 .args(["commit", "--allow-empty", "-m", "init"])
585 .current_dir(dir)
586 .output()
587 .unwrap();
588
589 let id1 = get_or_create_project_id(dir).unwrap();
591
592 let id2 = get_or_create_project_id(dir).unwrap();
594 assert_eq!(id1, id2, "Fast path should return same ID");
595
596 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
598 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
599 assert_eq!(config.source, "git-root-commit");
600 assert!(
601 config.migrated_from.is_none(),
602 "No migration on fresh git config"
603 );
604 }
605
606 #[test]
607 fn test_uuid_retained_when_git_unavailable() {
608 let tmp = tempfile::TempDir::new().unwrap();
610 let dir = tmp.path();
611
612 let id1 = get_or_create_project_id(dir).unwrap();
613
614 let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
616 let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
617 assert_eq!(config.source, "uuid");
618 assert!(config.migrated_from.is_none());
619
620 let id2 = get_or_create_project_id(dir).unwrap();
622 assert_eq!(id1, id2, "UUID should be retained on repeated calls");
623 }
624
625 #[test]
626 #[ignore] fn test_shallow_clone_falls_back_to_uuid() {
628 let tmp = tempfile::TempDir::new().unwrap();
629 let origin_dir = tmp.path().join("origin");
630 let shallow_dir = tmp.path().join("shallow");
631 std::fs::create_dir_all(&origin_dir).unwrap();
632
633 Command::new("git")
635 .args(["init"])
636 .current_dir(&origin_dir)
637 .output()
638 .unwrap();
639 Command::new("git")
640 .args(["config", "user.email", "test@test.com"])
641 .current_dir(&origin_dir)
642 .output()
643 .unwrap();
644 Command::new("git")
645 .args(["config", "user.name", "Test"])
646 .current_dir(&origin_dir)
647 .output()
648 .unwrap();
649 Command::new("git")
650 .args(["commit", "--allow-empty", "-m", "init"])
651 .current_dir(&origin_dir)
652 .output()
653 .unwrap();
654 Command::new("git")
655 .args(["commit", "--allow-empty", "-m", "second"])
656 .current_dir(&origin_dir)
657 .output()
658 .unwrap();
659
660 let origin_url = format!("file://{}", origin_dir.display());
662 Command::new("git")
663 .args([
664 "clone",
665 "--depth=1",
666 &origin_url,
667 shallow_dir.to_str().unwrap(),
668 ])
669 .output()
670 .unwrap();
671
672 let result = derive_git_root_commit(&shallow_dir).unwrap();
674 assert!(result.is_none(), "Shallow clone should return None");
675 }
676}