Skip to main content

sediment/
lib.rs

1//! Sediment: Semantic memory for AI agents
2//!
3//! A local-first, MCP-native vector database for AI agent memory.
4//!
5//! ## Features
6//!
7//! - **Embedded storage** - LanceDB-powered, directory-based, no server required
8//! - **Local embeddings** - Uses `all-MiniLM-L6-v2` locally, no API keys needed
9//! - **MCP-native** - 4 tools for seamless LLM integration
10//! - **Project-aware** - Scoped memories with automatic project detection
11//! - **Auto-chunking** - Long content is automatically chunked for better search
12
13use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::process::Command;
16use uuid::Uuid;
17
18pub mod access;
19pub mod chunker;
20pub mod consolidation;
21pub mod db;
22pub mod document;
23pub mod embedder;
24pub mod error;
25pub mod graph;
26pub mod item;
27pub mod mcp;
28pub mod retry;
29
30pub use chunker::{ChunkResult, ChunkingConfig, chunk_content};
31pub use db::Database;
32pub use document::ContentType;
33pub use embedder::{EMBEDDING_DIM, Embedder, EmbeddingModel};
34pub use error::{Result, SedimentError};
35pub use item::{Chunk, ConflictInfo, Item, ItemFilters, SearchResult, StoreResult};
36pub use retry::{RetryConfig, with_retry};
37
38/// Scope for storing items
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum StoreScope {
42    /// Store in project-local scope (with project_id)
43    #[default]
44    Project,
45    /// Store in global scope (no project_id)
46    Global,
47}
48
49impl std::str::FromStr for StoreScope {
50    type Err = String;
51
52    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
53        match s.to_lowercase().as_str() {
54            "project" => Ok(StoreScope::Project),
55            "global" => Ok(StoreScope::Global),
56            _ => Err(format!(
57                "Invalid store scope: {}. Use 'project' or 'global'",
58                s
59            )),
60        }
61    }
62}
63
64impl std::fmt::Display for StoreScope {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        match self {
67            StoreScope::Project => write!(f, "project"),
68            StoreScope::Global => write!(f, "global"),
69        }
70    }
71}
72
73/// Scope for listing items (recall always searches all with boosting)
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum ListScope {
77    /// List only project-local items
78    #[default]
79    Project,
80    /// List only global items
81    Global,
82    /// List all items
83    All,
84}
85
86impl std::str::FromStr for ListScope {
87    type Err = String;
88
89    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
90        match s.to_lowercase().as_str() {
91            "project" => Ok(ListScope::Project),
92            "global" => Ok(ListScope::Global),
93            "all" => Ok(ListScope::All),
94            _ => Err(format!(
95                "Invalid list scope: {}. Use 'project', 'global', or 'all'",
96                s
97            )),
98        }
99    }
100}
101
102impl std::fmt::Display for ListScope {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        match self {
105            ListScope::Project => write!(f, "project"),
106            ListScope::Global => write!(f, "global"),
107            ListScope::All => write!(f, "all"),
108        }
109    }
110}
111
112/// Get the central database path.
113///
114/// Returns `~/.sediment/data` or the path specified in `SEDIMENT_DB` environment variable.
115/// Note: LanceDB uses a directory, not a single file.
116pub fn central_db_path() -> PathBuf {
117    if let Ok(path) = std::env::var("SEDIMENT_DB") {
118        return PathBuf::from(path);
119    }
120
121    dirs::home_dir()
122        .unwrap_or_else(|| PathBuf::from("."))
123        .join(".sediment")
124        .join("data")
125}
126
127/// Project configuration stored in `.sediment/config`
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct ProjectConfig {
130    /// Unique project identifier (git root commit hash or UUID)
131    pub project_id: String,
132    /// How the project ID was derived: "git-root-commit" or "uuid"
133    #[serde(default = "default_source")]
134    pub source: String,
135    /// Set during UUID→git migration; cleared after LanceDB items are updated
136    #[serde(default, skip_serializing_if = "Option::is_none")]
137    pub migrated_from: Option<String>,
138}
139
140fn default_source() -> String {
141    "uuid".to_string()
142}
143
144/// Derive a stable project ID from the git repository's initial (root) commit hash.
145///
146/// Returns `Ok(Some(hash))` if the project root is inside a git repo with at least one commit.
147/// Returns `Ok(None)` if git is not installed, the directory is not a git repo, or there are no commits.
148pub fn derive_git_root_commit(project_root: &Path) -> std::io::Result<Option<String>> {
149    // Check for shallow clone — root commit in shallow history is not the true root
150    let shallow_check = match Command::new("git")
151        .args(["rev-parse", "--is-shallow-repository"])
152        .current_dir(project_root)
153        .stdout(std::process::Stdio::piped())
154        .stderr(std::process::Stdio::null())
155        .output()
156    {
157        Ok(o) => o,
158        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
159        Err(e) => return Err(e),
160    };
161
162    if shallow_check.status.success() {
163        let stdout = String::from_utf8_lossy(&shallow_check.stdout);
164        if stdout.trim() == "true" {
165            return Ok(None);
166        }
167    }
168
169    let output = match Command::new("git")
170        .args(["rev-list", "--max-parents=0", "HEAD"])
171        .current_dir(project_root)
172        .stdout(std::process::Stdio::piped())
173        .stderr(std::process::Stdio::null())
174        .output()
175    {
176        Ok(o) => o,
177        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
178        Err(e) => return Err(e),
179    };
180
181    if !output.status.success() {
182        return Ok(None);
183    }
184
185    let stdout = String::from_utf8_lossy(&output.stdout);
186    let hash = stdout.lines().next().unwrap_or("").trim();
187
188    // Validate: must be non-empty hex, at most 64 chars (covers SHA-1 40 and SHA-256 64)
189    if !hash.is_empty() && hash.len() <= 64 && hash.chars().all(|c| c.is_ascii_hexdigit()) {
190        Ok(Some(hash.to_string()))
191    } else {
192        Ok(None)
193    }
194}
195
196/// Get or create the project ID for a given project root.
197///
198/// The project ID is stored in `<project_root>/.sediment/config`.
199/// If the project is inside a git repo, the ID is derived from the root commit hash
200/// for stability across clones. Falls back to a random UUID for non-git directories.
201pub fn get_or_create_project_id(project_root: &Path) -> std::io::Result<String> {
202    let sediment_dir = project_root.join(".sediment");
203    let config_path = sediment_dir.join("config");
204
205    // Try to read existing config
206    if config_path.exists() {
207        let content = std::fs::read_to_string(&config_path)?;
208        if let Ok(config) = serde_json::from_str::<ProjectConfig>(&content) {
209            if config.source == "git-root-commit" {
210                // Already derived from git — trust it
211                return Ok(config.project_id);
212            }
213
214            // Source is "uuid" (or missing field from old config) — try to upgrade to git
215            if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
216                let new_config = ProjectConfig {
217                    project_id: git_hash.clone(),
218                    source: "git-root-commit".to_string(),
219                    migrated_from: Some(config.project_id),
220                };
221                write_config_atomic(&sediment_dir, &config_path, &new_config)?;
222                return Ok(git_hash);
223            }
224
225            // Git derivation failed — keep existing UUID
226            return Ok(config.project_id);
227        }
228    }
229
230    // No existing config — create new one
231    std::fs::create_dir_all(&sediment_dir)?;
232
233    let config = if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
234        ProjectConfig {
235            project_id: git_hash,
236            source: "git-root-commit".to_string(),
237            migrated_from: None,
238        }
239    } else {
240        ProjectConfig {
241            project_id: Uuid::new_v4().to_string(),
242            source: "uuid".to_string(),
243            migrated_from: None,
244        }
245    };
246
247    write_config_atomic(&sediment_dir, &config_path, &config)?;
248
249    // Re-read to return the ID that actually persisted (could be from another process)
250    let final_content = std::fs::read_to_string(&config_path)?;
251    if let Ok(final_config) = serde_json::from_str::<ProjectConfig>(&final_content) {
252        Ok(final_config.project_id)
253    } else {
254        Ok(config.project_id)
255    }
256}
257
258/// Write a ProjectConfig atomically via temp file + rename.
259fn write_config_atomic(
260    sediment_dir: &Path,
261    config_path: &Path,
262    config: &ProjectConfig,
263) -> std::io::Result<()> {
264    let content =
265        serde_json::to_string_pretty(config).map_err(|e| std::io::Error::other(e.to_string()))?;
266    let tmp_path = sediment_dir.join(format!("config.tmp.{}", std::process::id()));
267    std::fs::write(&tmp_path, &content)?;
268
269    if let Err(e) = std::fs::rename(&tmp_path, config_path) {
270        let _ = std::fs::remove_file(&tmp_path);
271        return Err(e);
272    }
273    Ok(())
274}
275
276/// Check if a project ID migration is pending (UUID→git hash).
277///
278/// Returns the old project ID if a migration was started but LanceDB items
279/// have not yet been updated.
280pub fn pending_migration(project_root: &Path) -> Option<String> {
281    let config_path = project_root.join(".sediment").join("config");
282    let content = std::fs::read_to_string(&config_path).ok()?;
283    let config: ProjectConfig = serde_json::from_str(&content).ok()?;
284    config.migrated_from
285}
286
287/// Clear the migration marker after LanceDB items have been updated.
288pub fn clear_migration_marker(project_root: &Path) -> std::io::Result<()> {
289    let sediment_dir = project_root.join(".sediment");
290    let config_path = sediment_dir.join("config");
291
292    let content = std::fs::read_to_string(&config_path)?;
293    if let Ok(mut config) = serde_json::from_str::<ProjectConfig>(&content)
294        && config.migrated_from.is_some()
295    {
296        config.migrated_from = None;
297        write_config_atomic(&sediment_dir, &config_path, &config)?;
298    }
299    Ok(())
300}
301
302/// Apply similarity boosting based on project context.
303///
304/// - Same project: no change (identity)
305/// - Different project: 0.875x penalty (12.5pp spread)
306/// - Global or no context: no change
307pub fn boost_similarity(
308    base: f32,
309    mem_project: Option<&str>,
310    current_project: Option<&str>,
311) -> f32 {
312    match (mem_project, current_project) {
313        (Some(m), Some(c)) if m == c => base, // Same project: no boost needed
314        (Some(_), Some(_)) => base * 0.875,   // Different project: 12.5pp penalty
315        _ => base,                            // Global or no context
316    }
317}
318
319/// Find the project root by walking up from the given path.
320///
321/// Looks for directories containing `.sediment/` or `.git/` markers.
322/// Returns `None` if no project root is found.
323pub fn find_project_root(start: &Path) -> Option<PathBuf> {
324    let mut current = start.to_path_buf();
325
326    // If start is a file, use its parent directory
327    if current.is_file() {
328        current = current.parent()?.to_path_buf();
329    }
330
331    let mut depth = 0;
332    loop {
333        if depth >= 100 {
334            return None;
335        }
336        depth += 1;
337
338        // Check for .sediment directory first (explicit project marker)
339        if current.join(".sediment").is_dir() {
340            return Some(current);
341        }
342
343        // Check for .git directory as fallback
344        if current.join(".git").exists() {
345            return Some(current);
346        }
347
348        // Move to parent directory; stop at filesystem root
349        match current.parent() {
350            Some(parent) if parent == current => return None,
351            Some(parent) => current = parent.to_path_buf(),
352            None => return None,
353        }
354    }
355}
356
357/// Initialize a project directory for Sediment.
358///
359/// Creates the `.sediment/` directory in the specified path and generates a project ID.
360pub fn init_project(project_root: &Path) -> std::io::Result<PathBuf> {
361    let sediment_dir = project_root.join(".sediment");
362    std::fs::create_dir_all(&sediment_dir)?;
363
364    // Generate project ID
365    get_or_create_project_id(project_root)?;
366
367    Ok(sediment_dir)
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_list_scope_default_is_project() {
376        // Fix #17: ListScope::default() should be Project, matching the tool schema default
377        assert_eq!(ListScope::default(), ListScope::Project);
378    }
379
380    #[test]
381    fn test_store_scope_default_is_project() {
382        assert_eq!(StoreScope::default(), StoreScope::Project);
383    }
384
385    #[test]
386    fn test_project_config_idempotent() {
387        // Fix #18: get_or_create_project_id should return the same ID on repeated calls
388        let tmp = tempfile::TempDir::new().unwrap();
389        let id1 = get_or_create_project_id(tmp.path()).unwrap();
390        let id2 = get_or_create_project_id(tmp.path()).unwrap();
391        assert_eq!(id1, id2, "Repeated calls should return the same project ID");
392    }
393
394    #[test]
395    fn test_boost_similarity() {
396        assert!((boost_similarity(0.5, Some("p1"), Some("p1")) - 0.5).abs() < 0.001);
397        assert!((boost_similarity(0.5, Some("p1"), Some("p2")) - 0.4375).abs() < 0.001);
398        assert!((boost_similarity(0.5, None, Some("p1")) - 0.5).abs() < 0.001);
399    }
400
401    #[test]
402    fn test_project_config_backward_compat() {
403        // Config JSON without 'source' field should deserialize with source="uuid"
404        let json = r#"{"project_id": "550e8400-e29b-41d4-a716-446655440000"}"#;
405        let config: ProjectConfig = serde_json::from_str(json).unwrap();
406        assert_eq!(config.source, "uuid");
407    }
408
409    #[test]
410    #[ignore] // requires git
411    fn test_derive_git_root_commit_in_repo() {
412        let tmp = tempfile::TempDir::new().unwrap();
413        let dir = tmp.path();
414
415        // git init + commit
416        Command::new("git")
417            .args(["init"])
418            .current_dir(dir)
419            .output()
420            .unwrap();
421        Command::new("git")
422            .args(["config", "user.email", "test@test.com"])
423            .current_dir(dir)
424            .output()
425            .unwrap();
426        Command::new("git")
427            .args(["config", "user.name", "Test"])
428            .current_dir(dir)
429            .output()
430            .unwrap();
431        Command::new("git")
432            .args(["commit", "--allow-empty", "-m", "init"])
433            .current_dir(dir)
434            .output()
435            .unwrap();
436
437        let result = derive_git_root_commit(dir).unwrap();
438        assert!(result.is_some(), "Should return root commit hash");
439        let hash = result.unwrap();
440        assert_eq!(hash.len(), 40, "SHA-1 hash should be 40 chars");
441        assert!(hash.chars().all(|c| c.is_ascii_hexdigit()), "Should be hex");
442    }
443
444    #[test]
445    #[ignore] // requires git
446    fn test_derive_git_root_commit_no_commits() {
447        let tmp = tempfile::TempDir::new().unwrap();
448        let dir = tmp.path();
449
450        Command::new("git")
451            .args(["init"])
452            .current_dir(dir)
453            .output()
454            .unwrap();
455
456        let result = derive_git_root_commit(dir).unwrap();
457        assert!(result.is_none(), "Repo with no commits should return None");
458    }
459
460    #[test]
461    fn test_derive_git_root_commit_no_git() {
462        let tmp = tempfile::TempDir::new().unwrap();
463        let result = derive_git_root_commit(tmp.path()).unwrap();
464        assert!(result.is_none(), "Non-git directory should return None");
465    }
466
467    #[test]
468    #[ignore] // requires git
469    fn test_project_id_from_git_root_commit() {
470        let tmp = tempfile::TempDir::new().unwrap();
471        let dir = tmp.path();
472
473        Command::new("git")
474            .args(["init"])
475            .current_dir(dir)
476            .output()
477            .unwrap();
478        Command::new("git")
479            .args(["config", "user.email", "test@test.com"])
480            .current_dir(dir)
481            .output()
482            .unwrap();
483        Command::new("git")
484            .args(["config", "user.name", "Test"])
485            .current_dir(dir)
486            .output()
487            .unwrap();
488        Command::new("git")
489            .args(["commit", "--allow-empty", "-m", "init"])
490            .current_dir(dir)
491            .output()
492            .unwrap();
493
494        let project_id = get_or_create_project_id(dir).unwrap();
495        let expected = derive_git_root_commit(dir).unwrap().unwrap();
496        assert_eq!(
497            project_id, expected,
498            "Project ID should be the git root commit hash"
499        );
500
501        // Verify config source
502        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
503        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
504        assert_eq!(config.source, "git-root-commit");
505    }
506
507    #[test]
508    #[ignore] // requires git
509    fn test_project_id_migration_uuid_to_git() {
510        let tmp = tempfile::TempDir::new().unwrap();
511        let dir = tmp.path();
512
513        // Write a UUID-based config first
514        let sediment_dir = dir.join(".sediment");
515        std::fs::create_dir_all(&sediment_dir).unwrap();
516        let old_uuid = "550e8400-e29b-41d4-a716-446655440000";
517        let old_config = format!(r#"{{"project_id": "{}"}}"#, old_uuid);
518        std::fs::write(sediment_dir.join("config"), &old_config).unwrap();
519
520        // Now create a git repo with a commit
521        Command::new("git")
522            .args(["init"])
523            .current_dir(dir)
524            .output()
525            .unwrap();
526        Command::new("git")
527            .args(["config", "user.email", "test@test.com"])
528            .current_dir(dir)
529            .output()
530            .unwrap();
531        Command::new("git")
532            .args(["config", "user.name", "Test"])
533            .current_dir(dir)
534            .output()
535            .unwrap();
536        Command::new("git")
537            .args(["commit", "--allow-empty", "-m", "init"])
538            .current_dir(dir)
539            .output()
540            .unwrap();
541
542        // Calling get_or_create_project_id should migrate to git hash
543        let project_id = get_or_create_project_id(dir).unwrap();
544        let git_hash = derive_git_root_commit(dir).unwrap().unwrap();
545        assert_eq!(project_id, git_hash, "Should migrate to git hash");
546
547        // Config should now have git-root-commit source with migrated_from
548        let config_content = std::fs::read_to_string(sediment_dir.join("config")).unwrap();
549        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
550        assert_eq!(config.source, "git-root-commit");
551        assert_eq!(config.migrated_from.as_deref(), Some(old_uuid));
552
553        // pending_migration should return the old UUID
554        assert_eq!(pending_migration(dir), Some(old_uuid.to_string()));
555
556        // clear_migration_marker should remove it
557        clear_migration_marker(dir).unwrap();
558        assert_eq!(pending_migration(dir), None);
559    }
560
561    #[test]
562    #[ignore] // requires git
563    fn test_git_root_commit_fast_path() {
564        let tmp = tempfile::TempDir::new().unwrap();
565        let dir = tmp.path();
566
567        // Create git repo with commit
568        Command::new("git")
569            .args(["init"])
570            .current_dir(dir)
571            .output()
572            .unwrap();
573        Command::new("git")
574            .args(["config", "user.email", "test@test.com"])
575            .current_dir(dir)
576            .output()
577            .unwrap();
578        Command::new("git")
579            .args(["config", "user.name", "Test"])
580            .current_dir(dir)
581            .output()
582            .unwrap();
583        Command::new("git")
584            .args(["commit", "--allow-empty", "-m", "init"])
585            .current_dir(dir)
586            .output()
587            .unwrap();
588
589        // First call creates config with git-root-commit source
590        let id1 = get_or_create_project_id(dir).unwrap();
591
592        // Second call should return immediately (fast path) without re-deriving
593        let id2 = get_or_create_project_id(dir).unwrap();
594        assert_eq!(id1, id2, "Fast path should return same ID");
595
596        // Verify config has git-root-commit source
597        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
598        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
599        assert_eq!(config.source, "git-root-commit");
600        assert!(
601            config.migrated_from.is_none(),
602            "No migration on fresh git config"
603        );
604    }
605
606    #[test]
607    fn test_uuid_retained_when_git_unavailable() {
608        // Non-git directory: UUID config should be created and retained
609        let tmp = tempfile::TempDir::new().unwrap();
610        let dir = tmp.path();
611
612        let id1 = get_or_create_project_id(dir).unwrap();
613
614        // Verify it's a UUID with source "uuid"
615        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
616        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
617        assert_eq!(config.source, "uuid");
618        assert!(config.migrated_from.is_none());
619
620        // Second call should return the same UUID
621        let id2 = get_or_create_project_id(dir).unwrap();
622        assert_eq!(id1, id2, "UUID should be retained on repeated calls");
623    }
624
625    #[test]
626    #[ignore] // requires git
627    fn test_shallow_clone_falls_back_to_uuid() {
628        let tmp = tempfile::TempDir::new().unwrap();
629        let origin_dir = tmp.path().join("origin");
630        let shallow_dir = tmp.path().join("shallow");
631        std::fs::create_dir_all(&origin_dir).unwrap();
632
633        // Create origin repo with a commit
634        Command::new("git")
635            .args(["init"])
636            .current_dir(&origin_dir)
637            .output()
638            .unwrap();
639        Command::new("git")
640            .args(["config", "user.email", "test@test.com"])
641            .current_dir(&origin_dir)
642            .output()
643            .unwrap();
644        Command::new("git")
645            .args(["config", "user.name", "Test"])
646            .current_dir(&origin_dir)
647            .output()
648            .unwrap();
649        Command::new("git")
650            .args(["commit", "--allow-empty", "-m", "init"])
651            .current_dir(&origin_dir)
652            .output()
653            .unwrap();
654        Command::new("git")
655            .args(["commit", "--allow-empty", "-m", "second"])
656            .current_dir(&origin_dir)
657            .output()
658            .unwrap();
659
660        // Shallow clone (file:// protocol required for local shallow clones)
661        let origin_url = format!("file://{}", origin_dir.display());
662        Command::new("git")
663            .args([
664                "clone",
665                "--depth=1",
666                &origin_url,
667                shallow_dir.to_str().unwrap(),
668            ])
669            .output()
670            .unwrap();
671
672        // derive_git_root_commit should return None for shallow clone
673        let result = derive_git_root_commit(&shallow_dir).unwrap();
674        assert!(result.is_none(), "Shallow clone should return None");
675    }
676}