Skip to main content

sediment/
lib.rs

1//! Sediment: Semantic memory for AI agents
2//!
3//! A local-first, MCP-native vector database for AI agent memory.
4//!
5//! ## Features
6//!
7//! - **Embedded storage** - LanceDB-powered, directory-based, no server required
8//! - **Local embeddings** - Uses `all-MiniLM-L6-v2` locally, no API keys needed
9//! - **MCP-native** - 4 tools for seamless LLM integration
10//! - **Project-aware** - Scoped memories with automatic project detection
11//! - **Auto-chunking** - Long content is automatically chunked for better search
12
13use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::process::Command;
16use uuid::Uuid;
17
18pub mod access;
19pub mod chunker;
20pub mod consolidation;
21pub mod db;
22pub mod document;
23pub mod embedder;
24pub mod error;
25pub mod graph;
26pub mod item;
27pub mod mcp;
28pub mod retry;
29
30pub use chunker::{ChunkResult, ChunkingConfig, chunk_content};
31pub use db::Database;
32pub use document::ContentType;
33pub use embedder::{EMBEDDING_DIM, Embedder};
34pub use error::{Result, SedimentError};
35pub use item::{Chunk, ConflictInfo, Item, ItemFilters, SearchResult, StoreResult};
36pub use retry::{RetryConfig, with_retry};
37
38/// Scope for storing items
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum StoreScope {
42    /// Store in project-local scope (with project_id)
43    #[default]
44    Project,
45    /// Store in global scope (no project_id)
46    Global,
47}
48
49impl std::str::FromStr for StoreScope {
50    type Err = String;
51
52    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
53        match s.to_lowercase().as_str() {
54            "project" => Ok(StoreScope::Project),
55            "global" => Ok(StoreScope::Global),
56            _ => Err(format!(
57                "Invalid store scope: {}. Use 'project' or 'global'",
58                s
59            )),
60        }
61    }
62}
63
64impl std::fmt::Display for StoreScope {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        match self {
67            StoreScope::Project => write!(f, "project"),
68            StoreScope::Global => write!(f, "global"),
69        }
70    }
71}
72
73/// Scope for listing items (recall always searches all with boosting)
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum ListScope {
77    /// List only project-local items
78    #[default]
79    Project,
80    /// List only global items
81    Global,
82    /// List all items
83    All,
84}
85
86impl std::str::FromStr for ListScope {
87    type Err = String;
88
89    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
90        match s.to_lowercase().as_str() {
91            "project" => Ok(ListScope::Project),
92            "global" => Ok(ListScope::Global),
93            "all" => Ok(ListScope::All),
94            _ => Err(format!(
95                "Invalid list scope: {}. Use 'project', 'global', or 'all'",
96                s
97            )),
98        }
99    }
100}
101
102impl std::fmt::Display for ListScope {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        match self {
105            ListScope::Project => write!(f, "project"),
106            ListScope::Global => write!(f, "global"),
107            ListScope::All => write!(f, "all"),
108        }
109    }
110}
111
112/// Get the central database path.
113///
114/// Returns `~/.sediment/data` or the path specified in `SEDIMENT_DB` environment variable.
115/// Note: LanceDB uses a directory, not a single file.
116pub fn central_db_path() -> PathBuf {
117    if let Ok(path) = std::env::var("SEDIMENT_DB") {
118        return PathBuf::from(path);
119    }
120
121    dirs::home_dir()
122        .unwrap_or_else(|| PathBuf::from("."))
123        .join(".sediment")
124        .join("data")
125}
126
127/// Get the default global database path (alias for central_db_path for backwards compatibility)
128pub fn default_db_path() -> PathBuf {
129    central_db_path()
130}
131
132/// Project configuration stored in `.sediment/config`
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ProjectConfig {
135    /// Unique project identifier (git root commit hash or UUID)
136    pub project_id: String,
137    /// How the project ID was derived: "git-root-commit" or "uuid"
138    #[serde(default = "default_source")]
139    pub source: String,
140    /// Set during UUID→git migration; cleared after LanceDB items are updated
141    #[serde(default, skip_serializing_if = "Option::is_none")]
142    pub migrated_from: Option<String>,
143}
144
145fn default_source() -> String {
146    "uuid".to_string()
147}
148
149/// Derive a stable project ID from the git repository's initial (root) commit hash.
150///
151/// Returns `Ok(Some(hash))` if the project root is inside a git repo with at least one commit.
152/// Returns `Ok(None)` if git is not installed, the directory is not a git repo, or there are no commits.
153pub fn derive_git_root_commit(project_root: &Path) -> std::io::Result<Option<String>> {
154    // Check for shallow clone — root commit in shallow history is not the true root
155    let shallow_check = match Command::new("git")
156        .args(["rev-parse", "--is-shallow-repository"])
157        .current_dir(project_root)
158        .stdout(std::process::Stdio::piped())
159        .stderr(std::process::Stdio::null())
160        .output()
161    {
162        Ok(o) => o,
163        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
164        Err(e) => return Err(e),
165    };
166
167    if shallow_check.status.success() {
168        let stdout = String::from_utf8_lossy(&shallow_check.stdout);
169        if stdout.trim() == "true" {
170            return Ok(None);
171        }
172    }
173
174    let output = match Command::new("git")
175        .args(["rev-list", "--max-parents=0", "HEAD"])
176        .current_dir(project_root)
177        .stdout(std::process::Stdio::piped())
178        .stderr(std::process::Stdio::null())
179        .output()
180    {
181        Ok(o) => o,
182        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
183        Err(e) => return Err(e),
184    };
185
186    if !output.status.success() {
187        return Ok(None);
188    }
189
190    let stdout = String::from_utf8_lossy(&output.stdout);
191    let hash = stdout.lines().next().unwrap_or("").trim();
192
193    // Validate: must be non-empty hex, at most 64 chars (covers SHA-1 40 and SHA-256 64)
194    if !hash.is_empty() && hash.len() <= 64 && hash.chars().all(|c| c.is_ascii_hexdigit()) {
195        Ok(Some(hash.to_string()))
196    } else {
197        Ok(None)
198    }
199}
200
201/// Get or create the project ID for a given project root.
202///
203/// The project ID is stored in `<project_root>/.sediment/config`.
204/// If the project is inside a git repo, the ID is derived from the root commit hash
205/// for stability across clones. Falls back to a random UUID for non-git directories.
206pub fn get_or_create_project_id(project_root: &Path) -> std::io::Result<String> {
207    let sediment_dir = project_root.join(".sediment");
208    let config_path = sediment_dir.join("config");
209
210    // Try to read existing config
211    if config_path.exists() {
212        let content = std::fs::read_to_string(&config_path)?;
213        if let Ok(config) = serde_json::from_str::<ProjectConfig>(&content) {
214            if config.source == "git-root-commit" {
215                // Already derived from git — trust it
216                return Ok(config.project_id);
217            }
218
219            // Source is "uuid" (or missing field from old config) — try to upgrade to git
220            if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
221                let new_config = ProjectConfig {
222                    project_id: git_hash.clone(),
223                    source: "git-root-commit".to_string(),
224                    migrated_from: Some(config.project_id),
225                };
226                write_config_atomic(&sediment_dir, &config_path, &new_config)?;
227                return Ok(git_hash);
228            }
229
230            // Git derivation failed — keep existing UUID
231            return Ok(config.project_id);
232        }
233    }
234
235    // No existing config — create new one
236    std::fs::create_dir_all(&sediment_dir)?;
237
238    let config = if let Ok(Some(git_hash)) = derive_git_root_commit(project_root) {
239        ProjectConfig {
240            project_id: git_hash,
241            source: "git-root-commit".to_string(),
242            migrated_from: None,
243        }
244    } else {
245        ProjectConfig {
246            project_id: Uuid::new_v4().to_string(),
247            source: "uuid".to_string(),
248            migrated_from: None,
249        }
250    };
251
252    write_config_atomic(&sediment_dir, &config_path, &config)?;
253
254    // Re-read to return the ID that actually persisted (could be from another process)
255    let final_content = std::fs::read_to_string(&config_path)?;
256    if let Ok(final_config) = serde_json::from_str::<ProjectConfig>(&final_content) {
257        Ok(final_config.project_id)
258    } else {
259        Ok(config.project_id)
260    }
261}
262
263/// Write a ProjectConfig atomically via temp file + rename.
264fn write_config_atomic(
265    sediment_dir: &Path,
266    config_path: &Path,
267    config: &ProjectConfig,
268) -> std::io::Result<()> {
269    let content =
270        serde_json::to_string_pretty(config).map_err(|e| std::io::Error::other(e.to_string()))?;
271    let tmp_path = sediment_dir.join(format!("config.tmp.{}", std::process::id()));
272    std::fs::write(&tmp_path, &content)?;
273
274    if let Err(e) = std::fs::rename(&tmp_path, config_path) {
275        let _ = std::fs::remove_file(&tmp_path);
276        return Err(e);
277    }
278    Ok(())
279}
280
281/// Check if a project ID migration is pending (UUID→git hash).
282///
283/// Returns the old project ID if a migration was started but LanceDB items
284/// have not yet been updated.
285pub fn pending_migration(project_root: &Path) -> Option<String> {
286    let config_path = project_root.join(".sediment").join("config");
287    let content = std::fs::read_to_string(&config_path).ok()?;
288    let config: ProjectConfig = serde_json::from_str(&content).ok()?;
289    config.migrated_from
290}
291
292/// Clear the migration marker after LanceDB items have been updated.
293pub fn clear_migration_marker(project_root: &Path) -> std::io::Result<()> {
294    let sediment_dir = project_root.join(".sediment");
295    let config_path = sediment_dir.join("config");
296
297    let content = std::fs::read_to_string(&config_path)?;
298    if let Ok(mut config) = serde_json::from_str::<ProjectConfig>(&content)
299        && config.migrated_from.is_some()
300    {
301        config.migrated_from = None;
302        write_config_atomic(&sediment_dir, &config_path, &config)?;
303    }
304    Ok(())
305}
306
307/// Apply similarity boosting based on project context.
308///
309/// - Same project: 1.15x boost (capped at 1.0)
310/// - Different project: 0.95x penalty
311/// - Global or no context: no change
312pub fn boost_similarity(
313    base: f32,
314    mem_project: Option<&str>,
315    current_project: Option<&str>,
316) -> f32 {
317    match (mem_project, current_project) {
318        (Some(m), Some(c)) if m == c => (base * 1.15).min(1.0), // Same project: boost
319        (Some(_), Some(_)) => base * 0.95,                      // Different project: slight penalty
320        _ => base,                                              // Global or no context
321    }
322}
323
324/// Find the project root by walking up from the given path.
325///
326/// Looks for directories containing `.sediment/` or `.git/` markers.
327/// Returns `None` if no project root is found.
328pub fn find_project_root(start: &Path) -> Option<PathBuf> {
329    let mut current = start.to_path_buf();
330
331    // If start is a file, use its parent directory
332    if current.is_file() {
333        current = current.parent()?.to_path_buf();
334    }
335
336    let mut depth = 0;
337    loop {
338        if depth >= 100 {
339            return None;
340        }
341        depth += 1;
342
343        // Check for .sediment directory first (explicit project marker)
344        if current.join(".sediment").is_dir() {
345            return Some(current);
346        }
347
348        // Check for .git directory as fallback
349        if current.join(".git").exists() {
350            return Some(current);
351        }
352
353        // Move to parent directory; stop at filesystem root
354        match current.parent() {
355            Some(parent) if parent == current => return None,
356            Some(parent) => current = parent.to_path_buf(),
357            None => return None,
358        }
359    }
360}
361
362/// Initialize a project directory for Sediment.
363///
364/// Creates the `.sediment/` directory in the specified path and generates a project ID.
365pub fn init_project(project_root: &Path) -> std::io::Result<PathBuf> {
366    let sediment_dir = project_root.join(".sediment");
367    std::fs::create_dir_all(&sediment_dir)?;
368
369    // Generate project ID
370    get_or_create_project_id(project_root)?;
371
372    Ok(sediment_dir)
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn test_list_scope_default_is_project() {
381        // Fix #17: ListScope::default() should be Project, matching the tool schema default
382        assert_eq!(ListScope::default(), ListScope::Project);
383    }
384
385    #[test]
386    fn test_store_scope_default_is_project() {
387        assert_eq!(StoreScope::default(), StoreScope::Project);
388    }
389
390    #[test]
391    fn test_project_config_idempotent() {
392        // Fix #18: get_or_create_project_id should return the same ID on repeated calls
393        let tmp = tempfile::TempDir::new().unwrap();
394        let id1 = get_or_create_project_id(tmp.path()).unwrap();
395        let id2 = get_or_create_project_id(tmp.path()).unwrap();
396        assert_eq!(id1, id2, "Repeated calls should return the same project ID");
397    }
398
399    #[test]
400    fn test_boost_similarity() {
401        assert!((boost_similarity(0.5, Some("p1"), Some("p1")) - 0.575).abs() < 0.001);
402        assert!((boost_similarity(0.5, Some("p1"), Some("p2")) - 0.475).abs() < 0.001);
403        assert!((boost_similarity(0.5, None, Some("p1")) - 0.5).abs() < 0.001);
404    }
405
406    #[test]
407    fn test_project_config_backward_compat() {
408        // Config JSON without 'source' field should deserialize with source="uuid"
409        let json = r#"{"project_id": "550e8400-e29b-41d4-a716-446655440000"}"#;
410        let config: ProjectConfig = serde_json::from_str(json).unwrap();
411        assert_eq!(config.source, "uuid");
412    }
413
414    #[test]
415    #[ignore] // requires git
416    fn test_derive_git_root_commit_in_repo() {
417        let tmp = tempfile::TempDir::new().unwrap();
418        let dir = tmp.path();
419
420        // git init + commit
421        Command::new("git")
422            .args(["init"])
423            .current_dir(dir)
424            .output()
425            .unwrap();
426        Command::new("git")
427            .args(["config", "user.email", "test@test.com"])
428            .current_dir(dir)
429            .output()
430            .unwrap();
431        Command::new("git")
432            .args(["config", "user.name", "Test"])
433            .current_dir(dir)
434            .output()
435            .unwrap();
436        Command::new("git")
437            .args(["commit", "--allow-empty", "-m", "init"])
438            .current_dir(dir)
439            .output()
440            .unwrap();
441
442        let result = derive_git_root_commit(dir).unwrap();
443        assert!(result.is_some(), "Should return root commit hash");
444        let hash = result.unwrap();
445        assert_eq!(hash.len(), 40, "SHA-1 hash should be 40 chars");
446        assert!(hash.chars().all(|c| c.is_ascii_hexdigit()), "Should be hex");
447    }
448
449    #[test]
450    #[ignore] // requires git
451    fn test_derive_git_root_commit_no_commits() {
452        let tmp = tempfile::TempDir::new().unwrap();
453        let dir = tmp.path();
454
455        Command::new("git")
456            .args(["init"])
457            .current_dir(dir)
458            .output()
459            .unwrap();
460
461        let result = derive_git_root_commit(dir).unwrap();
462        assert!(result.is_none(), "Repo with no commits should return None");
463    }
464
465    #[test]
466    fn test_derive_git_root_commit_no_git() {
467        let tmp = tempfile::TempDir::new().unwrap();
468        let result = derive_git_root_commit(tmp.path()).unwrap();
469        assert!(result.is_none(), "Non-git directory should return None");
470    }
471
472    #[test]
473    #[ignore] // requires git
474    fn test_project_id_from_git_root_commit() {
475        let tmp = tempfile::TempDir::new().unwrap();
476        let dir = tmp.path();
477
478        Command::new("git")
479            .args(["init"])
480            .current_dir(dir)
481            .output()
482            .unwrap();
483        Command::new("git")
484            .args(["config", "user.email", "test@test.com"])
485            .current_dir(dir)
486            .output()
487            .unwrap();
488        Command::new("git")
489            .args(["config", "user.name", "Test"])
490            .current_dir(dir)
491            .output()
492            .unwrap();
493        Command::new("git")
494            .args(["commit", "--allow-empty", "-m", "init"])
495            .current_dir(dir)
496            .output()
497            .unwrap();
498
499        let project_id = get_or_create_project_id(dir).unwrap();
500        let expected = derive_git_root_commit(dir).unwrap().unwrap();
501        assert_eq!(
502            project_id, expected,
503            "Project ID should be the git root commit hash"
504        );
505
506        // Verify config source
507        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
508        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
509        assert_eq!(config.source, "git-root-commit");
510    }
511
512    #[test]
513    #[ignore] // requires git
514    fn test_project_id_migration_uuid_to_git() {
515        let tmp = tempfile::TempDir::new().unwrap();
516        let dir = tmp.path();
517
518        // Write a UUID-based config first
519        let sediment_dir = dir.join(".sediment");
520        std::fs::create_dir_all(&sediment_dir).unwrap();
521        let old_uuid = "550e8400-e29b-41d4-a716-446655440000";
522        let old_config = format!(r#"{{"project_id": "{}"}}"#, old_uuid);
523        std::fs::write(sediment_dir.join("config"), &old_config).unwrap();
524
525        // Now create a git repo with a commit
526        Command::new("git")
527            .args(["init"])
528            .current_dir(dir)
529            .output()
530            .unwrap();
531        Command::new("git")
532            .args(["config", "user.email", "test@test.com"])
533            .current_dir(dir)
534            .output()
535            .unwrap();
536        Command::new("git")
537            .args(["config", "user.name", "Test"])
538            .current_dir(dir)
539            .output()
540            .unwrap();
541        Command::new("git")
542            .args(["commit", "--allow-empty", "-m", "init"])
543            .current_dir(dir)
544            .output()
545            .unwrap();
546
547        // Calling get_or_create_project_id should migrate to git hash
548        let project_id = get_or_create_project_id(dir).unwrap();
549        let git_hash = derive_git_root_commit(dir).unwrap().unwrap();
550        assert_eq!(project_id, git_hash, "Should migrate to git hash");
551
552        // Config should now have git-root-commit source with migrated_from
553        let config_content = std::fs::read_to_string(sediment_dir.join("config")).unwrap();
554        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
555        assert_eq!(config.source, "git-root-commit");
556        assert_eq!(config.migrated_from.as_deref(), Some(old_uuid));
557
558        // pending_migration should return the old UUID
559        assert_eq!(pending_migration(dir), Some(old_uuid.to_string()));
560
561        // clear_migration_marker should remove it
562        clear_migration_marker(dir).unwrap();
563        assert_eq!(pending_migration(dir), None);
564    }
565
566    #[test]
567    #[ignore] // requires git
568    fn test_git_root_commit_fast_path() {
569        let tmp = tempfile::TempDir::new().unwrap();
570        let dir = tmp.path();
571
572        // Create git repo with commit
573        Command::new("git")
574            .args(["init"])
575            .current_dir(dir)
576            .output()
577            .unwrap();
578        Command::new("git")
579            .args(["config", "user.email", "test@test.com"])
580            .current_dir(dir)
581            .output()
582            .unwrap();
583        Command::new("git")
584            .args(["config", "user.name", "Test"])
585            .current_dir(dir)
586            .output()
587            .unwrap();
588        Command::new("git")
589            .args(["commit", "--allow-empty", "-m", "init"])
590            .current_dir(dir)
591            .output()
592            .unwrap();
593
594        // First call creates config with git-root-commit source
595        let id1 = get_or_create_project_id(dir).unwrap();
596
597        // Second call should return immediately (fast path) without re-deriving
598        let id2 = get_or_create_project_id(dir).unwrap();
599        assert_eq!(id1, id2, "Fast path should return same ID");
600
601        // Verify config has git-root-commit source
602        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
603        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
604        assert_eq!(config.source, "git-root-commit");
605        assert!(
606            config.migrated_from.is_none(),
607            "No migration on fresh git config"
608        );
609    }
610
611    #[test]
612    fn test_uuid_retained_when_git_unavailable() {
613        // Non-git directory: UUID config should be created and retained
614        let tmp = tempfile::TempDir::new().unwrap();
615        let dir = tmp.path();
616
617        let id1 = get_or_create_project_id(dir).unwrap();
618
619        // Verify it's a UUID with source "uuid"
620        let config_content = std::fs::read_to_string(dir.join(".sediment/config")).unwrap();
621        let config: ProjectConfig = serde_json::from_str(&config_content).unwrap();
622        assert_eq!(config.source, "uuid");
623        assert!(config.migrated_from.is_none());
624
625        // Second call should return the same UUID
626        let id2 = get_or_create_project_id(dir).unwrap();
627        assert_eq!(id1, id2, "UUID should be retained on repeated calls");
628    }
629
630    #[test]
631    #[ignore] // requires git
632    fn test_shallow_clone_falls_back_to_uuid() {
633        let tmp = tempfile::TempDir::new().unwrap();
634        let origin_dir = tmp.path().join("origin");
635        let shallow_dir = tmp.path().join("shallow");
636        std::fs::create_dir_all(&origin_dir).unwrap();
637
638        // Create origin repo with a commit
639        Command::new("git")
640            .args(["init"])
641            .current_dir(&origin_dir)
642            .output()
643            .unwrap();
644        Command::new("git")
645            .args(["config", "user.email", "test@test.com"])
646            .current_dir(&origin_dir)
647            .output()
648            .unwrap();
649        Command::new("git")
650            .args(["config", "user.name", "Test"])
651            .current_dir(&origin_dir)
652            .output()
653            .unwrap();
654        Command::new("git")
655            .args(["commit", "--allow-empty", "-m", "init"])
656            .current_dir(&origin_dir)
657            .output()
658            .unwrap();
659        Command::new("git")
660            .args(["commit", "--allow-empty", "-m", "second"])
661            .current_dir(&origin_dir)
662            .output()
663            .unwrap();
664
665        // Shallow clone (file:// protocol required for local shallow clones)
666        let origin_url = format!("file://{}", origin_dir.display());
667        Command::new("git")
668            .args([
669                "clone",
670                "--depth=1",
671                &origin_url,
672                shallow_dir.to_str().unwrap(),
673            ])
674            .output()
675            .unwrap();
676
677        // derive_git_root_commit should return None for shallow clone
678        let result = derive_git_root_commit(&shallow_dir).unwrap();
679        assert!(result.is_none(), "Shallow clone should return None");
680    }
681}