Skip to main content

sediment/
lib.rs

1//! Sediment: Semantic memory for AI agents
2//!
3//! A local-first, MCP-native vector database for AI agent memory.
4//!
5//! ## Features
6//!
7//! - **Embedded storage** - LanceDB-powered, directory-based, no server required
8//! - **Local embeddings** - Uses `all-MiniLM-L6-v2` locally, no API keys needed
9//! - **MCP-native** - 4 tools for seamless LLM integration
10//! - **Project-aware** - Scoped memories with automatic project detection
11//! - **Auto-chunking** - Long content is automatically chunked for better search
12
13use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use uuid::Uuid;
16
17pub mod access;
18pub mod chunker;
19pub mod consolidation;
20pub mod db;
21pub mod document;
22pub mod embedder;
23pub mod error;
24pub mod graph;
25pub mod item;
26pub mod mcp;
27pub mod retry;
28
29pub use chunker::{ChunkResult, ChunkingConfig, chunk_content};
30pub use db::Database;
31pub use document::ContentType;
32pub use embedder::{EMBEDDING_DIM, Embedder};
33pub use error::{Result, SedimentError};
34pub use item::{Chunk, ConflictInfo, Item, ItemFilters, SearchResult, StoreResult};
35pub use retry::{RetryConfig, with_retry};
36
37/// Scope for storing items
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
39#[serde(rename_all = "lowercase")]
40pub enum StoreScope {
41    /// Store in project-local scope (with project_id)
42    #[default]
43    Project,
44    /// Store in global scope (no project_id)
45    Global,
46}
47
48impl std::str::FromStr for StoreScope {
49    type Err = String;
50
51    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
52        match s.to_lowercase().as_str() {
53            "project" => Ok(StoreScope::Project),
54            "global" => Ok(StoreScope::Global),
55            _ => Err(format!(
56                "Invalid store scope: {}. Use 'project' or 'global'",
57                s
58            )),
59        }
60    }
61}
62
63impl std::fmt::Display for StoreScope {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        match self {
66            StoreScope::Project => write!(f, "project"),
67            StoreScope::Global => write!(f, "global"),
68        }
69    }
70}
71
72/// Scope for listing items (recall always searches all with boosting)
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
74#[serde(rename_all = "lowercase")]
75pub enum ListScope {
76    /// List only project-local items
77    #[default]
78    Project,
79    /// List only global items
80    Global,
81    /// List all items
82    All,
83}
84
85impl std::str::FromStr for ListScope {
86    type Err = String;
87
88    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
89        match s.to_lowercase().as_str() {
90            "project" => Ok(ListScope::Project),
91            "global" => Ok(ListScope::Global),
92            "all" => Ok(ListScope::All),
93            _ => Err(format!(
94                "Invalid list scope: {}. Use 'project', 'global', or 'all'",
95                s
96            )),
97        }
98    }
99}
100
101impl std::fmt::Display for ListScope {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        match self {
104            ListScope::Project => write!(f, "project"),
105            ListScope::Global => write!(f, "global"),
106            ListScope::All => write!(f, "all"),
107        }
108    }
109}
110
111/// Get the central database path.
112///
113/// Returns `~/.sediment/data` or the path specified in `SEDIMENT_DB` environment variable.
114/// Note: LanceDB uses a directory, not a single file.
115pub fn central_db_path() -> PathBuf {
116    if let Ok(path) = std::env::var("SEDIMENT_DB") {
117        return PathBuf::from(path);
118    }
119
120    dirs::home_dir()
121        .unwrap_or_else(|| PathBuf::from("."))
122        .join(".sediment")
123        .join("data")
124}
125
126/// Get the default global database path (alias for central_db_path for backwards compatibility)
127pub fn default_db_path() -> PathBuf {
128    central_db_path()
129}
130
131/// Project configuration stored in `.sediment/config`
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct ProjectConfig {
134    /// Unique project identifier (UUID)
135    pub project_id: String,
136}
137
138impl Default for ProjectConfig {
139    fn default() -> Self {
140        Self {
141            project_id: Uuid::new_v4().to_string(),
142        }
143    }
144}
145
146/// Get or create the project ID for a given project root.
147///
148/// The project ID is stored in `<project_root>/.sediment/config`.
149/// If no config exists, a new UUID is generated and saved.
150pub fn get_or_create_project_id(project_root: &Path) -> std::io::Result<String> {
151    let sediment_dir = project_root.join(".sediment");
152    let config_path = sediment_dir.join("config");
153
154    // Try to read existing config
155    if config_path.exists() {
156        let content = std::fs::read_to_string(&config_path)?;
157        if let Ok(config) = serde_json::from_str::<ProjectConfig>(&content) {
158            return Ok(config.project_id);
159        }
160    }
161
162    // Create new config with generated UUID
163    let config = ProjectConfig::default();
164
165    // Ensure .sediment directory exists
166    std::fs::create_dir_all(&sediment_dir)?;
167
168    // Write to a temp file first, then atomically rename to prevent TOCTOU races
169    // where two concurrent processes both see the file as missing and write different UUIDs.
170    let content =
171        serde_json::to_string_pretty(&config).map_err(|e| std::io::Error::other(e.to_string()))?;
172    let tmp_path = sediment_dir.join(format!("config.tmp.{}", std::process::id()));
173    std::fs::write(&tmp_path, &content)?;
174
175    // Atomic rename: on Unix this is atomic. The first writer wins; subsequent renames
176    // just overwrite with a different UUID but that's acceptable since no data existed yet.
177    // After rename, re-read to get whichever UUID actually persisted.
178    if let Err(e) = std::fs::rename(&tmp_path, &config_path) {
179        // Clean up temp file on failure
180        let _ = std::fs::remove_file(&tmp_path);
181        return Err(e);
182    }
183
184    // Re-read to return the UUID that actually persisted (could be from another process)
185    let final_content = std::fs::read_to_string(&config_path)?;
186    if let Ok(final_config) = serde_json::from_str::<ProjectConfig>(&final_content) {
187        Ok(final_config.project_id)
188    } else {
189        Ok(config.project_id)
190    }
191}
192
193/// Apply similarity boosting based on project context.
194///
195/// - Same project: 1.15x boost (capped at 1.0)
196/// - Different project: 0.95x penalty
197/// - Global or no context: no change
198pub fn boost_similarity(
199    base: f32,
200    mem_project: Option<&str>,
201    current_project: Option<&str>,
202) -> f32 {
203    match (mem_project, current_project) {
204        (Some(m), Some(c)) if m == c => (base * 1.15).min(1.0), // Same project: boost
205        (Some(_), Some(_)) => base * 0.95,                      // Different project: slight penalty
206        _ => base,                                              // Global or no context
207    }
208}
209
210/// Find the project root by walking up from the given path.
211///
212/// Looks for directories containing `.sediment/` or `.git/` markers.
213/// Returns `None` if no project root is found.
214pub fn find_project_root(start: &Path) -> Option<PathBuf> {
215    let mut current = start.to_path_buf();
216
217    // If start is a file, use its parent directory
218    if current.is_file() {
219        current = current.parent()?.to_path_buf();
220    }
221
222    let mut depth = 0;
223    loop {
224        if depth >= 100 {
225            return None;
226        }
227        depth += 1;
228
229        // Check for .sediment directory first (explicit project marker)
230        if current.join(".sediment").is_dir() {
231            return Some(current);
232        }
233
234        // Check for .git directory as fallback
235        if current.join(".git").exists() {
236            return Some(current);
237        }
238
239        // Move to parent directory; stop at filesystem root
240        match current.parent() {
241            Some(parent) if parent == current => return None,
242            Some(parent) => current = parent.to_path_buf(),
243            None => return None,
244        }
245    }
246}
247
248/// Initialize a project directory for Sediment.
249///
250/// Creates the `.sediment/` directory in the specified path and generates a project ID.
251pub fn init_project(project_root: &Path) -> std::io::Result<PathBuf> {
252    let sediment_dir = project_root.join(".sediment");
253    std::fs::create_dir_all(&sediment_dir)?;
254
255    // Generate project ID
256    get_or_create_project_id(project_root)?;
257
258    Ok(sediment_dir)
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn test_list_scope_default_is_project() {
267        // Fix #17: ListScope::default() should be Project, matching the tool schema default
268        assert_eq!(ListScope::default(), ListScope::Project);
269    }
270
271    #[test]
272    fn test_store_scope_default_is_project() {
273        assert_eq!(StoreScope::default(), StoreScope::Project);
274    }
275
276    #[test]
277    fn test_project_config_idempotent() {
278        // Fix #18: get_or_create_project_id should return the same ID on repeated calls
279        let tmp = tempfile::TempDir::new().unwrap();
280        let id1 = get_or_create_project_id(tmp.path()).unwrap();
281        let id2 = get_or_create_project_id(tmp.path()).unwrap();
282        assert_eq!(id1, id2, "Repeated calls should return the same project ID");
283    }
284
285    #[test]
286    fn test_boost_similarity() {
287        assert!((boost_similarity(0.5, Some("p1"), Some("p1")) - 0.575).abs() < 0.001);
288        assert!((boost_similarity(0.5, Some("p1"), Some("p2")) - 0.475).abs() < 0.001);
289        assert!((boost_similarity(0.5, None, Some("p1")) - 0.5).abs() < 0.001);
290    }
291}