Skip to main content

sediment/
item.rs

1//! Unified Item type for semantic storage
2//!
3//! Items unify memories and documents into a single concept with automatic chunking.
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8/// A unified item stored in Sediment
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Item {
11    /// Unique identifier (UUID)
12    pub id: String,
13    /// The actual content
14    pub content: String,
15    /// Vector embedding (not serialized to JSON output)
16    #[serde(skip)]
17    pub embedding: Vec<f32>,
18    /// Project ID (None for global items)
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub project_id: Option<String>,
21    /// Whether this item was chunked (internal)
22    pub is_chunked: bool,
23    /// When this item was created
24    pub created_at: DateTime<Utc>,
25}
26
27impl Item {
28    /// Create a new item with content
29    pub fn new(content: impl Into<String>) -> Self {
30        Self {
31            id: uuid::Uuid::new_v4().to_string(),
32            content: content.into(),
33            embedding: Vec::new(),
34            project_id: None,
35            is_chunked: false,
36            created_at: Utc::now(),
37        }
38    }
39
40    /// Set the project ID
41    pub fn with_project_id(mut self, project_id: impl Into<String>) -> Self {
42        self.project_id = Some(project_id.into());
43        self
44    }
45
46    /// Set the embedding
47    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
48        self.embedding = embedding;
49        self
50    }
51
52    /// Override the creation timestamp (benchmark builds only)
53    #[cfg(feature = "bench")]
54    pub fn with_created_at(mut self, created_at: DateTime<Utc>) -> Self {
55        self.created_at = created_at;
56        self
57    }
58
59    /// Mark as chunked
60    pub fn with_chunked(mut self, is_chunked: bool) -> Self {
61        self.is_chunked = is_chunked;
62        self
63    }
64
65    /// Get the text to embed for this item
66    /// For chunked items: first ~500 chars
67    /// For non-chunked items: full content
68    pub fn embedding_text(&self) -> String {
69        if self.is_chunked {
70            self.content.chars().take(500).collect()
71        } else {
72            self.content.clone()
73        }
74    }
75}
76
77/// A chunk of an item (internal, not exposed to MCP)
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct Chunk {
80    /// Unique identifier (UUID)
81    pub id: String,
82    /// Parent item ID
83    pub item_id: String,
84    /// Index of this chunk within the item (0-based)
85    pub chunk_index: usize,
86    /// The chunk content
87    pub content: String,
88    /// Vector embedding of the chunk (not serialized)
89    #[serde(skip)]
90    pub embedding: Vec<f32>,
91    /// Optional context (e.g., header path, function name)
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub context: Option<String>,
94}
95
96impl Chunk {
97    /// Create a new chunk
98    pub fn new(item_id: impl Into<String>, chunk_index: usize, content: impl Into<String>) -> Self {
99        Self {
100            id: uuid::Uuid::new_v4().to_string(),
101            item_id: item_id.into(),
102            chunk_index,
103            content: content.into(),
104            embedding: Vec::new(),
105            context: None,
106        }
107    }
108
109    /// Set context
110    pub fn with_context(mut self, context: impl Into<String>) -> Self {
111        self.context = Some(context.into());
112        self
113    }
114
115    /// Set the embedding
116    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
117        self.embedding = embedding;
118        self
119    }
120}
121
122/// Result from storing an item
123#[derive(Debug, Clone, Serialize)]
124pub struct StoreResult {
125    /// The ID of the newly stored item
126    pub id: String,
127    /// Potentially conflicting items (high similarity)
128    #[serde(skip_serializing_if = "Vec::is_empty")]
129    pub potential_conflicts: Vec<ConflictInfo>,
130}
131
132/// Information about a potential conflict
133#[derive(Debug, Clone, Serialize)]
134pub struct ConflictInfo {
135    /// The ID of the conflicting item
136    pub id: String,
137    /// The content of the conflicting item
138    pub content: String,
139    /// Similarity score (0.0-1.0)
140    pub similarity: f32,
141}
142
143/// Result from a search query
144#[derive(Debug, Clone, Serialize)]
145pub struct SearchResult {
146    /// The matching item's id
147    pub id: String,
148    /// Content (full if short, or preview if chunked)
149    pub content: String,
150    /// Most relevant chunk content (if chunked)
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub relevant_excerpt: Option<String>,
153    /// Similarity score (0.0-1.0, higher is more similar)
154    pub similarity: f32,
155    /// When created
156    pub created_at: DateTime<Utc>,
157    /// Project ID (not serialized, used internally for cross-project checks and graph backfill)
158    #[serde(skip)]
159    pub project_id: Option<String>,
160}
161
162impl SearchResult {
163    /// Create from an item (non-chunked)
164    pub fn from_item(item: &Item, similarity: f32) -> Self {
165        Self {
166            id: item.id.clone(),
167            content: item.content.clone(),
168            relevant_excerpt: None,
169            similarity,
170            created_at: item.created_at,
171            project_id: item.project_id.clone(),
172        }
173    }
174
175    /// Create from an item with chunk excerpt
176    pub fn from_item_with_excerpt(item: &Item, similarity: f32, excerpt: String) -> Self {
177        // For chunked items, show a preview of the content
178        let content: String = item.content.chars().take(100).collect();
179        Self {
180            id: item.id.clone(),
181            content,
182            relevant_excerpt: Some(excerpt),
183            similarity,
184            created_at: item.created_at,
185            project_id: item.project_id.clone(),
186        }
187    }
188}
189
190/// Filters for search/list queries
191#[derive(Debug, Default, Clone)]
192pub struct ItemFilters {
193    /// Minimum similarity threshold (0.0-1.0)
194    pub min_similarity: Option<f32>,
195}
196
197impl ItemFilters {
198    pub fn new() -> Self {
199        Self::default()
200    }
201
202    pub fn with_min_similarity(mut self, min_similarity: f32) -> Self {
203        self.min_similarity = Some(min_similarity);
204        self
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_item_creation() {
214        let item = Item::new("Test content").with_project_id("project-123");
215
216        assert_eq!(item.content, "Test content");
217        assert_eq!(item.project_id, Some("project-123".to_string()));
218        assert!(!item.is_chunked);
219    }
220
221    #[test]
222    fn test_embedding_text_short() {
223        let item = Item::new("Short content");
224        assert_eq!(item.embedding_text(), "Short content");
225    }
226
227    #[test]
228    fn test_embedding_text_chunked() {
229        let item = Item::new("a".repeat(1000)).with_chunked(true);
230        let text = item.embedding_text();
231        assert_eq!(text.len(), 500);
232    }
233
234    #[test]
235    fn test_chunk_creation() {
236        let chunk = Chunk::new("item-123", 0, "Chunk content").with_context("## Header");
237
238        assert_eq!(chunk.item_id, "item-123");
239        assert_eq!(chunk.chunk_index, 0);
240        assert_eq!(chunk.content, "Chunk content");
241        assert_eq!(chunk.context, Some("## Header".to_string()));
242    }
243
244    #[test]
245    fn test_search_result_from_item() {
246        let item = Item::new("Test content");
247
248        let result = SearchResult::from_item(&item, 0.95);
249        assert_eq!(result.content, "Test content");
250        assert_eq!(result.similarity, 0.95);
251        assert!(result.relevant_excerpt.is_none());
252    }
253
254    #[test]
255    fn test_search_result_with_excerpt() {
256        let item = Item::new("Long content here").with_chunked(true);
257
258        let result = SearchResult::from_item_with_excerpt(&item, 0.85, "relevant part".to_string());
259        assert_eq!(result.content, "Long content here");
260        assert_eq!(result.relevant_excerpt, Some("relevant part".to_string()));
261    }
262
263    #[test]
264    fn test_store_result_serialization() {
265        let result = StoreResult {
266            id: "abc123".to_string(),
267            potential_conflicts: vec![],
268        };
269
270        let json = serde_json::to_string(&result).unwrap();
271        assert!(json.contains("abc123"));
272        // Empty conflicts should not be serialized
273        assert!(!json.contains("potential_conflicts"));
274    }
275
276    #[test]
277    fn test_store_result_with_conflicts() {
278        let result = StoreResult {
279            id: "new-id".to_string(),
280            potential_conflicts: vec![ConflictInfo {
281                id: "old-id".to_string(),
282                content: "Old content".to_string(),
283                similarity: 0.92,
284            }],
285        };
286
287        let json = serde_json::to_string(&result).unwrap();
288        assert!(json.contains("new-id"));
289        assert!(json.contains("potential_conflicts"));
290        assert!(json.contains("old-id"));
291        assert!(json.contains("0.92"));
292    }
293
294    #[test]
295    fn test_conflict_info_serialization() {
296        let conflict = ConflictInfo {
297            id: "conflict-123".to_string(),
298            content: "Conflicting content".to_string(),
299            similarity: 0.87,
300        };
301
302        let json = serde_json::to_string(&conflict).unwrap();
303        assert!(json.contains("conflict-123"));
304        assert!(json.contains("Conflicting content"));
305        assert!(json.contains("0.87"));
306    }
307}