Skip to main content

sediment/
item.rs

1//! Unified Item type for semantic storage
2//!
3//! Items unify memories and documents into a single concept with automatic chunking.
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8
9/// A unified item stored in Sediment
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Item {
12    /// Unique identifier (UUID)
13    pub id: String,
14    /// The actual content
15    pub content: String,
16    /// Vector embedding (not serialized to JSON output)
17    #[serde(skip)]
18    pub embedding: Vec<f32>,
19    /// Optional title (recommended for long content)
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub title: Option<String>,
22    /// Tags for categorization
23    #[serde(default, skip_serializing_if = "Vec::is_empty")]
24    pub tags: Vec<String>,
25    /// Source attribution
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub source: Option<String>,
28    /// Custom JSON metadata
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub metadata: Option<Value>,
31    /// Project ID (None for global items)
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub project_id: Option<String>,
34    /// Whether this item was chunked (internal)
35    pub is_chunked: bool,
36    /// When this item expires (optional)
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub expires_at: Option<DateTime<Utc>>,
39    /// When this item was created
40    pub created_at: DateTime<Utc>,
41}
42
43impl Item {
44    /// Create a new item with content
45    pub fn new(content: impl Into<String>) -> Self {
46        Self {
47            id: uuid::Uuid::new_v4().to_string(),
48            content: content.into(),
49            embedding: Vec::new(),
50            title: None,
51            tags: Vec::new(),
52            source: None,
53            metadata: None,
54            project_id: None,
55            is_chunked: false,
56            expires_at: None,
57            created_at: Utc::now(),
58        }
59    }
60
61    /// Set the title
62    pub fn with_title(mut self, title: impl Into<String>) -> Self {
63        self.title = Some(title.into());
64        self
65    }
66
67    /// Set tags
68    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
69        self.tags = tags;
70        self
71    }
72
73    /// Set the source
74    pub fn with_source(mut self, source: impl Into<String>) -> Self {
75        self.source = Some(source.into());
76        self
77    }
78
79    /// Set custom metadata
80    pub fn with_metadata(mut self, metadata: Value) -> Self {
81        self.metadata = Some(metadata);
82        self
83    }
84
85    /// Set the project ID
86    pub fn with_project_id(mut self, project_id: impl Into<String>) -> Self {
87        self.project_id = Some(project_id.into());
88        self
89    }
90
91    /// Set expiration time
92    pub fn with_expires_at(mut self, expires_at: DateTime<Utc>) -> Self {
93        self.expires_at = Some(expires_at);
94        self
95    }
96
97    /// Set the embedding
98    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
99        self.embedding = embedding;
100        self
101    }
102
103    /// Mark as chunked
104    pub fn with_chunked(mut self, is_chunked: bool) -> Self {
105        self.is_chunked = is_chunked;
106        self
107    }
108
109    /// Get the text to embed for this item
110    /// For chunked items: title + first ~500 chars
111    /// For non-chunked items: full content
112    pub fn embedding_text(&self) -> String {
113        if self.is_chunked {
114            let preview: String = self.content.chars().take(500).collect();
115            match &self.title {
116                Some(title) => format!("{} {}", title, preview),
117                None => preview,
118            }
119        } else {
120            self.content.clone()
121        }
122    }
123
124    /// Check if this item has expired
125    pub fn is_expired(&self) -> bool {
126        if let Some(expires_at) = self.expires_at {
127            Utc::now() > expires_at
128        } else {
129            false
130        }
131    }
132}
133
134/// A chunk of an item (internal, not exposed to MCP)
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct Chunk {
137    /// Unique identifier (UUID)
138    pub id: String,
139    /// Parent item ID
140    pub item_id: String,
141    /// Index of this chunk within the item (0-based)
142    pub chunk_index: usize,
143    /// The chunk content
144    pub content: String,
145    /// Vector embedding of the chunk (not serialized)
146    #[serde(skip)]
147    pub embedding: Vec<f32>,
148    /// Optional context (e.g., header path, function name)
149    #[serde(skip_serializing_if = "Option::is_none")]
150    pub context: Option<String>,
151}
152
153impl Chunk {
154    /// Create a new chunk
155    pub fn new(item_id: impl Into<String>, chunk_index: usize, content: impl Into<String>) -> Self {
156        Self {
157            id: uuid::Uuid::new_v4().to_string(),
158            item_id: item_id.into(),
159            chunk_index,
160            content: content.into(),
161            embedding: Vec::new(),
162            context: None,
163        }
164    }
165
166    /// Set context
167    pub fn with_context(mut self, context: impl Into<String>) -> Self {
168        self.context = Some(context.into());
169        self
170    }
171
172    /// Set the embedding
173    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
174        self.embedding = embedding;
175        self
176    }
177}
178
179/// Result from storing an item
180#[derive(Debug, Clone, Serialize)]
181pub struct StoreResult {
182    /// The ID of the newly stored item
183    pub id: String,
184    /// Potentially conflicting items (high similarity)
185    #[serde(skip_serializing_if = "Vec::is_empty")]
186    pub potential_conflicts: Vec<ConflictInfo>,
187}
188
189/// Information about a potential conflict
190#[derive(Debug, Clone, Serialize)]
191pub struct ConflictInfo {
192    /// The ID of the conflicting item
193    pub id: String,
194    /// The content of the conflicting item
195    pub content: String,
196    /// Similarity score (0.0-1.0)
197    pub similarity: f32,
198}
199
200/// Result from a search query
201#[derive(Debug, Clone, Serialize)]
202pub struct SearchResult {
203    /// The matching item's id
204    pub id: String,
205    /// Content (full if short, or title if chunked)
206    pub content: String,
207    /// Most relevant chunk content (if chunked)
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub relevant_excerpt: Option<String>,
210    /// Similarity score (0.0-1.0, higher is more similar)
211    pub similarity: f32,
212    /// Tags
213    #[serde(default, skip_serializing_if = "Vec::is_empty")]
214    pub tags: Vec<String>,
215    /// Source attribution
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub source: Option<String>,
218    /// When created
219    pub created_at: DateTime<Utc>,
220    /// Project ID (not serialized, used internally for cross-project checks and graph backfill)
221    #[serde(skip)]
222    pub project_id: Option<String>,
223    /// Metadata (not serialized, used internally for cross-project provenance)
224    #[serde(skip)]
225    pub metadata: Option<Value>,
226}
227
228impl SearchResult {
229    /// Create from an item (non-chunked)
230    pub fn from_item(item: &Item, similarity: f32) -> Self {
231        Self {
232            id: item.id.clone(),
233            content: item.content.clone(),
234            relevant_excerpt: None,
235            similarity,
236            tags: item.tags.clone(),
237            source: item.source.clone(),
238            created_at: item.created_at,
239            project_id: item.project_id.clone(),
240            metadata: item.metadata.clone(),
241        }
242    }
243
244    /// Create from an item with chunk excerpt
245    pub fn from_item_with_excerpt(item: &Item, similarity: f32, excerpt: String) -> Self {
246        let content = item.title.clone().unwrap_or_else(|| {
247            // For chunked items without title, show a preview
248            item.content.chars().take(100).collect()
249        });
250        Self {
251            id: item.id.clone(),
252            content,
253            relevant_excerpt: Some(excerpt),
254            similarity,
255            tags: item.tags.clone(),
256            source: item.source.clone(),
257            created_at: item.created_at,
258            project_id: item.project_id.clone(),
259            metadata: item.metadata.clone(),
260        }
261    }
262}
263
264/// Filters for search/list queries
265#[derive(Debug, Default, Clone)]
266pub struct ItemFilters {
267    /// Filter by tags (any match)
268    pub tags: Option<Vec<String>>,
269    /// Minimum similarity threshold (0.0-1.0)
270    pub min_similarity: Option<f32>,
271    /// Include expired items
272    pub include_expired: bool,
273}
274
275impl ItemFilters {
276    pub fn new() -> Self {
277        Self::default()
278    }
279
280    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
281        self.tags = Some(tags);
282        self
283    }
284
285    pub fn with_min_similarity(mut self, min_similarity: f32) -> Self {
286        self.min_similarity = Some(min_similarity);
287        self
288    }
289
290    pub fn include_expired(mut self, include: bool) -> Self {
291        self.include_expired = include;
292        self
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn test_item_creation() {
302        let item = Item::new("Test content")
303            .with_title("Test Title")
304            .with_tags(vec!["tag1".to_string(), "tag2".to_string()])
305            .with_source("test-source")
306            .with_project_id("project-123");
307
308        assert_eq!(item.content, "Test content");
309        assert_eq!(item.title, Some("Test Title".to_string()));
310        assert_eq!(item.tags, vec!["tag1", "tag2"]);
311        assert_eq!(item.source, Some("test-source".to_string()));
312        assert_eq!(item.project_id, Some("project-123".to_string()));
313        assert!(!item.is_chunked);
314    }
315
316    #[test]
317    fn test_embedding_text_short() {
318        let item = Item::new("Short content");
319        assert_eq!(item.embedding_text(), "Short content");
320    }
321
322    #[test]
323    fn test_embedding_text_chunked() {
324        let item = Item::new("a".repeat(1000))
325            .with_title("My Title")
326            .with_chunked(true);
327        let text = item.embedding_text();
328        assert!(text.starts_with("My Title "));
329        assert!(text.len() < 600);
330    }
331
332    #[test]
333    fn test_item_expiration() {
334        let expired = Item::new("Expired").with_expires_at(Utc::now() - chrono::Duration::hours(1));
335        assert!(expired.is_expired());
336
337        let valid = Item::new("Valid").with_expires_at(Utc::now() + chrono::Duration::hours(1));
338        assert!(!valid.is_expired());
339    }
340
341    #[test]
342    fn test_chunk_creation() {
343        let chunk = Chunk::new("item-123", 0, "Chunk content").with_context("## Header");
344
345        assert_eq!(chunk.item_id, "item-123");
346        assert_eq!(chunk.chunk_index, 0);
347        assert_eq!(chunk.content, "Chunk content");
348        assert_eq!(chunk.context, Some("## Header".to_string()));
349    }
350
351    #[test]
352    fn test_search_result_from_item() {
353        let item = Item::new("Test content")
354            .with_tags(vec!["test".to_string()])
355            .with_source("test");
356
357        let result = SearchResult::from_item(&item, 0.95);
358        assert_eq!(result.content, "Test content");
359        assert_eq!(result.similarity, 0.95);
360        assert!(result.relevant_excerpt.is_none());
361    }
362
363    #[test]
364    fn test_search_result_with_excerpt() {
365        let item = Item::new("Long content here")
366            .with_title("Document Title")
367            .with_chunked(true);
368
369        let result = SearchResult::from_item_with_excerpt(&item, 0.85, "relevant part".to_string());
370        assert_eq!(result.content, "Document Title");
371        assert_eq!(result.relevant_excerpt, Some("relevant part".to_string()));
372    }
373
374    #[test]
375    fn test_store_result_serialization() {
376        let result = StoreResult {
377            id: "abc123".to_string(),
378            potential_conflicts: vec![],
379        };
380
381        let json = serde_json::to_string(&result).unwrap();
382        assert!(json.contains("abc123"));
383        // Empty conflicts should not be serialized
384        assert!(!json.contains("potential_conflicts"));
385    }
386
387    #[test]
388    fn test_store_result_with_conflicts() {
389        let result = StoreResult {
390            id: "new-id".to_string(),
391            potential_conflicts: vec![ConflictInfo {
392                id: "old-id".to_string(),
393                content: "Old content".to_string(),
394                similarity: 0.92,
395            }],
396        };
397
398        let json = serde_json::to_string(&result).unwrap();
399        assert!(json.contains("new-id"));
400        assert!(json.contains("potential_conflicts"));
401        assert!(json.contains("old-id"));
402        assert!(json.contains("0.92"));
403    }
404
405    #[test]
406    fn test_conflict_info_serialization() {
407        let conflict = ConflictInfo {
408            id: "conflict-123".to_string(),
409            content: "Conflicting content".to_string(),
410            similarity: 0.87,
411        };
412
413        let json = serde_json::to_string(&conflict).unwrap();
414        assert!(json.contains("conflict-123"));
415        assert!(json.contains("Conflicting content"));
416        assert!(json.contains("0.87"));
417    }
418}