Skip to main content

sediment/
item.rs

1//! Unified Item type for semantic storage
2//!
3//! Items unify memories and documents into a single concept with automatic chunking.
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8/// A unified item stored in Sediment
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Item {
11    /// Unique identifier (UUID)
12    pub id: String,
13    /// The actual content
14    pub content: String,
15    /// Vector embedding (not serialized to JSON output)
16    #[serde(skip)]
17    pub embedding: Vec<f32>,
18    /// Project ID (None for global items)
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub project_id: Option<String>,
21    /// Whether this item was chunked (internal)
22    pub is_chunked: bool,
23    /// When this item was created
24    pub created_at: DateTime<Utc>,
25}
26
27impl Item {
28    /// Create a new item with content
29    pub fn new(content: impl Into<String>) -> Self {
30        Self {
31            id: uuid::Uuid::new_v4().to_string(),
32            content: content.into(),
33            embedding: Vec::new(),
34            project_id: None,
35            is_chunked: false,
36            created_at: Utc::now(),
37        }
38    }
39
40    /// Set the project ID
41    pub fn with_project_id(mut self, project_id: impl Into<String>) -> Self {
42        self.project_id = Some(project_id.into());
43        self
44    }
45
46    /// Set the embedding
47    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
48        self.embedding = embedding;
49        self
50    }
51
52    /// Mark as chunked
53    pub fn with_chunked(mut self, is_chunked: bool) -> Self {
54        self.is_chunked = is_chunked;
55        self
56    }
57
58    /// Get the text to embed for this item
59    /// For chunked items: first ~500 chars
60    /// For non-chunked items: full content
61    pub fn embedding_text(&self) -> String {
62        if self.is_chunked {
63            self.content.chars().take(500).collect()
64        } else {
65            self.content.clone()
66        }
67    }
68}
69
70/// A chunk of an item (internal, not exposed to MCP)
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct Chunk {
73    /// Unique identifier (UUID)
74    pub id: String,
75    /// Parent item ID
76    pub item_id: String,
77    /// Index of this chunk within the item (0-based)
78    pub chunk_index: usize,
79    /// The chunk content
80    pub content: String,
81    /// Vector embedding of the chunk (not serialized)
82    #[serde(skip)]
83    pub embedding: Vec<f32>,
84    /// Optional context (e.g., header path, function name)
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub context: Option<String>,
87}
88
89impl Chunk {
90    /// Create a new chunk
91    pub fn new(item_id: impl Into<String>, chunk_index: usize, content: impl Into<String>) -> Self {
92        Self {
93            id: uuid::Uuid::new_v4().to_string(),
94            item_id: item_id.into(),
95            chunk_index,
96            content: content.into(),
97            embedding: Vec::new(),
98            context: None,
99        }
100    }
101
102    /// Set context
103    pub fn with_context(mut self, context: impl Into<String>) -> Self {
104        self.context = Some(context.into());
105        self
106    }
107
108    /// Set the embedding
109    pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
110        self.embedding = embedding;
111        self
112    }
113}
114
115/// Result from storing an item
116#[derive(Debug, Clone, Serialize)]
117pub struct StoreResult {
118    /// The ID of the newly stored item
119    pub id: String,
120    /// Potentially conflicting items (high similarity)
121    #[serde(skip_serializing_if = "Vec::is_empty")]
122    pub potential_conflicts: Vec<ConflictInfo>,
123}
124
125/// Information about a potential conflict
126#[derive(Debug, Clone, Serialize)]
127pub struct ConflictInfo {
128    /// The ID of the conflicting item
129    pub id: String,
130    /// The content of the conflicting item
131    pub content: String,
132    /// Similarity score (0.0-1.0)
133    pub similarity: f32,
134}
135
136/// Result from a search query
137#[derive(Debug, Clone, Serialize)]
138pub struct SearchResult {
139    /// The matching item's id
140    pub id: String,
141    /// Content (full if short, or preview if chunked)
142    pub content: String,
143    /// Most relevant chunk content (if chunked)
144    #[serde(skip_serializing_if = "Option::is_none")]
145    pub relevant_excerpt: Option<String>,
146    /// Similarity score (0.0-1.0, higher is more similar)
147    pub similarity: f32,
148    /// When created
149    pub created_at: DateTime<Utc>,
150    /// Project ID (not serialized, used internally for cross-project checks and graph backfill)
151    #[serde(skip)]
152    pub project_id: Option<String>,
153}
154
155impl SearchResult {
156    /// Create from an item (non-chunked)
157    pub fn from_item(item: &Item, similarity: f32) -> Self {
158        Self {
159            id: item.id.clone(),
160            content: item.content.clone(),
161            relevant_excerpt: None,
162            similarity,
163            created_at: item.created_at,
164            project_id: item.project_id.clone(),
165        }
166    }
167
168    /// Create from an item with chunk excerpt
169    pub fn from_item_with_excerpt(item: &Item, similarity: f32, excerpt: String) -> Self {
170        // For chunked items, show a preview of the content
171        let content: String = item.content.chars().take(100).collect();
172        Self {
173            id: item.id.clone(),
174            content,
175            relevant_excerpt: Some(excerpt),
176            similarity,
177            created_at: item.created_at,
178            project_id: item.project_id.clone(),
179        }
180    }
181}
182
183/// Filters for search/list queries
184#[derive(Debug, Default, Clone)]
185pub struct ItemFilters {
186    /// Minimum similarity threshold (0.0-1.0)
187    pub min_similarity: Option<f32>,
188}
189
190impl ItemFilters {
191    pub fn new() -> Self {
192        Self::default()
193    }
194
195    pub fn with_min_similarity(mut self, min_similarity: f32) -> Self {
196        self.min_similarity = Some(min_similarity);
197        self
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_item_creation() {
207        let item = Item::new("Test content").with_project_id("project-123");
208
209        assert_eq!(item.content, "Test content");
210        assert_eq!(item.project_id, Some("project-123".to_string()));
211        assert!(!item.is_chunked);
212    }
213
214    #[test]
215    fn test_embedding_text_short() {
216        let item = Item::new("Short content");
217        assert_eq!(item.embedding_text(), "Short content");
218    }
219
220    #[test]
221    fn test_embedding_text_chunked() {
222        let item = Item::new("a".repeat(1000)).with_chunked(true);
223        let text = item.embedding_text();
224        assert_eq!(text.len(), 500);
225    }
226
227    #[test]
228    fn test_chunk_creation() {
229        let chunk = Chunk::new("item-123", 0, "Chunk content").with_context("## Header");
230
231        assert_eq!(chunk.item_id, "item-123");
232        assert_eq!(chunk.chunk_index, 0);
233        assert_eq!(chunk.content, "Chunk content");
234        assert_eq!(chunk.context, Some("## Header".to_string()));
235    }
236
237    #[test]
238    fn test_search_result_from_item() {
239        let item = Item::new("Test content");
240
241        let result = SearchResult::from_item(&item, 0.95);
242        assert_eq!(result.content, "Test content");
243        assert_eq!(result.similarity, 0.95);
244        assert!(result.relevant_excerpt.is_none());
245    }
246
247    #[test]
248    fn test_search_result_with_excerpt() {
249        let item = Item::new("Long content here").with_chunked(true);
250
251        let result = SearchResult::from_item_with_excerpt(&item, 0.85, "relevant part".to_string());
252        assert_eq!(result.content, "Long content here");
253        assert_eq!(result.relevant_excerpt, Some("relevant part".to_string()));
254    }
255
256    #[test]
257    fn test_store_result_serialization() {
258        let result = StoreResult {
259            id: "abc123".to_string(),
260            potential_conflicts: vec![],
261        };
262
263        let json = serde_json::to_string(&result).unwrap();
264        assert!(json.contains("abc123"));
265        // Empty conflicts should not be serialized
266        assert!(!json.contains("potential_conflicts"));
267    }
268
269    #[test]
270    fn test_store_result_with_conflicts() {
271        let result = StoreResult {
272            id: "new-id".to_string(),
273            potential_conflicts: vec![ConflictInfo {
274                id: "old-id".to_string(),
275                content: "Old content".to_string(),
276                similarity: 0.92,
277            }],
278        };
279
280        let json = serde_json::to_string(&result).unwrap();
281        assert!(json.contains("new-id"));
282        assert!(json.contains("potential_conflicts"));
283        assert!(json.contains("old-id"));
284        assert!(json.contains("0.92"));
285    }
286
287    #[test]
288    fn test_conflict_info_serialization() {
289        let conflict = ConflictInfo {
290            id: "conflict-123".to_string(),
291            content: "Conflicting content".to_string(),
292            similarity: 0.87,
293        };
294
295        let json = serde_json::to_string(&conflict).unwrap();
296        assert!(json.contains("conflict-123"));
297        assert!(json.contains("Conflicting content"));
298        assert!(json.contains("0.87"));
299    }
300}