Skip to main content

sediment/
item.rs

1//! Unified Item type for semantic storage
2//!
3//! Items unify memories and documents into a single concept with automatic chunking.
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8/// A unified item stored in Sediment
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Item {
11    /// Unique identifier (UUID)
12    pub id: String,
13    /// The actual content
14    pub content: String,
15    /// Vector embedding (not serialized to JSON output)
16    #[serde(skip)]
17    pub embedding: Vec<f32>,
18    /// Project ID (None for global items)
19    #[serde(skip_serializing_if = "Option::is_none")]
20    pub project_id: Option<String>,
21    /// Whether this item was chunked (internal)
22    pub is_chunked: bool,
23    /// When this item was created
24    pub created_at: DateTime<Utc>,
25}
26
27impl Item {
28    /// Create a new item with content
29    pub fn new(content: impl Into<String>) -> Self {
30        Self {
31            id: uuid::Uuid::new_v4().to_string(),
32            content: content.into(),
33            embedding: Vec::new(),
34            project_id: None,
35            is_chunked: false,
36            created_at: Utc::now(),
37        }
38    }
39
40    /// Set the project ID
41    pub fn with_project_id(mut self, project_id: impl Into<String>) -> Self {
42        self.project_id = Some(project_id.into());
43        self
44    }
45
46    /// Override the creation timestamp (benchmark builds only)
47    #[cfg(feature = "bench")]
48    pub fn with_created_at(mut self, created_at: DateTime<Utc>) -> Self {
49        self.created_at = created_at;
50        self
51    }
52
53    /// Get the text to embed for this item
54    /// For chunked items: first ~500 chars
55    /// For non-chunked items: full content
56    pub fn embedding_text(&self) -> String {
57        if self.is_chunked {
58            self.content.chars().take(500).collect()
59        } else {
60            self.content.clone()
61        }
62    }
63}
64
65/// A chunk of an item (internal, not exposed to MCP)
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct Chunk {
68    /// Unique identifier (UUID)
69    pub id: String,
70    /// Parent item ID
71    pub item_id: String,
72    /// Index of this chunk within the item (0-based)
73    pub chunk_index: usize,
74    /// The chunk content
75    pub content: String,
76    /// Vector embedding of the chunk (not serialized)
77    #[serde(skip)]
78    pub embedding: Vec<f32>,
79    /// Optional context (e.g., header path, function name)
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub context: Option<String>,
82}
83
84impl Chunk {
85    /// Create a new chunk
86    pub fn new(item_id: impl Into<String>, chunk_index: usize, content: impl Into<String>) -> Self {
87        Self {
88            id: uuid::Uuid::new_v4().to_string(),
89            item_id: item_id.into(),
90            chunk_index,
91            content: content.into(),
92            embedding: Vec::new(),
93            context: None,
94        }
95    }
96
97    /// Set context
98    pub fn with_context(mut self, context: impl Into<String>) -> Self {
99        self.context = Some(context.into());
100        self
101    }
102}
103
104/// Result from storing an item
105#[derive(Debug, Clone, Serialize)]
106pub struct StoreResult {
107    /// The ID of the newly stored item
108    pub id: String,
109    /// Potentially conflicting items (high similarity)
110    #[serde(skip_serializing_if = "Vec::is_empty")]
111    pub potential_conflicts: Vec<ConflictInfo>,
112}
113
114/// Information about a potential conflict
115#[derive(Debug, Clone, Serialize)]
116pub struct ConflictInfo {
117    /// The ID of the conflicting item
118    pub id: String,
119    /// The content of the conflicting item
120    pub content: String,
121    /// Similarity score (0.0-1.0)
122    pub similarity: f32,
123}
124
125/// Result from a search query
126#[derive(Debug, Clone, Serialize)]
127pub struct SearchResult {
128    /// The matching item's id
129    pub id: String,
130    /// Content (full if short, or preview if chunked)
131    pub content: String,
132    /// Most relevant chunk content (if chunked)
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub relevant_excerpt: Option<String>,
135    /// Similarity score (0.0-1.0, higher is more similar)
136    pub similarity: f32,
137    /// When created
138    pub created_at: DateTime<Utc>,
139    /// Project ID (not serialized, used internally for cross-project checks and graph backfill)
140    #[serde(skip)]
141    pub project_id: Option<String>,
142}
143
144impl SearchResult {
145    /// Create from an item (non-chunked)
146    pub fn from_item(item: &Item, similarity: f32) -> Self {
147        Self {
148            id: item.id.clone(),
149            content: item.content.clone(),
150            relevant_excerpt: None,
151            similarity,
152            created_at: item.created_at,
153            project_id: item.project_id.clone(),
154        }
155    }
156
157    /// Create from an item with chunk excerpt
158    pub fn from_item_with_excerpt(item: &Item, similarity: f32, excerpt: String) -> Self {
159        // For chunked items, show a preview of the content
160        let content: String = item.content.chars().take(100).collect();
161        Self {
162            id: item.id.clone(),
163            content,
164            relevant_excerpt: Some(excerpt),
165            similarity,
166            created_at: item.created_at,
167            project_id: item.project_id.clone(),
168        }
169    }
170}
171
172/// Filters for search/list queries
173#[derive(Debug, Default, Clone)]
174pub struct ItemFilters {
175    /// Minimum similarity threshold (0.0-1.0)
176    pub min_similarity: Option<f32>,
177}
178
179impl ItemFilters {
180    pub fn new() -> Self {
181        Self::default()
182    }
183}
184
185#[cfg(test)]
186impl Item {
187    /// Mark as chunked (test only)
188    pub fn with_chunked(mut self, is_chunked: bool) -> Self {
189        self.is_chunked = is_chunked;
190        self
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn test_item_creation() {
200        let item = Item::new("Test content").with_project_id("project-123");
201
202        assert_eq!(item.content, "Test content");
203        assert_eq!(item.project_id, Some("project-123".to_string()));
204        assert!(!item.is_chunked);
205    }
206
207    #[test]
208    fn test_embedding_text_short() {
209        let item = Item::new("Short content");
210        assert_eq!(item.embedding_text(), "Short content");
211    }
212
213    #[test]
214    fn test_embedding_text_chunked() {
215        let item = Item::new("a".repeat(1000)).with_chunked(true);
216        let text = item.embedding_text();
217        assert_eq!(text.len(), 500);
218    }
219
220    #[test]
221    fn test_chunk_creation() {
222        let chunk = Chunk::new("item-123", 0, "Chunk content").with_context("## Header");
223
224        assert_eq!(chunk.item_id, "item-123");
225        assert_eq!(chunk.chunk_index, 0);
226        assert_eq!(chunk.content, "Chunk content");
227        assert_eq!(chunk.context, Some("## Header".to_string()));
228    }
229
230    #[test]
231    fn test_search_result_from_item() {
232        let item = Item::new("Test content");
233
234        let result = SearchResult::from_item(&item, 0.95);
235        assert_eq!(result.content, "Test content");
236        assert_eq!(result.similarity, 0.95);
237        assert!(result.relevant_excerpt.is_none());
238    }
239
240    #[test]
241    fn test_search_result_with_excerpt() {
242        let item = Item::new("Long content here").with_chunked(true);
243
244        let result = SearchResult::from_item_with_excerpt(&item, 0.85, "relevant part".to_string());
245        assert_eq!(result.content, "Long content here");
246        assert_eq!(result.relevant_excerpt, Some("relevant part".to_string()));
247    }
248
249    #[test]
250    fn test_store_result_serialization() {
251        let result = StoreResult {
252            id: "abc123".to_string(),
253            potential_conflicts: vec![],
254        };
255
256        let json = serde_json::to_string(&result).unwrap();
257        assert!(json.contains("abc123"));
258        // Empty conflicts should not be serialized
259        assert!(!json.contains("potential_conflicts"));
260    }
261
262    #[test]
263    fn test_store_result_with_conflicts() {
264        let result = StoreResult {
265            id: "new-id".to_string(),
266            potential_conflicts: vec![ConflictInfo {
267                id: "old-id".to_string(),
268                content: "Old content".to_string(),
269                similarity: 0.92,
270            }],
271        };
272
273        let json = serde_json::to_string(&result).unwrap();
274        assert!(json.contains("new-id"));
275        assert!(json.contains("potential_conflicts"));
276        assert!(json.contains("old-id"));
277        assert!(json.contains("0.92"));
278    }
279
280    #[test]
281    fn test_conflict_info_serialization() {
282        let conflict = ConflictInfo {
283            id: "conflict-123".to_string(),
284            content: "Conflicting content".to_string(),
285            similarity: 0.87,
286        };
287
288        let json = serde_json::to_string(&conflict).unwrap();
289        assert!(json.contains("conflict-123"));
290        assert!(json.contains("Conflicting content"));
291        assert!(json.contains("0.87"));
292    }
293}