1use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Item {
12 pub id: String,
14 pub content: String,
16 #[serde(skip)]
18 pub embedding: Vec<f32>,
19 #[serde(skip_serializing_if = "Option::is_none")]
21 pub title: Option<String>,
22 #[serde(default, skip_serializing_if = "Vec::is_empty")]
24 pub tags: Vec<String>,
25 #[serde(skip_serializing_if = "Option::is_none")]
27 pub source: Option<String>,
28 #[serde(skip_serializing_if = "Option::is_none")]
30 pub metadata: Option<Value>,
31 #[serde(skip_serializing_if = "Option::is_none")]
33 pub project_id: Option<String>,
34 pub is_chunked: bool,
36 #[serde(skip_serializing_if = "Option::is_none")]
38 pub expires_at: Option<DateTime<Utc>>,
39 pub created_at: DateTime<Utc>,
41}
42
43impl Item {
44 pub fn new(content: impl Into<String>) -> Self {
46 Self {
47 id: uuid::Uuid::new_v4().to_string(),
48 content: content.into(),
49 embedding: Vec::new(),
50 title: None,
51 tags: Vec::new(),
52 source: None,
53 metadata: None,
54 project_id: None,
55 is_chunked: false,
56 expires_at: None,
57 created_at: Utc::now(),
58 }
59 }
60
61 pub fn with_title(mut self, title: impl Into<String>) -> Self {
63 self.title = Some(title.into());
64 self
65 }
66
67 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
69 self.tags = tags;
70 self
71 }
72
73 pub fn with_source(mut self, source: impl Into<String>) -> Self {
75 self.source = Some(source.into());
76 self
77 }
78
79 pub fn with_metadata(mut self, metadata: Value) -> Self {
81 self.metadata = Some(metadata);
82 self
83 }
84
85 pub fn with_project_id(mut self, project_id: impl Into<String>) -> Self {
87 self.project_id = Some(project_id.into());
88 self
89 }
90
91 pub fn with_expires_at(mut self, expires_at: DateTime<Utc>) -> Self {
93 self.expires_at = Some(expires_at);
94 self
95 }
96
97 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
99 self.embedding = embedding;
100 self
101 }
102
103 pub fn with_chunked(mut self, is_chunked: bool) -> Self {
105 self.is_chunked = is_chunked;
106 self
107 }
108
109 pub fn embedding_text(&self) -> String {
113 if self.is_chunked {
114 let preview: String = self.content.chars().take(500).collect();
115 match &self.title {
116 Some(title) => format!("{} {}", title, preview),
117 None => preview,
118 }
119 } else {
120 self.content.clone()
121 }
122 }
123
124 pub fn is_expired(&self) -> bool {
126 if let Some(expires_at) = self.expires_at {
127 Utc::now() > expires_at
128 } else {
129 false
130 }
131 }
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct Chunk {
137 pub id: String,
139 pub item_id: String,
141 pub chunk_index: usize,
143 pub content: String,
145 #[serde(skip)]
147 pub embedding: Vec<f32>,
148 #[serde(skip_serializing_if = "Option::is_none")]
150 pub context: Option<String>,
151}
152
153impl Chunk {
154 pub fn new(item_id: impl Into<String>, chunk_index: usize, content: impl Into<String>) -> Self {
156 Self {
157 id: uuid::Uuid::new_v4().to_string(),
158 item_id: item_id.into(),
159 chunk_index,
160 content: content.into(),
161 embedding: Vec::new(),
162 context: None,
163 }
164 }
165
166 pub fn with_context(mut self, context: impl Into<String>) -> Self {
168 self.context = Some(context.into());
169 self
170 }
171
172 pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
174 self.embedding = embedding;
175 self
176 }
177}
178
179#[derive(Debug, Clone, Serialize)]
181pub struct StoreResult {
182 pub id: String,
184 #[serde(skip_serializing_if = "Vec::is_empty")]
186 pub potential_conflicts: Vec<ConflictInfo>,
187}
188
189#[derive(Debug, Clone, Serialize)]
191pub struct ConflictInfo {
192 pub id: String,
194 pub content: String,
196 pub similarity: f32,
198}
199
200#[derive(Debug, Clone, Serialize)]
202pub struct SearchResult {
203 pub id: String,
205 pub content: String,
207 #[serde(skip_serializing_if = "Option::is_none")]
209 pub relevant_excerpt: Option<String>,
210 pub similarity: f32,
212 #[serde(default, skip_serializing_if = "Vec::is_empty")]
214 pub tags: Vec<String>,
215 #[serde(skip_serializing_if = "Option::is_none")]
217 pub source: Option<String>,
218 pub created_at: DateTime<Utc>,
220 #[serde(skip)]
222 pub project_id: Option<String>,
223 #[serde(skip)]
225 pub metadata: Option<Value>,
226}
227
228impl SearchResult {
229 pub fn from_item(item: &Item, similarity: f32) -> Self {
231 Self {
232 id: item.id.clone(),
233 content: item.content.clone(),
234 relevant_excerpt: None,
235 similarity,
236 tags: item.tags.clone(),
237 source: item.source.clone(),
238 created_at: item.created_at,
239 project_id: item.project_id.clone(),
240 metadata: item.metadata.clone(),
241 }
242 }
243
244 pub fn from_item_with_excerpt(item: &Item, similarity: f32, excerpt: String) -> Self {
246 let content = item.title.clone().unwrap_or_else(|| {
247 item.content.chars().take(100).collect()
249 });
250 Self {
251 id: item.id.clone(),
252 content,
253 relevant_excerpt: Some(excerpt),
254 similarity,
255 tags: item.tags.clone(),
256 source: item.source.clone(),
257 created_at: item.created_at,
258 project_id: item.project_id.clone(),
259 metadata: item.metadata.clone(),
260 }
261 }
262}
263
264#[derive(Debug, Default, Clone)]
266pub struct ItemFilters {
267 pub tags: Option<Vec<String>>,
269 pub min_similarity: Option<f32>,
271 pub include_expired: bool,
273}
274
275impl ItemFilters {
276 pub fn new() -> Self {
277 Self::default()
278 }
279
280 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
281 self.tags = Some(tags);
282 self
283 }
284
285 pub fn with_min_similarity(mut self, min_similarity: f32) -> Self {
286 self.min_similarity = Some(min_similarity);
287 self
288 }
289
290 pub fn include_expired(mut self, include: bool) -> Self {
291 self.include_expired = include;
292 self
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299
300 #[test]
301 fn test_item_creation() {
302 let item = Item::new("Test content")
303 .with_title("Test Title")
304 .with_tags(vec!["tag1".to_string(), "tag2".to_string()])
305 .with_source("test-source")
306 .with_project_id("project-123");
307
308 assert_eq!(item.content, "Test content");
309 assert_eq!(item.title, Some("Test Title".to_string()));
310 assert_eq!(item.tags, vec!["tag1", "tag2"]);
311 assert_eq!(item.source, Some("test-source".to_string()));
312 assert_eq!(item.project_id, Some("project-123".to_string()));
313 assert!(!item.is_chunked);
314 }
315
316 #[test]
317 fn test_embedding_text_short() {
318 let item = Item::new("Short content");
319 assert_eq!(item.embedding_text(), "Short content");
320 }
321
322 #[test]
323 fn test_embedding_text_chunked() {
324 let item = Item::new("a".repeat(1000))
325 .with_title("My Title")
326 .with_chunked(true);
327 let text = item.embedding_text();
328 assert!(text.starts_with("My Title "));
329 assert!(text.len() < 600);
330 }
331
332 #[test]
333 fn test_item_expiration() {
334 let expired = Item::new("Expired").with_expires_at(Utc::now() - chrono::Duration::hours(1));
335 assert!(expired.is_expired());
336
337 let valid = Item::new("Valid").with_expires_at(Utc::now() + chrono::Duration::hours(1));
338 assert!(!valid.is_expired());
339 }
340
341 #[test]
342 fn test_chunk_creation() {
343 let chunk = Chunk::new("item-123", 0, "Chunk content").with_context("## Header");
344
345 assert_eq!(chunk.item_id, "item-123");
346 assert_eq!(chunk.chunk_index, 0);
347 assert_eq!(chunk.content, "Chunk content");
348 assert_eq!(chunk.context, Some("## Header".to_string()));
349 }
350
351 #[test]
352 fn test_search_result_from_item() {
353 let item = Item::new("Test content")
354 .with_tags(vec!["test".to_string()])
355 .with_source("test");
356
357 let result = SearchResult::from_item(&item, 0.95);
358 assert_eq!(result.content, "Test content");
359 assert_eq!(result.similarity, 0.95);
360 assert!(result.relevant_excerpt.is_none());
361 }
362
363 #[test]
364 fn test_search_result_with_excerpt() {
365 let item = Item::new("Long content here")
366 .with_title("Document Title")
367 .with_chunked(true);
368
369 let result = SearchResult::from_item_with_excerpt(&item, 0.85, "relevant part".to_string());
370 assert_eq!(result.content, "Document Title");
371 assert_eq!(result.relevant_excerpt, Some("relevant part".to_string()));
372 }
373
374 #[test]
375 fn test_store_result_serialization() {
376 let result = StoreResult {
377 id: "abc123".to_string(),
378 potential_conflicts: vec![],
379 };
380
381 let json = serde_json::to_string(&result).unwrap();
382 assert!(json.contains("abc123"));
383 assert!(!json.contains("potential_conflicts"));
385 }
386
387 #[test]
388 fn test_store_result_with_conflicts() {
389 let result = StoreResult {
390 id: "new-id".to_string(),
391 potential_conflicts: vec![ConflictInfo {
392 id: "old-id".to_string(),
393 content: "Old content".to_string(),
394 similarity: 0.92,
395 }],
396 };
397
398 let json = serde_json::to_string(&result).unwrap();
399 assert!(json.contains("new-id"));
400 assert!(json.contains("potential_conflicts"));
401 assert!(json.contains("old-id"));
402 assert!(json.contains("0.92"));
403 }
404
405 #[test]
406 fn test_conflict_info_serialization() {
407 let conflict = ConflictInfo {
408 id: "conflict-123".to_string(),
409 content: "Conflicting content".to_string(),
410 similarity: 0.87,
411 };
412
413 let json = serde_json::to_string(&conflict).unwrap();
414 assert!(json.contains("conflict-123"));
415 assert!(json.contains("Conflicting content"));
416 assert!(json.contains("0.87"));
417 }
418}