1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::PathBuf;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct VectorConfig {
19 #[serde(default = "default_backend")]
21 pub backend: String,
22
23 #[serde(default = "default_provider")]
25 pub provider: String,
26
27 #[serde(default = "default_model")]
29 pub model: String,
30
31 #[serde(default)]
33 pub dimension: usize,
34
35 pub db_path: Option<String>,
37
38 pub content_path: Option<String>,
40
41 pub cache_path: Option<String>,
43
44 #[serde(default = "default_true")]
46 pub enabled: bool,
47
48 #[serde(default = "default_limit")]
50 pub default_limit: usize,
51
52 #[serde(default = "default_threshold")]
54 pub similarity_threshold: f32,
55
56 #[serde(default = "default_batch_size")]
58 pub batch_size: usize,
59}
60
61fn default_backend() -> String {
62 "lancedb".to_string()
63}
64
65fn default_provider() -> String {
66 "fastembed".to_string()
67}
68
69fn default_model() -> String {
70 "bge-small-en-v1.5".to_string()
71}
72
73fn default_true() -> bool {
74 true
75}
76
77fn default_limit() -> usize {
78 10
79}
80
81fn default_threshold() -> f32 {
82 0.0
83}
84
85fn default_batch_size() -> usize {
86 64
87}
88
89impl Default for VectorConfig {
90 fn default() -> Self {
91 Self {
92 backend: default_backend(),
93 provider: default_provider(),
94 model: default_model(),
95 dimension: 0,
96 db_path: None,
97 content_path: None,
98 cache_path: None,
99 enabled: default_true(),
100 default_limit: default_limit(),
101 similarity_threshold: default_threshold(),
102 batch_size: default_batch_size(),
103 }
104 }
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct VectorDocument {
117 pub id: String,
119
120 pub text: String,
122
123 #[serde(skip_serializing_if = "Option::is_none")]
125 pub category: Option<String>,
126
127 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
129 pub metadata: HashMap<String, String>,
130}
131
132impl VectorDocument {
133 pub fn new(id: impl Into<String>, text: impl Into<String>) -> Self {
135 Self {
136 id: id.into(),
137 text: text.into(),
138 category: None,
139 metadata: HashMap::new(),
140 }
141 }
142
143 pub fn with_category(mut self, category: impl Into<String>) -> Self {
145 self.category = Some(category.into());
146 self
147 }
148
149 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
151 self.metadata.insert(key.into(), value.into());
152 self
153 }
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct EmbeddedDocument {
159 pub document: VectorDocument,
161
162 pub embedding: Vec<f32>,
164}
165
166impl EmbeddedDocument {
167 pub fn new(document: VectorDocument, embedding: Vec<f32>) -> Self {
169 Self {
170 document,
171 embedding,
172 }
173 }
174
175 pub fn dimension(&self) -> usize {
177 self.embedding.len()
178 }
179}
180
181#[derive(Debug, Clone, Default, Serialize, Deserialize)]
187pub struct VectorSearchParams {
188 pub query: String,
190
191 #[serde(skip_serializing_if = "Option::is_none")]
193 pub limit: Option<usize>,
194
195 #[serde(skip_serializing_if = "Option::is_none")]
197 pub similarity_threshold: Option<f32>,
198
199 #[serde(skip_serializing_if = "Option::is_none")]
201 pub category: Option<String>,
202
203 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
205 pub metadata_filters: HashMap<String, String>,
206}
207
208impl VectorSearchParams {
209 pub fn new(query: impl Into<String>) -> Self {
211 Self {
212 query: query.into(),
213 ..Default::default()
214 }
215 }
216
217 pub fn with_limit(mut self, limit: usize) -> Self {
219 self.limit = Some(limit);
220 self
221 }
222
223 pub fn with_threshold(mut self, threshold: f32) -> Self {
225 self.similarity_threshold = Some(threshold);
226 self
227 }
228
229 pub fn with_category(mut self, category: impl Into<String>) -> Self {
231 self.category = Some(category.into());
232 self
233 }
234
235 pub fn with_filter(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
237 self.metadata_filters.insert(key.into(), value.into());
238 self
239 }
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct VectorSearchResult {
245 pub id: String,
247
248 pub score: f32,
250
251 pub distance: f32,
253
254 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
256 pub metadata: HashMap<String, String>,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct VectorSearchResults {
262 pub items: Vec<VectorSearchResult>,
264
265 pub total: usize,
267
268 pub backend: String,
270}
271
272impl VectorSearchResults {
273 pub fn empty(backend: &str) -> Self {
275 Self {
276 items: Vec::new(),
277 total: 0,
278 backend: backend.to_string(),
279 }
280 }
281}
282
283#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct VectorIndexStats {
290 pub documents_indexed: usize,
292
293 pub files_processed: usize,
295
296 pub files_skipped: usize,
298
299 pub embedding_dimension: usize,
301
302 pub content_hash: String,
304
305 pub build_duration_ms: u64,
307
308 #[serde(default, skip_serializing_if = "Vec::is_empty")]
310 pub errors: Vec<BuildError>,
311
312 #[serde(default)]
314 pub from_cache: bool,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct BuildError {
320 pub file: PathBuf,
322 pub message: String,
324}
325
326#[cfg(test)]
331mod tests {
332 use super::*;
333
334 #[test]
339 fn test_vector_config_default() {
340 let config = VectorConfig::default();
341 assert_eq!(config.backend, "lancedb");
342 assert_eq!(config.provider, "fastembed");
343 assert_eq!(config.model, "bge-small-en-v1.5");
344 assert_eq!(config.dimension, 0);
345 assert!(config.db_path.is_none());
346 assert!(config.content_path.is_none());
347 assert!(config.cache_path.is_none());
348 assert!(config.enabled);
349 assert_eq!(config.default_limit, 10);
350 assert_eq!(config.similarity_threshold, 0.0);
351 assert_eq!(config.batch_size, 64);
352 }
353
354 #[test]
355 fn test_vector_config_serialization() {
356 let config = VectorConfig {
357 backend: "lancedb".to_string(),
358 db_path: Some("/tmp/vectors".to_string()),
359 ..Default::default()
360 };
361
362 let json = serde_json::to_string(&config).unwrap();
363 assert!(json.contains("\"backend\":\"lancedb\""));
364 assert!(json.contains("\"/tmp/vectors\""));
365 }
366
367 #[test]
368 fn test_vector_config_deserialization_with_defaults() {
369 let json = r#"{"backend": "lancedb"}"#;
370 let config: VectorConfig = serde_json::from_str(json).unwrap();
371
372 assert_eq!(config.backend, "lancedb");
373 assert_eq!(config.default_limit, 10);
374 assert!(config.enabled);
375 assert_eq!(config.batch_size, 64);
376 }
377
378 #[test]
383 fn test_vector_document_new() {
384 let doc = VectorDocument::new("doc-1", "Hello world");
385 assert_eq!(doc.id, "doc-1");
386 assert_eq!(doc.text, "Hello world");
387 assert!(doc.category.is_none());
388 assert!(doc.metadata.is_empty());
389 }
390
391 #[test]
392 fn test_vector_document_with_category() {
393 let doc = VectorDocument::new("doc-1", "text").with_category("harmony");
394 assert_eq!(doc.category, Some("harmony".to_string()));
395 }
396
397 #[test]
398 fn test_vector_document_with_metadata() {
399 let doc = VectorDocument::new("doc-1", "text")
400 .with_metadata("author", "test")
401 .with_metadata("tier", "beginner");
402
403 assert_eq!(doc.metadata.len(), 2);
404 assert_eq!(doc.metadata.get("author").unwrap(), "test");
405 assert_eq!(doc.metadata.get("tier").unwrap(), "beginner");
406 }
407
408 #[test]
409 fn test_vector_document_serialization() {
410 let doc = VectorDocument::new("doc-1", "text content")
411 .with_category("test")
412 .with_metadata("key", "value");
413
414 let json = serde_json::to_string(&doc).unwrap();
415 assert!(json.contains("doc-1"));
416 assert!(json.contains("text content"));
417 assert!(json.contains("test"));
418
419 let deserialized: VectorDocument = serde_json::from_str(&json).unwrap();
420 assert_eq!(deserialized.id, "doc-1");
421 assert_eq!(deserialized.text, "text content");
422 assert_eq!(deserialized.category, Some("test".to_string()));
423 }
424
425 #[test]
426 fn test_vector_document_serialization_skips_empty() {
427 let doc = VectorDocument::new("doc-1", "text");
428 let json = serde_json::to_string(&doc).unwrap();
429
430 assert!(!json.contains("category"));
432 assert!(!json.contains("metadata"));
433 }
434
435 #[test]
440 fn test_embedded_document_new() {
441 let doc = VectorDocument::new("doc-1", "text");
442 let embedding = vec![0.1, 0.2, 0.3];
443 let embedded = EmbeddedDocument::new(doc, embedding);
444
445 assert_eq!(embedded.document.id, "doc-1");
446 assert_eq!(embedded.embedding.len(), 3);
447 assert_eq!(embedded.dimension(), 3);
448 }
449
450 #[test]
455 fn test_search_params_default() {
456 let params = VectorSearchParams::default();
457 assert!(params.query.is_empty());
458 assert!(params.limit.is_none());
459 assert!(params.similarity_threshold.is_none());
460 assert!(params.category.is_none());
461 assert!(params.metadata_filters.is_empty());
462 }
463
464 #[test]
465 fn test_search_params_builder() {
466 let params = VectorSearchParams::new("semantic query")
467 .with_limit(5)
468 .with_threshold(0.5)
469 .with_category("harmony")
470 .with_filter("tier", "advanced");
471
472 assert_eq!(params.query, "semantic query");
473 assert_eq!(params.limit, Some(5));
474 assert_eq!(params.similarity_threshold, Some(0.5));
475 assert_eq!(params.category, Some("harmony".to_string()));
476 assert_eq!(params.metadata_filters.get("tier").unwrap(), "advanced");
477 }
478
479 #[test]
480 fn test_search_params_serialization() {
481 let params = VectorSearchParams::new("test query").with_limit(10);
482
483 let json = serde_json::to_string(¶ms).unwrap();
484 assert!(json.contains("test query"));
485 assert!(json.contains("10"));
486
487 let minimal = VectorSearchParams::new("q");
489 let json = serde_json::to_string(&minimal).unwrap();
490 assert!(!json.contains("limit"));
491 assert!(!json.contains("similarity_threshold"));
492 }
493
494 #[test]
499 fn test_search_result_serialization() {
500 let result = VectorSearchResult {
501 id: "doc-1".to_string(),
502 score: 0.85,
503 distance: 0.176,
504 metadata: HashMap::from([("category".to_string(), "harmony".to_string())]),
505 };
506
507 let json = serde_json::to_string(&result).unwrap();
508 assert!(json.contains("doc-1"));
509 assert!(json.contains("0.85"));
510 }
511
512 #[test]
513 fn test_search_result_empty_metadata_skipped() {
514 let result = VectorSearchResult {
515 id: "doc-1".to_string(),
516 score: 0.5,
517 distance: 1.0,
518 metadata: HashMap::new(),
519 };
520
521 let json = serde_json::to_string(&result).unwrap();
522 assert!(!json.contains("metadata"));
523 }
524
525 #[test]
530 fn test_search_results_empty() {
531 let results = VectorSearchResults::empty("lancedb");
532 assert!(results.items.is_empty());
533 assert_eq!(results.total, 0);
534 assert_eq!(results.backend, "lancedb");
535 }
536
537 #[test]
542 fn test_index_stats_serialization() {
543 let stats = VectorIndexStats {
544 documents_indexed: 100,
545 files_processed: 50,
546 files_skipped: 2,
547 embedding_dimension: 384,
548 content_hash: "abc123".to_string(),
549 build_duration_ms: 1500,
550 errors: vec![],
551 from_cache: false,
552 };
553
554 let json = serde_json::to_string(&stats).unwrap();
555 assert!(json.contains("100"));
556 assert!(json.contains("384"));
557 assert!(json.contains("abc123"));
558
559 assert!(!json.contains("errors"));
561 }
562
563 #[test]
564 fn test_index_stats_with_errors() {
565 let stats = VectorIndexStats {
566 documents_indexed: 10,
567 files_processed: 12,
568 files_skipped: 2,
569 embedding_dimension: 384,
570 content_hash: "hash".to_string(),
571 build_duration_ms: 500,
572 errors: vec![BuildError {
573 file: PathBuf::from("/test/bad.md"),
574 message: "parse error".to_string(),
575 }],
576 from_cache: false,
577 };
578
579 let json = serde_json::to_string(&stats).unwrap();
580 assert!(json.contains("errors"));
581 assert!(json.contains("parse error"));
582 }
583}