vectorizer_sdk/
models.rs

1//! Data models for the Vectorizer SDK
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7/// Vector similarity metrics
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum SimilarityMetric {
11    /// Cosine similarity
12    Cosine,
13    /// Euclidean distance
14    Euclidean,
15    /// Dot product
16    DotProduct,
17}
18
19impl Default for SimilarityMetric {
20    fn default() -> Self {
21        Self::Cosine
22    }
23}
24
25/// Vector representation
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct Vector {
28    /// Unique identifier for the vector
29    pub id: String,
30    /// Vector data as an array of numbers
31    pub data: Vec<f32>,
32    /// Optional metadata associated with the vector
33    pub metadata: Option<HashMap<String, serde_json::Value>>,
34}
35
36/// Collection representation
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct Collection {
39    /// Collection name
40    pub name: String,
41    /// Vector dimension
42    pub dimension: usize,
43    /// Similarity metric used for search
44    pub similarity_metric: SimilarityMetric,
45    /// Optional description
46    pub description: Option<String>,
47    /// Creation timestamp
48    pub created_at: Option<DateTime<Utc>>,
49    /// Last update timestamp
50    pub updated_at: Option<DateTime<Utc>>,
51}
52
53/// Collection information
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct CollectionInfo {
56    /// Collection name
57    pub name: String,
58    /// Vector dimension
59    pub dimension: usize,
60    /// Similarity metric used for search
61    pub metric: String,
62    /// Number of vectors in the collection
63    pub vector_count: usize,
64    /// Number of documents in the collection
65    pub document_count: usize,
66    /// Creation timestamp
67    pub created_at: String,
68    /// Last update timestamp
69    pub updated_at: String,
70    /// Indexing status
71    pub indexing_status: IndexingStatus,
72}
73
74/// Indexing status
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct IndexingStatus {
77    /// Status
78    pub status: String,
79    /// Progress percentage
80    pub progress: f32,
81    /// Total documents
82    pub total_documents: usize,
83    /// Processed documents
84    pub processed_documents: usize,
85    /// Vector count
86    pub vector_count: usize,
87    /// Estimated time remaining
88    pub estimated_time_remaining: Option<String>,
89    /// Last updated timestamp
90    pub last_updated: String,
91}
92
93/// Search result
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct SearchResult {
96    /// Vector ID
97    pub id: String,
98    /// Similarity score
99    pub score: f32,
100    /// Vector content (if available)
101    pub content: Option<String>,
102    /// Optional metadata
103    pub metadata: Option<HashMap<String, serde_json::Value>>,
104}
105
106/// Search response
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct SearchResponse {
109    /// Search results
110    pub results: Vec<SearchResult>,
111    /// Query time in milliseconds
112    pub query_time_ms: f64,
113}
114
115/// Embedding request
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct EmbeddingRequest {
118    /// Text to embed
119    pub text: String,
120    /// Optional model to use for embedding
121    pub model: Option<String>,
122    /// Optional parameters for embedding generation
123    pub parameters: Option<EmbeddingParameters>,
124}
125
126/// Embedding parameters
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct EmbeddingParameters {
129    /// Maximum sequence length
130    pub max_length: Option<usize>,
131    /// Whether to normalize the embedding
132    pub normalize: Option<bool>,
133    /// Optional prefix for the text
134    pub prefix: Option<String>,
135}
136
137/// Embedding response
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct EmbeddingResponse {
140    /// Generated embedding vector
141    pub embedding: Vec<f32>,
142    /// Model used for embedding
143    pub model: String,
144    /// Text that was embedded
145    pub text: String,
146    /// Embedding dimension
147    pub dimension: usize,
148    /// Provider used
149    pub provider: String,
150}
151
152/// Health status
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct HealthStatus {
155    /// Service status
156    pub status: String,
157    /// Service version
158    pub version: String,
159    /// Timestamp
160    pub timestamp: String,
161    /// Uptime in seconds
162    pub uptime: Option<u64>,
163    /// Number of collections
164    pub collections: Option<usize>,
165    /// Total number of vectors
166    pub total_vectors: Option<usize>,
167}
168
169/// Collections list response
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct CollectionsResponse {
172    /// List of collections
173    pub collections: Vec<CollectionInfo>,
174}
175
176/// Create collection response
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct CreateCollectionResponse {
179    /// Success message
180    pub message: String,
181    /// Collection name
182    pub collection: String,
183}
184
185/// Database statistics
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct DatabaseStats {
188    /// Total number of collections
189    pub total_collections: usize,
190    /// Total number of vectors
191    pub total_vectors: usize,
192    /// Total memory estimate in bytes
193    pub total_memory_estimate_bytes: usize,
194    /// Collections information
195    pub collections: Vec<CollectionStats>,
196}
197
198/// Collection statistics
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct CollectionStats {
201    /// Collection name
202    pub name: String,
203    /// Number of vectors
204    pub vector_count: usize,
205    /// Vector dimension
206    pub dimension: usize,
207    /// Memory estimate in bytes
208    pub memory_estimate_bytes: usize,
209}
210
211/// Batch text request
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct BatchTextRequest {
214    /// Text ID
215    pub id: String,
216    /// Text content
217    pub text: String,
218    /// Optional metadata
219    pub metadata: Option<HashMap<String, String>>,
220}
221
222/// Batch configuration
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct BatchConfig {
225    /// Maximum batch size
226    pub max_batch_size: Option<usize>,
227    /// Number of parallel workers
228    pub parallel_workers: Option<usize>,
229    /// Whether operations should be atomic
230    pub atomic: Option<bool>,
231}
232
233/// Batch insert request
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct BatchInsertRequest {
236    /// Texts to insert
237    pub texts: Vec<BatchTextRequest>,
238    /// Batch configuration
239    pub config: Option<BatchConfig>,
240}
241
242/// Batch response
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct BatchResponse {
245    /// Whether the operation was successful
246    pub success: bool,
247    /// Collection name
248    pub collection: String,
249    /// Operation type
250    pub operation: String,
251    /// Total number of operations
252    pub total_operations: usize,
253    /// Number of successful operations
254    pub successful_operations: usize,
255    /// Number of failed operations
256    pub failed_operations: usize,
257    /// Duration in milliseconds
258    pub duration_ms: u64,
259    /// Error messages
260    pub errors: Vec<String>,
261}
262
263/// Batch search query
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct BatchSearchQuery {
266    /// Query text
267    pub query: String,
268    /// Maximum number of results
269    pub limit: Option<usize>,
270    /// Minimum score threshold
271    pub score_threshold: Option<f32>,
272}
273
274/// Batch search request
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct BatchSearchRequest {
277    /// Search queries
278    pub queries: Vec<BatchSearchQuery>,
279    /// Batch configuration
280    pub config: Option<BatchConfig>,
281}
282
283/// Batch search response
284#[derive(Debug, Clone, Serialize, Deserialize)]
285pub struct BatchSearchResponse {
286    /// Whether the operation was successful
287    pub success: bool,
288    /// Collection name
289    pub collection: String,
290    /// Total number of queries
291    pub total_queries: usize,
292    /// Number of successful queries
293    pub successful_queries: usize,
294    /// Number of failed queries
295    pub failed_queries: usize,
296    /// Duration in milliseconds
297    pub duration_ms: u64,
298    /// Search results
299    pub results: Vec<Vec<SearchResult>>,
300    /// Error messages
301    pub errors: Vec<String>,
302}
303
304/// Batch vector update
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct BatchVectorUpdate {
307    /// Vector ID
308    pub id: String,
309    /// New vector data (optional)
310    pub data: Option<Vec<f32>>,
311    /// New metadata (optional)
312    pub metadata: Option<HashMap<String, serde_json::Value>>,
313}
314
315/// Batch update request
316#[derive(Debug, Clone, Serialize, Deserialize)]
317pub struct BatchUpdateRequest {
318    /// Vector updates
319    pub updates: Vec<BatchVectorUpdate>,
320    /// Batch configuration
321    pub config: Option<BatchConfig>,
322}
323
324/// Batch delete request
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct BatchDeleteRequest {
327    /// Vector IDs to delete
328    pub vector_ids: Vec<String>,
329    /// Batch configuration
330    pub config: Option<BatchConfig>,
331}
332
333/// Summarization methods
334#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
335#[serde(rename_all = "snake_case")]
336pub enum SummarizationMethod {
337    /// Extractive summarization
338    Extractive,
339    /// Keyword summarization
340    Keyword,
341    /// Sentence summarization
342    Sentence,
343    /// Abstractive summarization
344    Abstractive,
345}
346
347impl Default for SummarizationMethod {
348    fn default() -> Self {
349        Self::Extractive
350    }
351}
352
353/// Summarize text request
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct SummarizeTextRequest {
356    /// Text to summarize
357    pub text: String,
358    /// Summarization method
359    pub method: Option<SummarizationMethod>,
360    /// Maximum summary length
361    pub max_length: Option<usize>,
362    /// Compression ratio
363    pub compression_ratio: Option<f32>,
364    /// Language code
365    pub language: Option<String>,
366}
367
368/// Summarize text response
369#[derive(Debug, Clone, Serialize, Deserialize)]
370pub struct SummarizeTextResponse {
371    /// Summary ID
372    pub summary_id: String,
373    /// Original text
374    pub original_text: String,
375    /// Generated summary
376    pub summary: String,
377    /// Method used
378    pub method: String,
379    /// Original text length
380    pub original_length: usize,
381    /// Summary length
382    pub summary_length: usize,
383    /// Compression ratio
384    pub compression_ratio: f32,
385    /// Language
386    pub language: String,
387    /// Status
388    pub status: String,
389    /// Message
390    pub message: String,
391    /// Metadata
392    pub metadata: HashMap<String, String>,
393}
394
395/// Summarize context request
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub struct SummarizeContextRequest {
398    /// Context to summarize
399    pub context: String,
400    /// Summarization method
401    pub method: Option<SummarizationMethod>,
402    /// Maximum summary length
403    pub max_length: Option<usize>,
404    /// Compression ratio
405    pub compression_ratio: Option<f32>,
406    /// Language code
407    pub language: Option<String>,
408}
409
410/// Summarize context response
411#[derive(Debug, Clone, Serialize, Deserialize)]
412pub struct SummarizeContextResponse {
413    /// Summary ID
414    pub summary_id: String,
415    /// Original context
416    pub original_context: String,
417    /// Generated summary
418    pub summary: String,
419    /// Method used
420    pub method: String,
421    /// Original context length
422    pub original_length: usize,
423    /// Summary length
424    pub summary_length: usize,
425    /// Compression ratio
426    pub compression_ratio: f32,
427    /// Language
428    pub language: String,
429    /// Status
430    pub status: String,
431    /// Message
432    pub message: String,
433    /// Metadata
434    pub metadata: HashMap<String, String>,
435}
436
437/// Get summary response
438#[derive(Debug, Clone, Serialize, Deserialize)]
439pub struct GetSummaryResponse {
440    /// Summary ID
441    pub summary_id: String,
442    /// Original text
443    pub original_text: String,
444    /// Generated summary
445    pub summary: String,
446    /// Method used
447    pub method: String,
448    /// Original text length
449    pub original_length: usize,
450    /// Summary length
451    pub summary_length: usize,
452    /// Compression ratio
453    pub compression_ratio: f32,
454    /// Language
455    pub language: String,
456    /// Creation timestamp
457    pub created_at: String,
458    /// Metadata
459    pub metadata: HashMap<String, String>,
460    /// Status
461    pub status: String,
462}
463
464/// Summary info
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct SummaryInfo {
467    /// Summary ID
468    pub summary_id: String,
469    /// Method used
470    pub method: String,
471    /// Language
472    pub language: String,
473    /// Original text length
474    pub original_length: usize,
475    /// Summary length
476    pub summary_length: usize,
477    /// Compression ratio
478    pub compression_ratio: f32,
479    /// Creation timestamp
480    pub created_at: String,
481    /// Metadata
482    pub metadata: HashMap<String, String>,
483}
484
485/// List summaries response
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ListSummariesResponse {
488    /// List of summaries
489    pub summaries: Vec<SummaryInfo>,
490    /// Total count
491    pub total_count: usize,
492    /// Status
493    pub status: String,
494}
495
496/// Indexing progress
497#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct IndexingProgress {
499    /// Whether indexing is in progress
500    pub is_indexing: bool,
501    /// Overall status
502    pub overall_status: String,
503    /// Collections being indexed
504    pub collections: Vec<CollectionProgress>,
505}
506
507/// Collection progress
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct CollectionProgress {
510    /// Collection name
511    pub collection_name: String,
512    /// Status
513    pub status: String,
514    /// Progress percentage
515    pub progress: f32,
516    /// Vector count
517    pub vector_count: usize,
518    /// Error message if any
519    pub error_message: Option<String>,
520    /// Last updated timestamp
521    pub last_updated: String,
522}
523
524// ===== INTELLIGENT SEARCH MODELS =====
525
526/// Intelligent search request
527#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct IntelligentSearchRequest {
529    /// Search query
530    pub query: String,
531    /// Collections to search (optional - searches all if not specified)
532    pub collections: Option<Vec<String>>,
533    /// Maximum number of results
534    pub max_results: Option<usize>,
535    /// Enable domain expansion
536    pub domain_expansion: Option<bool>,
537    /// Enable technical focus
538    pub technical_focus: Option<bool>,
539    /// Enable MMR diversification
540    pub mmr_enabled: Option<bool>,
541    /// MMR balance parameter (0.0-1.0)
542    pub mmr_lambda: Option<f32>,
543}
544
545/// Semantic search request
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct SemanticSearchRequest {
548    /// Search query
549    pub query: String,
550    /// Collection to search
551    pub collection: String,
552    /// Maximum number of results
553    pub max_results: Option<usize>,
554    /// Enable semantic reranking
555    pub semantic_reranking: Option<bool>,
556    /// Enable cross-encoder reranking
557    pub cross_encoder_reranking: Option<bool>,
558    /// Minimum similarity threshold
559    pub similarity_threshold: Option<f32>,
560}
561
562/// Contextual search request
563#[derive(Debug, Clone, Serialize, Deserialize)]
564pub struct ContextualSearchRequest {
565    /// Search query
566    pub query: String,
567    /// Collection to search
568    pub collection: String,
569    /// Metadata-based context filters
570    pub context_filters: Option<HashMap<String, serde_json::Value>>,
571    /// Maximum number of results
572    pub max_results: Option<usize>,
573    /// Enable context-aware reranking
574    pub context_reranking: Option<bool>,
575    /// Weight of context factors (0.0-1.0)
576    pub context_weight: Option<f32>,
577}
578
579/// Multi-collection search request
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct MultiCollectionSearchRequest {
582    /// Search query
583    pub query: String,
584    /// Collections to search
585    pub collections: Vec<String>,
586    /// Maximum results per collection
587    pub max_per_collection: Option<usize>,
588    /// Maximum total results
589    pub max_total_results: Option<usize>,
590    /// Enable cross-collection reranking
591    pub cross_collection_reranking: Option<bool>,
592}
593
594/// Intelligent search result
595#[derive(Debug, Clone, Serialize, Deserialize)]
596pub struct IntelligentSearchResult {
597    /// Result ID
598    pub id: String,
599    /// Similarity score
600    pub score: f32,
601    /// Result content
602    pub content: String,
603    /// Metadata
604    pub metadata: Option<HashMap<String, serde_json::Value>>,
605    /// Collection name
606    pub collection: Option<String>,
607    /// Query used for this result
608    pub query_used: Option<String>,
609}
610
611/// Intelligent search response
612#[derive(Debug, Clone, Serialize, Deserialize)]
613pub struct IntelligentSearchResponse {
614    /// Search results
615    pub results: Vec<IntelligentSearchResult>,
616    /// Total number of results found
617    pub total_results: usize,
618    /// Search duration in milliseconds
619    pub duration_ms: u64,
620    /// Queries generated
621    pub queries_generated: Option<Vec<String>>,
622    /// Collections searched
623    pub collections_searched: Option<Vec<String>>,
624    /// Search metadata
625    pub metadata: Option<HashMap<String, serde_json::Value>>,
626}
627
628/// Semantic search response
629#[derive(Debug, Clone, Serialize, Deserialize)]
630pub struct SemanticSearchResponse {
631    /// Search results
632    pub results: Vec<IntelligentSearchResult>,
633    /// Total number of results found
634    pub total_results: usize,
635    /// Search duration in milliseconds
636    pub duration_ms: u64,
637    /// Collection searched
638    pub collection: String,
639    /// Search metadata
640    pub metadata: Option<HashMap<String, serde_json::Value>>,
641}
642
643/// Contextual search response
644#[derive(Debug, Clone, Serialize, Deserialize)]
645pub struct ContextualSearchResponse {
646    /// Search results
647    pub results: Vec<IntelligentSearchResult>,
648    /// Total number of results found
649    pub total_results: usize,
650    /// Search duration in milliseconds
651    pub duration_ms: u64,
652    /// Collection searched
653    pub collection: String,
654    /// Context filters applied
655    pub context_filters: Option<HashMap<String, serde_json::Value>>,
656    /// Search metadata
657    pub metadata: Option<HashMap<String, serde_json::Value>>,
658}
659
660/// Multi-collection search response
661#[derive(Debug, Clone, Serialize, Deserialize)]
662pub struct MultiCollectionSearchResponse {
663    /// Search results
664    pub results: Vec<IntelligentSearchResult>,
665    /// Total number of results found
666    pub total_results: usize,
667    /// Search duration in milliseconds
668    pub duration_ms: u64,
669    /// Collections searched
670    pub collections_searched: Vec<String>,
671    /// Results per collection
672    pub results_per_collection: Option<HashMap<String, usize>>,
673    /// Search metadata
674    pub metadata: Option<HashMap<String, serde_json::Value>>,
675}