vectorizer_sdk/
models.rs

1//! Data models for the Vectorizer SDK
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7// Re-export hybrid search models
8pub mod hybrid_search;
9pub use hybrid_search::*;
10
11/// Vector similarity metrics
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum SimilarityMetric {
15    /// Cosine similarity
16    Cosine,
17    /// Euclidean distance
18    Euclidean,
19    /// Dot product
20    DotProduct,
21}
22
23impl Default for SimilarityMetric {
24    fn default() -> Self {
25        Self::Cosine
26    }
27}
28
29/// Vector representation
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Vector {
32    /// Unique identifier for the vector
33    pub id: String,
34    /// Vector data as an array of numbers
35    pub data: Vec<f32>,
36    /// Optional metadata associated with the vector
37    pub metadata: Option<HashMap<String, serde_json::Value>>,
38}
39
40/// Collection representation
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct Collection {
43    /// Collection name
44    pub name: String,
45    /// Vector dimension
46    pub dimension: usize,
47    /// Similarity metric used for search
48    pub similarity_metric: SimilarityMetric,
49    /// Optional description
50    pub description: Option<String>,
51    /// Creation timestamp
52    pub created_at: Option<DateTime<Utc>>,
53    /// Last update timestamp
54    pub updated_at: Option<DateTime<Utc>>,
55}
56
57/// Collection information
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct CollectionInfo {
60    /// Collection name
61    pub name: String,
62    /// Vector dimension
63    pub dimension: usize,
64    /// Similarity metric used for search
65    pub metric: String,
66    /// Number of vectors in the collection
67    pub vector_count: usize,
68    /// Number of documents in the collection
69    pub document_count: usize,
70    /// Creation timestamp
71    pub created_at: String,
72    /// Last update timestamp
73    pub updated_at: String,
74    /// Indexing status
75    pub indexing_status: IndexingStatus,
76}
77
78/// Indexing status
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct IndexingStatus {
81    /// Status
82    pub status: String,
83    /// Progress percentage
84    pub progress: f32,
85    /// Total documents
86    pub total_documents: usize,
87    /// Processed documents
88    pub processed_documents: usize,
89    /// Vector count
90    pub vector_count: usize,
91    /// Estimated time remaining
92    pub estimated_time_remaining: Option<String>,
93    /// Last updated timestamp
94    pub last_updated: String,
95}
96
97/// Search result
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct SearchResult {
100    /// Vector ID
101    pub id: String,
102    /// Similarity score
103    pub score: f32,
104    /// Vector content (if available)
105    pub content: Option<String>,
106    /// Optional metadata
107    pub metadata: Option<HashMap<String, serde_json::Value>>,
108}
109
110/// Search response
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct SearchResponse {
113    /// Search results
114    pub results: Vec<SearchResult>,
115    /// Query time in milliseconds
116    pub query_time_ms: f64,
117}
118
119/// Embedding request
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct EmbeddingRequest {
122    /// Text to embed
123    pub text: String,
124    /// Optional model to use for embedding
125    pub model: Option<String>,
126    /// Optional parameters for embedding generation
127    pub parameters: Option<EmbeddingParameters>,
128}
129
130/// Embedding parameters
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct EmbeddingParameters {
133    /// Maximum sequence length
134    pub max_length: Option<usize>,
135    /// Whether to normalize the embedding
136    pub normalize: Option<bool>,
137    /// Optional prefix for the text
138    pub prefix: Option<String>,
139}
140
141/// Embedding response
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct EmbeddingResponse {
144    /// Generated embedding vector
145    pub embedding: Vec<f32>,
146    /// Model used for embedding
147    pub model: String,
148    /// Text that was embedded
149    pub text: String,
150    /// Embedding dimension
151    pub dimension: usize,
152    /// Provider used
153    pub provider: String,
154}
155
156/// Health status
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct HealthStatus {
159    /// Service status
160    pub status: String,
161    /// Service version
162    pub version: String,
163    /// Timestamp
164    pub timestamp: String,
165    /// Uptime in seconds
166    pub uptime: Option<u64>,
167    /// Number of collections
168    pub collections: Option<usize>,
169    /// Total number of vectors
170    pub total_vectors: Option<usize>,
171}
172
173/// Collections list response
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct CollectionsResponse {
176    /// List of collections
177    pub collections: Vec<CollectionInfo>,
178}
179
180/// Create collection response
181#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct CreateCollectionResponse {
183    /// Success message
184    pub message: String,
185    /// Collection name
186    pub collection: String,
187}
188
189/// Database statistics
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct DatabaseStats {
192    /// Total number of collections
193    pub total_collections: usize,
194    /// Total number of vectors
195    pub total_vectors: usize,
196    /// Total memory estimate in bytes
197    pub total_memory_estimate_bytes: usize,
198    /// Collections information
199    pub collections: Vec<CollectionStats>,
200}
201
202/// Collection statistics
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct CollectionStats {
205    /// Collection name
206    pub name: String,
207    /// Number of vectors
208    pub vector_count: usize,
209    /// Vector dimension
210    pub dimension: usize,
211    /// Memory estimate in bytes
212    pub memory_estimate_bytes: usize,
213}
214
215/// Batch text request
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct BatchTextRequest {
218    /// Text ID
219    pub id: String,
220    /// Text content
221    pub text: String,
222    /// Optional metadata
223    pub metadata: Option<HashMap<String, String>>,
224}
225
226/// Batch configuration
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct BatchConfig {
229    /// Maximum batch size
230    pub max_batch_size: Option<usize>,
231    /// Number of parallel workers
232    pub parallel_workers: Option<usize>,
233    /// Whether operations should be atomic
234    pub atomic: Option<bool>,
235}
236
237/// Batch insert request
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct BatchInsertRequest {
240    /// Texts to insert
241    pub texts: Vec<BatchTextRequest>,
242    /// Batch configuration
243    pub config: Option<BatchConfig>,
244}
245
246/// Batch response
247#[derive(Debug, Clone, Serialize, Deserialize)]
248pub struct BatchResponse {
249    /// Whether the operation was successful
250    pub success: bool,
251    /// Collection name
252    pub collection: String,
253    /// Operation type
254    pub operation: String,
255    /// Total number of operations
256    pub total_operations: usize,
257    /// Number of successful operations
258    pub successful_operations: usize,
259    /// Number of failed operations
260    pub failed_operations: usize,
261    /// Duration in milliseconds
262    pub duration_ms: u64,
263    /// Error messages
264    pub errors: Vec<String>,
265}
266
267/// Batch search query
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct BatchSearchQuery {
270    /// Query text
271    pub query: String,
272    /// Maximum number of results
273    pub limit: Option<usize>,
274    /// Minimum score threshold
275    pub score_threshold: Option<f32>,
276}
277
278/// Batch search request
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct BatchSearchRequest {
281    /// Search queries
282    pub queries: Vec<BatchSearchQuery>,
283    /// Batch configuration
284    pub config: Option<BatchConfig>,
285}
286
287/// Batch search response
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct BatchSearchResponse {
290    /// Whether the operation was successful
291    pub success: bool,
292    /// Collection name
293    pub collection: String,
294    /// Total number of queries
295    pub total_queries: usize,
296    /// Number of successful queries
297    pub successful_queries: usize,
298    /// Number of failed queries
299    pub failed_queries: usize,
300    /// Duration in milliseconds
301    pub duration_ms: u64,
302    /// Search results
303    pub results: Vec<Vec<SearchResult>>,
304    /// Error messages
305    pub errors: Vec<String>,
306}
307
308/// Batch vector update
309#[derive(Debug, Clone, Serialize, Deserialize)]
310pub struct BatchVectorUpdate {
311    /// Vector ID
312    pub id: String,
313    /// New vector data (optional)
314    pub data: Option<Vec<f32>>,
315    /// New metadata (optional)
316    pub metadata: Option<HashMap<String, serde_json::Value>>,
317}
318
319/// Batch update request
320#[derive(Debug, Clone, Serialize, Deserialize)]
321pub struct BatchUpdateRequest {
322    /// Vector updates
323    pub updates: Vec<BatchVectorUpdate>,
324    /// Batch configuration
325    pub config: Option<BatchConfig>,
326}
327
328/// Batch delete request
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct BatchDeleteRequest {
331    /// Vector IDs to delete
332    pub vector_ids: Vec<String>,
333    /// Batch configuration
334    pub config: Option<BatchConfig>,
335}
336
337/// Summarization methods
338#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
339#[serde(rename_all = "snake_case")]
340pub enum SummarizationMethod {
341    /// Extractive summarization
342    Extractive,
343    /// Keyword summarization
344    Keyword,
345    /// Sentence summarization
346    Sentence,
347    /// Abstractive summarization
348    Abstractive,
349}
350
351impl Default for SummarizationMethod {
352    fn default() -> Self {
353        Self::Extractive
354    }
355}
356
357/// Summarize text request
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct SummarizeTextRequest {
360    /// Text to summarize
361    pub text: String,
362    /// Summarization method
363    pub method: Option<SummarizationMethod>,
364    /// Maximum summary length
365    pub max_length: Option<usize>,
366    /// Compression ratio
367    pub compression_ratio: Option<f32>,
368    /// Language code
369    pub language: Option<String>,
370}
371
372/// Summarize text response
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct SummarizeTextResponse {
375    /// Summary ID
376    pub summary_id: String,
377    /// Original text
378    pub original_text: String,
379    /// Generated summary
380    pub summary: String,
381    /// Method used
382    pub method: String,
383    /// Original text length
384    pub original_length: usize,
385    /// Summary length
386    pub summary_length: usize,
387    /// Compression ratio
388    pub compression_ratio: f32,
389    /// Language
390    pub language: String,
391    /// Status
392    pub status: String,
393    /// Message
394    pub message: String,
395    /// Metadata
396    pub metadata: HashMap<String, String>,
397}
398
399/// Summarize context request
400#[derive(Debug, Clone, Serialize, Deserialize)]
401pub struct SummarizeContextRequest {
402    /// Context to summarize
403    pub context: String,
404    /// Summarization method
405    pub method: Option<SummarizationMethod>,
406    /// Maximum summary length
407    pub max_length: Option<usize>,
408    /// Compression ratio
409    pub compression_ratio: Option<f32>,
410    /// Language code
411    pub language: Option<String>,
412}
413
414/// Summarize context response
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct SummarizeContextResponse {
417    /// Summary ID
418    pub summary_id: String,
419    /// Original context
420    pub original_context: String,
421    /// Generated summary
422    pub summary: String,
423    /// Method used
424    pub method: String,
425    /// Original context length
426    pub original_length: usize,
427    /// Summary length
428    pub summary_length: usize,
429    /// Compression ratio
430    pub compression_ratio: f32,
431    /// Language
432    pub language: String,
433    /// Status
434    pub status: String,
435    /// Message
436    pub message: String,
437    /// Metadata
438    pub metadata: HashMap<String, String>,
439}
440
441/// Get summary response
442#[derive(Debug, Clone, Serialize, Deserialize)]
443pub struct GetSummaryResponse {
444    /// Summary ID
445    pub summary_id: String,
446    /// Original text
447    pub original_text: String,
448    /// Generated summary
449    pub summary: String,
450    /// Method used
451    pub method: String,
452    /// Original text length
453    pub original_length: usize,
454    /// Summary length
455    pub summary_length: usize,
456    /// Compression ratio
457    pub compression_ratio: f32,
458    /// Language
459    pub language: String,
460    /// Creation timestamp
461    pub created_at: String,
462    /// Metadata
463    pub metadata: HashMap<String, String>,
464    /// Status
465    pub status: String,
466}
467
468/// Summary info
469#[derive(Debug, Clone, Serialize, Deserialize)]
470pub struct SummaryInfo {
471    /// Summary ID
472    pub summary_id: String,
473    /// Method used
474    pub method: String,
475    /// Language
476    pub language: String,
477    /// Original text length
478    pub original_length: usize,
479    /// Summary length
480    pub summary_length: usize,
481    /// Compression ratio
482    pub compression_ratio: f32,
483    /// Creation timestamp
484    pub created_at: String,
485    /// Metadata
486    pub metadata: HashMap<String, String>,
487}
488
489/// List summaries response
490#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct ListSummariesResponse {
492    /// List of summaries
493    pub summaries: Vec<SummaryInfo>,
494    /// Total count
495    pub total_count: usize,
496    /// Status
497    pub status: String,
498}
499
500/// Indexing progress
501#[derive(Debug, Clone, Serialize, Deserialize)]
502pub struct IndexingProgress {
503    /// Whether indexing is in progress
504    pub is_indexing: bool,
505    /// Overall status
506    pub overall_status: String,
507    /// Collections being indexed
508    pub collections: Vec<CollectionProgress>,
509}
510
511/// Collection progress
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct CollectionProgress {
514    /// Collection name
515    pub collection_name: String,
516    /// Status
517    pub status: String,
518    /// Progress percentage
519    pub progress: f32,
520    /// Vector count
521    pub vector_count: usize,
522    /// Error message if any
523    pub error_message: Option<String>,
524    /// Last updated timestamp
525    pub last_updated: String,
526}
527
528// ===== INTELLIGENT SEARCH MODELS =====
529
530/// Intelligent search request
531#[derive(Debug, Clone, Serialize, Deserialize)]
532pub struct IntelligentSearchRequest {
533    /// Search query
534    pub query: String,
535    /// Collections to search (optional - searches all if not specified)
536    pub collections: Option<Vec<String>>,
537    /// Maximum number of results
538    pub max_results: Option<usize>,
539    /// Enable domain expansion
540    pub domain_expansion: Option<bool>,
541    /// Enable technical focus
542    pub technical_focus: Option<bool>,
543    /// Enable MMR diversification
544    pub mmr_enabled: Option<bool>,
545    /// MMR balance parameter (0.0-1.0)
546    pub mmr_lambda: Option<f32>,
547}
548
549/// Semantic search request
550#[derive(Debug, Clone, Serialize, Deserialize)]
551pub struct SemanticSearchRequest {
552    /// Search query
553    pub query: String,
554    /// Collection to search
555    pub collection: String,
556    /// Maximum number of results
557    pub max_results: Option<usize>,
558    /// Enable semantic reranking
559    pub semantic_reranking: Option<bool>,
560    /// Enable cross-encoder reranking
561    pub cross_encoder_reranking: Option<bool>,
562    /// Minimum similarity threshold
563    pub similarity_threshold: Option<f32>,
564}
565
566/// Contextual search request
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct ContextualSearchRequest {
569    /// Search query
570    pub query: String,
571    /// Collection to search
572    pub collection: String,
573    /// Metadata-based context filters
574    pub context_filters: Option<HashMap<String, serde_json::Value>>,
575    /// Maximum number of results
576    pub max_results: Option<usize>,
577    /// Enable context-aware reranking
578    pub context_reranking: Option<bool>,
579    /// Weight of context factors (0.0-1.0)
580    pub context_weight: Option<f32>,
581}
582
583/// Multi-collection search request
584#[derive(Debug, Clone, Serialize, Deserialize)]
585pub struct MultiCollectionSearchRequest {
586    /// Search query
587    pub query: String,
588    /// Collections to search
589    pub collections: Vec<String>,
590    /// Maximum results per collection
591    pub max_per_collection: Option<usize>,
592    /// Maximum total results
593    pub max_total_results: Option<usize>,
594    /// Enable cross-collection reranking
595    pub cross_collection_reranking: Option<bool>,
596}
597
598/// Intelligent search result
599#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct IntelligentSearchResult {
601    /// Result ID
602    pub id: String,
603    /// Similarity score
604    pub score: f32,
605    /// Result content
606    pub content: String,
607    /// Metadata
608    pub metadata: Option<HashMap<String, serde_json::Value>>,
609    /// Collection name
610    pub collection: Option<String>,
611    /// Query used for this result
612    pub query_used: Option<String>,
613}
614
615/// Intelligent search response
616#[derive(Debug, Clone, Serialize, Deserialize)]
617pub struct IntelligentSearchResponse {
618    /// Search results
619    pub results: Vec<IntelligentSearchResult>,
620    /// Total number of results found
621    pub total_results: usize,
622    /// Search duration in milliseconds
623    pub duration_ms: u64,
624    /// Queries generated
625    pub queries_generated: Option<Vec<String>>,
626    /// Collections searched
627    pub collections_searched: Option<Vec<String>>,
628    /// Search metadata
629    pub metadata: Option<HashMap<String, serde_json::Value>>,
630}
631
632/// Semantic search response
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct SemanticSearchResponse {
635    /// Search results
636    pub results: Vec<IntelligentSearchResult>,
637    /// Total number of results found
638    pub total_results: usize,
639    /// Search duration in milliseconds
640    pub duration_ms: u64,
641    /// Collection searched
642    pub collection: String,
643    /// Search metadata
644    pub metadata: Option<HashMap<String, serde_json::Value>>,
645}
646
647/// Contextual search response
648#[derive(Debug, Clone, Serialize, Deserialize)]
649pub struct ContextualSearchResponse {
650    /// Search results
651    pub results: Vec<IntelligentSearchResult>,
652    /// Total number of results found
653    pub total_results: usize,
654    /// Search duration in milliseconds
655    pub duration_ms: u64,
656    /// Collection searched
657    pub collection: String,
658    /// Context filters applied
659    pub context_filters: Option<HashMap<String, serde_json::Value>>,
660    /// Search metadata
661    pub metadata: Option<HashMap<String, serde_json::Value>>,
662}
663
664/// Multi-collection search response
665#[derive(Debug, Clone, Serialize, Deserialize)]
666pub struct MultiCollectionSearchResponse {
667    /// Search results
668    pub results: Vec<IntelligentSearchResult>,
669    /// Total number of results found
670    pub total_results: usize,
671    /// Search duration in milliseconds
672    pub duration_ms: u64,
673    /// Collections searched
674    pub collections_searched: Vec<String>,
675    /// Results per collection
676    pub results_per_collection: Option<HashMap<String, usize>>,
677    /// Search metadata
678    pub metadata: Option<HashMap<String, serde_json::Value>>,
679}
680
681// ==================== REPLICATION MODELS ====================
682
683/// Status of a replica node
684#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
685#[serde(rename_all = "PascalCase")]
686pub enum ReplicaStatus {
687    /// Replica is connected and healthy
688    Connected,
689    /// Replica is syncing data
690    Syncing,
691    /// Replica is lagging behind master
692    Lagging,
693    /// Replica is disconnected
694    Disconnected,
695}
696
697/// Information about a replica node
698#[derive(Debug, Clone, Serialize, Deserialize)]
699pub struct ReplicaInfo {
700    /// Unique identifier for the replica
701    pub replica_id: String,
702    /// Hostname or IP address of the replica
703    pub host: String,
704    /// Port number of the replica
705    pub port: u16,
706    /// Current status of the replica
707    pub status: String,
708    /// Timestamp of last heartbeat
709    pub last_heartbeat: DateTime<Utc>,
710    /// Number of operations successfully synced
711    pub operations_synced: u64,
712    
713    // Legacy fields (backwards compatible)
714    /// Legacy: Current offset on replica (deprecated, use operations_synced)
715    #[serde(skip_serializing_if = "Option::is_none")]
716    pub offset: Option<u64>,
717    /// Legacy: Lag in operations (deprecated, use status)
718    #[serde(skip_serializing_if = "Option::is_none")]
719    pub lag: Option<u64>,
720}
721
722/// Statistics for replication status
723#[derive(Debug, Clone, Serialize, Deserialize)]
724pub struct ReplicationStats {
725    // New fields (v1.2.0+)
726    /// Role of the node: Master or Replica
727    #[serde(skip_serializing_if = "Option::is_none")]
728    pub role: Option<String>,
729    /// Total bytes sent to replicas (Master only)
730    #[serde(skip_serializing_if = "Option::is_none")]
731    pub bytes_sent: Option<u64>,
732    /// Total bytes received from master (Replica only)
733    #[serde(skip_serializing_if = "Option::is_none")]
734    pub bytes_received: Option<u64>,
735    /// Timestamp of last synchronization
736    #[serde(skip_serializing_if = "Option::is_none")]
737    pub last_sync: Option<DateTime<Utc>>,
738    /// Number of operations pending replication
739    #[serde(skip_serializing_if = "Option::is_none")]
740    pub operations_pending: Option<usize>,
741    /// Size of snapshot data in bytes
742    #[serde(skip_serializing_if = "Option::is_none")]
743    pub snapshot_size: Option<usize>,
744    /// Number of connected replicas (Master only)
745    #[serde(skip_serializing_if = "Option::is_none")]
746    pub connected_replicas: Option<usize>,
747    
748    // Legacy fields (backwards compatible - always present)
749    /// Current offset on master node
750    pub master_offset: u64,
751    /// Current offset on replica node
752    pub replica_offset: u64,
753    /// Number of operations behind
754    pub lag_operations: u64,
755    /// Total operations replicated
756    pub total_replicated: u64,
757}
758
759/// Response for replication status endpoint
760#[derive(Debug, Clone, Serialize, Deserialize)]
761pub struct ReplicationStatusResponse {
762    /// Overall status message
763    pub status: String,
764    /// Detailed replication statistics
765    pub stats: ReplicationStats,
766    /// Optional message with additional information
767    #[serde(skip_serializing_if = "Option::is_none")]
768    pub message: Option<String>,
769}
770
771/// Response for listing replicas
772#[derive(Debug, Clone, Serialize, Deserialize)]
773pub struct ReplicaListResponse {
774    /// List of replica nodes
775    pub replicas: Vec<ReplicaInfo>,
776    /// Total count of replicas
777    pub count: usize,
778    /// Status message
779    pub message: String,
780}