vectorizer_rust_sdk/
models.rs

1//! Data models for the Vectorizer SDK
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7/// Vector similarity metrics
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum SimilarityMetric {
11    /// Cosine similarity
12    Cosine,
13    /// Euclidean distance
14    Euclidean,
15    /// Dot product
16    DotProduct,
17}
18
19impl Default for SimilarityMetric {
20    fn default() -> Self {
21        Self::Cosine
22    }
23}
24
25/// Vector representation
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct Vector {
28    /// Unique identifier for the vector
29    pub id: String,
30    /// Vector data as an array of numbers
31    pub data: Vec<f32>,
32    /// Optional metadata associated with the vector
33    pub metadata: Option<HashMap<String, serde_json::Value>>,
34}
35
36/// Collection representation
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct Collection {
39    /// Collection name
40    pub name: String,
41    /// Vector dimension
42    pub dimension: usize,
43    /// Similarity metric used for search
44    pub similarity_metric: SimilarityMetric,
45    /// Optional description
46    pub description: Option<String>,
47    /// Creation timestamp
48    pub created_at: Option<DateTime<Utc>>,
49    /// Last update timestamp
50    pub updated_at: Option<DateTime<Utc>>,
51}
52
53/// Collection information
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct CollectionInfo {
56    /// Collection name
57    pub name: String,
58    /// Vector dimension
59    pub dimension: usize,
60    /// Similarity metric used for search
61    pub metric: String,
62    /// Number of vectors in the collection
63    pub vector_count: usize,
64    /// Number of documents in the collection
65    pub document_count: usize,
66    /// Creation timestamp
67    pub created_at: String,
68    /// Last update timestamp
69    pub updated_at: String,
70    /// Indexing status
71    pub indexing_status: IndexingStatus,
72}
73
74/// Indexing status
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct IndexingStatus {
77    /// Status
78    pub status: String,
79    /// Progress percentage
80    pub progress: f32,
81    /// Total documents
82    pub total_documents: usize,
83    /// Processed documents
84    pub processed_documents: usize,
85    /// Vector count
86    pub vector_count: usize,
87    /// Estimated time remaining
88    pub estimated_time_remaining: Option<String>,
89    /// Last updated timestamp
90    pub last_updated: String,
91}
92
93/// Search result
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct SearchResult {
96    /// Vector ID
97    pub id: String,
98    /// Similarity score
99    pub score: f32,
100    /// Vector content (if available)
101    pub content: Option<String>,
102    /// Optional metadata
103    pub metadata: Option<HashMap<String, serde_json::Value>>,
104}
105
106/// Search response
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct SearchResponse {
109    /// Search results
110    pub results: Vec<SearchResult>,
111    /// Query time in milliseconds
112    pub query_time_ms: f64,
113}
114
115/// Embedding request
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct EmbeddingRequest {
118    /// Text to embed
119    pub text: String,
120    /// Optional model to use for embedding
121    pub model: Option<String>,
122    /// Optional parameters for embedding generation
123    pub parameters: Option<EmbeddingParameters>,
124}
125
126/// Embedding parameters
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct EmbeddingParameters {
129    /// Maximum sequence length
130    pub max_length: Option<usize>,
131    /// Whether to normalize the embedding
132    pub normalize: Option<bool>,
133    /// Optional prefix for the text
134    pub prefix: Option<String>,
135}
136
137/// Embedding response
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct EmbeddingResponse {
140    /// Generated embedding vector
141    pub embedding: Vec<f32>,
142    /// Model used for embedding
143    pub model: String,
144    /// Text that was embedded
145    pub text: String,
146    /// Embedding dimension
147    pub dimension: usize,
148    /// Provider used
149    pub provider: String,
150}
151
152/// Health status
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct HealthStatus {
155    /// Service status
156    pub status: String,
157    /// Service version
158    pub version: String,
159    /// Timestamp
160    pub timestamp: String,
161    /// Uptime in seconds
162    pub uptime: Option<u64>,
163    /// Number of collections
164    pub collections: Option<usize>,
165    /// Total number of vectors
166    pub total_vectors: Option<usize>,
167}
168
169/// Collections list response
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct CollectionsResponse {
172    /// List of collections
173    pub collections: Vec<CollectionInfo>,
174}
175
176/// Create collection response
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct CreateCollectionResponse {
179    /// Success message
180    pub message: String,
181    /// Collection name
182    pub collection: String,
183}
184
185/// Database statistics
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct DatabaseStats {
188    /// Total number of collections
189    pub total_collections: usize,
190    /// Total number of vectors
191    pub total_vectors: usize,
192    /// Total memory estimate in bytes
193    pub total_memory_estimate_bytes: usize,
194    /// Collections information
195    pub collections: Vec<CollectionStats>,
196}
197
198/// Collection statistics
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct CollectionStats {
201    /// Collection name
202    pub name: String,
203    /// Number of vectors
204    pub vector_count: usize,
205    /// Vector dimension
206    pub dimension: usize,
207    /// Memory estimate in bytes
208    pub memory_estimate_bytes: usize,
209}
210
211/// Batch text request
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct BatchTextRequest {
214    /// Text ID
215    pub id: String,
216    /// Text content
217    pub text: String,
218    /// Optional metadata
219    pub metadata: Option<HashMap<String, String>>,
220}
221
222/// Batch configuration
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct BatchConfig {
225    /// Maximum batch size
226    pub max_batch_size: Option<usize>,
227    /// Number of parallel workers
228    pub parallel_workers: Option<usize>,
229    /// Whether operations should be atomic
230    pub atomic: Option<bool>,
231}
232
233/// Batch insert request
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct BatchInsertRequest {
236    /// Texts to insert
237    pub texts: Vec<BatchTextRequest>,
238    /// Batch configuration
239    pub config: Option<BatchConfig>,
240}
241
242/// Batch response
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct BatchResponse {
245    /// Whether the operation was successful
246    pub success: bool,
247    /// Collection name
248    pub collection: String,
249    /// Operation type
250    pub operation: String,
251    /// Total number of operations
252    pub total_operations: usize,
253    /// Number of successful operations
254    pub successful_operations: usize,
255    /// Number of failed operations
256    pub failed_operations: usize,
257    /// Duration in milliseconds
258    pub duration_ms: u64,
259    /// Error messages
260    pub errors: Vec<String>,
261}
262
263/// Batch search query
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct BatchSearchQuery {
266    /// Query text
267    pub query: String,
268    /// Maximum number of results
269    pub limit: Option<usize>,
270    /// Minimum score threshold
271    pub score_threshold: Option<f32>,
272}
273
274/// Batch search request
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct BatchSearchRequest {
277    /// Search queries
278    pub queries: Vec<BatchSearchQuery>,
279    /// Batch configuration
280    pub config: Option<BatchConfig>,
281}
282
283/// Batch search response
284#[derive(Debug, Clone, Serialize, Deserialize)]
285pub struct BatchSearchResponse {
286    /// Whether the operation was successful
287    pub success: bool,
288    /// Collection name
289    pub collection: String,
290    /// Total number of queries
291    pub total_queries: usize,
292    /// Number of successful queries
293    pub successful_queries: usize,
294    /// Number of failed queries
295    pub failed_queries: usize,
296    /// Duration in milliseconds
297    pub duration_ms: u64,
298    /// Search results
299    pub results: Vec<Vec<SearchResult>>,
300    /// Error messages
301    pub errors: Vec<String>,
302}
303
304/// Batch vector update
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct BatchVectorUpdate {
307    /// Vector ID
308    pub id: String,
309    /// New vector data (optional)
310    pub data: Option<Vec<f32>>,
311    /// New metadata (optional)
312    pub metadata: Option<HashMap<String, serde_json::Value>>,
313}
314
315/// Batch update request
316#[derive(Debug, Clone, Serialize, Deserialize)]
317pub struct BatchUpdateRequest {
318    /// Vector updates
319    pub updates: Vec<BatchVectorUpdate>,
320    /// Batch configuration
321    pub config: Option<BatchConfig>,
322}
323
324/// Batch delete request
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct BatchDeleteRequest {
327    /// Vector IDs to delete
328    pub vector_ids: Vec<String>,
329    /// Batch configuration
330    pub config: Option<BatchConfig>,
331}
332
333/// Summarization methods
334#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
335#[serde(rename_all = "snake_case")]
336pub enum SummarizationMethod {
337    /// Extractive summarization
338    Extractive,
339    /// Keyword summarization
340    Keyword,
341    /// Sentence summarization
342    Sentence,
343    /// Abstractive summarization
344    Abstractive,
345}
346
347impl Default for SummarizationMethod {
348    fn default() -> Self {
349        Self::Extractive
350    }
351}
352
353/// Summarize text request
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct SummarizeTextRequest {
356    /// Text to summarize
357    pub text: String,
358    /// Summarization method
359    pub method: Option<SummarizationMethod>,
360    /// Maximum summary length
361    pub max_length: Option<usize>,
362    /// Compression ratio
363    pub compression_ratio: Option<f32>,
364    /// Language code
365    pub language: Option<String>,
366}
367
368/// Summarize text response
369#[derive(Debug, Clone, Serialize, Deserialize)]
370pub struct SummarizeTextResponse {
371    /// Summary ID
372    pub summary_id: String,
373    /// Original text
374    pub original_text: String,
375    /// Generated summary
376    pub summary: String,
377    /// Method used
378    pub method: String,
379    /// Original text length
380    pub original_length: usize,
381    /// Summary length
382    pub summary_length: usize,
383    /// Compression ratio
384    pub compression_ratio: f32,
385    /// Language
386    pub language: String,
387    /// Status
388    pub status: String,
389    /// Message
390    pub message: String,
391    /// Metadata
392    pub metadata: HashMap<String, String>,
393}
394
395/// Summarize context request
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub struct SummarizeContextRequest {
398    /// Context to summarize
399    pub context: String,
400    /// Summarization method
401    pub method: Option<SummarizationMethod>,
402    /// Maximum summary length
403    pub max_length: Option<usize>,
404    /// Compression ratio
405    pub compression_ratio: Option<f32>,
406    /// Language code
407    pub language: Option<String>,
408}
409
410/// Summarize context response
411#[derive(Debug, Clone, Serialize, Deserialize)]
412pub struct SummarizeContextResponse {
413    /// Summary ID
414    pub summary_id: String,
415    /// Original context
416    pub original_context: String,
417    /// Generated summary
418    pub summary: String,
419    /// Method used
420    pub method: String,
421    /// Original context length
422    pub original_length: usize,
423    /// Summary length
424    pub summary_length: usize,
425    /// Compression ratio
426    pub compression_ratio: f32,
427    /// Language
428    pub language: String,
429    /// Status
430    pub status: String,
431    /// Message
432    pub message: String,
433    /// Metadata
434    pub metadata: HashMap<String, String>,
435}
436
437/// Get summary response
438#[derive(Debug, Clone, Serialize, Deserialize)]
439pub struct GetSummaryResponse {
440    /// Summary ID
441    pub summary_id: String,
442    /// Original text
443    pub original_text: String,
444    /// Generated summary
445    pub summary: String,
446    /// Method used
447    pub method: String,
448    /// Original text length
449    pub original_length: usize,
450    /// Summary length
451    pub summary_length: usize,
452    /// Compression ratio
453    pub compression_ratio: f32,
454    /// Language
455    pub language: String,
456    /// Creation timestamp
457    pub created_at: String,
458    /// Metadata
459    pub metadata: HashMap<String, String>,
460    /// Status
461    pub status: String,
462}
463
464/// Summary info
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct SummaryInfo {
467    /// Summary ID
468    pub summary_id: String,
469    /// Method used
470    pub method: String,
471    /// Language
472    pub language: String,
473    /// Original text length
474    pub original_length: usize,
475    /// Summary length
476    pub summary_length: usize,
477    /// Compression ratio
478    pub compression_ratio: f32,
479    /// Creation timestamp
480    pub created_at: String,
481    /// Metadata
482    pub metadata: HashMap<String, String>,
483}
484
485/// List summaries response
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ListSummariesResponse {
488    /// List of summaries
489    pub summaries: Vec<SummaryInfo>,
490    /// Total count
491    pub total_count: usize,
492    /// Status
493    pub status: String,
494}
495
496/// Indexing progress
497#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct IndexingProgress {
499    /// Whether indexing is in progress
500    pub is_indexing: bool,
501    /// Overall status
502    pub overall_status: String,
503    /// Collections being indexed
504    pub collections: Vec<CollectionProgress>,
505}
506
507/// Collection progress
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct CollectionProgress {
510    /// Collection name
511    pub collection_name: String,
512    /// Status
513    pub status: String,
514    /// Progress percentage
515    pub progress: f32,
516    /// Vector count
517    pub vector_count: usize,
518    /// Error message if any
519    pub error_message: Option<String>,
520    /// Last updated timestamp
521    pub last_updated: String,
522}