oxirs_vec/real_time_embedding_pipeline/
traits.rs

1//! Core traits for the real-time embedding pipeline
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::{Duration, SystemTime};
7use uuid::Uuid;
8
9use crate::Vector;
10
11/// Content to be processed for embedding generation
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ContentItem {
14    /// Unique identifier for the content
15    pub id: String,
16    /// Content type identifier
17    pub content_type: String,
18    /// Raw content data
19    pub content: String,
20    /// Optional metadata
21    pub metadata: HashMap<String, String>,
22    /// Content priority
23    pub priority: ProcessingPriority,
24    /// Creation timestamp
25    pub created_at: SystemTime,
26    /// Optional expiration time
27    pub expires_at: Option<SystemTime>,
28}
29
30/// Processing priority levels
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
32pub enum ProcessingPriority {
33    /// Low priority - batch processing
34    Low,
35    /// Normal priority - standard processing
36    Normal,
37    /// High priority - expedited processing
38    High,
39    /// Critical priority - immediate processing
40    Critical,
41}
42
43/// Processing status for content items
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45pub enum ProcessingStatus {
46    /// Pending processing
47    Pending,
48    /// Currently being processed
49    Processing,
50    /// Successfully processed
51    Completed,
52    /// Processing failed
53    Failed { reason: String },
54    /// Processing was retried
55    Retried { attempt: usize },
56}
57
58/// Result of processing a content item
59#[derive(Debug, Clone)]
60pub struct ProcessingResult {
61    /// Item that was processed
62    pub item: ContentItem,
63    /// Generated vector (if successful)
64    pub vector: Option<Vector>,
65    /// Processing status
66    pub status: ProcessingStatus,
67    /// Processing duration
68    pub duration: Duration,
69    /// Any error that occurred
70    pub error: Option<String>,
71    /// Processing metadata
72    pub metadata: HashMap<String, String>,
73}
74
75/// Trait for generating embeddings from content
76pub trait EmbeddingGenerator: Send + Sync {
77    /// Generate embedding vector from content
78    fn generate_embedding(&self, content: &ContentItem) -> Result<Vector>;
79
80    /// Generate embeddings for a batch of content items
81    fn generate_batch_embeddings(&self, content: &[ContentItem]) -> Result<Vec<ProcessingResult>>;
82
83    /// Get the embedding dimensions
84    fn embedding_dimensions(&self) -> usize;
85
86    /// Get generator configuration
87    fn get_config(&self) -> serde_json::Value;
88
89    /// Check if the generator is ready
90    fn is_ready(&self) -> bool;
91
92    /// Get generator statistics
93    fn get_statistics(&self) -> GeneratorStatistics;
94}
95
96/// Statistics for embedding generators
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct GeneratorStatistics {
99    /// Total embeddings generated
100    pub total_embeddings: u64,
101    /// Total processing time
102    pub total_processing_time: Duration,
103    /// Average processing time per embedding
104    pub average_processing_time: Duration,
105    /// Error count
106    pub error_count: u64,
107    /// Last error message
108    pub last_error: Option<String>,
109}
110
111/// Trait for incremental vector indices
112pub trait IncrementalVectorIndex: Send + Sync {
113    /// Insert or update a vector
114    fn upsert_vector(&mut self, id: String, vector: Vector) -> Result<()>;
115
116    /// Remove a vector
117    fn remove_vector(&mut self, id: &str) -> Result<bool>;
118
119    /// Batch upsert vectors
120    fn batch_upsert(&mut self, vectors: Vec<(String, Vector)>) -> Result<Vec<Result<()>>>;
121
122    /// Get index statistics
123    fn get_statistics(&self) -> IndexStatistics;
124
125    /// Optimize index structure
126    fn optimize(&mut self) -> Result<()>;
127
128    /// Check index health
129    fn health_check(&self) -> Result<HealthStatus>;
130}
131
132/// Statistics for vector indices
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct IndexStatistics {
135    /// Total vectors in index
136    pub total_vectors: usize,
137    /// Index memory usage in bytes
138    pub memory_usage: u64,
139    /// Last optimization time
140    pub last_optimization: Option<SystemTime>,
141    /// Total operations performed
142    pub total_operations: u64,
143    /// Error count
144    pub error_count: u64,
145}
146
147/// Health status for components
148#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
149pub enum HealthStatus {
150    /// Component is healthy
151    Healthy,
152    /// Component has warnings but is functional
153    Warning { message: String },
154    /// Component is unhealthy
155    Unhealthy { message: String },
156    /// Component status is unknown
157    Unknown,
158}
159
160/// Trait for handling alerts
161pub trait AlertHandler: Send + Sync {
162    /// Handle an alert
163    fn handle_alert(&self, alert: &Alert) -> Result<()>;
164
165    /// Get alert configuration
166    fn get_config(&self) -> AlertConfig;
167
168    /// Check if handler is enabled
169    fn is_enabled(&self) -> bool;
170}
171
172/// Alert information
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct Alert {
175    /// Alert identifier
176    pub id: Uuid,
177    /// Alert severity level
178    pub severity: AlertSeverity,
179    /// Alert category
180    pub category: AlertCategory,
181    /// Alert message
182    pub message: String,
183    /// Alert details
184    pub details: HashMap<String, String>,
185    /// Alert timestamp
186    pub timestamp: SystemTime,
187    /// Source component
188    pub source: String,
189}
190
191/// Alert severity levels
192#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
193pub enum AlertSeverity {
194    /// Information level
195    Info,
196    /// Warning level
197    Warning,
198    /// Error level
199    Error,
200    /// Critical level
201    Critical,
202}
203
204/// Alert categories
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
206pub enum AlertCategory {
207    /// Performance related
208    Performance,
209    /// Quality related
210    Quality,
211    /// System health related
212    Health,
213    /// Security related
214    Security,
215    /// Configuration related
216    Configuration,
217}
218
219/// Alert handler configuration
220#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct AlertConfig {
222    /// Minimum severity level to handle
223    pub min_severity: AlertSeverity,
224    /// Alert throttling settings
225    pub throttling: AlertThrottling,
226    /// Enable notifications
227    pub enable_notifications: bool,
228}
229
230/// Alert throttling configuration
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct AlertThrottling {
233    /// Enable throttling
234    pub enabled: bool,
235    /// Throttling window duration
236    pub window_duration: Duration,
237    /// Maximum alerts per window
238    pub max_alerts_per_window: usize,
239}
240
241/// Trait for storing metrics
242pub trait MetricsStorage: Send + Sync {
243    /// Store a metric value
244    fn store_metric(
245        &mut self,
246        name: &str,
247        value: f64,
248        timestamp: SystemTime,
249        tags: HashMap<String, String>,
250    ) -> Result<()>;
251
252    /// Get metric values within a time range
253    fn get_metrics(
254        &self,
255        name: &str,
256        start: SystemTime,
257        end: SystemTime,
258    ) -> Result<Vec<MetricPoint>>;
259
260    /// Get available metric names
261    fn get_metric_names(&self) -> Result<Vec<String>>;
262
263    /// Delete old metrics
264    fn cleanup_old_metrics(&mut self, cutoff: SystemTime) -> Result<usize>;
265}
266
267/// A single metric data point
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct MetricPoint {
270    /// Metric value
271    pub value: f64,
272    /// Timestamp
273    pub timestamp: SystemTime,
274    /// Associated tags
275    pub tags: HashMap<String, String>,
276}
277
278/// Trait for version storage
279pub trait VersionStorage: Send + Sync {
280    /// Store a new version
281    fn store_version(&mut self, id: &str, version: &Version) -> Result<()>;
282
283    /// Get a specific version
284    fn get_version(&self, id: &str, version_number: u64) -> Result<Option<Version>>;
285
286    /// Get all versions for an ID
287    fn get_all_versions(&self, id: &str) -> Result<Vec<Version>>;
288
289    /// Delete old versions
290    fn cleanup_old_versions(&mut self, id: &str, keep_count: usize) -> Result<usize>;
291}
292
293/// Version information
294#[derive(Debug, Clone, Serialize, Deserialize)]
295pub struct Version {
296    /// Version number
297    pub version: u64,
298    /// Vector data
299    pub vector: Vector,
300    /// Creation timestamp
301    pub created_at: SystemTime,
302    /// Metadata
303    pub metadata: HashMap<String, String>,
304    /// Checksum for integrity
305    pub checksum: String,
306}
307
308/// Trait for conflict resolution functions
309pub trait ConflictResolutionFunction: Send + Sync {
310    /// Resolve conflicts between versions
311    fn resolve_conflict(&self, versions: &[Version]) -> Result<Vector>;
312
313    /// Get resolution strategy name
314    fn get_strategy_name(&self) -> &str;
315}
316
317/// Trait for transaction logging
318pub trait TransactionLog: Send + Sync {
319    /// Log a transaction
320    fn log_transaction(&mut self, transaction: &Transaction) -> Result<()>;
321
322    /// Get transactions within a time range
323    fn get_transactions(&self, start: SystemTime, end: SystemTime) -> Result<Vec<Transaction>>;
324
325    /// Replay transactions from a specific point
326    fn replay_from(&self, checkpoint: SystemTime) -> Result<Vec<Transaction>>;
327}
328
329/// Transaction record
330#[derive(Debug, Clone, Serialize, Deserialize)]
331pub struct Transaction {
332    /// Transaction ID
333    pub id: Uuid,
334    /// Transaction type
335    pub transaction_type: TransactionType,
336    /// Affected resource ID
337    pub resource_id: String,
338    /// Transaction timestamp
339    pub timestamp: SystemTime,
340    /// Transaction data
341    pub data: serde_json::Value,
342    /// Transaction status
343    pub status: TransactionStatus,
344}
345
346/// Transaction types
347#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
348pub enum TransactionType {
349    /// Insert operation
350    Insert,
351    /// Update operation
352    Update,
353    /// Delete operation
354    Delete,
355    /// Batch operation
356    Batch,
357}
358
359/// Transaction status
360#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
361pub enum TransactionStatus {
362    /// Transaction is pending
363    Pending,
364    /// Transaction is committed
365    Committed,
366    /// Transaction was rolled back
367    RolledBack,
368    /// Transaction failed
369    Failed { reason: String },
370}
371
372/// Trait for inconsistency detection
373pub trait InconsistencyDetectionAlgorithm: Send + Sync {
374    /// Detect inconsistencies in the system
375    fn detect_inconsistencies(&self) -> Result<Vec<Inconsistency>>;
376
377    /// Get detection algorithm name
378    fn get_algorithm_name(&self) -> &str;
379}
380
381/// Inconsistency information
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct Inconsistency {
384    /// Inconsistency type
385    pub inconsistency_type: InconsistencyType,
386    /// Affected resources
387    pub affected_resources: Vec<String>,
388    /// Inconsistency description
389    pub description: String,
390    /// Severity level
391    pub severity: InconsistencySeverity,
392    /// Detection timestamp
393    pub detected_at: SystemTime,
394}
395
396/// Types of inconsistencies
397#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
398pub enum InconsistencyType {
399    /// Data mismatch
400    DataMismatch,
401    /// Missing data
402    MissingData,
403    /// Duplicate data
404    DuplicateData,
405    /// Stale data
406    StaleData,
407    /// Corrupted data
408    CorruptedData,
409}
410
411/// Inconsistency severity levels
412#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
413pub enum InconsistencySeverity {
414    /// Low severity
415    Low,
416    /// Medium severity
417    Medium,
418    /// High severity
419    High,
420    /// Critical severity
421    Critical,
422}
423
424/// Trait for consistency repair strategies
425pub trait ConsistencyRepairStrategy: Send + Sync {
426    /// Repair inconsistencies
427    fn repair_inconsistencies(
428        &self,
429        inconsistencies: &[Inconsistency],
430    ) -> Result<Vec<RepairResult>>;
431
432    /// Get repair strategy name
433    fn get_strategy_name(&self) -> &str;
434}
435
436/// Result of a repair operation
437#[derive(Debug, Clone, Serialize, Deserialize)]
438pub struct RepairResult {
439    /// Inconsistency that was repaired
440    pub inconsistency: Inconsistency,
441    /// Repair status
442    pub status: RepairStatus,
443    /// Repair actions taken
444    pub actions: Vec<String>,
445    /// Repair timestamp
446    pub repaired_at: SystemTime,
447}
448
449/// Status of repair operations
450#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
451pub enum RepairStatus {
452    /// Repair was successful
453    Success,
454    /// Repair was partially successful
455    PartialSuccess,
456    /// Repair failed
457    Failed { reason: String },
458    /// Repair was skipped
459    Skipped { reason: String },
460}