Skip to main content

cvx_core/traits/
mod.rs

1//! Core trait definitions for ChronosVector subsystems.
2//!
3//! These traits define the contracts between subsystems. Each crate implements
4//! the relevant traits, enabling loose coupling and testability via mock implementations.
5
6pub mod quantizer;
7
8use crate::error::{AnalyticsError, IndexError, QueryError, StorageError};
9use crate::types::{ChangePoint, CpdMethod, ScoredResult, TemporalFilter, TemporalPoint};
10
11/// Operations on a vector space.
12///
13/// Defines the algebraic structure that embedding vectors inhabit.
14/// Implementations are not required for Layer 0 — only signatures.
15pub trait VectorSpace: Clone + Send + Sync {
16    /// Dimensionality of vectors in this space.
17    fn dim(&self) -> usize;
18
19    /// The zero vector.
20    fn zero(dim: usize) -> Self;
21
22    /// Component-wise addition.
23    fn add(&self, other: &Self) -> Self;
24
25    /// Scalar multiplication.
26    fn scale(&self, factor: f32) -> Self;
27
28    /// View as a float slice.
29    fn as_slice(&self) -> &[f32];
30}
31
32/// A distance metric over vectors.
33///
34/// Implementations must satisfy metric properties:
35/// - Non-negativity: $d(a, b) \geq 0$
36/// - Identity: $d(a, a) = 0$
37/// - Symmetry: $d(a, b) = d(b, a)$
38///
39/// Triangle inequality is desired but not required (cosine distance violates it).
40pub trait DistanceMetric: Send + Sync {
41    /// Compute the distance between two vectors.
42    ///
43    /// # Panics
44    ///
45    /// Implementations should panic if `a.len() != b.len()`.
46    fn distance(&self, a: &[f32], b: &[f32]) -> f32;
47
48    /// Human-readable name of this metric (e.g., `"cosine"`, `"l2"`).
49    fn name(&self) -> &str;
50}
51
52/// Persistent storage backend for temporal points.
53///
54/// Abstracts over the underlying storage engine (in-memory, RocksDB, etc.).
55pub trait StorageBackend: Send + Sync {
56    /// Retrieve a single point by entity, space, and timestamp.
57    fn get(
58        &self,
59        entity_id: u64,
60        space_id: u32,
61        timestamp: i64,
62    ) -> Result<Option<TemporalPoint>, StorageError>;
63
64    /// Store a temporal point.
65    fn put(&self, space_id: u32, point: &TemporalPoint) -> Result<(), StorageError>;
66
67    /// Retrieve all points for an entity in a time range, ordered by timestamp.
68    fn range(
69        &self,
70        entity_id: u64,
71        space_id: u32,
72        start: i64,
73        end: i64,
74    ) -> Result<Vec<TemporalPoint>, StorageError>;
75
76    /// Delete a specific point.
77    fn delete(&self, entity_id: u64, space_id: u32, timestamp: i64) -> Result<(), StorageError>;
78}
79
80/// Low-level temporal index access for query engine orchestration.
81///
82/// Provides the methods that `QueryEngine` needs from a temporal index.
83/// Implemented by both `TemporalHnsw` (single-threaded) and
84/// `ConcurrentTemporalHnsw` (thread-safe).
85pub trait TemporalIndexAccess: Send + Sync {
86    /// Search with temporal filtering, returning (node_id, score) pairs.
87    fn search_raw(
88        &self,
89        query: &[f32],
90        k: usize,
91        filter: TemporalFilter,
92        alpha: f32,
93        query_timestamp: i64,
94    ) -> Vec<(u32, f32)>;
95
96    /// Retrieve trajectory for an entity: (timestamp, node_id) pairs.
97    fn trajectory(&self, entity_id: u64, filter: TemporalFilter) -> Vec<(i64, u32)>;
98
99    /// Get the vector for a node. Returns owned vec for thread safety.
100    fn vector(&self, node_id: u32) -> Vec<f32>;
101
102    /// Get the entity_id for a node.
103    fn entity_id(&self, node_id: u32) -> u64;
104
105    /// Get the timestamp for a node.
106    fn timestamp(&self, node_id: u32) -> i64;
107
108    /// Number of points in the index.
109    fn len(&self) -> usize;
110
111    /// Whether the index is empty.
112    fn is_empty(&self) -> bool {
113        self.len() == 0
114    }
115
116    /// Get semantic regions at a given HNSW level (RFC-004).
117    /// Returns `(hub_node_id, hub_vector, n_assigned)` per region.
118    fn regions(&self, _level: usize) -> Vec<(u32, Vec<f32>, usize)> {
119        Vec::new()
120    }
121
122    /// Get points belonging to a specific region, optionally time-filtered (RFC-005).
123    /// Returns `(node_id, entity_id, timestamp)` per member.
124    fn region_members(
125        &self,
126        _region_hub: u32,
127        _level: usize,
128        _filter: TemporalFilter,
129    ) -> Vec<(u32, u64, i64)> {
130        Vec::new()
131    }
132
133    /// Assign all nodes to regions in a single O(N) pass, optionally time-filtered.
134    /// Returns HashMap<hub_id, Vec<(entity_id, timestamp)>>.
135    fn region_assignments(
136        &self,
137        _level: usize,
138        _filter: TemporalFilter,
139    ) -> std::collections::HashMap<u32, Vec<(u64, i64)>> {
140        std::collections::HashMap::new()
141    }
142
143    /// Smoothed region-distribution trajectory for an entity (RFC-004).
144    fn region_trajectory(
145        &self,
146        _entity_id: u64,
147        _level: usize,
148        _window_days: i64,
149        _alpha: f32,
150    ) -> Vec<(i64, Vec<f32>)> {
151        Vec::new()
152    }
153
154    /// Get metadata for a node. Returns empty map if not available.
155    fn metadata(&self, _node_id: u32) -> std::collections::HashMap<String, String> {
156        std::collections::HashMap::new()
157    }
158
159    /// Search with metadata filtering (post-filter on search results).
160    /// Default: ignores metadata filter and delegates to search_raw.
161    fn search_with_metadata(
162        &self,
163        query: &[f32],
164        k: usize,
165        filter: TemporalFilter,
166        alpha: f32,
167        query_timestamp: i64,
168        metadata_filter: &crate::types::MetadataFilter,
169    ) -> Vec<(u32, f32)> {
170        if metadata_filter.is_empty() {
171            return self.search_raw(query, k, filter, alpha, query_timestamp);
172        }
173        // Over-fetch and post-filter
174        let overfetch = k * 4;
175        let candidates = self.search_raw(query, overfetch, filter, alpha, query_timestamp);
176        candidates
177            .into_iter()
178            .filter(|&(nid, _)| metadata_filter.matches(&self.metadata(nid)))
179            .take(k)
180            .collect()
181    }
182}
183
184/// Index backend for approximate nearest neighbor search.
185///
186/// Abstracts over the indexing structure (HNSW, brute-force, etc.).
187pub trait IndexBackend: Send + Sync {
188    /// Insert a point into the index.
189    fn insert(&self, entity_id: u64, vector: &[f32], timestamp: i64) -> Result<u32, IndexError>;
190
191    /// Search for the k nearest neighbors with temporal filtering.
192    ///
193    /// `alpha` controls the semantic vs temporal weight:
194    /// - `alpha = 1.0`: pure semantic distance
195    /// - `alpha = 0.0`: pure temporal distance
196    ///
197    /// `query_timestamp` is the reference time for temporal distance computation.
198    fn search(
199        &self,
200        query: &[f32],
201        k: usize,
202        filter: TemporalFilter,
203        alpha: f32,
204        query_timestamp: i64,
205    ) -> Result<Vec<ScoredResult>, QueryError>;
206
207    /// Remove a point from the index.
208    fn remove(&self, point_id: u64) -> Result<(), IndexError>;
209
210    /// Number of points in the index.
211    fn len(&self) -> usize;
212
213    /// Whether the index is empty.
214    fn is_empty(&self) -> bool {
215        self.len() == 0
216    }
217}
218
219/// Analytics backend for temporal analysis operations.
220///
221/// Provides prediction, change point detection, and differential calculus.
222pub trait AnalyticsBackend: Send + Sync {
223    /// Predict a future vector state using the learned trajectory model.
224    fn predict(
225        &self,
226        trajectory: &[TemporalPoint],
227        target_timestamp: i64,
228    ) -> Result<TemporalPoint, AnalyticsError>;
229
230    /// Detect change points in a trajectory.
231    fn detect_changepoints(
232        &self,
233        trajectory: &[TemporalPoint],
234        method: CpdMethod,
235    ) -> Result<Vec<ChangePoint>, AnalyticsError>;
236
237    /// Compute the velocity vector at a given timestamp.
238    fn velocity(
239        &self,
240        trajectory: &[TemporalPoint],
241        timestamp: i64,
242    ) -> Result<Vec<f32>, AnalyticsError>;
243}
244
245// ─── Embedder trait (RFC-009) ───────────────────────────────────────
246
247/// Error type for embedding operations.
248#[derive(Debug, thiserror::Error)]
249pub enum EmbedError {
250    /// Model not loaded or unavailable.
251    #[error("model not available: {0}")]
252    ModelNotAvailable(String),
253    /// Input text is empty or invalid.
254    #[error("invalid input: {0}")]
255    InvalidInput(String),
256    /// Backend-specific error.
257    #[error("embedding error: {0}")]
258    BackendError(String),
259}
260
261/// Trait for converting text to embedding vectors.
262///
263/// Implementations may use local models (ONNX, TorchScript) or
264/// remote APIs (OpenAI, Cohere).
265pub trait Embedder: Send + Sync {
266    /// Embed a single text string into a vector.
267    fn embed(&self, text: &str) -> Result<Vec<f32>, EmbedError>;
268
269    /// Embed multiple texts in a batch (more efficient for APIs).
270    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, EmbedError> {
271        texts.iter().map(|t| self.embed(t)).collect()
272    }
273
274    /// Output dimensionality of the embedding model.
275    fn dimension(&self) -> usize;
276
277    /// Name of the embedding model.
278    fn model_name(&self) -> &str;
279}