sevensense_vector/domain/
entities.rs

1//! Domain entities for the Vector Space bounded context.
2//!
3//! These are the core domain objects that represent the vector indexing domain.
4
5use serde::{Deserialize, Serialize};
6use std::fmt;
7use uuid::Uuid;
8
9/// A unique identifier for an embedding vector.
10///
11/// This wraps a UUID and provides domain-specific semantics.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13pub struct EmbeddingId(Uuid);
14
15impl EmbeddingId {
16    /// Create a new random embedding ID.
17    #[inline]
18    pub fn new() -> Self {
19        Self(Uuid::new_v4())
20    }
21
22    /// Create an embedding ID from a UUID.
23    #[inline]
24    pub const fn from_uuid(uuid: Uuid) -> Self {
25        Self(uuid)
26    }
27
28    /// Parse an embedding ID from a string.
29    pub fn parse(s: &str) -> Result<Self, uuid::Error> {
30        Ok(Self(Uuid::parse_str(s)?))
31    }
32
33    /// Get the inner UUID.
34    #[inline]
35    pub const fn as_uuid(&self) -> &Uuid {
36        &self.0
37    }
38
39    /// Convert to bytes for storage.
40    #[inline]
41    pub fn as_bytes(&self) -> &[u8; 16] {
42        self.0.as_bytes()
43    }
44
45    /// Create from bytes.
46    #[inline]
47    pub fn from_bytes(bytes: [u8; 16]) -> Self {
48        Self(Uuid::from_bytes(bytes))
49    }
50}
51
52impl Default for EmbeddingId {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl fmt::Display for EmbeddingId {
59    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60        write!(f, "{}", self.0)
61    }
62}
63
64impl From<Uuid> for EmbeddingId {
65    fn from(uuid: Uuid) -> Self {
66        Self(uuid)
67    }
68}
69
70impl From<EmbeddingId> for Uuid {
71    fn from(id: EmbeddingId) -> Self {
72        id.0
73    }
74}
75
76/// Unix timestamp in milliseconds.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
78pub struct Timestamp(i64);
79
80impl Timestamp {
81    /// Create a timestamp for the current moment.
82    pub fn now() -> Self {
83        Self(chrono::Utc::now().timestamp_millis())
84    }
85
86    /// Create a timestamp from milliseconds since Unix epoch.
87    #[inline]
88    pub const fn from_millis(millis: i64) -> Self {
89        Self(millis)
90    }
91
92    /// Get milliseconds since Unix epoch.
93    #[inline]
94    pub const fn as_millis(&self) -> i64 {
95        self.0
96    }
97
98    /// Convert to chrono DateTime.
99    pub fn to_datetime(&self) -> chrono::DateTime<chrono::Utc> {
100        chrono::DateTime::from_timestamp_millis(self.0)
101            .unwrap_or_else(|| chrono::DateTime::UNIX_EPOCH)
102    }
103}
104
105impl Default for Timestamp {
106    fn default() -> Self {
107        Self::now()
108    }
109}
110
111impl fmt::Display for Timestamp {
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        write!(f, "{}", self.to_datetime().format("%Y-%m-%d %H:%M:%S%.3f UTC"))
114    }
115}
116
117/// Configuration for the HNSW index.
118///
119/// These parameters control the trade-off between search accuracy,
120/// index build time, and memory usage.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct HnswConfig {
123    /// Number of bi-directional links per element.
124    /// Higher values improve recall but increase memory.
125    /// Recommended: 32 for 1536-dimensional vectors.
126    pub m: usize,
127
128    /// Size of dynamic candidate list during construction.
129    /// Higher values improve index quality but slow construction.
130    /// Recommended: 200 for high-quality indices.
131    pub ef_construction: usize,
132
133    /// Size of dynamic candidate list during search.
134    /// Higher values improve recall but slow queries.
135    /// Recommended: 128 for balanced accuracy/speed.
136    pub ef_search: usize,
137
138    /// Maximum number of elements the index can hold.
139    /// Pre-allocating improves construction performance.
140    pub max_elements: usize,
141
142    /// Dimensionality of vectors in this index.
143    pub dimensions: usize,
144
145    /// Whether to normalize vectors before indexing.
146    pub normalize: bool,
147
148    /// Distance metric to use.
149    pub distance_metric: DistanceMetric,
150}
151
152impl HnswConfig {
153    /// Create a configuration optimized for a given dimension.
154    pub fn for_dimension(dim: usize) -> Self {
155        Self {
156            m: if dim >= 1024 { 32 } else { 16 },
157            ef_construction: 200,
158            ef_search: 128,
159            max_elements: 1_000_000,
160            dimensions: dim,
161            normalize: true,
162            distance_metric: DistanceMetric::Cosine,
163        }
164    }
165
166    /// Create a configuration for OpenAI-style 1536-D embeddings.
167    pub fn for_openai_embeddings() -> Self {
168        Self::for_dimension(1536)
169    }
170
171    /// Create a configuration for smaller sentence transformers (384-D).
172    pub fn for_sentence_transformers() -> Self {
173        Self::for_dimension(384)
174    }
175
176    /// Builder: set M parameter.
177    pub fn with_m(mut self, m: usize) -> Self {
178        self.m = m;
179        self
180    }
181
182    /// Builder: set ef_construction parameter.
183    pub fn with_ef_construction(mut self, ef: usize) -> Self {
184        self.ef_construction = ef;
185        self
186    }
187
188    /// Builder: set ef_search parameter.
189    pub fn with_ef_search(mut self, ef: usize) -> Self {
190        self.ef_search = ef;
191        self
192    }
193
194    /// Builder: set maximum elements.
195    pub fn with_max_elements(mut self, max: usize) -> Self {
196        self.max_elements = max;
197        self
198    }
199
200    /// Builder: set distance metric.
201    pub fn with_distance_metric(mut self, metric: DistanceMetric) -> Self {
202        self.distance_metric = metric;
203        self
204    }
205
206    /// Builder: set normalization flag.
207    pub fn with_normalize(mut self, normalize: bool) -> Self {
208        self.normalize = normalize;
209        self
210    }
211
212    /// Validate the configuration.
213    pub fn validate(&self) -> Result<(), ConfigValidationError> {
214        if self.m < 2 {
215            return Err(ConfigValidationError::InvalidM(self.m));
216        }
217        if self.ef_construction < self.m {
218            return Err(ConfigValidationError::EfTooSmall {
219                ef: self.ef_construction,
220                m: self.m,
221            });
222        }
223        if self.dimensions == 0 {
224            return Err(ConfigValidationError::ZeroDimensions);
225        }
226        if self.max_elements == 0 {
227            return Err(ConfigValidationError::ZeroMaxElements);
228        }
229        Ok(())
230    }
231}
232
233impl Default for HnswConfig {
234    fn default() -> Self {
235        Self::for_openai_embeddings()
236    }
237}
238
239/// Distance metric for vector similarity.
240#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
241#[serde(rename_all = "snake_case")]
242pub enum DistanceMetric {
243    /// Cosine distance (1 - cosine_similarity).
244    /// Best for normalized embeddings.
245    Cosine,
246
247    /// Euclidean (L2) distance.
248    /// Best for spatial data.
249    Euclidean,
250
251    /// Dot product (negative for similarity ranking).
252    /// Best for when vectors are already normalized.
253    DotProduct,
254
255    /// Poincaré distance in hyperbolic space.
256    /// Best for hierarchical relationships.
257    Poincare,
258}
259
260impl Default for DistanceMetric {
261    fn default() -> Self {
262        Self::Cosine
263    }
264}
265
266/// Configuration validation errors.
267#[derive(Debug, Clone, thiserror::Error)]
268pub enum ConfigValidationError {
269    #[error("M parameter must be >= 2, got {0}")]
270    InvalidM(usize),
271
272    #[error("ef_construction ({ef}) must be >= M ({m})")]
273    EfTooSmall { ef: usize, m: usize },
274
275    #[error("dimensions cannot be zero")]
276    ZeroDimensions,
277
278    #[error("max_elements cannot be zero")]
279    ZeroMaxElements,
280}
281
282/// Metadata about a vector index.
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct VectorIndex {
285    /// Unique identifier for this index.
286    pub id: String,
287
288    /// Human-readable name.
289    pub name: String,
290
291    /// Number of dimensions per vector.
292    pub dimensions: usize,
293
294    /// Current number of vectors in the index.
295    pub size: usize,
296
297    /// Configuration used for this index.
298    pub config: HnswConfig,
299
300    /// When the index was created.
301    pub created_at: Timestamp,
302
303    /// When the index was last modified.
304    pub updated_at: Timestamp,
305
306    /// Optional description.
307    pub description: Option<String>,
308}
309
310impl VectorIndex {
311    /// Create a new vector index metadata object.
312    pub fn new(id: impl Into<String>, name: impl Into<String>, config: HnswConfig) -> Self {
313        let now = Timestamp::now();
314        Self {
315            id: id.into(),
316            name: name.into(),
317            dimensions: config.dimensions,
318            size: 0,
319            config,
320            created_at: now,
321            updated_at: now,
322            description: None,
323        }
324    }
325
326    /// Update the size and modification timestamp.
327    pub fn update_size(&mut self, size: usize) {
328        self.size = size;
329        self.updated_at = Timestamp::now();
330    }
331
332    /// Set the description.
333    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
334        self.description = Some(desc.into());
335        self
336    }
337}
338
339/// Type of relationship between embeddings.
340#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
341#[serde(rename_all = "snake_case")]
342pub enum EdgeType {
343    /// Embeddings are similar based on vector proximity.
344    Similar,
345
346    /// Embeddings are sequential (temporal ordering).
347    Sequential,
348
349    /// Embeddings belong to the same cluster.
350    SameCluster,
351
352    /// Embeddings are from the same source/recording.
353    SameSource,
354
355    /// Custom relationship type.
356    Custom,
357}
358
359impl Default for EdgeType {
360    fn default() -> Self {
361        Self::Similar
362    }
363}
364
365impl fmt::Display for EdgeType {
366    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
367        match self {
368            Self::Similar => write!(f, "similar"),
369            Self::Sequential => write!(f, "sequential"),
370            Self::SameCluster => write!(f, "same_cluster"),
371            Self::SameSource => write!(f, "same_source"),
372            Self::Custom => write!(f, "custom"),
373        }
374    }
375}
376
377/// An edge in the similarity graph between embeddings.
378#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct SimilarityEdge {
380    /// Source embedding ID.
381    pub from_id: EmbeddingId,
382
383    /// Target embedding ID.
384    pub to_id: EmbeddingId,
385
386    /// Distance between the embeddings.
387    pub distance: f32,
388
389    /// Type of relationship.
390    pub edge_type: EdgeType,
391
392    /// When this edge was created.
393    pub created_at: Timestamp,
394
395    /// Optional weight for weighted graph operations.
396    pub weight: Option<f32>,
397
398    /// Optional metadata.
399    pub metadata: Option<EdgeMetadata>,
400}
401
402impl SimilarityEdge {
403    /// Create a new similarity edge.
404    pub fn new(from_id: EmbeddingId, to_id: EmbeddingId, distance: f32) -> Self {
405        Self {
406            from_id,
407            to_id,
408            distance,
409            edge_type: EdgeType::Similar,
410            created_at: Timestamp::now(),
411            weight: None,
412            metadata: None,
413        }
414    }
415
416    /// Create a sequential edge (for temporal ordering).
417    pub fn sequential(from_id: EmbeddingId, to_id: EmbeddingId) -> Self {
418        Self {
419            from_id,
420            to_id,
421            distance: 0.0,
422            edge_type: EdgeType::Sequential,
423            created_at: Timestamp::now(),
424            weight: None,
425            metadata: None,
426        }
427    }
428
429    /// Set the edge type.
430    pub fn with_type(mut self, edge_type: EdgeType) -> Self {
431        self.edge_type = edge_type;
432        self
433    }
434
435    /// Set the weight.
436    pub fn with_weight(mut self, weight: f32) -> Self {
437        self.weight = Some(weight);
438        self
439    }
440
441    /// Set metadata.
442    pub fn with_metadata(mut self, metadata: EdgeMetadata) -> Self {
443        self.metadata = Some(metadata);
444        self
445    }
446
447    /// Get similarity (1 - distance) for cosine metric.
448    #[inline]
449    pub fn similarity(&self) -> f32 {
450        1.0 - self.distance.clamp(0.0, 1.0)
451    }
452
453    /// Check if this is a strong connection (high similarity).
454    #[inline]
455    pub fn is_strong(&self, threshold: f32) -> bool {
456        self.similarity() >= threshold
457    }
458}
459
460/// Optional metadata for edges.
461#[derive(Debug, Clone, Default, Serialize, Deserialize)]
462pub struct EdgeMetadata {
463    /// Source of this relationship.
464    pub source: Option<String>,
465
466    /// Confidence score for this relationship.
467    pub confidence: Option<f32>,
468
469    /// Additional key-value pairs.
470    pub attributes: hashbrown::HashMap<String, String>,
471}
472
473impl EdgeMetadata {
474    /// Create new empty metadata.
475    pub fn new() -> Self {
476        Self::default()
477    }
478
479    /// Set the source.
480    pub fn with_source(mut self, source: impl Into<String>) -> Self {
481        self.source = Some(source.into());
482        self
483    }
484
485    /// Set the confidence.
486    pub fn with_confidence(mut self, confidence: f32) -> Self {
487        self.confidence = Some(confidence);
488        self
489    }
490
491    /// Add an attribute.
492    pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
493        self.attributes.insert(key.into(), value.into());
494        self
495    }
496}
497
498/// A stored vector with its ID and optional metadata.
499#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct StoredVector {
501    /// Unique identifier.
502    pub id: EmbeddingId,
503
504    /// The vector data.
505    pub vector: Vec<f32>,
506
507    /// When this vector was stored.
508    pub created_at: Timestamp,
509
510    /// Optional metadata.
511    pub metadata: Option<VectorMetadata>,
512}
513
514impl StoredVector {
515    /// Create a new stored vector.
516    pub fn new(id: EmbeddingId, vector: Vec<f32>) -> Self {
517        Self {
518            id,
519            vector,
520            created_at: Timestamp::now(),
521            metadata: None,
522        }
523    }
524
525    /// Set metadata.
526    pub fn with_metadata(mut self, metadata: VectorMetadata) -> Self {
527        self.metadata = Some(metadata);
528        self
529    }
530
531    /// Get the dimensionality.
532    #[inline]
533    pub fn dimensions(&self) -> usize {
534        self.vector.len()
535    }
536}
537
538/// Optional metadata for stored vectors.
539#[derive(Debug, Clone, Default, Serialize, Deserialize)]
540pub struct VectorMetadata {
541    /// Source file or recording ID.
542    pub source_id: Option<String>,
543
544    /// Timestamp within the source (e.g., audio timestamp).
545    pub source_timestamp: Option<f64>,
546
547    /// Labels or tags.
548    pub labels: Vec<String>,
549
550    /// Additional key-value pairs.
551    pub attributes: hashbrown::HashMap<String, serde_json::Value>,
552}
553
554impl VectorMetadata {
555    /// Create new empty metadata.
556    pub fn new() -> Self {
557        Self::default()
558    }
559
560    /// Set the source ID.
561    pub fn with_source_id(mut self, id: impl Into<String>) -> Self {
562        self.source_id = Some(id.into());
563        self
564    }
565
566    /// Set the source timestamp.
567    pub fn with_source_timestamp(mut self, ts: f64) -> Self {
568        self.source_timestamp = Some(ts);
569        self
570    }
571
572    /// Add a label.
573    pub fn with_label(mut self, label: impl Into<String>) -> Self {
574        self.labels.push(label.into());
575        self
576    }
577
578    /// Add an attribute.
579    pub fn with_attribute(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
580        self.attributes.insert(key.into(), value);
581        self
582    }
583}
584
585#[cfg(test)]
586mod tests {
587    use super::*;
588
589    #[test]
590    fn test_embedding_id_creation() {
591        let id1 = EmbeddingId::new();
592        let id2 = EmbeddingId::new();
593        assert_ne!(id1, id2);
594    }
595
596    #[test]
597    fn test_embedding_id_parse() {
598        let id = EmbeddingId::new();
599        let s = id.to_string();
600        let parsed = EmbeddingId::parse(&s).unwrap();
601        assert_eq!(id, parsed);
602    }
603
604    #[test]
605    fn test_hnsw_config_default() {
606        let config = HnswConfig::default();
607        assert_eq!(config.dimensions, 1536);
608        assert_eq!(config.m, 32);
609        assert!(config.validate().is_ok());
610    }
611
612    #[test]
613    fn test_hnsw_config_validation() {
614        let config = HnswConfig::default().with_m(1);
615        assert!(config.validate().is_err());
616
617        let config = HnswConfig::default().with_ef_construction(10);
618        assert!(config.validate().is_err());
619    }
620
621    #[test]
622    fn test_similarity_edge() {
623        let from = EmbeddingId::new();
624        let to = EmbeddingId::new();
625        let edge = SimilarityEdge::new(from, to, 0.2);
626
627        assert_eq!(edge.similarity(), 0.8);
628        assert!(edge.is_strong(0.7));
629        assert!(!edge.is_strong(0.9));
630    }
631
632    #[test]
633    fn test_timestamp() {
634        let ts1 = Timestamp::now();
635        std::thread::sleep(std::time::Duration::from_millis(10));
636        let ts2 = Timestamp::now();
637        assert!(ts2 > ts1);
638    }
639}