1use serde::{Deserialize, Serialize};
6use std::fmt;
7use uuid::Uuid;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13pub struct EmbeddingId(Uuid);
14
15impl EmbeddingId {
16 #[inline]
18 pub fn new() -> Self {
19 Self(Uuid::new_v4())
20 }
21
22 #[inline]
24 pub const fn from_uuid(uuid: Uuid) -> Self {
25 Self(uuid)
26 }
27
28 pub fn parse(s: &str) -> Result<Self, uuid::Error> {
30 Ok(Self(Uuid::parse_str(s)?))
31 }
32
33 #[inline]
35 pub const fn as_uuid(&self) -> &Uuid {
36 &self.0
37 }
38
39 #[inline]
41 pub fn as_bytes(&self) -> &[u8; 16] {
42 self.0.as_bytes()
43 }
44
45 #[inline]
47 pub fn from_bytes(bytes: [u8; 16]) -> Self {
48 Self(Uuid::from_bytes(bytes))
49 }
50}
51
52impl Default for EmbeddingId {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl fmt::Display for EmbeddingId {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 write!(f, "{}", self.0)
61 }
62}
63
64impl From<Uuid> for EmbeddingId {
65 fn from(uuid: Uuid) -> Self {
66 Self(uuid)
67 }
68}
69
70impl From<EmbeddingId> for Uuid {
71 fn from(id: EmbeddingId) -> Self {
72 id.0
73 }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
78pub struct Timestamp(i64);
79
80impl Timestamp {
81 pub fn now() -> Self {
83 Self(chrono::Utc::now().timestamp_millis())
84 }
85
86 #[inline]
88 pub const fn from_millis(millis: i64) -> Self {
89 Self(millis)
90 }
91
92 #[inline]
94 pub const fn as_millis(&self) -> i64 {
95 self.0
96 }
97
98 pub fn to_datetime(&self) -> chrono::DateTime<chrono::Utc> {
100 chrono::DateTime::from_timestamp_millis(self.0)
101 .unwrap_or_else(|| chrono::DateTime::UNIX_EPOCH)
102 }
103}
104
105impl Default for Timestamp {
106 fn default() -> Self {
107 Self::now()
108 }
109}
110
111impl fmt::Display for Timestamp {
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 write!(f, "{}", self.to_datetime().format("%Y-%m-%d %H:%M:%S%.3f UTC"))
114 }
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct HnswConfig {
123 pub m: usize,
127
128 pub ef_construction: usize,
132
133 pub ef_search: usize,
137
138 pub max_elements: usize,
141
142 pub dimensions: usize,
144
145 pub normalize: bool,
147
148 pub distance_metric: DistanceMetric,
150}
151
152impl HnswConfig {
153 pub fn for_dimension(dim: usize) -> Self {
155 Self {
156 m: if dim >= 1024 { 32 } else { 16 },
157 ef_construction: 200,
158 ef_search: 128,
159 max_elements: 1_000_000,
160 dimensions: dim,
161 normalize: true,
162 distance_metric: DistanceMetric::Cosine,
163 }
164 }
165
166 pub fn for_openai_embeddings() -> Self {
168 Self::for_dimension(1536)
169 }
170
171 pub fn for_sentence_transformers() -> Self {
173 Self::for_dimension(384)
174 }
175
176 pub fn with_m(mut self, m: usize) -> Self {
178 self.m = m;
179 self
180 }
181
182 pub fn with_ef_construction(mut self, ef: usize) -> Self {
184 self.ef_construction = ef;
185 self
186 }
187
188 pub fn with_ef_search(mut self, ef: usize) -> Self {
190 self.ef_search = ef;
191 self
192 }
193
194 pub fn with_max_elements(mut self, max: usize) -> Self {
196 self.max_elements = max;
197 self
198 }
199
200 pub fn with_distance_metric(mut self, metric: DistanceMetric) -> Self {
202 self.distance_metric = metric;
203 self
204 }
205
206 pub fn with_normalize(mut self, normalize: bool) -> Self {
208 self.normalize = normalize;
209 self
210 }
211
212 pub fn validate(&self) -> Result<(), ConfigValidationError> {
214 if self.m < 2 {
215 return Err(ConfigValidationError::InvalidM(self.m));
216 }
217 if self.ef_construction < self.m {
218 return Err(ConfigValidationError::EfTooSmall {
219 ef: self.ef_construction,
220 m: self.m,
221 });
222 }
223 if self.dimensions == 0 {
224 return Err(ConfigValidationError::ZeroDimensions);
225 }
226 if self.max_elements == 0 {
227 return Err(ConfigValidationError::ZeroMaxElements);
228 }
229 Ok(())
230 }
231}
232
233impl Default for HnswConfig {
234 fn default() -> Self {
235 Self::for_openai_embeddings()
236 }
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
241#[serde(rename_all = "snake_case")]
242pub enum DistanceMetric {
243 Cosine,
246
247 Euclidean,
250
251 DotProduct,
254
255 Poincare,
258}
259
260impl Default for DistanceMetric {
261 fn default() -> Self {
262 Self::Cosine
263 }
264}
265
266#[derive(Debug, Clone, thiserror::Error)]
268pub enum ConfigValidationError {
269 #[error("M parameter must be >= 2, got {0}")]
270 InvalidM(usize),
271
272 #[error("ef_construction ({ef}) must be >= M ({m})")]
273 EfTooSmall { ef: usize, m: usize },
274
275 #[error("dimensions cannot be zero")]
276 ZeroDimensions,
277
278 #[error("max_elements cannot be zero")]
279 ZeroMaxElements,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct VectorIndex {
285 pub id: String,
287
288 pub name: String,
290
291 pub dimensions: usize,
293
294 pub size: usize,
296
297 pub config: HnswConfig,
299
300 pub created_at: Timestamp,
302
303 pub updated_at: Timestamp,
305
306 pub description: Option<String>,
308}
309
310impl VectorIndex {
311 pub fn new(id: impl Into<String>, name: impl Into<String>, config: HnswConfig) -> Self {
313 let now = Timestamp::now();
314 Self {
315 id: id.into(),
316 name: name.into(),
317 dimensions: config.dimensions,
318 size: 0,
319 config,
320 created_at: now,
321 updated_at: now,
322 description: None,
323 }
324 }
325
326 pub fn update_size(&mut self, size: usize) {
328 self.size = size;
329 self.updated_at = Timestamp::now();
330 }
331
332 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
334 self.description = Some(desc.into());
335 self
336 }
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
341#[serde(rename_all = "snake_case")]
342pub enum EdgeType {
343 Similar,
345
346 Sequential,
348
349 SameCluster,
351
352 SameSource,
354
355 Custom,
357}
358
359impl Default for EdgeType {
360 fn default() -> Self {
361 Self::Similar
362 }
363}
364
365impl fmt::Display for EdgeType {
366 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
367 match self {
368 Self::Similar => write!(f, "similar"),
369 Self::Sequential => write!(f, "sequential"),
370 Self::SameCluster => write!(f, "same_cluster"),
371 Self::SameSource => write!(f, "same_source"),
372 Self::Custom => write!(f, "custom"),
373 }
374 }
375}
376
377#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct SimilarityEdge {
380 pub from_id: EmbeddingId,
382
383 pub to_id: EmbeddingId,
385
386 pub distance: f32,
388
389 pub edge_type: EdgeType,
391
392 pub created_at: Timestamp,
394
395 pub weight: Option<f32>,
397
398 pub metadata: Option<EdgeMetadata>,
400}
401
402impl SimilarityEdge {
403 pub fn new(from_id: EmbeddingId, to_id: EmbeddingId, distance: f32) -> Self {
405 Self {
406 from_id,
407 to_id,
408 distance,
409 edge_type: EdgeType::Similar,
410 created_at: Timestamp::now(),
411 weight: None,
412 metadata: None,
413 }
414 }
415
416 pub fn sequential(from_id: EmbeddingId, to_id: EmbeddingId) -> Self {
418 Self {
419 from_id,
420 to_id,
421 distance: 0.0,
422 edge_type: EdgeType::Sequential,
423 created_at: Timestamp::now(),
424 weight: None,
425 metadata: None,
426 }
427 }
428
429 pub fn with_type(mut self, edge_type: EdgeType) -> Self {
431 self.edge_type = edge_type;
432 self
433 }
434
435 pub fn with_weight(mut self, weight: f32) -> Self {
437 self.weight = Some(weight);
438 self
439 }
440
441 pub fn with_metadata(mut self, metadata: EdgeMetadata) -> Self {
443 self.metadata = Some(metadata);
444 self
445 }
446
447 #[inline]
449 pub fn similarity(&self) -> f32 {
450 1.0 - self.distance.clamp(0.0, 1.0)
451 }
452
453 #[inline]
455 pub fn is_strong(&self, threshold: f32) -> bool {
456 self.similarity() >= threshold
457 }
458}
459
460#[derive(Debug, Clone, Default, Serialize, Deserialize)]
462pub struct EdgeMetadata {
463 pub source: Option<String>,
465
466 pub confidence: Option<f32>,
468
469 pub attributes: hashbrown::HashMap<String, String>,
471}
472
473impl EdgeMetadata {
474 pub fn new() -> Self {
476 Self::default()
477 }
478
479 pub fn with_source(mut self, source: impl Into<String>) -> Self {
481 self.source = Some(source.into());
482 self
483 }
484
485 pub fn with_confidence(mut self, confidence: f32) -> Self {
487 self.confidence = Some(confidence);
488 self
489 }
490
491 pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
493 self.attributes.insert(key.into(), value.into());
494 self
495 }
496}
497
498#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct StoredVector {
501 pub id: EmbeddingId,
503
504 pub vector: Vec<f32>,
506
507 pub created_at: Timestamp,
509
510 pub metadata: Option<VectorMetadata>,
512}
513
514impl StoredVector {
515 pub fn new(id: EmbeddingId, vector: Vec<f32>) -> Self {
517 Self {
518 id,
519 vector,
520 created_at: Timestamp::now(),
521 metadata: None,
522 }
523 }
524
525 pub fn with_metadata(mut self, metadata: VectorMetadata) -> Self {
527 self.metadata = Some(metadata);
528 self
529 }
530
531 #[inline]
533 pub fn dimensions(&self) -> usize {
534 self.vector.len()
535 }
536}
537
538#[derive(Debug, Clone, Default, Serialize, Deserialize)]
540pub struct VectorMetadata {
541 pub source_id: Option<String>,
543
544 pub source_timestamp: Option<f64>,
546
547 pub labels: Vec<String>,
549
550 pub attributes: hashbrown::HashMap<String, serde_json::Value>,
552}
553
554impl VectorMetadata {
555 pub fn new() -> Self {
557 Self::default()
558 }
559
560 pub fn with_source_id(mut self, id: impl Into<String>) -> Self {
562 self.source_id = Some(id.into());
563 self
564 }
565
566 pub fn with_source_timestamp(mut self, ts: f64) -> Self {
568 self.source_timestamp = Some(ts);
569 self
570 }
571
572 pub fn with_label(mut self, label: impl Into<String>) -> Self {
574 self.labels.push(label.into());
575 self
576 }
577
578 pub fn with_attribute(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
580 self.attributes.insert(key.into(), value);
581 self
582 }
583}
584
585#[cfg(test)]
586mod tests {
587 use super::*;
588
589 #[test]
590 fn test_embedding_id_creation() {
591 let id1 = EmbeddingId::new();
592 let id2 = EmbeddingId::new();
593 assert_ne!(id1, id2);
594 }
595
596 #[test]
597 fn test_embedding_id_parse() {
598 let id = EmbeddingId::new();
599 let s = id.to_string();
600 let parsed = EmbeddingId::parse(&s).unwrap();
601 assert_eq!(id, parsed);
602 }
603
604 #[test]
605 fn test_hnsw_config_default() {
606 let config = HnswConfig::default();
607 assert_eq!(config.dimensions, 1536);
608 assert_eq!(config.m, 32);
609 assert!(config.validate().is_ok());
610 }
611
612 #[test]
613 fn test_hnsw_config_validation() {
614 let config = HnswConfig::default().with_m(1);
615 assert!(config.validate().is_err());
616
617 let config = HnswConfig::default().with_ef_construction(10);
618 assert!(config.validate().is_err());
619 }
620
621 #[test]
622 fn test_similarity_edge() {
623 let from = EmbeddingId::new();
624 let to = EmbeddingId::new();
625 let edge = SimilarityEdge::new(from, to, 0.2);
626
627 assert_eq!(edge.similarity(), 0.8);
628 assert!(edge.is_strong(0.7));
629 assert!(!edge.is_strong(0.9));
630 }
631
632 #[test]
633 fn test_timestamp() {
634 let ts1 = Timestamp::now();
635 std::thread::sleep(std::time::Duration::from_millis(10));
636 let ts2 = Timestamp::now();
637 assert!(ts2 > ts1);
638 }
639}