ruvector_snapshot/
snapshot.rs

1use bincode::{Decode, Encode};
2use chrono::{DateTime, Utc};
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5
6/// Snapshot metadata and information
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct Snapshot {
9    /// Unique snapshot identifier
10    pub id: String,
11
12    /// Name of the collection this snapshot represents
13    pub collection_name: String,
14
15    /// Timestamp when the snapshot was created
16    pub created_at: DateTime<Utc>,
17
18    /// Number of vectors in the snapshot
19    pub vectors_count: usize,
20
21    /// SHA-256 checksum of the snapshot data
22    pub checksum: String,
23
24    /// Size of the snapshot in bytes (compressed)
25    pub size_bytes: u64,
26}
27
28/// Complete snapshot data including metadata and vectors
29#[derive(Debug, Serialize, Deserialize, Encode, Decode)]
30pub struct SnapshotData {
31    /// Snapshot metadata
32    pub metadata: SnapshotMetadata,
33
34    /// Collection configuration
35    pub config: CollectionConfig,
36
37    /// All vectors in the collection
38    pub vectors: Vec<VectorRecord>,
39}
40
41impl SnapshotData {
42    /// Create a new snapshot data instance
43    pub fn new(
44        collection_name: String,
45        config: CollectionConfig,
46        vectors: Vec<VectorRecord>,
47    ) -> Self {
48        Self {
49            metadata: SnapshotMetadata {
50                id: uuid::Uuid::new_v4().to_string(),
51                collection_name,
52                created_at: Utc::now().to_rfc3339(),
53                version: env!("CARGO_PKG_VERSION").to_string(),
54            },
55            config,
56            vectors,
57        }
58    }
59
60    /// Get the number of vectors in this snapshot
61    pub fn vectors_count(&self) -> usize {
62        self.vectors.len()
63    }
64
65    /// Get the snapshot ID
66    pub fn id(&self) -> &str {
67        &self.metadata.id
68    }
69
70    /// Get the collection name
71    pub fn collection_name(&self) -> &str {
72        &self.metadata.collection_name
73    }
74}
75
76/// Snapshot metadata
77#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
78pub struct SnapshotMetadata {
79    /// Unique snapshot identifier
80    pub id: String,
81
82    /// Name of the collection
83    pub collection_name: String,
84
85    /// Creation timestamp (RFC3339 format)
86    pub created_at: String,
87
88    /// Version of the snapshot format
89    pub version: String,
90}
91
92/// Collection configuration stored in snapshot
93#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
94pub struct CollectionConfig {
95    /// Vector dimension
96    pub dimension: usize,
97
98    /// Distance metric
99    pub metric: DistanceMetric,
100
101    /// HNSW configuration
102    pub hnsw_config: Option<HnswConfig>,
103}
104
105/// Distance metric for vector similarity
106#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
107pub enum DistanceMetric {
108    Cosine,
109    Euclidean,
110    DotProduct,
111}
112
113/// HNSW index configuration
114#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
115pub struct HnswConfig {
116    pub m: usize,
117    pub ef_construction: usize,
118    pub ef_search: usize,
119}
120
121/// Individual vector record in a snapshot
122#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
123pub struct VectorRecord {
124    /// Unique vector identifier
125    pub id: String,
126
127    /// Vector data
128    pub vector: Vec<f32>,
129
130    /// Optional metadata payload (stored as JSON string for bincode compatibility)
131    #[serde(skip)]
132    #[bincode(with_serde)]
133    payload_json: Option<String>,
134}
135
136impl VectorRecord {
137    /// Create a new vector record
138    pub fn new(id: String, vector: Vec<f32>, payload: Option<Value>) -> Self {
139        let payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
140        Self {
141            id,
142            vector,
143            payload_json,
144        }
145    }
146
147    /// Get the payload as a serde_json::Value
148    pub fn payload(&self) -> Option<Value> {
149        self.payload_json
150            .as_ref()
151            .and_then(|s| serde_json::from_str(s).ok())
152    }
153
154    /// Set the payload from a serde_json::Value
155    pub fn set_payload(&mut self, payload: Option<Value>) {
156        self.payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
157    }
158
159    /// Get the dimension of this vector
160    pub fn dimension(&self) -> usize {
161        self.vector.len()
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_vector_record_creation() {
171        let record = VectorRecord::new("test-1".to_string(), vec![1.0, 2.0, 3.0], None);
172        assert_eq!(record.id, "test-1");
173        assert_eq!(record.dimension(), 3);
174    }
175
176    #[test]
177    fn test_snapshot_data_creation() {
178        let config = CollectionConfig {
179            dimension: 3,
180            metric: DistanceMetric::Cosine,
181            hnsw_config: None,
182        };
183
184        let vectors = vec![
185            VectorRecord::new("v1".to_string(), vec![1.0, 0.0, 0.0], None),
186            VectorRecord::new("v2".to_string(), vec![0.0, 1.0, 0.0], None),
187        ];
188
189        let data = SnapshotData::new("test-collection".to_string(), config, vectors);
190
191        assert_eq!(data.vectors_count(), 2);
192        assert_eq!(data.collection_name(), "test-collection");
193        assert!(!data.id().is_empty());
194    }
195}