velesdb-core 1.14.1

High-performance vector database engine written in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
//! Snapshot and versioning support for `AgentMemory`.
//!
//! Provides serialization/deserialization of `AgentMemory` state for:
//! - Persistence across restarts
//! - Rollback to previous versions
//! - State transfer between instances
//!
//! # Snapshot Format
//!
//! ```text
//! [Magic: "VAMM" 4 bytes]
//! [Version: 1 byte]
//! [Semantic state length: 8 bytes]
//! [Semantic state: N bytes]
//! [Episodic state length: 8 bytes]
//! [Episodic state: N bytes]
//! [Procedural state length: 8 bytes]
//! [Procedural state: N bytes]
//! [TTL state length: 8 bytes]
//! [TTL state: N bytes]
//! [CRC32: 4 bytes]
//! ```

// Reason: Numeric casts in snapshot handling are intentional:
// - usize to u32 in CRC32: i ranges 0-255, always fits in u32
// - u64 to usize for lengths: Snapshot data is created/loaded on same architecture
//   or architecture-compatible data. Lengths are validated before use.
// All length values are bounds-checked against data.len() before array access.
#![allow(clippy::cast_possible_truncation)]

use std::fs::File;
use std::io::{self, Read, Write};
use std::path::Path;

use crate::storage::snapshot::crc32_hash;

/// Snapshot file magic bytes for `AgentMemory`.
pub const SNAPSHOT_MAGIC: &[u8; 4] = b"VAMM";

/// Current snapshot format version.
pub const SNAPSHOT_VERSION: u8 = 1;

/// Memory state for serialization.
#[derive(Debug, Clone, Default)]
pub struct MemoryState {
    /// Serialized semantic memory entries.
    pub semantic: Vec<u8>,
    /// Serialized episodic memory entries.
    pub episodic: Vec<u8>,
    /// Serialized procedural memory entries.
    pub procedural: Vec<u8>,
    /// Serialized TTL state.
    pub ttl: Vec<u8>,
}

/// Snapshot metadata.
#[derive(Debug, Clone)]
pub struct SnapshotMetadata {
    /// Snapshot format version.
    pub version: u8,
    /// Total size in bytes.
    pub total_size: usize,
    /// CRC32 checksum.
    pub checksum: u32,
}

/// Error type for snapshot operations.
#[derive(Debug)]
#[non_exhaustive]
pub enum SnapshotError {
    /// IO error during read/write.
    Io(io::Error),
    /// Invalid magic bytes.
    InvalidMagic,
    /// Unsupported version.
    UnsupportedVersion(u8),
    /// CRC checksum mismatch.
    ChecksumMismatch {
        /// Expected CRC32 value stored in the snapshot.
        expected: u32,
        /// Actual CRC32 value computed from the data.
        actual: u32,
    },
    /// Data corruption or truncation.
    CorruptedData(String),
}

impl std::fmt::Display for SnapshotError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Io(e) => write!(f, "IO error: {e}"),
            Self::InvalidMagic => write!(f, "Invalid snapshot magic bytes"),
            Self::UnsupportedVersion(v) => write!(f, "Unsupported snapshot version: {v}"),
            Self::ChecksumMismatch { expected, actual } => {
                write!(
                    f,
                    "Checksum mismatch: expected {expected:08x}, got {actual:08x}"
                )
            }
            Self::CorruptedData(msg) => write!(f, "Corrupted data: {msg}"),
        }
    }
}

impl std::error::Error for SnapshotError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            Self::Io(e) => Some(e),
            _ => None,
        }
    }
}

impl From<io::Error> for SnapshotError {
    fn from(e: io::Error) -> Self {
        Self::Io(e)
    }
}

/// Creates a snapshot from memory state.
///
/// # Arguments
///
/// * `state` - Memory state to serialize
///
/// # Returns
///
/// Serialized snapshot bytes.
#[must_use]
pub fn create_snapshot(state: &MemoryState) -> Vec<u8> {
    let total_size = 4
        + 1
        + 8
        + state.semantic.len()
        + 8
        + state.episodic.len()
        + 8
        + state.procedural.len()
        + 8
        + state.ttl.len()
        + 4;
    let mut buf = Vec::with_capacity(total_size);

    buf.extend_from_slice(SNAPSHOT_MAGIC);
    buf.push(SNAPSHOT_VERSION);

    buf.extend_from_slice(&(state.semantic.len() as u64).to_le_bytes());
    buf.extend_from_slice(&state.semantic);

    buf.extend_from_slice(&(state.episodic.len() as u64).to_le_bytes());
    buf.extend_from_slice(&state.episodic);

    buf.extend_from_slice(&(state.procedural.len() as u64).to_le_bytes());
    buf.extend_from_slice(&state.procedural);

    buf.extend_from_slice(&(state.ttl.len() as u64).to_le_bytes());
    buf.extend_from_slice(&state.ttl);

    let crc = crc32_hash(&buf);
    buf.extend_from_slice(&crc.to_le_bytes());

    buf
}

/// Loads a snapshot from bytes.
///
/// # Arguments
///
/// * `data` - Snapshot bytes
///
/// # Errors
///
/// Returns error if snapshot is invalid or corrupted.
pub fn load_snapshot(data: &[u8]) -> Result<MemoryState, SnapshotError> {
    validate_snapshot_header(data)?;

    let mut offset = 5; // skip magic (4) + version (1)
    let payload_end = data.len() - 4; // exclude trailing CRC

    let semantic = read_section(data, &mut offset, payload_end, "Semantic")?;
    let episodic = read_section(data, &mut offset, payload_end, "Episodic")?;
    let procedural = read_section(data, &mut offset, payload_end, "Procedural")?;
    let ttl = read_section(data, &mut offset, payload_end, "TTL")?;

    Ok(MemoryState {
        semantic,
        episodic,
        procedural,
        ttl,
    })
}

/// Validates magic bytes, version, and CRC32 checksum of a snapshot.
fn validate_snapshot_header(data: &[u8]) -> Result<(), SnapshotError> {
    const MIN_SIZE: usize = 4 + 1 + 8 + 8 + 8 + 8 + 4;

    if data.len() < MIN_SIZE {
        return Err(SnapshotError::CorruptedData(
            "Snapshot too small".to_string(),
        ));
    }
    if &data[0..4] != SNAPSHOT_MAGIC {
        return Err(SnapshotError::InvalidMagic);
    }
    let version = data[4];
    if version != SNAPSHOT_VERSION {
        return Err(SnapshotError::UnsupportedVersion(version));
    }

    let stored_crc = u32::from_le_bytes(
        data[data.len() - 4..]
            .try_into()
            .map_err(|_| SnapshotError::CorruptedData("Invalid CRC bytes".to_string()))?,
    );
    let computed_crc = crc32_hash(&data[..data.len() - 4]);
    if stored_crc != computed_crc {
        return Err(SnapshotError::ChecksumMismatch {
            expected: stored_crc,
            actual: computed_crc,
        });
    }
    Ok(())
}

/// Reads a length-prefixed section from the snapshot data.
fn read_section(
    data: &[u8],
    offset: &mut usize,
    payload_end: usize,
    label: &str,
) -> Result<Vec<u8>, SnapshotError> {
    let section_len = read_u64(&data[*offset..])? as usize;
    *offset += 8;
    if *offset + section_len > payload_end {
        return Err(SnapshotError::CorruptedData(format!(
            "{label} data truncated"
        )));
    }
    let section = data[*offset..*offset + section_len].to_vec();
    *offset += section_len;
    Ok(section)
}

/// Saves a snapshot to a file.
///
/// Uses atomic write (temp file + rename) for safety.
///
/// # Errors
///
/// Returns error if file operations fail.
pub fn save_snapshot_to_file<P: AsRef<Path>>(
    path: P,
    state: &MemoryState,
) -> Result<(), SnapshotError> {
    let path = path.as_ref();
    let snapshot_data = create_snapshot(state);

    let temp_path = path.with_extension("tmp");
    let mut file = File::create(&temp_path)?;
    file.write_all(&snapshot_data)?;
    file.sync_all()?;
    drop(file);

    std::fs::rename(&temp_path, path)?;

    Ok(())
}

/// Loads a snapshot from a file.
///
/// # Errors
///
/// Returns error if file operations fail or snapshot is invalid.
pub fn load_snapshot_from_file<P: AsRef<Path>>(path: P) -> Result<MemoryState, SnapshotError> {
    let mut file = File::open(path)?;
    let mut data = Vec::new();
    file.read_to_end(&mut data)?;
    load_snapshot(&data)
}

/// Helper to read u64 from bytes.
fn read_u64(data: &[u8]) -> Result<u64, SnapshotError> {
    if data.len() < 8 {
        return Err(SnapshotError::CorruptedData(
            "Not enough bytes for u64".to_string(),
        ));
    }
    Ok(u64::from_le_bytes(data[0..8].try_into().map_err(|_| {
        SnapshotError::CorruptedData("Invalid u64 bytes".to_string())
    })?))
}

/// Snapshot manager for versioned snapshots.
pub struct SnapshotManager {
    /// Base directory for snapshots.
    base_path: std::path::PathBuf,
    /// Maximum number of snapshots to retain.
    max_snapshots: usize,
}

impl SnapshotManager {
    /// Creates a new snapshot manager.
    ///
    /// # Arguments
    ///
    /// * `base_path` - Directory for storing snapshots
    /// * `max_snapshots` - Maximum number of snapshots to retain
    pub fn new<P: AsRef<Path>>(base_path: P, max_snapshots: usize) -> Self {
        Self {
            base_path: base_path.as_ref().to_path_buf(),
            max_snapshots,
        }
    }

    /// Creates a new versioned snapshot.
    ///
    /// # Returns
    ///
    /// The version number of the created snapshot.
    ///
    /// # Errors
    ///
    /// Returns error if file operations fail.
    pub fn create_versioned_snapshot(&self, state: &MemoryState) -> Result<u64, SnapshotError> {
        std::fs::create_dir_all(&self.base_path)?;

        let version = self.next_version()?;
        let filename = format!("snapshot_{version:08}.vamm");
        let path = self.base_path.join(filename);

        save_snapshot_to_file(&path, state)?;
        self.cleanup_old_snapshots()?;

        Ok(version)
    }

    /// Loads the latest snapshot.
    ///
    /// # Errors
    ///
    /// Returns error if no snapshots exist or loading fails.
    pub fn load_latest(&self) -> Result<(u64, MemoryState), SnapshotError> {
        let version = self
            .latest_version()?
            .ok_or_else(|| SnapshotError::CorruptedData("No snapshots found".to_string()))?;
        let state = self.load_version(version)?;
        Ok((version, state))
    }

    /// Loads a specific snapshot version.
    ///
    /// # Errors
    ///
    /// Returns error if version doesn't exist or loading fails.
    pub fn load_version(&self, version: u64) -> Result<MemoryState, SnapshotError> {
        let filename = format!("snapshot_{version:08}.vamm");
        let path = self.base_path.join(filename);
        load_snapshot_from_file(&path)
    }

    /// Lists all available snapshot versions.
    ///
    /// # Errors
    ///
    /// Returns error if directory operations fail.
    pub fn list_versions(&self) -> Result<Vec<u64>, SnapshotError> {
        if !self.base_path.exists() {
            return Ok(Vec::new());
        }

        let mut versions: Vec<u64> = std::fs::read_dir(&self.base_path)?
            .filter_map(Result::ok)
            .filter_map(|e| parse_snapshot_version(&e.file_name().to_string_lossy()))
            .collect();

        versions.sort_unstable();
        Ok(versions)
    }

    /// Returns the latest snapshot version.
    fn latest_version(&self) -> Result<Option<u64>, SnapshotError> {
        Ok(self.list_versions()?.into_iter().max())
    }

    /// Returns the next version number.
    fn next_version(&self) -> Result<u64, SnapshotError> {
        Ok(self.latest_version()?.map_or(1, |v| v + 1))
    }

    /// Removes old snapshots beyond the retention limit.
    fn cleanup_old_snapshots(&self) -> Result<(), SnapshotError> {
        let versions = self.list_versions()?;
        if versions.len() <= self.max_snapshots {
            return Ok(());
        }

        let to_remove = versions.len() - self.max_snapshots;
        for version in versions.into_iter().take(to_remove) {
            let filename = format!("snapshot_{version:08}.vamm");
            let path = self.base_path.join(filename);
            let _ = std::fs::remove_file(path);
        }

        Ok(())
    }
}

/// Extracts a snapshot version number from a filename like `snapshot_00000001.vamm`.
fn parse_snapshot_version(filename: &str) -> Option<u64> {
    filename
        .strip_prefix("snapshot_")
        .and_then(|s| s.strip_suffix(".vamm"))
        .and_then(|s| s.parse::<u64>().ok())
}