kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Plan 12: point-in-time snapshots.
//!
//! A snapshot freezes the live set at a moment in time. The
//! [`SnapshotManifest`] is a JSON blob persisted under
//! `metadata/snapshots/{snapshot_id}.manifest.json` carrying the primary
//! keys of every live row at the snapshot's `audit_log.seq`. A future
//! `restore` (deferred — see Plan 12) will reconcile the live set
//! against this blob.
//!
//! Storage blobs are append-only: snapshots never delete data files,
//! summary blobs, or shard files synchronously. A future `gc()` (also
//! deferred) reaps orphan blobs (including orphan manifests) only
//! after a retention window, preserving the "always recover" property.

use std::fmt;
use std::str::FromStr;

use serde::{Deserialize, Serialize};
use ulid::Ulid;

use crate::attribute::AttributeValue;
use crate::memory::MemoryId;
use crate::summary::SummaryId;

/// Stable identifier for a snapshot — a 128-bit ULID.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(into = "String", try_from = "String")]
pub struct SnapshotId(Ulid);

impl SnapshotId {
    /// Generate a fresh ULID at the current wall-clock time.
    ///
    /// When the env var `KIROMI_AI_TEST_DETERMINISTIC_ULID` is set,
    /// returns successive ULIDs derived from a process-local counter
    /// so insta snapshots stay stable. Mirrors
    /// [`crate::MemoryId::generate`].
    #[must_use]
    pub fn generate() -> Self {
        if std::env::var_os("KIROMI_AI_TEST_DETERMINISTIC_ULID").is_some() {
            use std::sync::atomic::{AtomicU64, Ordering};
            static COUNTER: AtomicU64 = AtomicU64::new(8_000_000);
            let n = COUNTER.fetch_add(1, Ordering::Relaxed);
            return SnapshotId(Ulid::from_parts(n, u128::from(n)));
        }
        SnapshotId(Ulid::new())
    }

    /// Construct from a raw [`Ulid`].
    #[must_use]
    pub const fn from_ulid(u: Ulid) -> Self {
        SnapshotId(u)
    }

    /// Underlying [`Ulid`].
    #[must_use]
    pub const fn as_ulid(&self) -> Ulid {
        self.0
    }
}

impl fmt::Display for SnapshotId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.fmt(f)
    }
}

impl FromStr for SnapshotId {
    type Err = ulid::DecodeError;
    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        s.parse::<Ulid>().map(SnapshotId)
    }
}

impl From<SnapshotId> for String {
    fn from(id: SnapshotId) -> String {
        id.0.to_string()
    }
}

impl TryFrom<String> for SnapshotId {
    type Error = ulid::DecodeError;
    fn try_from(s: String) -> std::result::Result<Self, Self::Error> {
        s.parse()
    }
}

/// Lightweight handle returned by [`crate::Memory::snapshot`] /
/// [`crate::Memory::list_snapshots`].
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SnapshotRef {
    /// Snapshot id.
    pub id: SnapshotId,
    /// `audit_log.seq` at snapshot time. Every mutation past this seq
    /// is what diverges from the snapshot.
    pub seq: i64,
    /// Created-at unix millis.
    pub created_at_ms: i64,
    /// Caller-supplied tag.
    pub tag: Option<String>,
    /// Caller-supplied reason.
    pub reason: Option<String>,
}

/// Caller-tunable knobs on [`crate::Memory::snapshot`].
#[non_exhaustive]
#[derive(Debug, Clone, Default)]
pub struct SnapshotOpts {
    /// Optional human tag (e.g. `"pre-experiment"`). Surfaces in
    /// `list_snapshots` for inspection.
    pub tag: Option<String>,
    /// Optional human reason (e.g. `"about to run risky migration"`).
    pub reason: Option<String>,
}

impl SnapshotOpts {
    /// Set the tag.
    #[must_use]
    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
        self.tag = Some(tag.into());
        self
    }

    /// Set the reason.
    #[must_use]
    pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
        self.reason = Some(reason.into());
        self
    }
}

/// Manifest written to storage at snapshot time.
///
/// The manifest carries the primary keys of every live row at the
/// snapshot's `seq`, plus the values needed to reconstruct the
/// attribute set. `Memory::restore` reads this back and reconciles
/// the live set against it: rows live now but missing from the
/// manifest are re-tombstoned; rows in the manifest but currently
/// tombstoned are un-tombstoned.
///
/// All `Vec` fields are written sorted so binary search at restore
/// time is `O(log N)`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SnapshotManifest {
    /// Snapshot id.
    pub snapshot_id: SnapshotId,
    /// `audit_log.seq` at snapshot time.
    pub seq: i64,
    /// Created-at unix millis.
    pub created_at_ms: i64,
    /// Live memory ids at snapshot time. Sorted ascending for binary
    /// search at restore-time.
    pub memory_ids: Vec<MemoryId>,
    /// Live summary ids at snapshot time. Sorted.
    #[serde(default)]
    pub summary_ids: Vec<SummaryId>,
    /// Live link pairs `(src, dst)` at snapshot time. Sorted.
    pub link_pairs: Vec<(MemoryId, MemoryId)>,
    /// Live attribute triples `(memory_id, key, value)` at snapshot
    /// time. Restore re-applies these verbatim and clears keys that
    /// are present now but not here. Sorted by `(memory_id, key)`.
    #[serde(default)]
    pub attributes: Vec<SnapshotAttribute>,
}

/// One frozen `(memory_id, key, value)` triple inside a
/// [`SnapshotManifest`].
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SnapshotAttribute {
    /// Owning memory.
    pub memory_id: MemoryId,
    /// Attribute key.
    pub key: String,
    /// Attribute value.
    pub value: AttributeValue,
}

impl SnapshotManifest {
    /// Storage key for the manifest blob, relative to the tenant root.
    #[must_use]
    pub fn manifest_path_for(id: SnapshotId) -> String {
        format!("snapshots/{id}.manifest.json")
    }
}

/// Caller-tunable knobs on [`crate::Memory::restore`].
#[non_exhaustive]
#[derive(Debug, Clone)]
pub struct RestoreOpts {
    /// When `true`, the restore also reconciles the typed-attribute set
    /// against the snapshot. Default `true` — attributes round-trip
    /// through the manifest, so opting out is rarely useful.
    pub also_restore_attributes: bool,
}

impl Default for RestoreOpts {
    fn default() -> Self {
        Self {
            also_restore_attributes: true,
        }
    }
}

impl RestoreOpts {
    /// Set the attribute round-trip flag.
    #[must_use]
    pub fn with_also_restore_attributes(mut self, v: bool) -> Self {
        self.also_restore_attributes = v;
        self
    }
}

/// Report from [`crate::Memory::restore`].
#[non_exhaustive]
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RestoreReport {
    /// Memories live now but absent from the snapshot — re-tombstoned.
    pub memories_re_tombstoned: u64,
    /// Memories tombstoned now but live in the snapshot — un-tombstoned.
    pub memories_un_tombstoned: u64,
    /// Summaries live now but absent from the snapshot.
    pub summaries_re_tombstoned: u64,
    /// Summaries tombstoned now but live in the snapshot.
    pub summaries_un_tombstoned: u64,
    /// Link pairs added by the reconcile.
    pub links_added: u64,
    /// Link pairs removed by the reconcile.
    pub links_removed: u64,
    /// Attribute upserts (set or replaced).
    pub attributes_set: u64,
    /// Attribute clears.
    pub attributes_cleared: u64,
    /// Partitions whose `summary_stale` flag was flipped on.
    pub partitions_marked_stale: u64,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn id_round_trips_through_string() {
        let id = SnapshotId::generate();
        let s = id.to_string();
        let back: SnapshotId = s.parse().unwrap();
        assert_eq!(id, back);
    }

    #[test]
    fn id_serde_round_trip() {
        let id = SnapshotId::generate();
        let j = serde_json::to_string(&id).unwrap();
        let back: SnapshotId = serde_json::from_str(&j).unwrap();
        assert_eq!(id, back);
    }

    #[test]
    fn manifest_path_is_stable() {
        let id = SnapshotId::from_ulid(Ulid::from_string("01HXAZ0000000000000000000Z").unwrap());
        assert_eq!(
            SnapshotManifest::manifest_path_for(id),
            "snapshots/01HXAZ0000000000000000000Z.manifest.json"
        );
    }

    #[test]
    fn opts_default_is_empty() {
        let o = SnapshotOpts::default();
        assert!(o.tag.is_none() && o.reason.is_none());
    }
}