kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Memory identity, references, and read-side records.

use std::fmt;
use std::str::FromStr;

use serde::{Deserialize, Serialize};
use ulid::Ulid;

use crate::content::{Content, ContentHash};
use crate::partition::PartitionPath;

/// Stable identifier for a memory — a 128-bit ULID.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(into = "String", try_from = "String")]
pub struct MemoryId(Ulid);

impl MemoryId {
    /// Generate a fresh ULID at the current wall-clock time.
    ///
    /// When the env var `KIROMI_AI_TEST_DETERMINISTIC_ULID` is set, returns
    /// successive ULIDs derived from a process-local counter so insta snapshots
    /// stay stable. Documented for testing only.
    #[must_use]
    pub fn generate() -> Self {
        if std::env::var_os("KIROMI_AI_TEST_DETERMINISTIC_ULID").is_some() {
            use std::sync::atomic::{AtomicU64, Ordering};
            static COUNTER: AtomicU64 = AtomicU64::new(1);
            let n = COUNTER.fetch_add(1, Ordering::Relaxed);
            // ULID = 48-bit timestamp + 80-bit randomness.
            // `Ulid::from_parts(timestamp_ms, random)` — fudge both with the counter.
            return MemoryId(Ulid::from_parts(n, u128::from(n)));
        }
        MemoryId(Ulid::new())
    }

    /// Construct from a raw `Ulid`.
    #[must_use]
    pub const fn from_ulid(u: Ulid) -> Self {
        MemoryId(u)
    }

    /// Underlying `Ulid`.
    #[must_use]
    pub const fn as_ulid(&self) -> Ulid {
        self.0
    }
}

impl fmt::Display for MemoryId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.fmt(f)
    }
}

impl FromStr for MemoryId {
    type Err = ulid::DecodeError;
    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        s.parse::<Ulid>().map(MemoryId)
    }
}

impl From<MemoryId> for String {
    fn from(id: MemoryId) -> String {
        id.0.to_string()
    }
}

impl TryFrom<String> for MemoryId {
    type Error = ulid::DecodeError;
    fn try_from(s: String) -> std::result::Result<Self, Self::Error> {
        s.parse()
    }
}

/// Plan 15: typed memory discriminator.
///
/// - `Episodic`: "what happened" — transcripts, events.
/// - `Semantic`: "facts" — distilled knowledge.
/// - `Procedural`: "how to do X" — runbooks, playbooks.
/// - `Archival`: long-term, low-access.
/// - `Working`: scratch / agent state.
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Default, Serialize, Deserialize)]
#[non_exhaustive]
pub enum MemoryKind {
    /// "What happened" — transcripts, events.
    #[default]
    Episodic,
    /// "Facts" — distilled knowledge.
    Semantic,
    /// "How to do X" — runbooks, playbooks.
    Procedural,
    /// Long-term, low-access.
    Archival,
    /// Scratch / agent state.
    Working,
}

impl MemoryKind {
    /// String tag persisted in `memory.kind`.
    #[must_use]
    pub const fn as_persisted_str(self) -> &'static str {
        match self {
            MemoryKind::Episodic => "episodic",
            MemoryKind::Semantic => "semantic",
            MemoryKind::Procedural => "procedural",
            MemoryKind::Archival => "archival",
            MemoryKind::Working => "working",
        }
    }

    /// Parse from the persisted tag. Unknown / NULL rows yield `None`.
    #[must_use]
    pub fn from_persisted(s: &str) -> Option<Self> {
        match s {
            "episodic" => Some(MemoryKind::Episodic),
            "semantic" => Some(MemoryKind::Semantic),
            "procedural" => Some(MemoryKind::Procedural),
            "archival" => Some(MemoryKind::Archival),
            "working" => Some(MemoryKind::Working),
            _ => None,
        }
    }
}

/// A handle returned by `append` / `get` / `list`. Carries enough context to look
/// the row up without another partition-resolution round trip.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MemoryRef {
    /// Memory id.
    pub id: MemoryId,
    /// Partition the memory lives in.
    pub partition: PartitionPath,
}

/// Full read-side memory record returned by `Memory::get`.
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct MemoryRecord {
    /// The reference.
    pub r#ref: MemoryRef,
    /// Materialised content.
    pub content: Content,
    /// Content hash captured at append.
    pub hash: ContentHash,
    /// Created-at unix millis.
    pub created_at_ms: i64,
    /// Updated-at unix millis (== created_at on first write).
    pub updated_at_ms: i64,
    /// Soft-tombstone flag.
    pub tombstoned: bool,
    /// Plan 15: when the FACT became operationally true. `None` = no
    /// lower bound.
    pub valid_from_ms: Option<i64>,
    /// Plan 15: when the FACT stopped being true. `None` = still valid
    /// at read time.
    pub valid_until_ms: Option<i64>,
    /// Plan 15: typed memory discriminator. `None` = unspecified
    /// (legacy rows on disk before Plan 15).
    pub kind: Option<MemoryKind>,
}

/// Flat, JSON-friendly projection of [`MemoryRecord`]. The CLI emits this
/// shape under `--json` so the [`Content`] enum's serde representation never
/// bleeds into the wire snapshot surface.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[non_exhaustive]
pub struct MemoryRecordWire<'a> {
    /// Memory id (stringified ULID).
    pub id: String,
    /// Partition path.
    pub partition: &'a str,
    /// Content kind extension (`md` or `txt`).
    pub kind: &'static str,
    /// Body as UTF-8.
    pub body: &'a str,
    /// Body length in bytes.
    pub bytes: usize,
    /// Created-at unix millis.
    pub created_at_ms: i64,
    /// Updated-at unix millis.
    pub updated_at_ms: i64,
    /// Soft-tombstone flag.
    pub tombstoned: bool,
}

impl MemoryRecord {
    /// Project a [`MemoryRecord`] into the flat wire shape used by the CLI's
    /// `--json` output and any other API surface that wants a stable, snapshot-
    /// pinnable representation. Borrows the record — no allocation beyond the
    /// id string.
    #[must_use]
    pub fn wire(&self) -> MemoryRecordWire<'_> {
        MemoryRecordWire {
            id: self.r#ref.id.to_string(),
            partition: self.r#ref.partition.as_str(),
            kind: self.content.kind().extension(),
            body: self.content.as_str(),
            bytes: self.content.byte_len(),
            created_at_ms: self.created_at_ms,
            updated_at_ms: self.updated_at_ms,
            tombstoned: self.tombstoned,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn id_roundtrips_through_string() {
        let id = MemoryId::generate();
        let s = id.to_string();
        let back: MemoryId = s.parse().unwrap();
        assert_eq!(id, back);
    }

    #[test]
    fn id_roundtrips_through_serde() {
        let id = MemoryId::generate();
        let j = serde_json::to_string(&id).unwrap();
        let back: MemoryId = serde_json::from_str(&j).unwrap();
        assert_eq!(id, back);
    }
}