kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Plan 11: anchor / citation URI parser.
//!
//! Citation strings round-trip between callers and the engine through a
//! compact URL form. The parser is `nom`-free — straight `str::strip_prefix`
//! and `split_once`.
//!
//! Grammar (informal):
//!
//! ```text
//! anchor    := "kiromi://" body
//! body      := memory | partition | summary
//! memory    := "memory/" ULID [subpos]
//! subpos    := "/L" U32 "-" U32          // line range, 1-indexed
//!            | "/B" U32 "-" U32          // byte range
//!            | "@" U32 "s" ["-" U32 "s"] // time range in seconds
//! partition := "partition/" PERCENT_ENCODED_PATH
//! summary   := "summary/" ULID
//! ```
//!
//! [`Memory::resolve_anchor`] returns the bare [`crate::graph::NodeRef`];
//! [`Memory::resolve_anchor_with_range`] returns the same plus an optional
//! [`crate::summary::content::DataPointRef`] carrying the sub-position info.

use crate::error::Error;
use crate::graph::NodeRef;
use crate::handle::Memory;
use crate::memory::{MemoryId, MemoryRef};
use crate::partition::PartitionPath;
use crate::summary::SummaryId;
use crate::summary::content::DataPointRef;

/// Scheme prefix.
const SCHEME: &str = "kiromi://";

impl Memory {
    /// Parse an anchor URI into a typed [`crate::graph::NodeRef`]. The
    /// memory partition for `Memory` URIs is set to the empty path
    /// (`""`) — the parser only recovers the id; callers join against the
    /// catalog if they need the full ref.
    ///
    /// ```
    /// use kiromi_ai_memory::{graph::NodeRef, Memory};
    /// let n = Memory::resolve_anchor("kiromi://partition/user=alex").unwrap();
    /// assert!(matches!(n, NodeRef::Partition(_)));
    /// ```
    pub fn resolve_anchor(s: &str) -> Result<NodeRef, Error> {
        Self::resolve_anchor_with_range(s).map(|(n, _)| n)
    }

    /// Parse an anchor URI into a typed [`crate::graph::NodeRef`] plus an
    /// optional [`crate::summary::content::DataPointRef`] that carries the
    /// sub-position info when the URI included one.
    ///
    /// ```
    /// use kiromi_ai_memory::Memory;
    /// let (_, range) = Memory::resolve_anchor_with_range(
    ///     "kiromi://memory/01HX0WJDR2QH2A8YZTR8N0XJDC"
    /// ).unwrap();
    /// assert!(range.is_none());
    /// ```
    pub fn resolve_anchor_with_range(s: &str) -> Result<(NodeRef, Option<DataPointRef>), Error> {
        let body = s
            .strip_prefix(SCHEME)
            .ok_or_else(|| Error::InvalidAnchor(format!("missing scheme: {s}")))?;
        let (kind, rest) = body
            .split_once('/')
            .ok_or_else(|| Error::InvalidAnchor(format!("missing kind: {s}")))?;
        match kind {
            "memory" => parse_memory(rest),
            "partition" => parse_partition(rest),
            "summary" => parse_summary(rest),
            other => Err(Error::InvalidAnchor(format!("unknown kind: {other}"))),
        }
    }
}

fn parse_memory(rest: &str) -> Result<(NodeRef, Option<DataPointRef>), Error> {
    // Split off `@<time>s[-<end>s]` first (timestamps live AFTER any line/byte range).
    let (head, time_range) = match rest.split_once('@') {
        Some((h, t)) => (h, Some(parse_time(t)?)),
        None => (rest, None),
    };

    // Then split `/L…` or `/B…` off `head`.
    let (id_part, line_range, byte_range) = if let Some((idp, sub)) = head.split_once('/') {
        let (lr, br) = parse_line_or_byte(sub)?;
        (idp, lr, br)
    } else {
        (head, None, None)
    };

    let mid: MemoryId = id_part
        .parse()
        .map_err(|e: ulid::DecodeError| Error::InvalidAnchor(format!("invalid ulid: {e}")))?;
    // The URI doesn't encode the partition; callers that need the
    // partition path hydrate via `Memory::get`. We use a sentinel here so
    // the `MemoryRef` is non-empty (the `FromStr` impl rejects empty
    // strings); the angle-bracket form mirrors other engine sentinels and
    // can't collide with a real path.
    let partition: PartitionPath = "<anchor>"
        .parse()
        .map_err(|e| Error::InvalidAnchor(format!("partition sentinel failed to parse: {e}")))?;
    let mref = MemoryRef { id: mid, partition };

    let dp = if line_range.is_some() || byte_range.is_some() || time_range.is_some() {
        Some(DataPointRef {
            memory_id: mid,
            byte_range,
            line_range,
            time_range_ms: time_range,
            note: None,
        })
    } else {
        None
    };
    Ok((NodeRef::Memory(mref), dp))
}

type SubRanges = (Option<std::ops::Range<u32>>, Option<std::ops::Range<u32>>);

fn parse_line_or_byte(sub: &str) -> Result<SubRanges, Error> {
    if let Some(rest) = sub.strip_prefix('L') {
        let r = parse_u32_range(rest, "line")?;
        Ok((Some(r), None))
    } else if let Some(rest) = sub.strip_prefix('B') {
        let r = parse_u32_range(rest, "byte")?;
        Ok((None, Some(r)))
    } else {
        Err(Error::InvalidAnchor(format!(
            "unknown sub-range tag: {sub}"
        )))
    }
}

fn parse_u32_range(s: &str, label: &str) -> Result<std::ops::Range<u32>, Error> {
    let (lo, hi) = s
        .split_once('-')
        .ok_or_else(|| Error::InvalidAnchor(format!("malformed {label} range: {s}")))?;
    let lo: u32 = lo
        .parse()
        .map_err(|e| Error::InvalidAnchor(format!("malformed {label} start: {e}")))?;
    let hi: u32 = hi
        .parse()
        .map_err(|e| Error::InvalidAnchor(format!("malformed {label} end: {e}")))?;
    Ok(lo..hi)
}

fn parse_time(s: &str) -> Result<std::ops::Range<u32>, Error> {
    // Forms: "127s" or "127s-130s"
    if let Some((lo, hi)) = s.split_once('-') {
        let lo = strip_s(lo)?;
        let hi = strip_s(hi)?;
        Ok(lo * 1000..hi * 1000)
    } else {
        let lo = strip_s(s)?;
        // Single timestamp → degenerate range [lo, lo+1) ms scale (we still
        // emit seconds * 1000 for both ends so consumers see "this exact
        // second"; readers that need to display a point check whether
        // start+1000 == end).
        Ok(lo * 1000..lo * 1000 + 1000)
    }
}

fn strip_s(t: &str) -> Result<u32, Error> {
    let inner = t
        .strip_suffix('s')
        .ok_or_else(|| Error::InvalidAnchor(format!("missing 's' suffix: {t}")))?;
    inner
        .parse()
        .map_err(|e| Error::InvalidAnchor(format!("malformed timestamp: {e}")))
}

fn parse_partition(rest: &str) -> Result<(NodeRef, Option<DataPointRef>), Error> {
    let decoded = percent_encoding::percent_decode_str(rest)
        .decode_utf8()
        .map_err(|e| Error::InvalidAnchor(format!("partition path not utf-8: {e}")))?
        .into_owned();
    let path: PartitionPath = decoded
        .parse()
        .map_err(|e| Error::InvalidAnchor(format!("invalid partition path: {e}")))?;
    Ok((NodeRef::Partition(path), None))
}

fn parse_summary(rest: &str) -> Result<(NodeRef, Option<DataPointRef>), Error> {
    let id: SummaryId = rest.parse().map_err(|e: ulid::DecodeError| {
        Error::InvalidAnchor(format!("invalid summary ulid: {e}"))
    })?;
    // We don't have the subject here; use Tenant as a placeholder. Callers
    // that need the subject can call `latest_summary`/`get_summary` to hydrate.
    let s_ref = crate::summary::SummaryRef {
        id,
        subject: crate::summary::SummarySubject::Tenant,
        style: crate::summarizer::SummaryStyle::Compact,
        version: 0,
    };
    Ok((NodeRef::Summary(s_ref), None))
}

#[cfg(test)]
mod tests {
    use super::*;

    const ULID: &str = "01JCXYZABCDEFGHJKMNPQRSTVW";

    #[test]
    fn parses_bare_memory() {
        let s = format!("kiromi://memory/{ULID}");
        let (node, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        match node {
            NodeRef::Memory(r) => assert_eq!(r.id.to_string(), ULID),
            _ => panic!("expected Memory"),
        }
        assert!(dp.is_none());
    }

    #[test]
    fn parses_memory_with_line_range() {
        let s = format!("kiromi://memory/{ULID}/L42-44");
        let (node, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        let dp = dp.unwrap();
        assert_eq!(dp.line_range, Some(42..44));
        assert!(dp.byte_range.is_none());
        assert!(dp.time_range_ms.is_none());
        assert!(matches!(node, NodeRef::Memory(_)));
    }

    #[test]
    fn parses_memory_with_byte_range() {
        let s = format!("kiromi://memory/{ULID}/B0-128");
        let (_, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        let dp = dp.unwrap();
        assert_eq!(dp.byte_range, Some(0..128));
        assert!(dp.line_range.is_none());
    }

    #[test]
    fn parses_memory_with_time_point() {
        let s = format!("kiromi://memory/{ULID}@127s");
        let (_, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        let dp = dp.unwrap();
        assert_eq!(dp.time_range_ms, Some(127_000..128_000));
    }

    #[test]
    fn parses_memory_with_time_range() {
        let s = format!("kiromi://memory/{ULID}@127s-130s");
        let (_, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        let dp = dp.unwrap();
        assert_eq!(dp.time_range_ms, Some(127_000..130_000));
    }

    #[test]
    fn parses_memory_with_line_and_time() {
        let s = format!("kiromi://memory/{ULID}/L42-44@127s");
        let (node, dp) = Memory::resolve_anchor_with_range(&s).unwrap();
        let dp = dp.unwrap();
        assert_eq!(dp.line_range, Some(42..44));
        assert_eq!(dp.time_range_ms, Some(127_000..128_000));
        match node {
            NodeRef::Memory(r) => assert_eq!(r.id.to_string(), ULID),
            _ => panic!("expected Memory"),
        }
    }

    #[test]
    fn parses_partition_path() {
        let s = "kiromi://partition/user=alex/year=2026";
        let (node, _) = Memory::resolve_anchor_with_range(s).unwrap();
        match node {
            NodeRef::Partition(p) => assert_eq!(p.as_str(), "user=alex/year=2026"),
            _ => panic!("expected Partition"),
        }
    }

    #[test]
    fn parses_summary_id() {
        let s = format!("kiromi://summary/{ULID}");
        let (node, _) = Memory::resolve_anchor_with_range(&s).unwrap();
        match node {
            NodeRef::Summary(s) => assert_eq!(s.id.to_string(), ULID),
            _ => panic!("expected Summary"),
        }
    }

    #[test]
    fn rejects_bad_scheme() {
        let r = Memory::resolve_anchor_with_range("http://memory/x");
        assert!(matches!(r, Err(Error::InvalidAnchor(_))));
    }

    #[test]
    fn rejects_unknown_kind() {
        let r = Memory::resolve_anchor_with_range("kiromi://nope/x");
        assert!(matches!(r, Err(Error::InvalidAnchor(_))));
    }

    #[test]
    fn rejects_malformed_ranges() {
        let s = format!("kiromi://memory/{ULID}/L42");
        let r = Memory::resolve_anchor_with_range(&s);
        assert!(matches!(r, Err(Error::InvalidAnchor(_))));

        let s = format!("kiromi://memory/{ULID}/Q1-2");
        let r = Memory::resolve_anchor_with_range(&s);
        assert!(matches!(r, Err(Error::InvalidAnchor(_))));

        let s = format!("kiromi://memory/{ULID}@127");
        let r = Memory::resolve_anchor_with_range(&s);
        assert!(matches!(r, Err(Error::InvalidAnchor(_))));
    }

    #[test]
    fn resolve_anchor_drops_range() {
        let s = format!("kiromi://memory/{ULID}/L1-2");
        let node = Memory::resolve_anchor(&s).unwrap();
        assert!(matches!(node, NodeRef::Memory(_)));
    }
}