kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Plan 11: structured summary content.
//!
//! Replaces `SummaryRecord::content: String` with a [`SummaryContent`]
//! carrying the human-readable Markdown prose plus typed structured
//! `blocks`. The Markdown blob (`{id}.v{N}.md`) is unchanged; a sidecar
//! `{id}.v{N}.json` carries the structured form.
//!
//! `SummaryContent: From<String>` keeps every prior `attach_summary` call
//! site working without touching call sites. Plan 11 is the only public-API
//! breaking change in this slice.

use std::collections::BTreeMap;
use std::ops::Range;

use serde::{Deserialize, Serialize};

use crate::attribute::AttributeValue;
use crate::memory::MemoryId;
use crate::partition::PartitionPath;

/// A summary's structured body.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub struct SummaryContent {
    /// Main narrative — Markdown.
    pub prose: String,
    /// Structured sections. Empty for prose-only summaries.
    #[serde(default)]
    pub blocks: Vec<SummaryBlock>,
}

impl SummaryContent {
    /// Convenience constructor — `prose` only, no blocks.
    #[must_use]
    pub fn prose_only(prose: impl Into<String>) -> Self {
        SummaryContent {
            prose: prose.into(),
            blocks: Vec::new(),
        }
    }

    /// Builder helper: append one block.
    #[must_use]
    pub fn with_block(mut self, block: SummaryBlock) -> Self {
        self.blocks.push(block);
        self
    }

    /// Iterate every [`DataPointRef`] referenced by this content's blocks.
    /// Used by `attach_summary` to denormalise citations into
    /// `summary_input` rows.
    pub fn data_point_refs(&self) -> impl Iterator<Item = &DataPointRef> {
        self.blocks.iter().flat_map(|b| match b {
            SummaryBlock::DataPointLinks(refs) => refs.as_slice(),
            SummaryBlock::Citation { refs, .. } => refs.as_slice(),
            _ => &[],
        })
    }

    /// Iterate every [`PartitionRef`] referenced by this content's blocks.
    pub fn partition_refs(&self) -> impl Iterator<Item = &PartitionRef> {
        self.blocks.iter().flat_map(|b| match b {
            SummaryBlock::PartitionLinks(refs) => refs.as_slice(),
            _ => &[],
        })
    }
}

impl From<String> for SummaryContent {
    fn from(s: String) -> Self {
        SummaryContent::prose_only(s)
    }
}

impl From<&str> for SummaryContent {
    fn from(s: &str) -> Self {
        SummaryContent::prose_only(s.to_string())
    }
}

/// One structured section. Variants are `#[non_exhaustive]` so additions
/// are non-breaking.
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
pub enum SummaryBlock {
    /// Citations to specific data points (memories, optionally with sub-ranges).
    DataPointLinks(Vec<DataPointRef>),
    /// Subtree references.
    PartitionLinks(Vec<PartitionRef>),
    /// Inline citation: a sentence + the refs that back it.
    Citation {
        /// The cited sentence.
        sentence: String,
        /// Backing refs.
        refs: Vec<DataPointRef>,
    },
    /// Free-form structured key-value attributes (caller extension point).
    Attributes(BTreeMap<String, AttributeValue>),
}

/// Reference to a memory plus an optional sub-position.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DataPointRef {
    /// Memory id.
    pub memory_id: MemoryId,
    /// Byte range, when relevant.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub byte_range: Option<Range<u32>>,
    /// Line range (1-indexed), when relevant.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub line_range: Option<Range<u32>>,
    /// Time range in unix-millis (for transcript chunks).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub time_range_ms: Option<Range<u32>>,
    /// Caller-supplied note.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub note: Option<String>,
}

impl DataPointRef {
    /// Convenience: build a bare ref with no sub-position info.
    #[must_use]
    pub fn whole_memory(memory_id: MemoryId) -> Self {
        DataPointRef {
            memory_id,
            byte_range: None,
            line_range: None,
            time_range_ms: None,
            note: None,
        }
    }

    /// Convert into a [`SummaryInputRange`] suitable for the SQL layer.
    #[must_use]
    pub fn as_input_range(&self) -> SummaryInputRange {
        SummaryInputRange {
            byte_start: self.byte_range.as_ref().map(|r| r.start),
            byte_end: self.byte_range.as_ref().map(|r| r.end),
            line_start: self.line_range.as_ref().map(|r| r.start),
            line_end: self.line_range.as_ref().map(|r| r.end),
            time_start_ms: self.time_range_ms.as_ref().map(|r| r.start),
            time_end_ms: self.time_range_ms.as_ref().map(|r| r.end),
            note: self.note.clone(),
        }
    }

    /// Build a [`DataPointRef`] from a [`SummaryInputRange`].
    #[must_use]
    pub fn from_input_range(memory_id: MemoryId, range: &SummaryInputRange) -> Self {
        DataPointRef {
            memory_id,
            byte_range: range.byte_start.zip(range.byte_end).map(|(s, e)| s..e),
            line_range: range.line_start.zip(range.line_end).map(|(s, e)| s..e),
            time_range_ms: range
                .time_start_ms
                .zip(range.time_end_ms)
                .map(|(s, e)| s..e),
            note: range.note.clone(),
        }
    }
}

/// Reference to a partition.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct PartitionRef {
    /// Path.
    pub path: PartitionPath,
    /// Caller-supplied note.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub note: Option<String>,
}

/// SQL-layer carrier for the optional sub-position columns on
/// `summary_input`. Plain old data; the engine builds it from a
/// [`DataPointRef`] before the trait call.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct SummaryInputRange {
    /// Byte range start (inclusive).
    pub byte_start: Option<u32>,
    /// Byte range end (exclusive).
    pub byte_end: Option<u32>,
    /// Line range start (1-indexed, inclusive).
    pub line_start: Option<u32>,
    /// Line range end (1-indexed, exclusive).
    pub line_end: Option<u32>,
    /// Time range start in unix millis.
    pub time_start_ms: Option<u32>,
    /// Time range end in unix millis.
    pub time_end_ms: Option<u32>,
    /// Caller-supplied note.
    pub note: Option<String>,
}

impl SummaryInputRange {
    /// Whether every position field is `None` — i.e. this is a "whole
    /// memory" / "whole partition" citation.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.byte_start.is_none()
            && self.byte_end.is_none()
            && self.line_start.is_none()
            && self.line_end.is_none()
            && self.time_start_ms.is_none()
            && self.time_end_ms.is_none()
            && self.note.is_none()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn content_round_trips_through_json() {
        let mid = MemoryId::generate();
        let c = SummaryContent::prose_only("hello").with_block(SummaryBlock::Citation {
            sentence: "Alex spoke first.".into(),
            refs: vec![DataPointRef {
                memory_id: mid,
                byte_range: None,
                line_range: Some(42..44),
                time_range_ms: Some(127_000..130_000),
                note: Some("intro".into()),
            }],
        });
        let s = serde_json::to_string(&c).unwrap();
        let back: SummaryContent = serde_json::from_str(&s).unwrap();
        assert_eq!(c, back);
    }

    #[test]
    fn prose_only_constructor_has_no_blocks() {
        let c = SummaryContent::prose_only("hi");
        assert_eq!(c.prose, "hi");
        assert!(c.blocks.is_empty());
    }

    #[test]
    fn data_point_ref_skips_none_optionals_in_json() {
        let r = DataPointRef::whole_memory(MemoryId::generate());
        let s = serde_json::to_string(&r).unwrap();
        assert!(!s.contains("byte_range"));
        assert!(!s.contains("line_range"));
        assert!(!s.contains("time_range_ms"));
        assert!(!s.contains("note"));
    }

    #[test]
    fn input_range_is_empty_iff_all_none() {
        let r = SummaryInputRange::default();
        assert!(r.is_empty());
        let r = SummaryInputRange {
            line_start: Some(1),
            ..Default::default()
        };
        assert!(!r.is_empty());
    }

    #[test]
    fn data_point_ref_round_trips_through_input_range() {
        let mid = MemoryId::generate();
        let original = DataPointRef {
            memory_id: mid,
            byte_range: Some(0..10),
            line_range: Some(2..5),
            time_range_ms: Some(1000..2000),
            note: Some("note".into()),
        };
        let r = original.as_input_range();
        let back = DataPointRef::from_input_range(mid, &r);
        assert_eq!(original, back);
    }

    #[test]
    fn from_string_compiles() {
        let _: SummaryContent = String::from("hello").into();
        let _: SummaryContent = "hi".into();
    }

    #[test]
    fn data_point_refs_iter_visits_links_and_citations() {
        let mid = MemoryId::generate();
        let dp = DataPointRef::whole_memory(mid);
        let c = SummaryContent::prose_only("p")
            .with_block(SummaryBlock::DataPointLinks(vec![dp.clone()]))
            .with_block(SummaryBlock::Citation {
                sentence: "x".into(),
                refs: vec![dp.clone()],
            })
            .with_block(SummaryBlock::Attributes(BTreeMap::new()));
        assert_eq!(c.data_point_refs().count(), 2);
    }
}