kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Summarizer plugin trait + registry.
//!
//! Slice 1 ships **no implementation** and the engine has no code path that
//! invokes a summarizer. The trait, capabilities, options, and registry
//! exist so future rollup work doesn't break SemVer when an actual
//! summarizer plugin lands. Same caller-owned-model contract as
//! [`crate::Embedder`].
//!
//! See spec § 12.8.

use std::collections::HashMap;
use std::fmt;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;

use async_trait::async_trait;

use crate::error::{Error, Result};

/// Self-declared `Summarizer` capabilities. Adding a field is non-breaking
/// thanks to `#[non_exhaustive]`; consumers must construct via
/// `SummarizerCapabilities { ..Default::default() }`.
///
/// See spec § 12.8.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SummarizerCapabilities {
    /// Recommended ceiling on input token count, per call. `None` = unknown.
    pub max_input_tokens: Option<usize>,
    /// Implementation can stream tokens / partial markdown chunks. Slice-1's
    /// trait shape returns the full `String`; this flag is informational so
    /// future surfaces can branch on it.
    pub streaming: bool,
    /// Implementation can return structured output (JSON / typed). Reserved
    /// for a future trait expansion.
    pub structured_output: bool,
}

impl Default for SummarizerCapabilities {
    /// Conservative defaults: unknown ceiling, no streaming, no structured
    /// output. A summarizer that doesn't override `capabilities()` lands here.
    fn default() -> Self {
        Self {
            max_input_tokens: None,
            streaming: false,
            structured_output: false,
        }
    }
}

/// Summarization length / detail preset. Mirrors [`crate::EmbedRole`] in
/// shape: a small enum that asymmetric implementations branch on.
///
/// Plan 9 added the `Custom(String)` variant for caller-defined styles
/// (e.g. `"executive-summary"`, `"daily-recap"`); the storage tag becomes
/// `"custom:<name>"`.
///
/// See spec § 12.8.
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)]
pub enum SummaryStyle {
    /// One paragraph, ~3 sentences. Default.
    #[default]
    Compact,
    /// ~5 paragraphs, retains structure.
    Detailed,
    /// Caller-defined preset. Persisted as `"custom:<name>"`.
    Custom(String),
}

impl SummaryStyle {
    /// Stable string tag for tracing / metrics / SQL persistence.
    /// Compact / Detailed map to `"compact"` / `"detailed"`; Custom(n)
    /// maps to `"custom:<n>"`.
    #[must_use]
    pub fn as_str(&self) -> std::borrow::Cow<'static, str> {
        match self {
            SummaryStyle::Compact => std::borrow::Cow::Borrowed("compact"),
            SummaryStyle::Detailed => std::borrow::Cow::Borrowed("detailed"),
            SummaryStyle::Custom(n) => std::borrow::Cow::Owned(format!("custom:{n}")),
        }
    }

    /// Parse a string previously emitted by [`SummaryStyle::as_str`].
    /// Round-trips exactly for Compact, Detailed, and Custom values.
    #[must_use]
    pub fn from_persisted(s: &str) -> Self {
        match s {
            "compact" => SummaryStyle::Compact,
            "detailed" => SummaryStyle::Detailed,
            other => match other.strip_prefix("custom:") {
                Some(n) if !n.is_empty() => SummaryStyle::Custom(n.to_string()),
                _ => SummaryStyle::Compact,
            },
        }
    }
}

/// Per-call summarization options. New fields land via `#[non_exhaustive]`
/// so adding (e.g.) `language: Option<&str>` is non-breaking.
///
/// See spec § 12.8.
#[non_exhaustive]
#[derive(Debug, Clone, Default)]
pub struct SummarizeOpts {
    /// Detail / length preset.
    pub style: SummaryStyle,
    /// Caller-requested target output length, in tokens. `None` = let the
    /// summarizer decide. Implementations may treat this as a soft hint.
    pub target_tokens: Option<usize>,
}

impl SummarizeOpts {
    /// Builder helper: set the style.
    #[must_use]
    pub fn with_style(mut self, style: SummaryStyle) -> Self {
        self.style = style;
        self
    }

    /// Builder helper: set the target output length (tokens).
    #[must_use]
    pub fn with_target_tokens(mut self, n: usize) -> Self {
        self.target_tokens = Some(n);
        self
    }
}

/// Summarizer plugin. Same caller-owned-model contract as [`crate::Embedder`].
///
/// Slice-1 has no engine code path that calls `summarize`; the trait exists
/// so a future rollup plan can land without breaking SemVer. See spec § 12.8.
#[async_trait]
pub trait Summarizer: Send + Sync + fmt::Debug + 'static {
    /// Stable identifier (e.g. `"openai:gpt-4.1:v1"`, `"apple-fm:on-device:v1"`).
    /// Convention: `"<family>:<model>:<version>"`. Changing this value across
    /// `open()`s on the same store is reserved for a future migration tool.
    fn id(&self) -> &str;

    /// Self-declared capabilities. Default: unknown ceiling, no streaming,
    /// no structured output.
    fn capabilities(&self) -> SummarizerCapabilities {
        SummarizerCapabilities::default()
    }

    /// Summarize a corpus chunk under the supplied options. Returns Markdown.
    /// The caller controls the model — the engine never invokes a third-party
    /// inference path itself.
    async fn summarize(&self, opts: &SummarizeOpts, inputs: &[&str]) -> Result<String>;

    /// Optional diagnostic config snapshot, captured into the audit log when
    /// the future rollup wiring happens.
    fn describe(&self) -> serde_json::Value {
        serde_json::Value::Null
    }
}

type BoxFut<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
type FactoryFn = Arc<
    dyn Fn(serde_json::Value) -> BoxFut<'static, Result<Box<dyn Summarizer>>>
        + Send
        + Sync
        + 'static,
>;

/// Config-driven summarizer registry. CLI / server use it; programmatic
/// callers construct the summarizer directly and pass it to whatever
/// future builder hook ships. Mirrors [`crate::EmbedderRegistry`].
///
/// See spec § 12.8.
#[derive(Default, Clone)]
pub struct SummarizerRegistry {
    factories: HashMap<String, FactoryFn>,
}

impl fmt::Debug for SummarizerRegistry {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("SummarizerRegistry")
            .field("families", &self.factories.keys().collect::<Vec<_>>())
            .finish()
    }
}

impl SummarizerRegistry {
    /// Empty registry.
    #[must_use]
    pub fn empty() -> Self {
        Self::default()
    }

    /// Register a factory under a family name.
    pub fn register<F, Fut>(&mut self, family: &'static str, factory: F)
    where
        F: Fn(serde_json::Value) -> Fut + Send + Sync + 'static,
        Fut: Future<Output = Result<Box<dyn Summarizer>>> + Send + 'static,
    {
        let f: FactoryFn = Arc::new(move |v| Box::pin(factory(v)));
        self.factories.insert(family.to_string(), f);
    }

    /// Build a summarizer from a family + JSON config. Returns
    /// `Error::Config` if the family isn't registered.
    pub async fn build(
        &self,
        family: &str,
        config: serde_json::Value,
    ) -> Result<Box<dyn Summarizer>> {
        let factory = self
            .factories
            .get(family)
            .ok_or_else(|| Error::Config(format!("summarizer family {family:?} not registered")))?;
        factory(config).await
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[derive(Debug)]
    struct Fake;

    #[async_trait]
    impl Summarizer for Fake {
        fn id(&self) -> &str {
            "fake:1"
        }
        async fn summarize(&self, opts: &SummarizeOpts, inputs: &[&str]) -> Result<String> {
            Ok(format!("style={} n={}", opts.style.as_str(), inputs.len()))
        }
    }

    #[tokio::test]
    async fn boxed_summarizer_dispatches() {
        let b: Box<dyn Summarizer> = Box::new(Fake);
        let opts = SummarizeOpts::default().with_style(SummaryStyle::Detailed);
        let s = b.summarize(&opts, &["a", "b"]).await.unwrap();
        assert_eq!(s, "style=detailed n=2");
        assert_eq!(b.id(), "fake:1");
    }

    #[tokio::test]
    async fn registry_round_trip() {
        let mut r = SummarizerRegistry::empty();
        r.register("fake", |_cfg| async {
            Ok(Box::new(Fake) as Box<dyn Summarizer>)
        });
        let s = r.build("fake", serde_json::Value::Null).await.unwrap();
        assert_eq!(s.id(), "fake:1");
    }

    #[tokio::test]
    async fn registry_unknown_family() {
        let r = SummarizerRegistry::empty();
        let err = r.build("nope", serde_json::Value::Null).await.unwrap_err();
        assert!(matches!(err, Error::Config(_)));
    }

    #[test]
    fn capabilities_default_is_conservative() {
        let c = SummarizerCapabilities::default();
        assert!(c.max_input_tokens.is_none());
        assert!(!c.streaming);
        assert!(!c.structured_output);
    }

    #[test]
    fn summary_style_default_is_compact() {
        assert_eq!(SummaryStyle::default(), SummaryStyle::Compact);
        assert_eq!(SummaryStyle::Compact.as_str(), "compact");
        assert_eq!(SummaryStyle::Detailed.as_str(), "detailed");
    }

    #[test]
    fn summary_style_custom_round_trips_through_persistence() {
        let s = SummaryStyle::Custom("daily-recap".into());
        assert_eq!(s.as_str(), "custom:daily-recap");
        assert_eq!(SummaryStyle::from_persisted("custom:daily-recap"), s);
    }

    #[test]
    fn summary_style_from_persisted_handles_known_tags() {
        assert_eq!(
            SummaryStyle::from_persisted("compact"),
            SummaryStyle::Compact
        );
        assert_eq!(
            SummaryStyle::from_persisted("detailed"),
            SummaryStyle::Detailed
        );
        // Empty custom name → fallback to Compact.
        assert_eq!(
            SummaryStyle::from_persisted("custom:"),
            SummaryStyle::Compact
        );
        assert_eq!(
            SummaryStyle::from_persisted("garbage"),
            SummaryStyle::Compact
        );
    }
}