kiromi-ai-memory 0.2.2

// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Plan 12 phase I — `Memory::build_context(focus, opts)`.
//!
//! Walks the partition tree top-down from the tenant root to the focus
//! node, fetching the latest summary at each level whose level-index is
//! in `opts.include_summaries_at`, then returns a token-budget-bounded
//! list of [`ContextBlock`]s ready to feed into a prompt.

use serde::{Deserialize, Serialize};

use crate::error::Result;
use crate::graph::NodeRef;
use crate::handle::{Memory, MemoryView};
use crate::summarizer::SummaryStyle;
use crate::summary::SummarySubject;

/// Kind tag carried by each [`ContextBlock`].
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ContextKind {
    /// The whole-tenant memo.
    TenantMemo,
    /// Per-partition rollup at any depth.
    PartitionSummary,
    /// A raw memory body.
    Memory,
    /// A linked memory (one hop from the focus).
    LinkedMemory,
}

/// One block in an assembled context.
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ContextBlock {
    /// Block kind.
    pub kind: ContextKind,
    /// Anchor node — partition for summaries, memory for body / linked.
    pub anchor: NodeRef,
    /// Rendered prose.
    pub text: String,
    /// Estimated tokens (4 bytes/token heuristic).
    pub tokens_estimated: u32,
}

/// Plan 15: how `build_context` orders the budget-filled block list.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ContextOrdering {
    /// Highest importance first, descending. Slice 1 / Plan 12 default.
    #[default]
    TopDown,
    /// U-curve: highest importance at start AND end, less important in
    /// the middle. Optimises for LLM attention curves under long
    /// contexts (per the 2024 "Lost in the Middle" line of work).
    UCurve,
}

/// Caller-tunable knobs on [`Memory::build_context`].
#[non_exhaustive]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ContextOpts {
    /// Hard token budget for the assembled context. Default 4000.
    pub budget_tokens: u32,
    /// Include the tenant memo block when present. Default `true`.
    pub include_tenant_memo: bool,
    /// Partition-level indices whose summary to include. Default `[0, 1]`.
    pub include_summaries_at: Vec<u32>,
    /// Top-K memories to include (when focus is a partition). Default 5.
    pub include_memories_top_k: u32,
    /// Style preset for partition summaries. Default `Compact`.
    pub style: SummaryStyle,
    /// Plan 15: ordering applied to the post-budget block list. Default
    /// [`ContextOrdering::TopDown`] preserves the slice 1 behaviour.
    pub ordering: ContextOrdering,
}

impl Default for ContextOpts {
    fn default() -> Self {
        Self {
            budget_tokens: 4_000,
            include_tenant_memo: true,
            include_summaries_at: vec![0, 1],
            include_memories_top_k: 5,
            style: SummaryStyle::Compact,
            ordering: ContextOrdering::TopDown,
        }
    }
}

impl ContextOpts {
    /// Set the budget.
    #[must_use]
    pub fn with_budget(mut self, n: u32) -> Self {
        self.budget_tokens = n;
        self
    }
    /// Set include-tenant-memo.
    #[must_use]
    pub fn with_include_tenant_memo(mut self, v: bool) -> Self {
        self.include_tenant_memo = v;
        self
    }
    /// Set top-K memories.
    #[must_use]
    pub fn with_top_k(mut self, n: u32) -> Self {
        self.include_memories_top_k = n;
        self
    }
    /// Set the summary style.
    #[must_use]
    pub fn with_style(mut self, s: SummaryStyle) -> Self {
        self.style = s;
        self
    }
    /// Plan 15: pick the ordering applied to the post-budget block list.
    #[must_use]
    pub fn with_ordering(mut self, o: ContextOrdering) -> Self {
        self.ordering = o;
        self
    }
}

/// Plan 15: re-order an importance-sorted block list (highest first)
/// into a U-curve so the highest-importance blocks anchor the start
/// AND the end of the prompt window.
///
/// Algorithm: split the input in half (front half gets the ceiling
/// element on odd N), then alternate pulling one from the front of the
/// front half with one from the back of the back half. Example:
/// `[A, B, C, D, E, F, G]` → `[A, G, B, F, C, E, D]`.
fn u_curve_reorder<T>(mut input: Vec<T>) -> Vec<T> {
    let n = input.len();
    if n <= 2 {
        return input;
    }
    // Split in half — front keeps the ceiling-element on odd N so the
    // most-important block always anchors the start.
    let mid = n.div_ceil(2);
    let back: Vec<T> = input.drain(mid..).collect();
    let front = input;
    let mut front_iter = front.into_iter();
    let mut back_iter_rev = back.into_iter().rev();
    let mut out: Vec<T> = Vec::with_capacity(n);
    let mut alt: u32 = 0;
    loop {
        let pulled = if alt.is_multiple_of(2) {
            front_iter.next().or_else(|| back_iter_rev.next())
        } else {
            back_iter_rev.next().or_else(|| front_iter.next())
        };
        match pulled {
            Some(v) => out.push(v),
            None => break,
        }
        alt = alt.wrapping_add(1);
    }
    out
}

fn estimate_tokens(text: &str) -> u32 {
    // Heuristic: ~4 bytes per token. Documented inline so callers can
    // recompute if they care about exact ChatML counts.
    u32::try_from(text.len() / 4).unwrap_or(u32::MAX)
}

impl Memory {
    /// Plan 12 — assemble a token-budget-bounded list of context
    /// blocks rooted at `focus`. Walks the tenant tree top-down: tenant
    /// memo → top-level summary → ... → focus → (optionally) linked
    /// memories or top-K memories under the partition.
    ///
    /// ```no_run
    /// # async fn _ex(mem: kiromi_ai_memory::Memory, r: kiromi_ai_memory::MemoryRef) -> kiromi_ai_memory::Result<()> {
    /// use kiromi_ai_memory::{ContextOpts, graph::NodeRef};
    /// let blocks = mem.build_context(NodeRef::Memory(r), ContextOpts::default()).await?;
    /// # let _ = blocks; Ok(()) }
    /// ```
    pub async fn build_context(
        &self,
        focus: NodeRef,
        opts: ContextOpts,
    ) -> Result<Vec<ContextBlock>> {
        let mut blocks: Vec<ContextBlock> = Vec::new();

        // 1. Tenant memo.
        if opts.include_tenant_memo
            && let Some(memo) = self.tenant_memo().await?
        {
            let tokens = estimate_tokens(&memo);
            blocks.push(ContextBlock {
                kind: ContextKind::TenantMemo,
                anchor: NodeRef::Partition(crate::partition::tenant_root_path()),
                text: memo,
                tokens_estimated: tokens,
            });
        }

        // 2. Walk from root → focus partition, fetching partition
        //    summaries at each requested level.
        let focus_partition = match &focus {
            NodeRef::Memory(r) => Some(r.partition.clone()),
            NodeRef::Partition(p) => Some(p.clone()),
            NodeRef::Summary(s) => s.subject.partition_path().cloned(),
        };
        if let Some(p) = focus_partition.as_ref() {
            let mut chain: Vec<crate::partition::PartitionPath> = p.ancestors().collect();
            chain.reverse();
            chain.push(p.clone());
            for path in chain {
                let level = u32::try_from(path.depth().saturating_sub(1)).unwrap_or(0);
                if !opts.include_summaries_at.contains(&level) {
                    continue;
                }
                if let Some(rec) = self
                    .latest_summary(&SummarySubject::Partition(path.clone()), &opts.style)
                    .await?
                {
                    let text = rec.content.prose.clone();
                    let tokens = estimate_tokens(&text);
                    blocks.push(ContextBlock {
                        kind: ContextKind::PartitionSummary,
                        anchor: NodeRef::Partition(path.clone()),
                        text,
                        tokens_estimated: tokens,
                    });
                }
            }
        }

        // 3. If focus is a memory, include its body + linked memories.
        if let NodeRef::Memory(r) = &focus {
            if let Ok(record) = self.get(r).await {
                let text = record.content.as_str().to_string();
                let tokens = estimate_tokens(&text);
                blocks.push(ContextBlock {
                    kind: ContextKind::Memory,
                    anchor: NodeRef::Memory(r.clone()),
                    text,
                    tokens_estimated: tokens,
                });
            }
            let links = self.links_of(r).await?;
            for l in links {
                let dst_ref = crate::memory::MemoryRef {
                    id: l.dst,
                    partition: r.partition.clone(),
                };
                if let Ok(rec) = self.get(&dst_ref).await {
                    let text = rec.content.as_str().to_string();
                    let tokens = estimate_tokens(&text);
                    blocks.push(ContextBlock {
                        kind: ContextKind::LinkedMemory,
                        anchor: NodeRef::Memory(rec.r#ref.clone()),
                        text,
                        tokens_estimated: tokens,
                    });
                }
            }
        }

        // 4. If focus is a partition, list top-K live memories.
        if let NodeRef::Partition(p) = &focus {
            let limit = opts.include_memories_top_k;
            if limit > 0 {
                let part = crate::partition::Partitions::from_path(p);
                let page = self
                    .list(
                        part,
                        crate::opts::ListOpts {
                            limit,
                            ..Default::default()
                        },
                    )
                    .await?;
                for mref in page.items {
                    if let Ok(rec) = self.get(&mref).await {
                        let text = rec.content.as_str().to_string();
                        let tokens = estimate_tokens(&text);
                        blocks.push(ContextBlock {
                            kind: ContextKind::Memory,
                            anchor: NodeRef::Memory(mref),
                            text,
                            tokens_estimated: tokens,
                        });
                    }
                }
            }
        }

        // 5. Greedy-fill: keep walk order, drop tail blocks past budget.
        let mut accum: u32 = 0;
        let mut out = Vec::with_capacity(blocks.len());
        for b in blocks {
            let next = accum.saturating_add(b.tokens_estimated);
            if next > opts.budget_tokens {
                break;
            }
            accum = next;
            out.push(b);
        }
        // 6. Plan 15: optional U-curve reorder for long-context attention.
        if matches!(opts.ordering, ContextOrdering::UCurve) {
            out = u_curve_reorder(out);
        }
        Ok(out)
    }
}

/// Plan 15: result of [`Memory::build_context_diff`].
///
/// Splits the post-budget block list into the parts that changed
/// (`added`), dropped (`removed`), and survived (`kept`) since the
/// supplied snapshot. Chat-style turn loops can re-emit only `added`
/// to the LLM; `kept` carries anchors with no body so callers know
/// which blocks the prior turn already saw.
#[non_exhaustive]
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ContextDiff {
    /// Blocks present now but absent at the snapshot.
    pub added: Vec<ContextBlock>,
    /// Anchors of blocks present at the snapshot but absent now.
    pub removed: Vec<NodeRef>,
    /// Anchors of blocks present in both — body intentionally omitted
    /// so callers don't double-send.
    pub kept: Vec<NodeRef>,
    /// Cheap upper bound on the additional tokens the caller now needs
    /// to budget for, summed over `added`.
    pub tokens_estimated_added: u32,
}

impl MemoryView {
    /// Plan 15: snapshot-pinned [`Memory::build_context`].
    ///
    /// Walks the same tenant-memo / partition-summary / focus chain as
    /// the engine path but filters every memory + summary anchor through
    /// the snapshot manifest, so only blocks that were live at snapshot
    /// time appear. Greedy-fill + ordering match
    /// [`Memory::build_context`].
    pub async fn build_context(
        &self,
        focus: NodeRef,
        opts: ContextOpts,
    ) -> Result<Vec<ContextBlock>> {
        // Re-run the engine path against a fresh handle then filter
        // through the manifest.
        let mem = Memory {
            inner: std::sync::Arc::clone(&self.inner),
        };
        let raw = mem.build_context(focus, opts.clone()).await?;
        let kept: Vec<ContextBlock> = raw
            .into_iter()
            .filter(|b| match &b.anchor {
                NodeRef::Memory(r) => self.manifest.memory_ids.binary_search(&r.id).is_ok(),
                NodeRef::Summary(s) => self.manifest.summary_ids.binary_search(&s.id).is_ok(),
                // Partition + tenant-memo anchors aren't tied to live
                // ids; carry them through unfiltered.
                NodeRef::Partition(_) => true,
            })
            .collect();
        // Apply U-curve once more so post-filter ordering matches the
        // ordering the engine would produce against the same
        // (now-filtered) input set.
        if matches!(opts.ordering, ContextOrdering::UCurve) {
            Ok(u_curve_reorder(kept))
        } else {
            Ok(kept)
        }
    }
}

impl Memory {
    /// Plan 15: assemble a delta of [`ContextBlock`]s relative to a
    /// prior snapshot.
    ///
    /// Builds the current context, builds the snapshot's view of the
    /// same context, then partitions block anchors into:
    /// - `added`: present now, absent at snapshot.
    /// - `removed`: present at snapshot, absent now.
    /// - `kept`: present in both.
    ///
    /// Chat-style turn loops can replay only `added` to the LLM and
    /// rely on `kept` to remember which blocks the prior turn already
    /// saw.
    ///
    /// ```no_run
    /// # async fn _ex(mem: kiromi_ai_memory::Memory, s: kiromi_ai_memory::SnapshotRef) -> kiromi_ai_memory::Result<()> {
    /// use kiromi_ai_memory::{ContextOpts, graph::NodeRef, PartitionPath};
    /// let path: PartitionPath = "user=alex/topic=meetings".parse().unwrap();
    /// let diff = mem.build_context_diff(NodeRef::Partition(path), &s, ContextOpts::default()).await?;
    /// # let _ = diff; Ok(()) }
    /// ```
    pub async fn build_context_diff(
        &self,
        focus: NodeRef,
        since: &crate::snapshot::SnapshotRef,
        opts: ContextOpts,
    ) -> Result<ContextDiff> {
        let prior_view = self.at(since).await?;
        let prior_blocks = prior_view
            .build_context(focus.clone(), opts.clone())
            .await?;
        let now_blocks = self.build_context(focus, opts).await?;

        let prior_anchors: std::collections::HashSet<NodeRef> =
            prior_blocks.iter().map(|b| b.anchor.clone()).collect();
        let now_anchors: std::collections::HashSet<NodeRef> =
            now_blocks.iter().map(|b| b.anchor.clone()).collect();

        let mut added: Vec<ContextBlock> = Vec::new();
        let mut kept: Vec<NodeRef> = Vec::new();
        for b in now_blocks {
            if prior_anchors.contains(&b.anchor) {
                kept.push(b.anchor);
            } else {
                added.push(b);
            }
        }
        let removed: Vec<NodeRef> = prior_blocks
            .into_iter()
            .filter(|b| !now_anchors.contains(&b.anchor))
            .map(|b| b.anchor)
            .collect();
        let tokens_estimated_added = added.iter().map(|b| b.tokens_estimated).sum();
        Ok(ContextDiff {
            added,
            removed,
            kept,
            tokens_estimated_added,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn u_curve_reorders_seven_alternating_front_back() {
        let v = vec!["A", "B", "C", "D", "E", "F", "G"];
        let got = u_curve_reorder(v);
        // Front half (ceiling on odd N) = [A,B,C,D]; back = [E,F,G].
        // Alternating front-front-..., back-back-... -> A, G, B, F, C, E, D.
        assert_eq!(got, vec!["A", "G", "B", "F", "C", "E", "D"]);
    }

    #[test]
    fn u_curve_handles_short_lists() {
        assert_eq!(u_curve_reorder::<i32>(vec![]), Vec::<i32>::new());
        assert_eq!(u_curve_reorder(vec![1]), vec![1]);
        assert_eq!(u_curve_reorder(vec![1, 2]), vec![1, 2]);
    }

    #[test]
    fn u_curve_even_lists_preserve_endpoints() {
        let v = vec![1, 2, 3, 4, 5, 6];
        let got = u_curve_reorder(v);
        // First element is always the highest-importance, last is the lowest.
        assert_eq!(got[0], 1);
    }
}