Skip to main content

entelix_memory/
consolidation.rs

1//! `ConsolidationPolicy` — when should the buffered conversation be
2//! summarised, archived, or compressed into the running summary?
3//!
4//! Long-running agents accumulate buffered messages indefinitely.
5//! Without a consolidation pass, every model call pays for the
6//! entire history; once the context window fills, useful continuity
7//! is lost. This module defines the *trigger* abstraction. The
8//! actual summarisation step (typically an LLM call that turns N
9//! messages into a paragraph) is the operator's concern — they
10//! attach a `ConsolidationPolicy` that decides *when* and supply
11//! their own summariser that decides *how*.
12
13use chrono::{DateTime, Utc};
14use entelix_core::ir::Message;
15
16/// Inputs that a [`ConsolidationPolicy`] consults when deciding
17/// whether to consolidate. Carries the buffer plus optional signals
18/// (last-consolidated-at, current/available context tokens) so
19/// non-trivial policies — for example, "summarise once we use 80 %
20/// of the model's context window" or "throttle consolidation to at
21/// most once per hour" — can express their decision without
22/// embedding their own clock or token counter.
23///
24/// Marked `#[non_exhaustive]`: callers always go through
25/// [`Self::new`] and the `with_*` builders, and impls always read
26/// fields by name, so adding a new signal (e.g. message count since
27/// last consolidation, oldest message age) is a non-breaking change.
28#[derive(Clone, Debug)]
29#[non_exhaustive]
30pub struct ConsolidationContext<'a> {
31    /// The buffered conversation.
32    pub buffer: &'a [Message],
33    /// Wall-clock time of the most recent consolidation, if any.
34    pub last_consolidated_at: Option<DateTime<Utc>>,
35    /// Tokens currently consumed by the buffer in the model's
36    /// context window. `None` when the host has not measured.
37    pub context_tokens_used: Option<usize>,
38    /// Total context-window capacity for the active model, in
39    /// tokens. `None` when the host has not declared one.
40    pub context_tokens_available: Option<usize>,
41}
42
43impl<'a> ConsolidationContext<'a> {
44    /// Build with only the buffer — other signals default to `None`.
45    /// Useful for simple agents whose policy doesn't need them.
46    #[must_use]
47    pub const fn new(buffer: &'a [Message]) -> Self {
48        Self {
49            buffer,
50            last_consolidated_at: None,
51            context_tokens_used: None,
52            context_tokens_available: None,
53        }
54    }
55
56    /// Attach the timestamp of the most recent consolidation.
57    #[must_use]
58    pub const fn with_last_consolidated_at(mut self, at: DateTime<Utc>) -> Self {
59        self.last_consolidated_at = Some(at);
60        self
61    }
62
63    /// Attach the active model's context-window state.
64    #[must_use]
65    pub const fn with_context_tokens(mut self, used: usize, available: usize) -> Self {
66        self.context_tokens_used = Some(used);
67        self.context_tokens_available = Some(available);
68        self
69    }
70}
71
72/// Decides whether the current buffered conversation should be
73/// consolidated.
74///
75/// Implementations are pure functions of the supplied context — no
76/// I/O, no async — so checks are free to run after every append.
77/// Stateful behaviour (counters, last-trigger timestamp) lives
78/// inside the impl when needed.
79pub trait ConsolidationPolicy: Send + Sync + 'static {
80    /// Return `true` when the buffer is ready for consolidation.
81    fn should_consolidate(&self, ctx: &ConsolidationContext<'_>) -> bool;
82}
83
84/// Trigger consolidation once the buffer reaches `max_messages`.
85///
86/// Simplest possible policy — count messages, fire when threshold
87/// crossed. Suitable for chat agents where every message is a turn.
88#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
89pub struct OnMessageCount {
90    /// Maximum buffered messages before consolidation fires.
91    pub max_messages: usize,
92}
93
94impl OnMessageCount {
95    /// Build a policy with the given message threshold.
96    #[must_use]
97    pub const fn new(max_messages: usize) -> Self {
98        Self { max_messages }
99    }
100}
101
102impl ConsolidationPolicy for OnMessageCount {
103    fn should_consolidate(&self, ctx: &ConsolidationContext<'_>) -> bool {
104        ctx.buffer.len() >= self.max_messages
105    }
106}
107
108/// Trigger consolidation when the buffer's total text length
109/// (summed UTF-8 byte length of every `ContentPart::Text`) exceeds
110/// `max_bytes`. Approximates a token-budget gate without needing a
111/// tokenizer in the SDK.
112#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
113pub struct OnTokenBudget {
114    /// Maximum cumulative text bytes before consolidation fires.
115    pub max_bytes: usize,
116}
117
118impl OnTokenBudget {
119    /// Build a policy with the given byte threshold.
120    #[must_use]
121    pub const fn new(max_bytes: usize) -> Self {
122        Self { max_bytes }
123    }
124}
125
126impl ConsolidationPolicy for OnTokenBudget {
127    fn should_consolidate(&self, ctx: &ConsolidationContext<'_>) -> bool {
128        let mut total: usize = 0;
129        for msg in ctx.buffer {
130            for part in &msg.content {
131                if let entelix_core::ir::ContentPart::Text { text, .. } = part {
132                    total = total.saturating_add(text.len());
133                    if total >= self.max_bytes {
134                        return true;
135                    }
136                }
137            }
138        }
139        false
140    }
141}
142
143/// Never trigger. Useful as a default when consolidation is wired
144/// but the operator wants to disable it temporarily without
145/// rebuilding the agent graph.
146#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)]
147pub struct NeverConsolidate;
148
149impl ConsolidationPolicy for NeverConsolidate {
150    fn should_consolidate(&self, _ctx: &ConsolidationContext<'_>) -> bool {
151        false
152    }
153}
154
155#[cfg(test)]
156#[allow(clippy::indexing_slicing)]
157mod tests {
158    use super::*;
159    use entelix_core::ir::Message;
160
161    #[test]
162    fn on_message_count_fires_at_threshold() {
163        let policy = OnMessageCount::new(3);
164        let one = vec![Message::user("a")];
165        let three = vec![Message::user("a"), Message::user("b"), Message::user("c")];
166        assert!(!policy.should_consolidate(&ConsolidationContext::new(&one)));
167        assert!(policy.should_consolidate(&ConsolidationContext::new(&three)));
168    }
169
170    #[test]
171    fn on_token_budget_fires_when_text_exceeds_limit() {
172        let policy = OnTokenBudget::new(10);
173        let small = vec![Message::user("hi")];
174        let large = vec![Message::user("hello there friend")];
175        assert!(!policy.should_consolidate(&ConsolidationContext::new(&small)));
176        assert!(policy.should_consolidate(&ConsolidationContext::new(&large)));
177    }
178
179    #[test]
180    fn never_consolidate_is_always_false() {
181        let policy = NeverConsolidate;
182        let buf = vec![Message::user("anything"); 1000];
183        assert!(!policy.should_consolidate(&ConsolidationContext::new(&buf)));
184    }
185}