Skip to main content

zeph_agent_context/
retrieved.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Retrieved-memory context extraction for the MARCH self-check pipeline.
5//!
6//! [`RetrievedContext`] holds borrowed slices of retrieved-memory fragments
7//! (recall, graph facts, cross-session, summaries) for one turn.
8//! [`collect_retrieved_context`] walks the turn's message list and populates
9//! the four buckets without allocating beyond the [`Vec`]s themselves.
10use zeph_llm::provider::{Message, MessagePart, Role};
11
12use crate::helpers::{CROSS_SESSION_PREFIX, GRAPH_FACTS_PREFIX, RECALL_PREFIX, SUMMARY_PREFIX};
13
14/// Collected retrieved-memory context for a single turn.
15///
16/// All fields hold borrowed `&str` slices from message parts, so no allocation
17/// beyond the `Vec` headers themselves is needed; [`Self::joined`] is the only
18/// method that allocates.
19#[derive(Debug, Default)]
20pub struct RetrievedContext<'a> {
21    /// Semantic recall fragments.
22    pub recall: Vec<&'a str>,
23    /// Graph / known-facts fragments.
24    pub graph_facts: Vec<&'a str>,
25    /// Cross-session memory fragments.
26    pub cross_session: Vec<&'a str>,
27    /// Compaction / conversation summaries.
28    pub summaries: Vec<&'a str>,
29}
30
31impl RetrievedContext<'_> {
32    /// Returns `true` when no retrieved context was found for this turn.
33    #[must_use]
34    pub fn is_empty(&self) -> bool {
35        self.recall.is_empty()
36            && self.graph_facts.is_empty()
37            && self.cross_session.is_empty()
38            && self.summaries.is_empty()
39    }
40
41    /// Concatenate all fragments with the given separator. Allocates a fresh `String`.
42    #[must_use]
43    pub fn joined(&self, sep: &str) -> String {
44        let parts: Vec<&str> = self
45            .recall
46            .iter()
47            .chain(&self.graph_facts)
48            .chain(&self.cross_session)
49            .chain(&self.summaries)
50            .copied()
51            .collect();
52        parts.join(sep)
53    }
54}
55
56/// Walk the message list and collect all retrieved-memory fragments.
57///
58/// Two paths are supported:
59/// - **Canonical multipart path**: `MessagePart::{Recall, Summary, CrossSession}` on any message.
60/// - **Legacy string-prefix path**: `Role::System` text whose content begins with a known
61///   prefix constant (used by Ollama and older session restores).
62///
63/// `MessagePart::GraphFacts` does not exist; graph facts flow via `Role::System` messages
64/// with the [`GRAPH_FACTS_PREFIX`] prefix and are captured by the legacy path.
65#[must_use]
66pub fn collect_retrieved_context(messages: &[Message]) -> RetrievedContext<'_> {
67    let mut rc = RetrievedContext::default();
68
69    for msg in messages {
70        // (a) Canonical multipart path
71        for part in &msg.parts {
72            match part {
73                MessagePart::Recall { text } => rc.recall.push(text.as_str()),
74                MessagePart::Summary { text } => rc.summaries.push(text.as_str()),
75                MessagePart::CrossSession { text } => rc.cross_session.push(text.as_str()),
76                _ => {}
77            }
78        }
79
80        // (b) Legacy string-prefix path on System role only
81        if msg.role == Role::System {
82            for part in &msg.parts {
83                if let Some(text) = part.as_plain_text() {
84                    if let Some(body) = text.strip_prefix(RECALL_PREFIX) {
85                        rc.recall.push(body);
86                    } else if let Some(body) = text.strip_prefix(SUMMARY_PREFIX) {
87                        rc.summaries.push(body);
88                    } else if let Some(body) = text.strip_prefix(CROSS_SESSION_PREFIX) {
89                        rc.cross_session.push(body);
90                    } else if let Some(body) = text.strip_prefix(GRAPH_FACTS_PREFIX) {
91                        rc.graph_facts.push(body);
92                    }
93                }
94            }
95            // Also scan legacy content field (Ollama providers set content only, no parts)
96            if msg.parts.is_empty() {
97                let text = msg.content.as_str();
98                if let Some(body) = text.strip_prefix(RECALL_PREFIX) {
99                    rc.recall.push(body);
100                } else if let Some(body) = text.strip_prefix(SUMMARY_PREFIX) {
101                    rc.summaries.push(body);
102                } else if let Some(body) = text.strip_prefix(CROSS_SESSION_PREFIX) {
103                    rc.cross_session.push(body);
104                } else if let Some(body) = text.strip_prefix(GRAPH_FACTS_PREFIX) {
105                    rc.graph_facts.push(body);
106                }
107            }
108        }
109    }
110
111    rc
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use zeph_llm::provider::MessageMetadata;
118
119    fn sys_msg(content: &str) -> Message {
120        Message {
121            role: Role::System,
122            content: content.to_owned(),
123            parts: vec![],
124            metadata: MessageMetadata::default(),
125        }
126    }
127
128    fn msg_with_part(role: Role, part: MessagePart) -> Message {
129        Message {
130            role,
131            content: String::new(),
132            parts: vec![part],
133            metadata: MessageMetadata::default(),
134        }
135    }
136
137    #[test]
138    fn collect_finds_multipart_recall() {
139        let msgs = vec![msg_with_part(
140            Role::User,
141            MessagePart::Recall {
142                text: "recall fragment".into(),
143            },
144        )];
145        let rc = collect_retrieved_context(&msgs);
146        assert_eq!(rc.recall, vec!["recall fragment"]);
147        assert!(rc.summaries.is_empty());
148    }
149
150    #[test]
151    fn collect_finds_legacy_prefix_system() {
152        let msgs = vec![sys_msg(&format!("{RECALL_PREFIX}legacy recall body"))];
153        let rc = collect_retrieved_context(&msgs);
154        assert_eq!(rc.recall, vec!["legacy recall body"]);
155    }
156
157    #[test]
158    fn collect_combines_both_shapes() {
159        let msgs = vec![
160            msg_with_part(
161                Role::User,
162                MessagePart::Recall {
163                    text: "part recall".into(),
164                },
165            ),
166            sys_msg(&format!("{GRAPH_FACTS_PREFIX}graph data")),
167        ];
168        let rc = collect_retrieved_context(&msgs);
169        assert_eq!(rc.recall, vec!["part recall"]);
170        assert_eq!(rc.graph_facts, vec!["graph data"]);
171    }
172
173    #[test]
174    fn collect_skips_non_retrieval_parts() {
175        let msgs = vec![msg_with_part(
176            Role::User,
177            MessagePart::Text {
178                text: "plain user text".into(),
179            },
180        )];
181        let rc = collect_retrieved_context(&msgs);
182        assert!(rc.is_empty());
183    }
184
185    #[test]
186    fn collect_empty_on_plain_user_turn() {
187        let msgs = vec![Message {
188            role: Role::User,
189            content: "hello world".into(),
190            parts: vec![],
191            metadata: MessageMetadata::default(),
192        }];
193        let rc = collect_retrieved_context(&msgs);
194        assert!(rc.is_empty());
195    }
196}