Skip to main content

brainos_cortex/
context.rs

1//! Context assembly — builds LLM prompts with token budget management.
2//!
3//! Manages the token budget for LLM context windows:
4//! - System prompt (~500 tokens)
5//! - User model snapshot (~300 tokens)
6//! - Conversation history (~2000 tokens)
7//! - Retrieved memories (remaining budget)
8//! - Response buffer (~400 tokens)
9
10use crate::llm::Message;
11use hippocampus::search::Memory;
12
13/// Conservative chars-per-token ratio for the prompt estimator. English
14/// prose runs ~4 chars/token and code/JSON (common in attachments) ~3, so we
15/// use 3: packing never badly *under*-counts and overflows the model's true
16/// window, while being far less wasteful than the old 2-chars/token guess
17/// (which threw away ~half the usable window). Deliberately a portable
18/// heuristic, not a model-specific BPE tokenizer — Brain is multi-provider
19/// (Ollama/qwen, OpenAI, …) and no single vocabulary is correct for all of
20/// them. Swapping in a real tokenizer later only needs to touch this module.
21pub const CHARS_PER_TOKEN: usize = 3;
22
23/// Estimate the token count of `text` for budgeting and packing decisions.
24/// Single source of the heuristic — every budget check in this module routes
25/// through here so they can't drift.
26pub fn estimate_tokens(text: &str) -> usize {
27    text.chars().count().div_ceil(CHARS_PER_TOKEN)
28}
29
30/// Default token budgets.
31pub const TOKEN_BUDGETS: TokenBudget = TokenBudget {
32    system_prompt: 500,
33    user_model: 300,
34    conversation_history: 2000,
35    response_buffer: 400,
36    attachments: 2500,
37    total_context: 8192, // Default for most models
38};
39
40/// Hardcoded greeting for first-ever chat session (0 facts).
41/// Printed directly — no LLM call needed.
42pub const ONBOARDING_GREETING: &str = "Hey! I'm Brain \u{2014} your personal memory engine. \
43I run locally on your machine and I'm here to remember what matters to you. \
44I don't know anything about you yet, so let's fix that. What's your name?";
45
46/// System-prompt addendum injected while the user has fewer than 5 facts.
47/// Makes the LLM naturally curious and question-asking during onboarding.
48pub const ONBOARDING_ADDENDUM: &str = r#"
49
50[ONBOARDING MODE — the user is new and you know very little about them]
51- After every user message, end your response with ONE short, focused follow-up question to learn about the user (name, role, projects, interests).
52- Keep responses to 1-3 sentences plus the question.
53- Sound warm, curious, and conversational — not like an intake form.
54- NEVER say "I don't have that in my memory yet" — instead, be proactive about learning.
55- Once you learn something, acknowledge it naturally and ask about the next thing."#;
56
57/// The always-on cognitive faculties, rendered as the fallback "Your
58/// Capabilities" section of the SOUL prompt. Used verbatim when no live
59/// capability digest is supplied (non-chat LLM paths, tests, custom
60/// prompts) and as the prefix of the live digest the chat path builds
61/// (see `signal::pipeline::conversation`). Keeping the
62/// wording in one place stops the static and live views from drifting.
63pub const DEFAULT_CAPABILITIES: &str = r#"Your Capabilities:
64- Episodic Memory: You recall past experiences and conversations provided as context.
65- Semantic Memory: You maintain a web of facts about the user's world, projects, and habits.
66- Proactivity: You don't just react; you anticipate needs based on established patterns (provided in context)."#;
67
68/// Token budget allocation.
69#[derive(Debug, Clone, Copy)]
70pub struct TokenBudget {
71    pub system_prompt: usize,
72    pub user_model: usize,
73    pub conversation_history: usize,
74    pub response_buffer: usize,
75    /// Cap on rendered path-attachments (snapshots of files/dirs the
76    /// user referenced in chat). Truncated to fit by the assembler.
77    pub attachments: usize,
78    pub total_context: usize,
79}
80
81impl TokenBudget {
82    /// Calculate remaining budget for memories.
83    pub fn memory_budget(&self) -> usize {
84        self.total_context
85            .saturating_sub(self.system_prompt)
86            .saturating_sub(self.user_model)
87            .saturating_sub(self.conversation_history)
88            .saturating_sub(self.response_buffer)
89            .saturating_sub(self.attachments)
90    }
91
92    /// Build a budget scaled to a model's real context window.
93    ///
94    /// Fixed overheads (system prompt, user-model snapshot, response buffer)
95    /// don't grow with the window, so they're reserved as constants. The
96    /// remaining *working pool* is split proportionally between conversation
97    /// history, path-attachments, and retrieved memories — so a 128k-window
98    /// model reads far more file content and recalls more memory than the
99    /// conservative 8k default, instead of clipping everything to a fixed cap.
100    ///
101    /// At `total_tokens == 8192` this stays close to the historical fixed
102    /// split (history ≈2k, attachments ≈2.5k, memory ≈2.5k). Memory is the
103    /// implicit remainder via [`Self::memory_budget`].
104    pub fn for_context_size(total_tokens: usize) -> Self {
105        let system_prompt = TOKEN_BUDGETS.system_prompt;
106        let user_model = TOKEN_BUDGETS.user_model;
107        let response_buffer = TOKEN_BUDGETS.response_buffer;
108        let reserved = system_prompt + user_model + response_buffer;
109
110        // Working pool after fixed overheads. Below the reserve we can't split
111        // anything — fall back to zero variable sections (the assembler still
112        // renders the system prompt).
113        let pool = total_tokens.saturating_sub(reserved);
114        // History stays modest (recent turns dominate relevance); attachments
115        // and memory get the lion's share and scale with the window.
116        let conversation_history = pool * 28 / 100;
117        let attachments = pool * 36 / 100;
118        // memory_budget() consumes the remainder (~36% of the pool).
119
120        Self {
121            system_prompt,
122            user_model,
123            conversation_history,
124            response_buffer,
125            attachments,
126            total_context: total_tokens,
127        }
128    }
129}
130
131impl Default for TokenBudget {
132    fn default() -> Self {
133        TOKEN_BUDGETS
134    }
135}
136
137/// Path-attachment grounding for a chat turn. When the user references
138/// a local path in their message, the pipeline reads it on their behalf
139/// and hands the snapshot here so the LLM can see *what's actually
140/// there* alongside memories and history. The SOUL prompt's
141/// "ATTACHED_CONTENT" instructions explain how to read these blocks.
142#[derive(Debug, Clone)]
143pub struct Attachment {
144    /// Path token as the user wrote it. Preserved verbatim so the LLM
145    /// can refer back to the user's own wording.
146    pub display_path: String,
147    /// Rendered snapshot — directory listing + histogram + inlined
148    /// files for a directory, or file excerpt for a file. Built by
149    /// `signal::pipeline::build_directory_snapshot` /
150    /// `build_file_snapshot`.
151    pub snapshot: String,
152}
153
154/// A path the user referenced that couldn't be attached (not found,
155/// outside `security.allowed_paths`, wrong file kind). Rendered as a
156/// `<SKIPPED_PATH>` tag so Brain can mention it instead of silently
157/// dropping the reference.
158#[derive(Debug, Clone)]
159pub struct SkippedAttachment {
160    pub display_path: String,
161    pub reason: String,
162}
163
164/// User profile data for context injection.
165#[derive(Debug, Clone, Default)]
166pub struct UserProfile {
167    pub name: Option<String>,
168    pub preferences: Vec<String>,
169    pub goals: Vec<String>,
170    pub facts: Vec<String>,
171}
172
173impl UserProfile {
174    /// Format as a context string.
175    pub fn to_context_string(&self) -> String {
176        let mut parts = Vec::new();
177
178        if let Some(name) = &self.name {
179            parts.push(format!("The user's name is {}.", name));
180        }
181
182        if !self.preferences.is_empty() {
183            parts.push(format!("User preferences: {}", self.preferences.join(", ")));
184        }
185
186        if !self.goals.is_empty() {
187            parts.push(format!("User goals: {}", self.goals.join(", ")));
188        }
189
190        if !self.facts.is_empty() {
191            parts.push(format!("Key facts: {}", self.facts.join("; ")));
192        }
193
194        parts.join(" ")
195    }
196
197    /// Estimate token count (conservative: ~2 chars per token to handle non-ASCII safely).
198    pub fn estimate_tokens(&self) -> usize {
199        estimate_tokens(&self.to_context_string())
200    }
201}
202
203/// Context assembler — builds prompts respecting token budgets.
204pub struct ContextAssembler {
205    budget: TokenBudget,
206    system_prompt: String,
207    user_profile: UserProfile,
208}
209
210impl ContextAssembler {
211    /// Create a new context assembler.
212    pub fn new(budget: TokenBudget) -> Self {
213        Self {
214            budget,
215            system_prompt: Self::default_system_prompt(),
216            user_profile: UserProfile::default(),
217        }
218    }
219
220    /// Create with default budget.
221    pub fn with_defaults() -> Self {
222        Self::new(TOKEN_BUDGETS)
223    }
224
225    /// The active token budget — lets the pipeline plan history compaction
226    /// against the same per-section allocation the assembler enforces.
227    pub fn budget(&self) -> TokenBudget {
228        self.budget
229    }
230
231    /// Set custom system prompt.
232    pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
233        self.system_prompt = prompt.into();
234        self
235    }
236
237    /// Set user profile.
238    pub fn with_user_profile(mut self, profile: UserProfile) -> Self {
239        self.user_profile = profile;
240        self
241    }
242
243    /// Get the default system prompt.
244    fn default_system_prompt() -> String {
245        r#"You are the SOUL of Brain OS — a biologically-inspired, proactive cognitive engine. You are not just an assistant; you are the user's digital hippocampus and prefrontal cortex, operating with deep context and long-term memory.
246
247Your Identity:
248- You are "Brain", the central intelligence of a local-first memory system.
249- You are private, secure, and run entirely on the user's machine.
250- Your purpose is to eliminate "context amnesia" by bridging the gap between siloed tools and the user's life.
251
252Operating Principles:
2531. TRUTH OVER HALLUCINATION: Ground answers in (a) the provided memories, (b) the live conversation history above this message, and (c) general world knowledge. If a *fact about the user* is genuinely absent from memory AND not present in the conversation, state: "I don't have that in my memory yet." Do NOT say this when the user is asking about things discussed earlier in the current conversation — answer from the message thread itself.
254   - SELF-KNOWLEDGE BOUNDARY: General world knowledge is fine for the world at large, but it is NOT a source for claims about Brain itself. Any statement about Brain's own CLI commands, config keys/schema, file layout, or features MUST come from the "About Brain" and "Your Capabilities" sections below — never from general knowledge or guesswork. If the answer isn't in those sections, say so plainly ("that isn't something Brain exposes" / "that's not a command/config key I have") and, where useful, point to the closest real command or config key. Never invent command names, config keys, templating syntax, or option flags — a confident, plausible-looking fabrication of Brain's surface is the worst failure mode.
255   - MEMORY GROUNDING: Never assert a specific fact about the user unless it appears verbatim in the "Relevant memories:" block or earlier in this conversation. This applies with full force when you are *describing what you remember* (e.g. answering "what do you know about me?" or "what are your capabilities?"): do NOT manufacture illustrative examples — never say things like "you bike to work" or "you deploy on Fridays" to demonstrate recall. Describe the *kinds* of things you store (preferences, projects, habits, people, decisions) in the abstract, and cite only real entries from the memories block. A fabricated personal "memory" is a betrayal of a memory product's core promise — when memory is empty or lacks the detail, say so.
2562. SEAMLESS RECALL: Reference memories and prior turns naturally ("You mentioned earlier...", "Based on what we discussed...").
2573. COGNITIVE CLARITY: Be concise, direct, and insightful. Avoid corporate fluff. Match response length to the question — simple greetings get one or two sentences, not tables.
2584. CONTEXTUAL AWARENESS: Use the provided User Profile to tailor your tone and relevance.
2595. CURIOSITY: When you lack context about the user, ask one focused follow-up question. Learning about the user is part of your job — don't wait to be told.
2606. FORMATTING: The user's terminal renders markdown. Use it lightly when it helps (lists for multi-item answers, **bold** for emphasis, `code` for identifiers). Skip headings and tables for short replies. Prefer bullet lists over tables — the terminal is narrow and wide tables render poorly; only use a table for genuinely tabular data with short cells.
2617. ATTACHED CONTENT: When the user references a local path, an `<ATTACHED_CONTENT path="…">` block is provided below as grounding — that is what is actually on disk, read on the user's behalf. Adapt your response shape to the *content*, not to a template: a chat export deserves a conversational summary with themes, tone, and an honest opinion; a code project deserves a technical overview; a folder of photos or media deserves an honest "I can see these file types but I can't view the images themselves." Never describe a non-code folder as if it were a software project. If a `<SKIPPED_PATH reason="…"/>` tag appears, the user named a path I couldn't read — acknowledge it briefly and ask them to confirm or rephrase.
262
263You are the user's partner in thought. Your goal is to make their digital life feel like a continuous, coherent stream of intelligence."#
264            .to_string()
265    }
266
267    /// Assemble context into messages.
268    ///
269    /// Takes retrieved memories and conversation history, returns
270    /// messages ready for the LLM.
271    pub fn assemble(
272        &self,
273        user_message: &str,
274        memories: &[Memory],
275        conversation_history: &[Message],
276    ) -> Vec<Message> {
277        self.assemble_with_addendum(user_message, memories, conversation_history, None)
278    }
279
280    /// Like [`assemble`], but appends `addendum` to the system prompt if provided.
281    /// Used to switch prompt modes per-turn (e.g. onboarding) without mutating
282    /// the shared assembler.
283    pub fn assemble_with_addendum(
284        &self,
285        user_message: &str,
286        memories: &[Memory],
287        conversation_history: &[Message],
288        addendum: Option<&str>,
289    ) -> Vec<Message> {
290        self.assemble_full(
291            user_message,
292            memories,
293            conversation_history,
294            addendum,
295            None,
296            &[],
297            &[],
298        )
299    }
300
301    /// Full assembly with path-attachment grounding. Attachments render
302    /// as `<ATTACHED_CONTENT>` blocks in a System message positioned
303    /// right before the user's actual message — closest attention slot
304    /// to "what the user just put on the table." Skipped paths render
305    /// as `<SKIPPED_PATH>` tags in the same block so Brain can mention
306    /// them naturally.
307    ///
308    /// Per-attachment content is truncated to fit `budget.attachments`;
309    /// when total snapshot text exceeds the budget, later attachments
310    /// shrink first so the first (and usually primary) reference stays
311    /// intact.
312    ///
313    /// `capabilities` is the "Your Capabilities" section of the SOUL
314    /// prompt. The chat path passes a *live* digest rendered from the
315    /// currently-wired tools and agents; every other path
316    /// passes `None` and falls back to [`DEFAULT_CAPABILITIES`]. Either
317    /// way the section is appended after the base prompt so the reasoner
318    /// always sees an explicit capability manifest.
319    pub fn assemble_full(
320        &self,
321        user_message: &str,
322        memories: &[Memory],
323        conversation_history: &[Message],
324        addendum: Option<&str>,
325        capabilities: Option<&str>,
326        attachments: &[Attachment],
327        skipped: &[SkippedAttachment],
328    ) -> Vec<Message> {
329        let mut messages = Vec::new();
330        let memory_budget = self.budget.memory_budget();
331
332        // 1. System prompt with optional addendum and user profile
333        let base_prompt = match addendum {
334            Some(extra) if !extra.is_empty() => {
335                format!("{}{}", self.system_prompt, extra)
336            }
337            _ => self.system_prompt.clone(),
338        };
339        // Capability manifest: live digest from the chat path, or the
340        // static always-on faculties everywhere else.
341        let prompt_with_caps = format!(
342            "{}\n\n{}",
343            base_prompt,
344            capabilities.unwrap_or(DEFAULT_CAPABILITIES)
345        );
346        let system_content = if self.user_profile.estimate_tokens() > 0 {
347            format!(
348                "{}\n\nUser Profile: {}",
349                prompt_with_caps,
350                self.user_profile.to_context_string()
351            )
352        } else {
353            prompt_with_caps
354        };
355        messages.push(Message::system(system_content));
356
357        // 2. Add memories as system context (if within budget)
358        let mut current_tokens = estimate_tokens(&messages[0].content);
359        let mut memory_context = String::new();
360
361        for memory in memories {
362            let memory_text = if let Some(ref agent) = memory.agent {
363                format!(
364                    "- [{:?}, agent: {}] {}\n",
365                    memory.source, agent, memory.content
366                )
367            } else {
368                format!("- [{:?}] {}\n", memory.source, memory.content)
369            };
370            let memory_tokens = estimate_tokens(&memory_text);
371
372            if current_tokens + memory_tokens > memory_budget {
373                break;
374            }
375
376            memory_context.push_str(&memory_text);
377            current_tokens += memory_tokens;
378        }
379
380        if !memory_context.is_empty() {
381            messages.push(Message::system(format!(
382                "Relevant memories:\n{}",
383                memory_context
384            )));
385        }
386
387        // 3. Add conversation history (respecting budget)
388        let mut history_tokens: usize = 0;
389        let mut included_history: Vec<Message> = Vec::new();
390
391        // Start from most recent and work backwards
392        for msg in conversation_history.iter().rev() {
393            let msg_tokens = estimate_tokens(&msg.content);
394            if history_tokens + msg_tokens > self.budget.conversation_history {
395                break;
396            }
397            included_history.push(msg.clone());
398            history_tokens += msg_tokens;
399        }
400
401        // Reverse to maintain chronological order
402        included_history.reverse();
403        messages.extend(included_history);
404
405        // 4. Attached path grounding (renders right before the user
406        //    message so the LLM has it freshly in attention).
407        if let Some(block) = render_attachments_block(attachments, skipped, self.budget.attachments)
408        {
409            messages.push(Message::system(block));
410        }
411
412        // 5. Add current user message
413        messages.push(Message::user(user_message.to_string()));
414
415        messages
416    }
417
418    /// Quick estimate of total tokens in messages.
419    pub fn estimate_tokens(messages: &[Message]) -> usize {
420        messages.iter().map(|m| estimate_tokens(&m.content)).sum()
421    }
422}
423
424/// Build the `<ATTACHED_CONTENT>` / `<SKIPPED_PATH>` block that goes
425/// just before the user's message. Returns `None` when there's nothing
426/// to render. Each attachment's snapshot is truncated to keep the
427/// total under `budget_tokens` (2 chars ≈ 1 token); later attachments
428/// shrink first so the primary reference stays intact.
429fn render_attachments_block(
430    attachments: &[Attachment],
431    skipped: &[SkippedAttachment],
432    budget_tokens: usize,
433) -> Option<String> {
434    if attachments.is_empty() && skipped.is_empty() {
435        return None;
436    }
437    // Convert the token budget back to a char ceiling using the same ratio
438    // the estimator assumes, so truncation and packing stay consistent.
439    let char_budget = budget_tokens.saturating_mul(CHARS_PER_TOKEN);
440    let mut out = String::new();
441    let mut chars_used = 0usize;
442
443    for (i, att) in attachments.iter().enumerate() {
444        // Per-attachment ceiling: equal share of remaining budget,
445        // floored at 600 chars so a small attachment can always fit.
446        let remaining_atts = attachments.len() - i;
447        let per_attachment =
448            (char_budget.saturating_sub(chars_used) / remaining_atts.max(1)).max(600);
449        let body = truncate_snapshot(&att.snapshot, per_attachment);
450        let block = format!(
451            "<ATTACHED_CONTENT path=\"{}\">\n{}\n</ATTACHED_CONTENT>\n",
452            att.display_path, body
453        );
454        chars_used = chars_used.saturating_add(block.chars().count());
455        out.push_str(&block);
456    }
457    for sk in skipped {
458        let tag = format!(
459            "<SKIPPED_PATH path=\"{}\" reason=\"{}\"/>\n",
460            sk.display_path,
461            sk.reason.replace('"', "'"),
462        );
463        out.push_str(&tag);
464    }
465    Some(out)
466}
467
468/// Truncate a snapshot string to at most `cap_chars`, appending a
469/// short marker so the LLM knows content was cut. Walks back to a
470/// character boundary to avoid splitting multi-byte chars.
471fn truncate_snapshot(s: &str, cap_chars: usize) -> String {
472    if s.chars().count() <= cap_chars {
473        return s.to_string();
474    }
475    let mut out: String = s.chars().take(cap_chars.saturating_sub(20)).collect();
476    out.push_str("\n…[truncated]");
477    out
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483    use crate::llm::Role;
484
485    #[test]
486    fn test_token_budget_memory_allocation() {
487        let budget = TokenBudget::default();
488        let memory_budget = budget.memory_budget();
489
490        // 8192 - 500 - 300 - 2000 - 400 - 2500 = 2492
491        assert_eq!(memory_budget, 2492);
492    }
493
494    #[test]
495    fn for_context_size_scales_attachments_and_memory_with_window() {
496        let small = TokenBudget::for_context_size(8192);
497        let large = TokenBudget::for_context_size(128000);
498
499        // Fixed overheads don't move with the window.
500        assert_eq!(large.system_prompt, small.system_prompt);
501        assert_eq!(large.response_buffer, small.response_buffer);
502
503        // A 128k model reads far more file content and recalls far more memory.
504        assert!(
505            large.attachments > small.attachments * 10,
506            "attachments should scale with the window: {} vs {}",
507            large.attachments,
508            small.attachments,
509        );
510        assert!(large.memory_budget() > small.memory_budget() * 10);
511
512        // The pieces still fit inside the declared window.
513        let used = large.system_prompt
514            + large.user_model
515            + large.conversation_history
516            + large.response_buffer
517            + large.attachments
518            + large.memory_budget();
519        assert!(used <= large.total_context);
520
521        // At the 8k default the split stays close to the historical fixed one.
522        assert!((1800..=2200).contains(&small.conversation_history));
523        assert!((2300..=2700).contains(&small.attachments));
524    }
525
526    #[test]
527    fn for_context_size_below_reserve_is_safe() {
528        // A tiny window can't fund variable sections, but must not panic or
529        // produce a budget that exceeds the window.
530        let budget = TokenBudget::for_context_size(500);
531        assert_eq!(budget.total_context, 500);
532        assert_eq!(budget.conversation_history, 0);
533        assert_eq!(budget.attachments, 0);
534        assert_eq!(budget.memory_budget(), 0);
535    }
536
537    // ── Property tests ────────────────────────────────────────────────
538    //
539    // The estimator and budget split are the safety floor for every prompt
540    // Brain assembles: under-counting tokens overflows the model's real
541    // window, and a budget whose sections sum past `total_context` clips
542    // content the assembler thought it had room for. These assert the
543    // invariants for arbitrary input rather than the hand-picked sizes above.
544
545    use proptest::prelude::*;
546
547    proptest! {
548        #![proptest_config(ProptestConfig { cases: 512, .. ProptestConfig::default() })]
549
550        /// The estimator must never *under*-count past a single token's worth
551        /// of characters — that's the whole point of the conservative 3
552        /// chars/token ratio (see `CHARS_PER_TOKEN`). If this breaks, packed
553        /// prompts can silently overflow the provider's context window.
554        #[test]
555        fn estimate_never_undercounts(s in ".*") {
556            let chars = s.chars().count();
557            let est = estimate_tokens(&s);
558            prop_assert!(
559                chars <= est * CHARS_PER_TOKEN,
560                "estimate under-counted: {chars} chars but est*ratio = {}", est * CHARS_PER_TOKEN
561            );
562            // …and never wastefully over-counts by more than one token.
563            prop_assert!(est * CHARS_PER_TOKEN < chars + CHARS_PER_TOKEN);
564            // Zero tokens iff empty.
565            prop_assert_eq!(est == 0, chars == 0);
566        }
567
568        /// The estimate is a function of *character* count, not byte length —
569        /// guards against a regression to `str::len()` that would over-count
570        /// multi-byte (non-ASCII) text and waste budget. A string of N
571        /// multi-byte codepoints must estimate the same as N ASCII ones.
572        #[test]
573        fn estimate_counts_chars_not_bytes(n in 0usize..512) {
574            prop_assert_eq!(
575                estimate_tokens(&"€".repeat(n)), // 3 bytes/char
576                estimate_tokens(&"a".repeat(n)), // 1 byte/char
577            );
578        }
579
580        /// Splitting text and budgeting the pieces separately must never count
581        /// *fewer* tokens than budgeting the whole — otherwise per-section
582        /// accounting could fit content the combined prompt can't hold.
583        #[test]
584        fn estimate_is_subadditive(a in ".*", b in ".*") {
585            let whole = estimate_tokens(&format!("{a}{b}"));
586            let parts = estimate_tokens(&a) + estimate_tokens(&b);
587            prop_assert!(parts >= whole, "split under-counted: {parts} < {whole}");
588        }
589
590        /// The budget split is exact and self-consistent for *any* window: the
591        /// variable pool (history + attachments + memory) always equals the
592        /// window minus the fixed reserve, and the six sections together cover
593        /// exactly the window (or the fixed reserve when the window is too
594        /// small to fund anything — a window that can't fit the fixed prompt).
595        #[test]
596        fn for_context_size_split_is_exact(total in 0usize..8_000_000) {
597            let b = TokenBudget::for_context_size(total);
598            prop_assert_eq!(b.total_context, total);
599
600            let reserved = b.system_prompt + b.user_model + b.response_buffer;
601            let pool = total.saturating_sub(reserved);
602
603            // The three variable sections partition the pool exactly.
604            prop_assert_eq!(
605                b.conversation_history + b.attachments + b.memory_budget(),
606                pool,
607                "variable sections must sum to the pool"
608            );
609
610            // All six sections cover the window exactly, or the fixed reserve
611            // when the window is below it.
612            let used = b.system_prompt
613                + b.user_model
614                + b.conversation_history
615                + b.response_buffer
616                + b.attachments
617                + b.memory_budget();
618            prop_assert_eq!(used, total.max(reserved));
619        }
620
621        /// Fixed overheads never move with the window, and a window at least
622        /// twice the reserve grows both attachments and memory when doubled
623        /// (doubling clears the ±1 floor jitter of the percentage split).
624        #[test]
625        fn for_context_size_scales_monotonically(total in 2_400usize..4_000_000) {
626            let small = TokenBudget::for_context_size(total);
627            let large = TokenBudget::for_context_size(total * 2);
628
629            prop_assert_eq!(large.system_prompt, small.system_prompt);
630            prop_assert_eq!(large.user_model, small.user_model);
631            prop_assert_eq!(large.response_buffer, small.response_buffer);
632
633            prop_assert!(large.attachments > small.attachments);
634            prop_assert!(large.memory_budget() > small.memory_budget());
635            prop_assert!(large.conversation_history > small.conversation_history);
636        }
637    }
638
639    #[test]
640    fn test_user_profile_to_context() {
641        let profile = UserProfile {
642            name: Some("Alice".to_string()),
643            preferences: vec!["coffee".to_string(), "quiet mornings".to_string()],
644            goals: vec!["learn Rust".to_string()],
645            facts: vec!["works remotely".to_string()],
646        };
647
648        let context = profile.to_context_string();
649        assert!(context.contains("Alice"));
650        assert!(context.contains("coffee"));
651        assert!(context.contains("learn Rust"));
652    }
653
654    #[test]
655    fn test_assemble_with_addendum_injects_into_system_prompt() {
656        let assembler = ContextAssembler::with_defaults();
657        let messages = assembler.assemble_with_addendum("hi", &[], &[], Some(ONBOARDING_ADDENDUM));
658
659        let system = messages
660            .iter()
661            .find(|m| matches!(m.role, Role::System))
662            .expect("system message");
663        assert!(
664            system.content.contains("[ONBOARDING MODE"),
665            "onboarding addendum should be present in system prompt"
666        );
667    }
668
669    #[test]
670    fn system_prompt_forbids_fabricated_memories() {
671        // The SOUL prompt must carry the memory-grounding rule that stops the
672        // reasoner inventing first-person "memories" (WS3). Anchored on the
673        // base prompt so it's present on every turn, onboarding or not.
674        let assembler = ContextAssembler::with_defaults();
675        let messages = assembler.assemble("what do you know about me?", &[], &[]);
676        let system = &messages[0].content;
677        assert!(
678            system.contains("MEMORY GROUNDING"),
679            "memory-grounding rule missing from system prompt"
680        );
681        assert!(
682            system.contains("Relevant memories:"),
683            "rule should anchor on the real memories block label"
684        );
685    }
686
687    #[test]
688    fn test_assemble_without_addendum_matches_plain_assemble() {
689        let assembler = ContextAssembler::with_defaults();
690        let a = assembler.assemble("hi", &[], &[]);
691        let b = assembler.assemble_with_addendum("hi", &[], &[], None);
692        assert_eq!(a.len(), b.len());
693        assert_eq!(a[0].content, b[0].content);
694    }
695
696    #[test]
697    fn test_context_assembler_basic() {
698        use hippocampus::search::MemorySource;
699
700        let assembler = ContextAssembler::with_defaults();
701
702        let memories = vec![Memory {
703            id: "1".to_string(),
704            content: "User likes Rust programming".to_string(),
705            source: MemorySource::Semantic,
706            score: 0.9,
707            importance: 0.8,
708            timestamp: "2026-01-01".to_string(),
709            agent: None,
710        }];
711
712        let history = vec![];
713        let messages = assembler.assemble("What language should I learn?", &memories, &history);
714
715        // Should have: system prompt, memory context, user message
716        assert!(messages.len() >= 2);
717        assert_eq!(
718            messages.last().unwrap().content,
719            "What language should I learn?"
720        );
721        assert_eq!(messages.last().unwrap().role, Role::User);
722    }
723
724    #[test]
725    fn test_context_assembler_agent_attribution() {
726        use hippocampus::search::MemorySource;
727
728        let assembler = ContextAssembler::with_defaults();
729
730        let memories = vec![
731            Memory {
732                id: "1".to_string(),
733                content: "User likes coffee".to_string(),
734                source: MemorySource::Episodic,
735                score: 0.9,
736                importance: 0.8,
737                timestamp: "2026-01-01".to_string(),
738                agent: Some("chat-bot".to_string()),
739            },
740            Memory {
741                id: "2".to_string(),
742                content: "User works remotely".to_string(),
743                source: MemorySource::Semantic,
744                score: 0.85,
745                importance: 0.7,
746                timestamp: "2026-01-02".to_string(),
747                agent: None,
748            },
749        ];
750
751        let messages = assembler.assemble("Tell me about the user", &memories, &[]);
752
753        // The memories block is its own system message starting with the
754        // label; `starts_with` avoids matching the base system prompt, which
755        // now references "Relevant memories:" in its memory-grounding rule.
756        let memory_msg = messages
757            .iter()
758            .find(|m| m.content.starts_with("Relevant memories:"))
759            .expect("should have memory context message");
760
761        assert!(
762            memory_msg.content.contains("agent: chat-bot"),
763            "memory with agent should include attribution"
764        );
765        assert!(
766            !memory_msg.content.contains("agent: ")
767                || memory_msg.content.matches("agent: ").count() == 1,
768            "memory without agent should NOT include agent label"
769        );
770    }
771
772    #[test]
773    fn test_context_assembler_with_history() {
774        let assembler = ContextAssembler::with_defaults();
775
776        let history = vec![
777            Message {
778                role: Role::User,
779                content: "Hello".to_string(),
780                ..Default::default()
781            },
782            Message {
783                role: Role::Assistant,
784                content: "Hi there!".to_string(),
785                ..Default::default()
786            },
787        ];
788
789        let messages = assembler.assemble("How are you?", &[], &history);
790
791        // Should include system + history + current message
792        assert!(messages.len() >= 3);
793        assert_eq!(messages.last().unwrap().content, "How are you?");
794    }
795
796    #[test]
797    fn test_default_prompt_core_instructions() {
798        let assembler = ContextAssembler::with_defaults();
799        let messages = assembler.assemble("How do I connect OpenClaw?", &[], &[]);
800        let system = &messages[0].content;
801
802        assert!(system.contains("Brain"));
803        assert!(system.contains("SOUL"));
804        assert!(system.contains("biologically-inspired"));
805        assert!(system.contains("Episodic Memory"));
806        assert!(system.contains("Semantic Memory"));
807        assert!(system.contains("Proactivity"));
808        assert!(system.contains("TRUTH OVER HALLUCINATION"));
809        assert!(
810            system.contains("CURIOSITY"),
811            "SOUL prompt must include CURIOSITY operating principle"
812        );
813        assert!(
814            system.contains("ATTACHED CONTENT"),
815            "SOUL prompt must teach Brain how to handle <ATTACHED_CONTENT> blocks"
816        );
817        assert!(
818            system.contains("chat export deserves a conversational summary"),
819            "SOUL prompt must instruct response-shape adaptation by content type"
820        );
821    }
822
823    #[test]
824    fn default_capabilities_used_when_no_digest_supplied() {
825        let assembler = ContextAssembler::with_defaults();
826        let messages = assembler.assemble("what can you do?", &[], &[]);
827        let system = &messages[0].content;
828        // Falls back to the static always-on faculties.
829        assert!(system.contains(DEFAULT_CAPABILITIES));
830        assert!(system.contains("Episodic Memory"));
831    }
832
833    #[test]
834    fn live_capability_digest_overrides_default() {
835        let assembler = ContextAssembler::with_defaults();
836        let digest = "Your Capabilities:\n- Episodic Memory: ...\n\nMounted tools:\n- MCP server \"github\": create_issue";
837        let messages =
838            assembler.assemble_full("what can you do?", &[], &[], None, Some(digest), &[], &[]);
839        let system = &messages[0].content;
840        assert!(
841            system.contains("MCP server \"github\": create_issue"),
842            "live digest must reach the system prompt"
843        );
844        // The supplied digest replaces the static block — the default's
845        // Semantic/Proactivity bullets are not present unless the caller
846        // included them.
847        assert!(!system.contains("a web of facts about the user's world"));
848    }
849
850    #[test]
851    fn attachments_render_as_a_dedicated_system_message_before_user() {
852        let assembler = ContextAssembler::with_defaults();
853        let attachments = vec![Attachment {
854            display_path: "/Users/me/notes.md".to_string(),
855            snapshot: "# my notes\nbuy milk".to_string(),
856        }];
857        let messages =
858            assembler.assemble_full("read this", &[], &[], None, None, &attachments, &[]);
859
860        // Penultimate message should be the attachments block; last is
861        // the user message itself.
862        let user_msg = messages.last().expect("non-empty");
863        assert_eq!(user_msg.role, Role::User);
864        assert_eq!(user_msg.content, "read this");
865
866        let prev = &messages[messages.len() - 2];
867        assert_eq!(prev.role, Role::System);
868        assert!(
869            prev.content
870                .contains("<ATTACHED_CONTENT path=\"/Users/me/notes.md\">"),
871            "missing attached-content block:\n{}",
872            prev.content
873        );
874        assert!(prev.content.contains("buy milk"));
875        assert!(prev.content.contains("</ATTACHED_CONTENT>"));
876    }
877
878    #[test]
879    fn skipped_paths_render_as_a_tag_for_brain_to_mention() {
880        let assembler = ContextAssembler::with_defaults();
881        let skipped = vec![SkippedAttachment {
882            display_path: "/Users/me/missing.txt".to_string(),
883            reason: "path not found".to_string(),
884        }];
885        let messages = assembler.assemble_full("summarise it", &[], &[], None, None, &[], &skipped);
886        let prev = &messages[messages.len() - 2];
887        assert!(prev.content.contains("<SKIPPED_PATH"));
888        assert!(prev.content.contains("/Users/me/missing.txt"));
889        assert!(prev.content.contains("path not found"));
890    }
891
892    #[test]
893    fn no_attachments_means_no_extra_block() {
894        let assembler = ContextAssembler::with_defaults();
895        let before = assembler.assemble("hi", &[], &[]);
896        let after = assembler.assemble_full("hi", &[], &[], None, None, &[], &[]);
897        assert_eq!(
898            before.len(),
899            after.len(),
900            "no attachments must not add a message"
901        );
902    }
903
904    #[test]
905    fn large_attachment_is_truncated_to_budget() {
906        // Snapshot is 60_000 chars (~30_000 tokens). Default attachments
907        // budget is 2500 tokens ≈ 5000 chars; the rendered block must be
908        // far smaller than the input snapshot.
909        let huge = "x".repeat(60_000);
910        let assembler = ContextAssembler::with_defaults();
911        let attachments = vec![Attachment {
912            display_path: "/Users/me/huge.txt".to_string(),
913            snapshot: huge,
914        }];
915        let messages = assembler.assemble_full("read", &[], &[], None, None, &attachments, &[]);
916        let prev = &messages[messages.len() - 2];
917        assert!(
918            prev.content.contains("[truncated]"),
919            "huge attachment must be marked as truncated"
920        );
921        // Sanity: rendered block must be at least an order of magnitude
922        // smaller than the input snapshot.
923        assert!(
924            prev.content.chars().count() < 10_000,
925            "rendered block too large: {} chars",
926            prev.content.chars().count()
927        );
928    }
929
930    #[test]
931    fn test_onboarding_greeting_exists() {
932        assert!(
933            ONBOARDING_GREETING.contains("Brain"),
934            "greeting must mention Brain"
935        );
936        assert!(
937            ONBOARDING_GREETING.contains("name"),
938            "greeting must ask for the user's name"
939        );
940    }
941
942    #[test]
943    fn test_onboarding_addendum_exists() {
944        assert!(
945            ONBOARDING_ADDENDUM.contains("ONBOARDING MODE"),
946            "addendum must contain ONBOARDING MODE marker"
947        );
948        assert!(
949            ONBOARDING_ADDENDUM.contains("follow-up question"),
950            "addendum must instruct follow-up questions"
951        );
952    }
953
954    #[test]
955    fn test_estimate_tokens() {
956        let messages = vec![Message::user("Hello world")];
957
958        let tokens = ContextAssembler::estimate_tokens(&messages);
959        assert!(tokens > 0);
960        // "Hello world" is 11 chars; at 3 chars/token, ceil(11/3) = 4.
961        assert_eq!(tokens, 11usize.div_ceil(CHARS_PER_TOKEN));
962        assert_eq!(super::estimate_tokens("Hello world"), 4);
963    }
964}