brainos-cortex 0.5.0

//! Context assembly — builds LLM prompts with token budget management.
//!
//! Manages the token budget for LLM context windows:
//! - System prompt (~500 tokens)
//! - User model snapshot (~300 tokens)
//! - Conversation history (~2000 tokens)
//! - Retrieved memories (remaining budget)
//! - Response buffer (~400 tokens)

use crate::llm::Message;
use hippocampus::search::Memory;

/// Conservative chars-per-token ratio for the prompt estimator. English
/// prose runs ~4 chars/token and code/JSON (common in attachments) ~3, so we
/// use 3: packing never badly *under*-counts and overflows the model's true
/// window, while being far less wasteful than the old 2-chars/token guess
/// (which threw away ~half the usable window). Deliberately a portable
/// heuristic, not a model-specific BPE tokenizer — Brain is multi-provider
/// (Ollama/qwen, OpenAI, …) and no single vocabulary is correct for all of
/// them. Swapping in a real tokenizer later only needs to touch this module.
pub const CHARS_PER_TOKEN: usize = 3;

/// Estimate the token count of `text` for budgeting and packing decisions.
/// Single source of the heuristic — every budget check in this module routes
/// through here so they can't drift.
pub fn estimate_tokens(text: &str) -> usize {
    text.chars().count().div_ceil(CHARS_PER_TOKEN)
}

/// Default token budgets.
pub const TOKEN_BUDGETS: TokenBudget = TokenBudget {
    system_prompt: 500,
    user_model: 300,
    conversation_history: 2000,
    response_buffer: 400,
    attachments: 2500,
    total_context: 8192, // Default for most models
};

/// Hardcoded greeting for first-ever chat session (0 facts).
/// Printed directly — no LLM call needed.
pub const ONBOARDING_GREETING: &str = "Hey! I'm Brain \u{2014} your personal memory engine. \
I run locally on your machine and I'm here to remember what matters to you. \
I don't know anything about you yet, so let's fix that. What's your name?";

/// System-prompt addendum injected while the user has fewer than 5 facts.
/// Makes the LLM naturally curious and question-asking during onboarding.
pub const ONBOARDING_ADDENDUM: &str = r#"

[ONBOARDING MODE — the user is new and you know very little about them]
- After every user message, end your response with ONE short, focused follow-up question to learn about the user (name, role, projects, interests).
- Keep responses to 1-3 sentences plus the question.
- Sound warm, curious, and conversational — not like an intake form.
- NEVER say "I don't have that in my memory yet" — instead, be proactive about learning.
- Once you learn something, acknowledge it naturally and ask about the next thing."#;

/// The always-on cognitive faculties, rendered as the fallback "Your
/// Capabilities" section of the SOUL prompt. Used verbatim when no live
/// capability digest is supplied (non-chat LLM paths, tests, custom
/// prompts) and as the prefix of the live digest the chat path builds
/// (see `signal::pipeline::conversation`). Keeping the
/// wording in one place stops the static and live views from drifting.
pub const DEFAULT_CAPABILITIES: &str = r#"Your Capabilities:
- Episodic Memory: You recall past experiences and conversations provided as context.
- Semantic Memory: You maintain a web of facts about the user's world, projects, and habits.
- Proactivity: You don't just react; you anticipate needs based on established patterns (provided in context)."#;

/// Token budget allocation.
#[derive(Debug, Clone, Copy)]
pub struct TokenBudget {
    pub system_prompt: usize,
    pub user_model: usize,
    pub conversation_history: usize,
    pub response_buffer: usize,
    /// Cap on rendered path-attachments (snapshots of files/dirs the
    /// user referenced in chat). Truncated to fit by the assembler.
    pub attachments: usize,
    pub total_context: usize,
}

impl TokenBudget {
    /// Calculate remaining budget for memories.
    pub fn memory_budget(&self) -> usize {
        self.total_context
            .saturating_sub(self.system_prompt)
            .saturating_sub(self.user_model)
            .saturating_sub(self.conversation_history)
            .saturating_sub(self.response_buffer)
            .saturating_sub(self.attachments)
    }

    /// Build a budget scaled to a model's real context window.
    ///
    /// Fixed overheads (system prompt, user-model snapshot, response buffer)
    /// don't grow with the window, so they're reserved as constants. The
    /// remaining *working pool* is split proportionally between conversation
    /// history, path-attachments, and retrieved memories — so a 128k-window
    /// model reads far more file content and recalls more memory than the
    /// conservative 8k default, instead of clipping everything to a fixed cap.
    ///
    /// At `total_tokens == 8192` this stays close to the historical fixed
    /// split (history ≈2k, attachments ≈2.5k, memory ≈2.5k). Memory is the
    /// implicit remainder via [`Self::memory_budget`].
    pub fn for_context_size(total_tokens: usize) -> Self {
        let system_prompt = TOKEN_BUDGETS.system_prompt;
        let user_model = TOKEN_BUDGETS.user_model;
        let response_buffer = TOKEN_BUDGETS.response_buffer;
        let reserved = system_prompt + user_model + response_buffer;

        // Working pool after fixed overheads. Below the reserve we can't split
        // anything — fall back to zero variable sections (the assembler still
        // renders the system prompt).
        let pool = total_tokens.saturating_sub(reserved);
        // History stays modest (recent turns dominate relevance); attachments
        // and memory get the lion's share and scale with the window.
        let conversation_history = pool * 28 / 100;
        let attachments = pool * 36 / 100;
        // memory_budget() consumes the remainder (~36% of the pool).

        Self {
            system_prompt,
            user_model,
            conversation_history,
            response_buffer,
            attachments,
            total_context: total_tokens,
        }
    }
}

impl Default for TokenBudget {
    fn default() -> Self {
        TOKEN_BUDGETS
    }
}

/// Path-attachment grounding for a chat turn. When the user references
/// a local path in their message, the pipeline reads it on their behalf
/// and hands the snapshot here so the LLM can see *what's actually
/// there* alongside memories and history. The SOUL prompt's
/// "ATTACHED_CONTENT" instructions explain how to read these blocks.
#[derive(Debug, Clone)]
pub struct Attachment {
    /// Path token as the user wrote it. Preserved verbatim so the LLM
    /// can refer back to the user's own wording.
    pub display_path: String,
    /// Rendered snapshot — directory listing + histogram + inlined
    /// files for a directory, or file excerpt for a file. Built by
    /// `signal::pipeline::build_directory_snapshot` /
    /// `build_file_snapshot`.
    pub snapshot: String,
}

/// A path the user referenced that couldn't be attached (not found,
/// outside `security.allowed_paths`, wrong file kind). Rendered as a
/// `<SKIPPED_PATH>` tag so Brain can mention it instead of silently
/// dropping the reference.
#[derive(Debug, Clone)]
pub struct SkippedAttachment {
    pub display_path: String,
    pub reason: String,
}

/// User profile data for context injection.
#[derive(Debug, Clone, Default)]
pub struct UserProfile {
    pub name: Option<String>,
    pub preferences: Vec<String>,
    pub goals: Vec<String>,
    pub facts: Vec<String>,
}

impl UserProfile {
    /// Format as a context string.
    pub fn to_context_string(&self) -> String {
        let mut parts = Vec::new();

        if let Some(name) = &self.name {
            parts.push(format!("The user's name is {}.", name));
        }

        if !self.preferences.is_empty() {
            parts.push(format!("User preferences: {}", self.preferences.join(", ")));
        }

        if !self.goals.is_empty() {
            parts.push(format!("User goals: {}", self.goals.join(", ")));
        }

        if !self.facts.is_empty() {
            parts.push(format!("Key facts: {}", self.facts.join("; ")));
        }

        parts.join(" ")
    }

    /// Estimate token count (conservative: ~2 chars per token to handle non-ASCII safely).
    pub fn estimate_tokens(&self) -> usize {
        estimate_tokens(&self.to_context_string())
    }
}

/// Context assembler — builds prompts respecting token budgets.
pub struct ContextAssembler {
    budget: TokenBudget,
    system_prompt: String,
    user_profile: UserProfile,
}

impl ContextAssembler {
    /// Create a new context assembler.
    pub fn new(budget: TokenBudget) -> Self {
        Self {
            budget,
            system_prompt: Self::default_system_prompt(),
            user_profile: UserProfile::default(),
        }
    }

    /// Create with default budget.
    pub fn with_defaults() -> Self {
        Self::new(TOKEN_BUDGETS)
    }

    /// The active token budget — lets the pipeline plan history compaction
    /// against the same per-section allocation the assembler enforces.
    pub fn budget(&self) -> TokenBudget {
        self.budget
    }

    /// Set custom system prompt.
    pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
        self.system_prompt = prompt.into();
        self
    }

    /// Set user profile.
    pub fn with_user_profile(mut self, profile: UserProfile) -> Self {
        self.user_profile = profile;
        self
    }

    /// Get the default system prompt.
    fn default_system_prompt() -> String {
        r#"You are the SOUL of Brain OS — a biologically-inspired, proactive cognitive engine. You are not just an assistant; you are the user's digital hippocampus and prefrontal cortex, operating with deep context and long-term memory.

Your Identity:
- You are "Brain", the central intelligence of a local-first memory system.
- You are private, secure, and run entirely on the user's machine.
- Your purpose is to eliminate "context amnesia" by bridging the gap between siloed tools and the user's life.

Operating Principles:
1. TRUTH OVER HALLUCINATION: Ground answers in (a) the provided memories, (b) the live conversation history above this message, and (c) general world knowledge. If a *fact about the user* is genuinely absent from memory AND not present in the conversation, state: "I don't have that in my memory yet." Do NOT say this when the user is asking about things discussed earlier in the current conversation — answer from the message thread itself.
   - SELF-KNOWLEDGE BOUNDARY: General world knowledge is fine for the world at large, but it is NOT a source for claims about Brain itself. Any statement about Brain's own CLI commands, config keys/schema, file layout, or features MUST come from the "About Brain" and "Your Capabilities" sections below — never from general knowledge or guesswork. If the answer isn't in those sections, say so plainly ("that isn't something Brain exposes" / "that's not a command/config key I have") and, where useful, point to the closest real command or config key. Never invent command names, config keys, templating syntax, or option flags — a confident, plausible-looking fabrication of Brain's surface is the worst failure mode.
   - MEMORY GROUNDING: Never assert a specific fact about the user unless it appears verbatim in the "Relevant memories:" block or earlier in this conversation. This applies with full force when you are *describing what you remember* (e.g. answering "what do you know about me?" or "what are your capabilities?"): do NOT manufacture illustrative examples — never say things like "you bike to work" or "you deploy on Fridays" to demonstrate recall. Describe the *kinds* of things you store (preferences, projects, habits, people, decisions) in the abstract, and cite only real entries from the memories block. A fabricated personal "memory" is a betrayal of a memory product's core promise — when memory is empty or lacks the detail, say so.
2. SEAMLESS RECALL: Reference memories and prior turns naturally ("You mentioned earlier...", "Based on what we discussed...").
3. COGNITIVE CLARITY: Be concise, direct, and insightful. Avoid corporate fluff. Match response length to the question — simple greetings get one or two sentences, not tables.
4. CONTEXTUAL AWARENESS: Use the provided User Profile to tailor your tone and relevance.
5. CURIOSITY: When you lack context about the user, ask one focused follow-up question. Learning about the user is part of your job — don't wait to be told.
6. FORMATTING: The user's terminal renders markdown. Use it lightly when it helps (lists for multi-item answers, **bold** for emphasis, `code` for identifiers). Skip headings and tables for short replies. Prefer bullet lists over tables — the terminal is narrow and wide tables render poorly; only use a table for genuinely tabular data with short cells.
7. ATTACHED CONTENT: When the user references a local path, an `<ATTACHED_CONTENT path="…">` block is provided below as grounding — that is what is actually on disk, read on the user's behalf. Adapt your response shape to the *content*, not to a template: a chat export deserves a conversational summary with themes, tone, and an honest opinion; a code project deserves a technical overview; a folder of photos or media deserves an honest "I can see these file types but I can't view the images themselves." Never describe a non-code folder as if it were a software project. If a `<SKIPPED_PATH reason="…"/>` tag appears, the user named a path I couldn't read — acknowledge it briefly and ask them to confirm or rephrase.

You are the user's partner in thought. Your goal is to make their digital life feel like a continuous, coherent stream of intelligence."#
            .to_string()
    }

    /// Assemble context into messages.
    ///
    /// Takes retrieved memories and conversation history, returns
    /// messages ready for the LLM.
    pub fn assemble(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
    ) -> Vec<Message> {
        self.assemble_with_addendum(user_message, memories, conversation_history, None)
    }

    /// Like [`assemble`], but appends `addendum` to the system prompt if provided.
    /// Used to switch prompt modes per-turn (e.g. onboarding) without mutating
    /// the shared assembler.
    pub fn assemble_with_addendum(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
        addendum: Option<&str>,
    ) -> Vec<Message> {
        self.assemble_full(
            user_message,
            memories,
            conversation_history,
            addendum,
            None,
            &[],
            &[],
        )
    }

    /// Full assembly with path-attachment grounding. Attachments render
    /// as `<ATTACHED_CONTENT>` blocks in a System message positioned
    /// right before the user's actual message — closest attention slot
    /// to "what the user just put on the table." Skipped paths render
    /// as `<SKIPPED_PATH>` tags in the same block so Brain can mention
    /// them naturally.
    ///
    /// Per-attachment content is truncated to fit `budget.attachments`;
    /// when total snapshot text exceeds the budget, later attachments
    /// shrink first so the first (and usually primary) reference stays
    /// intact.
    ///
    /// `capabilities` is the "Your Capabilities" section of the SOUL
    /// prompt. The chat path passes a *live* digest rendered from the
    /// currently-wired tools and agents; every other path
    /// passes `None` and falls back to [`DEFAULT_CAPABILITIES`]. Either
    /// way the section is appended after the base prompt so the reasoner
    /// always sees an explicit capability manifest.
    pub fn assemble_full(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
        addendum: Option<&str>,
        capabilities: Option<&str>,
        attachments: &[Attachment],
        skipped: &[SkippedAttachment],
    ) -> Vec<Message> {
        let mut messages = Vec::new();
        let memory_budget = self.budget.memory_budget();

        // 1. System prompt with optional addendum and user profile
        let base_prompt = match addendum {
            Some(extra) if !extra.is_empty() => {
                format!("{}{}", self.system_prompt, extra)
            }
            _ => self.system_prompt.clone(),
        };
        // Capability manifest: live digest from the chat path, or the
        // static always-on faculties everywhere else.
        let prompt_with_caps = format!(
            "{}\n\n{}",
            base_prompt,
            capabilities.unwrap_or(DEFAULT_CAPABILITIES)
        );
        let system_content = if self.user_profile.estimate_tokens() > 0 {
            format!(
                "{}\n\nUser Profile: {}",
                prompt_with_caps,
                self.user_profile.to_context_string()
            )
        } else {
            prompt_with_caps
        };
        messages.push(Message::system(system_content));

        // 2. Add memories as system context (if within budget)
        let mut current_tokens = estimate_tokens(&messages[0].content);
        let mut memory_context = String::new();

        for memory in memories {
            let memory_text = if let Some(ref agent) = memory.agent {
                format!(
                    "- [{:?}, agent: {}] {}\n",
                    memory.source, agent, memory.content
                )
            } else {
                format!("- [{:?}] {}\n", memory.source, memory.content)
            };
            let memory_tokens = estimate_tokens(&memory_text);

            if current_tokens + memory_tokens > memory_budget {
                break;
            }

            memory_context.push_str(&memory_text);
            current_tokens += memory_tokens;
        }

        if !memory_context.is_empty() {
            messages.push(Message::system(format!(
                "Relevant memories:\n{}",
                memory_context
            )));
        }

        // 3. Add conversation history (respecting budget)
        let mut history_tokens: usize = 0;
        let mut included_history: Vec<Message> = Vec::new();

        // Start from most recent and work backwards
        for msg in conversation_history.iter().rev() {
            let msg_tokens = estimate_tokens(&msg.content);
            if history_tokens + msg_tokens > self.budget.conversation_history {
                break;
            }
            included_history.push(msg.clone());
            history_tokens += msg_tokens;
        }

        // Reverse to maintain chronological order
        included_history.reverse();
        messages.extend(included_history);

        // 4. Attached path grounding (renders right before the user
        //    message so the LLM has it freshly in attention).
        if let Some(block) = render_attachments_block(attachments, skipped, self.budget.attachments)
        {
            messages.push(Message::system(block));
        }

        // 5. Add current user message
        messages.push(Message::user(user_message.to_string()));

        messages
    }

    /// Quick estimate of total tokens in messages.
    pub fn estimate_tokens(messages: &[Message]) -> usize {
        messages.iter().map(|m| estimate_tokens(&m.content)).sum()
    }
}

/// Build the `<ATTACHED_CONTENT>` / `<SKIPPED_PATH>` block that goes
/// just before the user's message. Returns `None` when there's nothing
/// to render. Each attachment's snapshot is truncated to keep the
/// total under `budget_tokens` (2 chars ≈ 1 token); later attachments
/// shrink first so the primary reference stays intact.
fn render_attachments_block(
    attachments: &[Attachment],
    skipped: &[SkippedAttachment],
    budget_tokens: usize,
) -> Option<String> {
    if attachments.is_empty() && skipped.is_empty() {
        return None;
    }
    // Convert the token budget back to a char ceiling using the same ratio
    // the estimator assumes, so truncation and packing stay consistent.
    let char_budget = budget_tokens.saturating_mul(CHARS_PER_TOKEN);
    let mut out = String::new();
    let mut chars_used = 0usize;

    for (i, att) in attachments.iter().enumerate() {
        // Per-attachment ceiling: equal share of remaining budget,
        // floored at 600 chars so a small attachment can always fit.
        let remaining_atts = attachments.len() - i;
        let per_attachment =
            (char_budget.saturating_sub(chars_used) / remaining_atts.max(1)).max(600);
        let body = truncate_snapshot(&att.snapshot, per_attachment);
        let block = format!(
            "<ATTACHED_CONTENT path=\"{}\">\n{}\n</ATTACHED_CONTENT>\n",
            att.display_path, body
        );
        chars_used = chars_used.saturating_add(block.chars().count());
        out.push_str(&block);
    }
    for sk in skipped {
        let tag = format!(
            "<SKIPPED_PATH path=\"{}\" reason=\"{}\"/>\n",
            sk.display_path,
            sk.reason.replace('"', "'"),
        );
        out.push_str(&tag);
    }
    Some(out)
}

/// Truncate a snapshot string to at most `cap_chars`, appending a
/// short marker so the LLM knows content was cut. Walks back to a
/// character boundary to avoid splitting multi-byte chars.
fn truncate_snapshot(s: &str, cap_chars: usize) -> String {
    if s.chars().count() <= cap_chars {
        return s.to_string();
    }
    let mut out: String = s.chars().take(cap_chars.saturating_sub(20)).collect();
    out.push_str("\n…[truncated]");
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::llm::Role;

    #[test]
    fn test_token_budget_memory_allocation() {
        let budget = TokenBudget::default();
        let memory_budget = budget.memory_budget();

        // 8192 - 500 - 300 - 2000 - 400 - 2500 = 2492
        assert_eq!(memory_budget, 2492);
    }

    #[test]
    fn for_context_size_scales_attachments_and_memory_with_window() {
        let small = TokenBudget::for_context_size(8192);
        let large = TokenBudget::for_context_size(128000);

        // Fixed overheads don't move with the window.
        assert_eq!(large.system_prompt, small.system_prompt);
        assert_eq!(large.response_buffer, small.response_buffer);

        // A 128k model reads far more file content and recalls far more memory.
        assert!(
            large.attachments > small.attachments * 10,
            "attachments should scale with the window: {} vs {}",
            large.attachments,
            small.attachments,
        );
        assert!(large.memory_budget() > small.memory_budget() * 10);

        // The pieces still fit inside the declared window.
        let used = large.system_prompt
            + large.user_model
            + large.conversation_history
            + large.response_buffer
            + large.attachments
            + large.memory_budget();
        assert!(used <= large.total_context);

        // At the 8k default the split stays close to the historical fixed one.
        assert!((1800..=2200).contains(&small.conversation_history));
        assert!((2300..=2700).contains(&small.attachments));
    }

    #[test]
    fn for_context_size_below_reserve_is_safe() {
        // A tiny window can't fund variable sections, but must not panic or
        // produce a budget that exceeds the window.
        let budget = TokenBudget::for_context_size(500);
        assert_eq!(budget.total_context, 500);
        assert_eq!(budget.conversation_history, 0);
        assert_eq!(budget.attachments, 0);
        assert_eq!(budget.memory_budget(), 0);
    }

    // ── Property tests ────────────────────────────────────────────────
    //
    // The estimator and budget split are the safety floor for every prompt
    // Brain assembles: under-counting tokens overflows the model's real
    // window, and a budget whose sections sum past `total_context` clips
    // content the assembler thought it had room for. These assert the
    // invariants for arbitrary input rather than the hand-picked sizes above.

    use proptest::prelude::*;

    proptest! {
        #![proptest_config(ProptestConfig { cases: 512, .. ProptestConfig::default() })]

        /// The estimator must never *under*-count past a single token's worth
        /// of characters — that's the whole point of the conservative 3
        /// chars/token ratio (see `CHARS_PER_TOKEN`). If this breaks, packed
        /// prompts can silently overflow the provider's context window.
        #[test]
        fn estimate_never_undercounts(s in ".*") {
            let chars = s.chars().count();
            let est = estimate_tokens(&s);
            prop_assert!(
                chars <= est * CHARS_PER_TOKEN,
                "estimate under-counted: {chars} chars but est*ratio = {}", est * CHARS_PER_TOKEN
            );
            // …and never wastefully over-counts by more than one token.
            prop_assert!(est * CHARS_PER_TOKEN < chars + CHARS_PER_TOKEN);
            // Zero tokens iff empty.
            prop_assert_eq!(est == 0, chars == 0);
        }

        /// The estimate is a function of *character* count, not byte length —
        /// guards against a regression to `str::len()` that would over-count
        /// multi-byte (non-ASCII) text and waste budget. A string of N
        /// multi-byte codepoints must estimate the same as N ASCII ones.
        #[test]
        fn estimate_counts_chars_not_bytes(n in 0usize..512) {
            prop_assert_eq!(
                estimate_tokens(&"€".repeat(n)), // 3 bytes/char
                estimate_tokens(&"a".repeat(n)), // 1 byte/char
            );
        }

        /// Splitting text and budgeting the pieces separately must never count
        /// *fewer* tokens than budgeting the whole — otherwise per-section
        /// accounting could fit content the combined prompt can't hold.
        #[test]
        fn estimate_is_subadditive(a in ".*", b in ".*") {
            let whole = estimate_tokens(&format!("{a}{b}"));
            let parts = estimate_tokens(&a) + estimate_tokens(&b);
            prop_assert!(parts >= whole, "split under-counted: {parts} < {whole}");
        }

        /// The budget split is exact and self-consistent for *any* window: the
        /// variable pool (history + attachments + memory) always equals the
        /// window minus the fixed reserve, and the six sections together cover
        /// exactly the window (or the fixed reserve when the window is too
        /// small to fund anything — a window that can't fit the fixed prompt).
        #[test]
        fn for_context_size_split_is_exact(total in 0usize..8_000_000) {
            let b = TokenBudget::for_context_size(total);
            prop_assert_eq!(b.total_context, total);

            let reserved = b.system_prompt + b.user_model + b.response_buffer;
            let pool = total.saturating_sub(reserved);

            // The three variable sections partition the pool exactly.
            prop_assert_eq!(
                b.conversation_history + b.attachments + b.memory_budget(),
                pool,
                "variable sections must sum to the pool"
            );

            // All six sections cover the window exactly, or the fixed reserve
            // when the window is below it.
            let used = b.system_prompt
                + b.user_model
                + b.conversation_history
                + b.response_buffer
                + b.attachments
                + b.memory_budget();
            prop_assert_eq!(used, total.max(reserved));
        }

        /// Fixed overheads never move with the window, and a window at least
        /// twice the reserve grows both attachments and memory when doubled
        /// (doubling clears the ±1 floor jitter of the percentage split).
        #[test]
        fn for_context_size_scales_monotonically(total in 2_400usize..4_000_000) {
            let small = TokenBudget::for_context_size(total);
            let large = TokenBudget::for_context_size(total * 2);

            prop_assert_eq!(large.system_prompt, small.system_prompt);
            prop_assert_eq!(large.user_model, small.user_model);
            prop_assert_eq!(large.response_buffer, small.response_buffer);

            prop_assert!(large.attachments > small.attachments);
            prop_assert!(large.memory_budget() > small.memory_budget());
            prop_assert!(large.conversation_history > small.conversation_history);
        }
    }

    #[test]
    fn test_user_profile_to_context() {
        let profile = UserProfile {
            name: Some("Alice".to_string()),
            preferences: vec!["coffee".to_string(), "quiet mornings".to_string()],
            goals: vec!["learn Rust".to_string()],
            facts: vec!["works remotely".to_string()],
        };

        let context = profile.to_context_string();
        assert!(context.contains("Alice"));
        assert!(context.contains("coffee"));
        assert!(context.contains("learn Rust"));
    }

    #[test]
    fn test_assemble_with_addendum_injects_into_system_prompt() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble_with_addendum("hi", &[], &[], Some(ONBOARDING_ADDENDUM));

        let system = messages
            .iter()
            .find(|m| matches!(m.role, Role::System))
            .expect("system message");
        assert!(
            system.content.contains("[ONBOARDING MODE"),
            "onboarding addendum should be present in system prompt"
        );
    }

    #[test]
    fn system_prompt_forbids_fabricated_memories() {
        // The SOUL prompt must carry the memory-grounding rule that stops the
        // reasoner inventing first-person "memories" (WS3). Anchored on the
        // base prompt so it's present on every turn, onboarding or not.
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("what do you know about me?", &[], &[]);
        let system = &messages[0].content;
        assert!(
            system.contains("MEMORY GROUNDING"),
            "memory-grounding rule missing from system prompt"
        );
        assert!(
            system.contains("Relevant memories:"),
            "rule should anchor on the real memories block label"
        );
    }

    #[test]
    fn test_assemble_without_addendum_matches_plain_assemble() {
        let assembler = ContextAssembler::with_defaults();
        let a = assembler.assemble("hi", &[], &[]);
        let b = assembler.assemble_with_addendum("hi", &[], &[], None);
        assert_eq!(a.len(), b.len());
        assert_eq!(a[0].content, b[0].content);
    }

    #[test]
    fn test_context_assembler_basic() {
        use hippocampus::search::MemorySource;

        let assembler = ContextAssembler::with_defaults();

        let memories = vec![Memory {
            id: "1".to_string(),
            content: "User likes Rust programming".to_string(),
            source: MemorySource::Semantic,
            score: 0.9,
            importance: 0.8,
            timestamp: "2026-01-01".to_string(),
            agent: None,
        }];

        let history = vec![];
        let messages = assembler.assemble("What language should I learn?", &memories, &history);

        // Should have: system prompt, memory context, user message
        assert!(messages.len() >= 2);
        assert_eq!(
            messages.last().unwrap().content,
            "What language should I learn?"
        );
        assert_eq!(messages.last().unwrap().role, Role::User);
    }

    #[test]
    fn test_context_assembler_agent_attribution() {
        use hippocampus::search::MemorySource;

        let assembler = ContextAssembler::with_defaults();

        let memories = vec![
            Memory {
                id: "1".to_string(),
                content: "User likes coffee".to_string(),
                source: MemorySource::Episodic,
                score: 0.9,
                importance: 0.8,
                timestamp: "2026-01-01".to_string(),
                agent: Some("chat-bot".to_string()),
            },
            Memory {
                id: "2".to_string(),
                content: "User works remotely".to_string(),
                source: MemorySource::Semantic,
                score: 0.85,
                importance: 0.7,
                timestamp: "2026-01-02".to_string(),
                agent: None,
            },
        ];

        let messages = assembler.assemble("Tell me about the user", &memories, &[]);

        // The memories block is its own system message starting with the
        // label; `starts_with` avoids matching the base system prompt, which
        // now references "Relevant memories:" in its memory-grounding rule.
        let memory_msg = messages
            .iter()
            .find(|m| m.content.starts_with("Relevant memories:"))
            .expect("should have memory context message");

        assert!(
            memory_msg.content.contains("agent: chat-bot"),
            "memory with agent should include attribution"
        );
        assert!(
            !memory_msg.content.contains("agent: ")
                || memory_msg.content.matches("agent: ").count() == 1,
            "memory without agent should NOT include agent label"
        );
    }

    #[test]
    fn test_context_assembler_with_history() {
        let assembler = ContextAssembler::with_defaults();

        let history = vec![
            Message {
                role: Role::User,
                content: "Hello".to_string(),
                ..Default::default()
            },
            Message {
                role: Role::Assistant,
                content: "Hi there!".to_string(),
                ..Default::default()
            },
        ];

        let messages = assembler.assemble("How are you?", &[], &history);

        // Should include system + history + current message
        assert!(messages.len() >= 3);
        assert_eq!(messages.last().unwrap().content, "How are you?");
    }

    #[test]
    fn test_default_prompt_core_instructions() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("How do I connect OpenClaw?", &[], &[]);
        let system = &messages[0].content;

        assert!(system.contains("Brain"));
        assert!(system.contains("SOUL"));
        assert!(system.contains("biologically-inspired"));
        assert!(system.contains("Episodic Memory"));
        assert!(system.contains("Semantic Memory"));
        assert!(system.contains("Proactivity"));
        assert!(system.contains("TRUTH OVER HALLUCINATION"));
        assert!(
            system.contains("CURIOSITY"),
            "SOUL prompt must include CURIOSITY operating principle"
        );
        assert!(
            system.contains("ATTACHED CONTENT"),
            "SOUL prompt must teach Brain how to handle <ATTACHED_CONTENT> blocks"
        );
        assert!(
            system.contains("chat export deserves a conversational summary"),
            "SOUL prompt must instruct response-shape adaptation by content type"
        );
    }

    #[test]
    fn default_capabilities_used_when_no_digest_supplied() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("what can you do?", &[], &[]);
        let system = &messages[0].content;
        // Falls back to the static always-on faculties.
        assert!(system.contains(DEFAULT_CAPABILITIES));
        assert!(system.contains("Episodic Memory"));
    }

    #[test]
    fn live_capability_digest_overrides_default() {
        let assembler = ContextAssembler::with_defaults();
        let digest = "Your Capabilities:\n- Episodic Memory: ...\n\nMounted tools:\n- MCP server \"github\": create_issue";
        let messages =
            assembler.assemble_full("what can you do?", &[], &[], None, Some(digest), &[], &[]);
        let system = &messages[0].content;
        assert!(
            system.contains("MCP server \"github\": create_issue"),
            "live digest must reach the system prompt"
        );
        // The supplied digest replaces the static block — the default's
        // Semantic/Proactivity bullets are not present unless the caller
        // included them.
        assert!(!system.contains("a web of facts about the user's world"));
    }

    #[test]
    fn attachments_render_as_a_dedicated_system_message_before_user() {
        let assembler = ContextAssembler::with_defaults();
        let attachments = vec![Attachment {
            display_path: "/Users/me/notes.md".to_string(),
            snapshot: "# my notes\nbuy milk".to_string(),
        }];
        let messages =
            assembler.assemble_full("read this", &[], &[], None, None, &attachments, &[]);

        // Penultimate message should be the attachments block; last is
        // the user message itself.
        let user_msg = messages.last().expect("non-empty");
        assert_eq!(user_msg.role, Role::User);
        assert_eq!(user_msg.content, "read this");

        let prev = &messages[messages.len() - 2];
        assert_eq!(prev.role, Role::System);
        assert!(
            prev.content
                .contains("<ATTACHED_CONTENT path=\"/Users/me/notes.md\">"),
            "missing attached-content block:\n{}",
            prev.content
        );
        assert!(prev.content.contains("buy milk"));
        assert!(prev.content.contains("</ATTACHED_CONTENT>"));
    }

    #[test]
    fn skipped_paths_render_as_a_tag_for_brain_to_mention() {
        let assembler = ContextAssembler::with_defaults();
        let skipped = vec![SkippedAttachment {
            display_path: "/Users/me/missing.txt".to_string(),
            reason: "path not found".to_string(),
        }];
        let messages = assembler.assemble_full("summarise it", &[], &[], None, None, &[], &skipped);
        let prev = &messages[messages.len() - 2];
        assert!(prev.content.contains("<SKIPPED_PATH"));
        assert!(prev.content.contains("/Users/me/missing.txt"));
        assert!(prev.content.contains("path not found"));
    }

    #[test]
    fn no_attachments_means_no_extra_block() {
        let assembler = ContextAssembler::with_defaults();
        let before = assembler.assemble("hi", &[], &[]);
        let after = assembler.assemble_full("hi", &[], &[], None, None, &[], &[]);
        assert_eq!(
            before.len(),
            after.len(),
            "no attachments must not add a message"
        );
    }

    #[test]
    fn large_attachment_is_truncated_to_budget() {
        // Snapshot is 60_000 chars (~30_000 tokens). Default attachments
        // budget is 2500 tokens ≈ 5000 chars; the rendered block must be
        // far smaller than the input snapshot.
        let huge = "x".repeat(60_000);
        let assembler = ContextAssembler::with_defaults();
        let attachments = vec![Attachment {
            display_path: "/Users/me/huge.txt".to_string(),
            snapshot: huge,
        }];
        let messages = assembler.assemble_full("read", &[], &[], None, None, &attachments, &[]);
        let prev = &messages[messages.len() - 2];
        assert!(
            prev.content.contains("[truncated]"),
            "huge attachment must be marked as truncated"
        );
        // Sanity: rendered block must be at least an order of magnitude
        // smaller than the input snapshot.
        assert!(
            prev.content.chars().count() < 10_000,
            "rendered block too large: {} chars",
            prev.content.chars().count()
        );
    }

    #[test]
    fn test_onboarding_greeting_exists() {
        assert!(
            ONBOARDING_GREETING.contains("Brain"),
            "greeting must mention Brain"
        );
        assert!(
            ONBOARDING_GREETING.contains("name"),
            "greeting must ask for the user's name"
        );
    }

    #[test]
    fn test_onboarding_addendum_exists() {
        assert!(
            ONBOARDING_ADDENDUM.contains("ONBOARDING MODE"),
            "addendum must contain ONBOARDING MODE marker"
        );
        assert!(
            ONBOARDING_ADDENDUM.contains("follow-up question"),
            "addendum must instruct follow-up questions"
        );
    }

    #[test]
    fn test_estimate_tokens() {
        let messages = vec![Message::user("Hello world")];

        let tokens = ContextAssembler::estimate_tokens(&messages);
        assert!(tokens > 0);
        // "Hello world" is 11 chars; at 3 chars/token, ceil(11/3) = 4.
        assert_eq!(tokens, 11usize.div_ceil(CHARS_PER_TOKEN));
        assert_eq!(super::estimate_tokens("Hello world"), 4);
    }
}