brainos-cortex 0.5.0

LLM provider abstraction, context assembly, and action dispatch for Brain OS
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
//! Context assembly — builds LLM prompts with token budget management.
//!
//! Manages the token budget for LLM context windows:
//! - System prompt (~500 tokens)
//! - User model snapshot (~300 tokens)
//! - Conversation history (~2000 tokens)
//! - Retrieved memories (remaining budget)
//! - Response buffer (~400 tokens)

use crate::llm::Message;
use hippocampus::search::Memory;

/// Conservative chars-per-token ratio for the prompt estimator. English
/// prose runs ~4 chars/token and code/JSON (common in attachments) ~3, so we
/// use 3: packing never badly *under*-counts and overflows the model's true
/// window, while being far less wasteful than the old 2-chars/token guess
/// (which threw away ~half the usable window). Deliberately a portable
/// heuristic, not a model-specific BPE tokenizer — Brain is multi-provider
/// (Ollama/qwen, OpenAI, …) and no single vocabulary is correct for all of
/// them. Swapping in a real tokenizer later only needs to touch this module.
pub const CHARS_PER_TOKEN: usize = 3;

/// Estimate the token count of `text` for budgeting and packing decisions.
/// Single source of the heuristic — every budget check in this module routes
/// through here so they can't drift.
pub fn estimate_tokens(text: &str) -> usize {
    text.chars().count().div_ceil(CHARS_PER_TOKEN)
}

/// Default token budgets.
pub const TOKEN_BUDGETS: TokenBudget = TokenBudget {
    system_prompt: 500,
    user_model: 300,
    conversation_history: 2000,
    response_buffer: 400,
    attachments: 2500,
    total_context: 8192, // Default for most models
};

/// Hardcoded greeting for first-ever chat session (0 facts).
/// Printed directly — no LLM call needed.
pub const ONBOARDING_GREETING: &str = "Hey! I'm Brain \u{2014} your personal memory engine. \
I run locally on your machine and I'm here to remember what matters to you. \
I don't know anything about you yet, so let's fix that. What's your name?";

/// System-prompt addendum injected while the user has fewer than 5 facts.
/// Makes the LLM naturally curious and question-asking during onboarding.
pub const ONBOARDING_ADDENDUM: &str = r#"

[ONBOARDING MODE — the user is new and you know very little about them]
- After every user message, end your response with ONE short, focused follow-up question to learn about the user (name, role, projects, interests).
- Keep responses to 1-3 sentences plus the question.
- Sound warm, curious, and conversational — not like an intake form.
- NEVER say "I don't have that in my memory yet" — instead, be proactive about learning.
- Once you learn something, acknowledge it naturally and ask about the next thing."#;

/// The always-on cognitive faculties, rendered as the fallback "Your
/// Capabilities" section of the SOUL prompt. Used verbatim when no live
/// capability digest is supplied (non-chat LLM paths, tests, custom
/// prompts) and as the prefix of the live digest the chat path builds
/// (see `signal::pipeline::conversation`). Keeping the
/// wording in one place stops the static and live views from drifting.
pub const DEFAULT_CAPABILITIES: &str = r#"Your Capabilities:
- Episodic Memory: You recall past experiences and conversations provided as context.
- Semantic Memory: You maintain a web of facts about the user's world, projects, and habits.
- Proactivity: You don't just react; you anticipate needs based on established patterns (provided in context)."#;

/// Token budget allocation.
#[derive(Debug, Clone, Copy)]
pub struct TokenBudget {
    pub system_prompt: usize,
    pub user_model: usize,
    pub conversation_history: usize,
    pub response_buffer: usize,
    /// Cap on rendered path-attachments (snapshots of files/dirs the
    /// user referenced in chat). Truncated to fit by the assembler.
    pub attachments: usize,
    pub total_context: usize,
}

impl TokenBudget {
    /// Calculate remaining budget for memories.
    pub fn memory_budget(&self) -> usize {
        self.total_context
            .saturating_sub(self.system_prompt)
            .saturating_sub(self.user_model)
            .saturating_sub(self.conversation_history)
            .saturating_sub(self.response_buffer)
            .saturating_sub(self.attachments)
    }

    /// Build a budget scaled to a model's real context window.
    ///
    /// Fixed overheads (system prompt, user-model snapshot, response buffer)
    /// don't grow with the window, so they're reserved as constants. The
    /// remaining *working pool* is split proportionally between conversation
    /// history, path-attachments, and retrieved memories — so a 128k-window
    /// model reads far more file content and recalls more memory than the
    /// conservative 8k default, instead of clipping everything to a fixed cap.
    ///
    /// At `total_tokens == 8192` this stays close to the historical fixed
    /// split (history ≈2k, attachments ≈2.5k, memory ≈2.5k). Memory is the
    /// implicit remainder via [`Self::memory_budget`].
    pub fn for_context_size(total_tokens: usize) -> Self {
        let system_prompt = TOKEN_BUDGETS.system_prompt;
        let user_model = TOKEN_BUDGETS.user_model;
        let response_buffer = TOKEN_BUDGETS.response_buffer;
        let reserved = system_prompt + user_model + response_buffer;

        // Working pool after fixed overheads. Below the reserve we can't split
        // anything — fall back to zero variable sections (the assembler still
        // renders the system prompt).
        let pool = total_tokens.saturating_sub(reserved);
        // History stays modest (recent turns dominate relevance); attachments
        // and memory get the lion's share and scale with the window.
        let conversation_history = pool * 28 / 100;
        let attachments = pool * 36 / 100;
        // memory_budget() consumes the remainder (~36% of the pool).

        Self {
            system_prompt,
            user_model,
            conversation_history,
            response_buffer,
            attachments,
            total_context: total_tokens,
        }
    }
}

impl Default for TokenBudget {
    fn default() -> Self {
        TOKEN_BUDGETS
    }
}

/// Path-attachment grounding for a chat turn. When the user references
/// a local path in their message, the pipeline reads it on their behalf
/// and hands the snapshot here so the LLM can see *what's actually
/// there* alongside memories and history. The SOUL prompt's
/// "ATTACHED_CONTENT" instructions explain how to read these blocks.
#[derive(Debug, Clone)]
pub struct Attachment {
    /// Path token as the user wrote it. Preserved verbatim so the LLM
    /// can refer back to the user's own wording.
    pub display_path: String,
    /// Rendered snapshot — directory listing + histogram + inlined
    /// files for a directory, or file excerpt for a file. Built by
    /// `signal::pipeline::build_directory_snapshot` /
    /// `build_file_snapshot`.
    pub snapshot: String,
}

/// A path the user referenced that couldn't be attached (not found,
/// outside `security.allowed_paths`, wrong file kind). Rendered as a
/// `<SKIPPED_PATH>` tag so Brain can mention it instead of silently
/// dropping the reference.
#[derive(Debug, Clone)]
pub struct SkippedAttachment {
    pub display_path: String,
    pub reason: String,
}

/// User profile data for context injection.
#[derive(Debug, Clone, Default)]
pub struct UserProfile {
    pub name: Option<String>,
    pub preferences: Vec<String>,
    pub goals: Vec<String>,
    pub facts: Vec<String>,
}

impl UserProfile {
    /// Format as a context string.
    pub fn to_context_string(&self) -> String {
        let mut parts = Vec::new();

        if let Some(name) = &self.name {
            parts.push(format!("The user's name is {}.", name));
        }

        if !self.preferences.is_empty() {
            parts.push(format!("User preferences: {}", self.preferences.join(", ")));
        }

        if !self.goals.is_empty() {
            parts.push(format!("User goals: {}", self.goals.join(", ")));
        }

        if !self.facts.is_empty() {
            parts.push(format!("Key facts: {}", self.facts.join("; ")));
        }

        parts.join(" ")
    }

    /// Estimate token count (conservative: ~2 chars per token to handle non-ASCII safely).
    pub fn estimate_tokens(&self) -> usize {
        estimate_tokens(&self.to_context_string())
    }
}

/// Context assembler — builds prompts respecting token budgets.
pub struct ContextAssembler {
    budget: TokenBudget,
    system_prompt: String,
    user_profile: UserProfile,
}

impl ContextAssembler {
    /// Create a new context assembler.
    pub fn new(budget: TokenBudget) -> Self {
        Self {
            budget,
            system_prompt: Self::default_system_prompt(),
            user_profile: UserProfile::default(),
        }
    }

    /// Create with default budget.
    pub fn with_defaults() -> Self {
        Self::new(TOKEN_BUDGETS)
    }

    /// The active token budget — lets the pipeline plan history compaction
    /// against the same per-section allocation the assembler enforces.
    pub fn budget(&self) -> TokenBudget {
        self.budget
    }

    /// Set custom system prompt.
    pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
        self.system_prompt = prompt.into();
        self
    }

    /// Set user profile.
    pub fn with_user_profile(mut self, profile: UserProfile) -> Self {
        self.user_profile = profile;
        self
    }

    /// Get the default system prompt.
    fn default_system_prompt() -> String {
        r#"You are the SOUL of Brain OS — a biologically-inspired, proactive cognitive engine. You are not just an assistant; you are the user's digital hippocampus and prefrontal cortex, operating with deep context and long-term memory.

Your Identity:
- You are "Brain", the central intelligence of a local-first memory system.
- You are private, secure, and run entirely on the user's machine.
- Your purpose is to eliminate "context amnesia" by bridging the gap between siloed tools and the user's life.

Operating Principles:
1. TRUTH OVER HALLUCINATION: Ground answers in (a) the provided memories, (b) the live conversation history above this message, and (c) general world knowledge. If a *fact about the user* is genuinely absent from memory AND not present in the conversation, state: "I don't have that in my memory yet." Do NOT say this when the user is asking about things discussed earlier in the current conversation — answer from the message thread itself.
   - SELF-KNOWLEDGE BOUNDARY: General world knowledge is fine for the world at large, but it is NOT a source for claims about Brain itself. Any statement about Brain's own CLI commands, config keys/schema, file layout, or features MUST come from the "About Brain" and "Your Capabilities" sections below — never from general knowledge or guesswork. If the answer isn't in those sections, say so plainly ("that isn't something Brain exposes" / "that's not a command/config key I have") and, where useful, point to the closest real command or config key. Never invent command names, config keys, templating syntax, or option flags — a confident, plausible-looking fabrication of Brain's surface is the worst failure mode.
   - MEMORY GROUNDING: Never assert a specific fact about the user unless it appears verbatim in the "Relevant memories:" block or earlier in this conversation. This applies with full force when you are *describing what you remember* (e.g. answering "what do you know about me?" or "what are your capabilities?"): do NOT manufacture illustrative examples — never say things like "you bike to work" or "you deploy on Fridays" to demonstrate recall. Describe the *kinds* of things you store (preferences, projects, habits, people, decisions) in the abstract, and cite only real entries from the memories block. A fabricated personal "memory" is a betrayal of a memory product's core promise — when memory is empty or lacks the detail, say so.
2. SEAMLESS RECALL: Reference memories and prior turns naturally ("You mentioned earlier...", "Based on what we discussed...").
3. COGNITIVE CLARITY: Be concise, direct, and insightful. Avoid corporate fluff. Match response length to the question — simple greetings get one or two sentences, not tables.
4. CONTEXTUAL AWARENESS: Use the provided User Profile to tailor your tone and relevance.
5. CURIOSITY: When you lack context about the user, ask one focused follow-up question. Learning about the user is part of your job — don't wait to be told.
6. FORMATTING: The user's terminal renders markdown. Use it lightly when it helps (lists for multi-item answers, **bold** for emphasis, `code` for identifiers). Skip headings and tables for short replies. Prefer bullet lists over tables — the terminal is narrow and wide tables render poorly; only use a table for genuinely tabular data with short cells.
7. ATTACHED CONTENT: When the user references a local path, an `<ATTACHED_CONTENT path="…">` block is provided below as grounding — that is what is actually on disk, read on the user's behalf. Adapt your response shape to the *content*, not to a template: a chat export deserves a conversational summary with themes, tone, and an honest opinion; a code project deserves a technical overview; a folder of photos or media deserves an honest "I can see these file types but I can't view the images themselves." Never describe a non-code folder as if it were a software project. If a `<SKIPPED_PATH reason="…"/>` tag appears, the user named a path I couldn't read — acknowledge it briefly and ask them to confirm or rephrase.

You are the user's partner in thought. Your goal is to make their digital life feel like a continuous, coherent stream of intelligence."#
            .to_string()
    }

    /// Assemble context into messages.
    ///
    /// Takes retrieved memories and conversation history, returns
    /// messages ready for the LLM.
    pub fn assemble(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
    ) -> Vec<Message> {
        self.assemble_with_addendum(user_message, memories, conversation_history, None)
    }

    /// Like [`assemble`], but appends `addendum` to the system prompt if provided.
    /// Used to switch prompt modes per-turn (e.g. onboarding) without mutating
    /// the shared assembler.
    pub fn assemble_with_addendum(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
        addendum: Option<&str>,
    ) -> Vec<Message> {
        self.assemble_full(
            user_message,
            memories,
            conversation_history,
            addendum,
            None,
            &[],
            &[],
        )
    }

    /// Full assembly with path-attachment grounding. Attachments render
    /// as `<ATTACHED_CONTENT>` blocks in a System message positioned
    /// right before the user's actual message — closest attention slot
    /// to "what the user just put on the table." Skipped paths render
    /// as `<SKIPPED_PATH>` tags in the same block so Brain can mention
    /// them naturally.
    ///
    /// Per-attachment content is truncated to fit `budget.attachments`;
    /// when total snapshot text exceeds the budget, later attachments
    /// shrink first so the first (and usually primary) reference stays
    /// intact.
    ///
    /// `capabilities` is the "Your Capabilities" section of the SOUL
    /// prompt. The chat path passes a *live* digest rendered from the
    /// currently-wired tools and agents; every other path
    /// passes `None` and falls back to [`DEFAULT_CAPABILITIES`]. Either
    /// way the section is appended after the base prompt so the reasoner
    /// always sees an explicit capability manifest.
    pub fn assemble_full(
        &self,
        user_message: &str,
        memories: &[Memory],
        conversation_history: &[Message],
        addendum: Option<&str>,
        capabilities: Option<&str>,
        attachments: &[Attachment],
        skipped: &[SkippedAttachment],
    ) -> Vec<Message> {
        let mut messages = Vec::new();
        let memory_budget = self.budget.memory_budget();

        // 1. System prompt with optional addendum and user profile
        let base_prompt = match addendum {
            Some(extra) if !extra.is_empty() => {
                format!("{}{}", self.system_prompt, extra)
            }
            _ => self.system_prompt.clone(),
        };
        // Capability manifest: live digest from the chat path, or the
        // static always-on faculties everywhere else.
        let prompt_with_caps = format!(
            "{}\n\n{}",
            base_prompt,
            capabilities.unwrap_or(DEFAULT_CAPABILITIES)
        );
        let system_content = if self.user_profile.estimate_tokens() > 0 {
            format!(
                "{}\n\nUser Profile: {}",
                prompt_with_caps,
                self.user_profile.to_context_string()
            )
        } else {
            prompt_with_caps
        };
        messages.push(Message::system(system_content));

        // 2. Add memories as system context (if within budget)
        let mut current_tokens = estimate_tokens(&messages[0].content);
        let mut memory_context = String::new();

        for memory in memories {
            let memory_text = if let Some(ref agent) = memory.agent {
                format!(
                    "- [{:?}, agent: {}] {}\n",
                    memory.source, agent, memory.content
                )
            } else {
                format!("- [{:?}] {}\n", memory.source, memory.content)
            };
            let memory_tokens = estimate_tokens(&memory_text);

            if current_tokens + memory_tokens > memory_budget {
                break;
            }

            memory_context.push_str(&memory_text);
            current_tokens += memory_tokens;
        }

        if !memory_context.is_empty() {
            messages.push(Message::system(format!(
                "Relevant memories:\n{}",
                memory_context
            )));
        }

        // 3. Add conversation history (respecting budget)
        let mut history_tokens: usize = 0;
        let mut included_history: Vec<Message> = Vec::new();

        // Start from most recent and work backwards
        for msg in conversation_history.iter().rev() {
            let msg_tokens = estimate_tokens(&msg.content);
            if history_tokens + msg_tokens > self.budget.conversation_history {
                break;
            }
            included_history.push(msg.clone());
            history_tokens += msg_tokens;
        }

        // Reverse to maintain chronological order
        included_history.reverse();
        messages.extend(included_history);

        // 4. Attached path grounding (renders right before the user
        //    message so the LLM has it freshly in attention).
        if let Some(block) = render_attachments_block(attachments, skipped, self.budget.attachments)
        {
            messages.push(Message::system(block));
        }

        // 5. Add current user message
        messages.push(Message::user(user_message.to_string()));

        messages
    }

    /// Quick estimate of total tokens in messages.
    pub fn estimate_tokens(messages: &[Message]) -> usize {
        messages.iter().map(|m| estimate_tokens(&m.content)).sum()
    }
}

/// Build the `<ATTACHED_CONTENT>` / `<SKIPPED_PATH>` block that goes
/// just before the user's message. Returns `None` when there's nothing
/// to render. Each attachment's snapshot is truncated to keep the
/// total under `budget_tokens` (2 chars ≈ 1 token); later attachments
/// shrink first so the primary reference stays intact.
fn render_attachments_block(
    attachments: &[Attachment],
    skipped: &[SkippedAttachment],
    budget_tokens: usize,
) -> Option<String> {
    if attachments.is_empty() && skipped.is_empty() {
        return None;
    }
    // Convert the token budget back to a char ceiling using the same ratio
    // the estimator assumes, so truncation and packing stay consistent.
    let char_budget = budget_tokens.saturating_mul(CHARS_PER_TOKEN);
    let mut out = String::new();
    let mut chars_used = 0usize;

    for (i, att) in attachments.iter().enumerate() {
        // Per-attachment ceiling: equal share of remaining budget,
        // floored at 600 chars so a small attachment can always fit.
        let remaining_atts = attachments.len() - i;
        let per_attachment =
            (char_budget.saturating_sub(chars_used) / remaining_atts.max(1)).max(600);
        let body = truncate_snapshot(&att.snapshot, per_attachment);
        let block = format!(
            "<ATTACHED_CONTENT path=\"{}\">\n{}\n</ATTACHED_CONTENT>\n",
            att.display_path, body
        );
        chars_used = chars_used.saturating_add(block.chars().count());
        out.push_str(&block);
    }
    for sk in skipped {
        let tag = format!(
            "<SKIPPED_PATH path=\"{}\" reason=\"{}\"/>\n",
            sk.display_path,
            sk.reason.replace('"', "'"),
        );
        out.push_str(&tag);
    }
    Some(out)
}

/// Truncate a snapshot string to at most `cap_chars`, appending a
/// short marker so the LLM knows content was cut. Walks back to a
/// character boundary to avoid splitting multi-byte chars.
fn truncate_snapshot(s: &str, cap_chars: usize) -> String {
    if s.chars().count() <= cap_chars {
        return s.to_string();
    }
    let mut out: String = s.chars().take(cap_chars.saturating_sub(20)).collect();
    out.push_str("\n…[truncated]");
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::llm::Role;

    #[test]
    fn test_token_budget_memory_allocation() {
        let budget = TokenBudget::default();
        let memory_budget = budget.memory_budget();

        // 8192 - 500 - 300 - 2000 - 400 - 2500 = 2492
        assert_eq!(memory_budget, 2492);
    }

    #[test]
    fn for_context_size_scales_attachments_and_memory_with_window() {
        let small = TokenBudget::for_context_size(8192);
        let large = TokenBudget::for_context_size(128000);

        // Fixed overheads don't move with the window.
        assert_eq!(large.system_prompt, small.system_prompt);
        assert_eq!(large.response_buffer, small.response_buffer);

        // A 128k model reads far more file content and recalls far more memory.
        assert!(
            large.attachments > small.attachments * 10,
            "attachments should scale with the window: {} vs {}",
            large.attachments,
            small.attachments,
        );
        assert!(large.memory_budget() > small.memory_budget() * 10);

        // The pieces still fit inside the declared window.
        let used = large.system_prompt
            + large.user_model
            + large.conversation_history
            + large.response_buffer
            + large.attachments
            + large.memory_budget();
        assert!(used <= large.total_context);

        // At the 8k default the split stays close to the historical fixed one.
        assert!((1800..=2200).contains(&small.conversation_history));
        assert!((2300..=2700).contains(&small.attachments));
    }

    #[test]
    fn for_context_size_below_reserve_is_safe() {
        // A tiny window can't fund variable sections, but must not panic or
        // produce a budget that exceeds the window.
        let budget = TokenBudget::for_context_size(500);
        assert_eq!(budget.total_context, 500);
        assert_eq!(budget.conversation_history, 0);
        assert_eq!(budget.attachments, 0);
        assert_eq!(budget.memory_budget(), 0);
    }

    // ── Property tests ────────────────────────────────────────────────
    //
    // The estimator and budget split are the safety floor for every prompt
    // Brain assembles: under-counting tokens overflows the model's real
    // window, and a budget whose sections sum past `total_context` clips
    // content the assembler thought it had room for. These assert the
    // invariants for arbitrary input rather than the hand-picked sizes above.

    use proptest::prelude::*;

    proptest! {
        #![proptest_config(ProptestConfig { cases: 512, .. ProptestConfig::default() })]

        /// The estimator must never *under*-count past a single token's worth
        /// of characters — that's the whole point of the conservative 3
        /// chars/token ratio (see `CHARS_PER_TOKEN`). If this breaks, packed
        /// prompts can silently overflow the provider's context window.
        #[test]
        fn estimate_never_undercounts(s in ".*") {
            let chars = s.chars().count();
            let est = estimate_tokens(&s);
            prop_assert!(
                chars <= est * CHARS_PER_TOKEN,
                "estimate under-counted: {chars} chars but est*ratio = {}", est * CHARS_PER_TOKEN
            );
            // …and never wastefully over-counts by more than one token.
            prop_assert!(est * CHARS_PER_TOKEN < chars + CHARS_PER_TOKEN);
            // Zero tokens iff empty.
            prop_assert_eq!(est == 0, chars == 0);
        }

        /// The estimate is a function of *character* count, not byte length —
        /// guards against a regression to `str::len()` that would over-count
        /// multi-byte (non-ASCII) text and waste budget. A string of N
        /// multi-byte codepoints must estimate the same as N ASCII ones.
        #[test]
        fn estimate_counts_chars_not_bytes(n in 0usize..512) {
            prop_assert_eq!(
                estimate_tokens(&"€".repeat(n)), // 3 bytes/char
                estimate_tokens(&"a".repeat(n)), // 1 byte/char
            );
        }

        /// Splitting text and budgeting the pieces separately must never count
        /// *fewer* tokens than budgeting the whole — otherwise per-section
        /// accounting could fit content the combined prompt can't hold.
        #[test]
        fn estimate_is_subadditive(a in ".*", b in ".*") {
            let whole = estimate_tokens(&format!("{a}{b}"));
            let parts = estimate_tokens(&a) + estimate_tokens(&b);
            prop_assert!(parts >= whole, "split under-counted: {parts} < {whole}");
        }

        /// The budget split is exact and self-consistent for *any* window: the
        /// variable pool (history + attachments + memory) always equals the
        /// window minus the fixed reserve, and the six sections together cover
        /// exactly the window (or the fixed reserve when the window is too
        /// small to fund anything — a window that can't fit the fixed prompt).
        #[test]
        fn for_context_size_split_is_exact(total in 0usize..8_000_000) {
            let b = TokenBudget::for_context_size(total);
            prop_assert_eq!(b.total_context, total);

            let reserved = b.system_prompt + b.user_model + b.response_buffer;
            let pool = total.saturating_sub(reserved);

            // The three variable sections partition the pool exactly.
            prop_assert_eq!(
                b.conversation_history + b.attachments + b.memory_budget(),
                pool,
                "variable sections must sum to the pool"
            );

            // All six sections cover the window exactly, or the fixed reserve
            // when the window is below it.
            let used = b.system_prompt
                + b.user_model
                + b.conversation_history
                + b.response_buffer
                + b.attachments
                + b.memory_budget();
            prop_assert_eq!(used, total.max(reserved));
        }

        /// Fixed overheads never move with the window, and a window at least
        /// twice the reserve grows both attachments and memory when doubled
        /// (doubling clears the ±1 floor jitter of the percentage split).
        #[test]
        fn for_context_size_scales_monotonically(total in 2_400usize..4_000_000) {
            let small = TokenBudget::for_context_size(total);
            let large = TokenBudget::for_context_size(total * 2);

            prop_assert_eq!(large.system_prompt, small.system_prompt);
            prop_assert_eq!(large.user_model, small.user_model);
            prop_assert_eq!(large.response_buffer, small.response_buffer);

            prop_assert!(large.attachments > small.attachments);
            prop_assert!(large.memory_budget() > small.memory_budget());
            prop_assert!(large.conversation_history > small.conversation_history);
        }
    }

    #[test]
    fn test_user_profile_to_context() {
        let profile = UserProfile {
            name: Some("Alice".to_string()),
            preferences: vec!["coffee".to_string(), "quiet mornings".to_string()],
            goals: vec!["learn Rust".to_string()],
            facts: vec!["works remotely".to_string()],
        };

        let context = profile.to_context_string();
        assert!(context.contains("Alice"));
        assert!(context.contains("coffee"));
        assert!(context.contains("learn Rust"));
    }

    #[test]
    fn test_assemble_with_addendum_injects_into_system_prompt() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble_with_addendum("hi", &[], &[], Some(ONBOARDING_ADDENDUM));

        let system = messages
            .iter()
            .find(|m| matches!(m.role, Role::System))
            .expect("system message");
        assert!(
            system.content.contains("[ONBOARDING MODE"),
            "onboarding addendum should be present in system prompt"
        );
    }

    #[test]
    fn system_prompt_forbids_fabricated_memories() {
        // The SOUL prompt must carry the memory-grounding rule that stops the
        // reasoner inventing first-person "memories" (WS3). Anchored on the
        // base prompt so it's present on every turn, onboarding or not.
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("what do you know about me?", &[], &[]);
        let system = &messages[0].content;
        assert!(
            system.contains("MEMORY GROUNDING"),
            "memory-grounding rule missing from system prompt"
        );
        assert!(
            system.contains("Relevant memories:"),
            "rule should anchor on the real memories block label"
        );
    }

    #[test]
    fn test_assemble_without_addendum_matches_plain_assemble() {
        let assembler = ContextAssembler::with_defaults();
        let a = assembler.assemble("hi", &[], &[]);
        let b = assembler.assemble_with_addendum("hi", &[], &[], None);
        assert_eq!(a.len(), b.len());
        assert_eq!(a[0].content, b[0].content);
    }

    #[test]
    fn test_context_assembler_basic() {
        use hippocampus::search::MemorySource;

        let assembler = ContextAssembler::with_defaults();

        let memories = vec![Memory {
            id: "1".to_string(),
            content: "User likes Rust programming".to_string(),
            source: MemorySource::Semantic,
            score: 0.9,
            importance: 0.8,
            timestamp: "2026-01-01".to_string(),
            agent: None,
        }];

        let history = vec![];
        let messages = assembler.assemble("What language should I learn?", &memories, &history);

        // Should have: system prompt, memory context, user message
        assert!(messages.len() >= 2);
        assert_eq!(
            messages.last().unwrap().content,
            "What language should I learn?"
        );
        assert_eq!(messages.last().unwrap().role, Role::User);
    }

    #[test]
    fn test_context_assembler_agent_attribution() {
        use hippocampus::search::MemorySource;

        let assembler = ContextAssembler::with_defaults();

        let memories = vec![
            Memory {
                id: "1".to_string(),
                content: "User likes coffee".to_string(),
                source: MemorySource::Episodic,
                score: 0.9,
                importance: 0.8,
                timestamp: "2026-01-01".to_string(),
                agent: Some("chat-bot".to_string()),
            },
            Memory {
                id: "2".to_string(),
                content: "User works remotely".to_string(),
                source: MemorySource::Semantic,
                score: 0.85,
                importance: 0.7,
                timestamp: "2026-01-02".to_string(),
                agent: None,
            },
        ];

        let messages = assembler.assemble("Tell me about the user", &memories, &[]);

        // The memories block is its own system message starting with the
        // label; `starts_with` avoids matching the base system prompt, which
        // now references "Relevant memories:" in its memory-grounding rule.
        let memory_msg = messages
            .iter()
            .find(|m| m.content.starts_with("Relevant memories:"))
            .expect("should have memory context message");

        assert!(
            memory_msg.content.contains("agent: chat-bot"),
            "memory with agent should include attribution"
        );
        assert!(
            !memory_msg.content.contains("agent: ")
                || memory_msg.content.matches("agent: ").count() == 1,
            "memory without agent should NOT include agent label"
        );
    }

    #[test]
    fn test_context_assembler_with_history() {
        let assembler = ContextAssembler::with_defaults();

        let history = vec![
            Message {
                role: Role::User,
                content: "Hello".to_string(),
                ..Default::default()
            },
            Message {
                role: Role::Assistant,
                content: "Hi there!".to_string(),
                ..Default::default()
            },
        ];

        let messages = assembler.assemble("How are you?", &[], &history);

        // Should include system + history + current message
        assert!(messages.len() >= 3);
        assert_eq!(messages.last().unwrap().content, "How are you?");
    }

    #[test]
    fn test_default_prompt_core_instructions() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("How do I connect OpenClaw?", &[], &[]);
        let system = &messages[0].content;

        assert!(system.contains("Brain"));
        assert!(system.contains("SOUL"));
        assert!(system.contains("biologically-inspired"));
        assert!(system.contains("Episodic Memory"));
        assert!(system.contains("Semantic Memory"));
        assert!(system.contains("Proactivity"));
        assert!(system.contains("TRUTH OVER HALLUCINATION"));
        assert!(
            system.contains("CURIOSITY"),
            "SOUL prompt must include CURIOSITY operating principle"
        );
        assert!(
            system.contains("ATTACHED CONTENT"),
            "SOUL prompt must teach Brain how to handle <ATTACHED_CONTENT> blocks"
        );
        assert!(
            system.contains("chat export deserves a conversational summary"),
            "SOUL prompt must instruct response-shape adaptation by content type"
        );
    }

    #[test]
    fn default_capabilities_used_when_no_digest_supplied() {
        let assembler = ContextAssembler::with_defaults();
        let messages = assembler.assemble("what can you do?", &[], &[]);
        let system = &messages[0].content;
        // Falls back to the static always-on faculties.
        assert!(system.contains(DEFAULT_CAPABILITIES));
        assert!(system.contains("Episodic Memory"));
    }

    #[test]
    fn live_capability_digest_overrides_default() {
        let assembler = ContextAssembler::with_defaults();
        let digest = "Your Capabilities:\n- Episodic Memory: ...\n\nMounted tools:\n- MCP server \"github\": create_issue";
        let messages =
            assembler.assemble_full("what can you do?", &[], &[], None, Some(digest), &[], &[]);
        let system = &messages[0].content;
        assert!(
            system.contains("MCP server \"github\": create_issue"),
            "live digest must reach the system prompt"
        );
        // The supplied digest replaces the static block — the default's
        // Semantic/Proactivity bullets are not present unless the caller
        // included them.
        assert!(!system.contains("a web of facts about the user's world"));
    }

    #[test]
    fn attachments_render_as_a_dedicated_system_message_before_user() {
        let assembler = ContextAssembler::with_defaults();
        let attachments = vec![Attachment {
            display_path: "/Users/me/notes.md".to_string(),
            snapshot: "# my notes\nbuy milk".to_string(),
        }];
        let messages =
            assembler.assemble_full("read this", &[], &[], None, None, &attachments, &[]);

        // Penultimate message should be the attachments block; last is
        // the user message itself.
        let user_msg = messages.last().expect("non-empty");
        assert_eq!(user_msg.role, Role::User);
        assert_eq!(user_msg.content, "read this");

        let prev = &messages[messages.len() - 2];
        assert_eq!(prev.role, Role::System);
        assert!(
            prev.content
                .contains("<ATTACHED_CONTENT path=\"/Users/me/notes.md\">"),
            "missing attached-content block:\n{}",
            prev.content
        );
        assert!(prev.content.contains("buy milk"));
        assert!(prev.content.contains("</ATTACHED_CONTENT>"));
    }

    #[test]
    fn skipped_paths_render_as_a_tag_for_brain_to_mention() {
        let assembler = ContextAssembler::with_defaults();
        let skipped = vec![SkippedAttachment {
            display_path: "/Users/me/missing.txt".to_string(),
            reason: "path not found".to_string(),
        }];
        let messages = assembler.assemble_full("summarise it", &[], &[], None, None, &[], &skipped);
        let prev = &messages[messages.len() - 2];
        assert!(prev.content.contains("<SKIPPED_PATH"));
        assert!(prev.content.contains("/Users/me/missing.txt"));
        assert!(prev.content.contains("path not found"));
    }

    #[test]
    fn no_attachments_means_no_extra_block() {
        let assembler = ContextAssembler::with_defaults();
        let before = assembler.assemble("hi", &[], &[]);
        let after = assembler.assemble_full("hi", &[], &[], None, None, &[], &[]);
        assert_eq!(
            before.len(),
            after.len(),
            "no attachments must not add a message"
        );
    }

    #[test]
    fn large_attachment_is_truncated_to_budget() {
        // Snapshot is 60_000 chars (~30_000 tokens). Default attachments
        // budget is 2500 tokens ≈ 5000 chars; the rendered block must be
        // far smaller than the input snapshot.
        let huge = "x".repeat(60_000);
        let assembler = ContextAssembler::with_defaults();
        let attachments = vec![Attachment {
            display_path: "/Users/me/huge.txt".to_string(),
            snapshot: huge,
        }];
        let messages = assembler.assemble_full("read", &[], &[], None, None, &attachments, &[]);
        let prev = &messages[messages.len() - 2];
        assert!(
            prev.content.contains("[truncated]"),
            "huge attachment must be marked as truncated"
        );
        // Sanity: rendered block must be at least an order of magnitude
        // smaller than the input snapshot.
        assert!(
            prev.content.chars().count() < 10_000,
            "rendered block too large: {} chars",
            prev.content.chars().count()
        );
    }

    #[test]
    fn test_onboarding_greeting_exists() {
        assert!(
            ONBOARDING_GREETING.contains("Brain"),
            "greeting must mention Brain"
        );
        assert!(
            ONBOARDING_GREETING.contains("name"),
            "greeting must ask for the user's name"
        );
    }

    #[test]
    fn test_onboarding_addendum_exists() {
        assert!(
            ONBOARDING_ADDENDUM.contains("ONBOARDING MODE"),
            "addendum must contain ONBOARDING MODE marker"
        );
        assert!(
            ONBOARDING_ADDENDUM.contains("follow-up question"),
            "addendum must instruct follow-up questions"
        );
    }

    #[test]
    fn test_estimate_tokens() {
        let messages = vec![Message::user("Hello world")];

        let tokens = ContextAssembler::estimate_tokens(&messages);
        assert!(tokens > 0);
        // "Hello world" is 11 chars; at 3 chars/token, ceil(11/3) = 4.
        assert_eq!(tokens, 11usize.div_ceil(CHARS_PER_TOKEN));
        assert_eq!(super::estimate_tokens("Hello world"), 4);
    }
}