opencrabs 0.3.12

The autonomous, self-improving AI agent. Single Rust binary. Every channel. Install with: cargo install opencrabs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
use super::builder::AgentService;
use crate::brain::agent::context::AgentContext;
use crate::brain::agent::error::{AgentError, Result};
use crate::brain::provider::{LLMRequest, Message};
use crate::services::{MessageService, SessionService};
use tokio_util::sync::CancellationToken;
use uuid::Uuid;

impl AgentService {
    /// Helper to prepare message context for LLM requests
    ///
    /// This extracts the common setup logic shared between send_message() and
    /// send_message_streaming() to reduce code duplication.
    pub(super) async fn prepare_message_context(
        &self,
        session_id: Uuid,
        user_message: String,
        model: Option<String>,
    ) -> Result<(String, LLMRequest, MessageService, SessionService)> {
        // Get or create session
        let session_service = SessionService::new(self.context.clone());
        let _session = session_service
            .get_session(session_id)
            .await
            .map_err(|e| AgentError::Database(e.to_string()))?
            .ok_or(AgentError::SessionNotFound(session_id))?;

        // Load conversation context with budget-aware message trimming
        let message_service = MessageService::new(self.context.clone());
        let all_db_messages = message_service
            .list_messages_for_session(session_id)
            .await
            .map_err(|e| AgentError::Database(e.to_string()))?;

        let model_name = model.unwrap_or_else(|| {
            self.provider_for_session(session_id)
                .default_model()
                .to_string()
        });
        let context_window = self.context_limit();

        // Load from last compaction point — no arbitrary trimming
        let db_messages = Self::messages_from_last_compaction(all_db_messages);

        let mut context =
            AgentContext::from_db_messages(session_id, db_messages, context_window as usize);

        // Add system brain if available (count its tokens for accurate tracking)
        if let Some(brain) = &self.default_system_brain {
            context.token_count += AgentContext::estimate_tokens(brain);
            context.system_brain = Some(brain.clone());
        }

        // Add user message
        let user_msg = Message::user(user_message.clone());
        context.add_message(user_msg);

        // Save user message to database
        message_service
            .create_message(session_id, "user".to_string(), user_message)
            .await
            .map_err(|e| AgentError::Database(e.to_string()))?;

        // Build base LLM request
        let request = LLMRequest::new(model_name.clone(), context.messages.clone())
            .with_max_tokens(self.max_tokens);

        let mut request = if let Some(system) = context.system_brain {
            request.with_system(system)
        } else {
            request
        };

        // Pass working directory so proxy-aware providers can forward it
        request.working_directory =
            Some(self.get_working_directory().to_string_lossy().to_string());
        request.session_id = Some(session_id);

        Ok((model_name, request, message_service, session_service))
    }

    /// Load messages from the last compaction point forward.
    ///
    /// Finds the last message containing the `[CONTEXT COMPACTION` marker and
    /// returns only messages from that point onward. If no compaction marker
    /// exists, returns all messages. This ensures restarts pick up exactly
    /// where compaction left off — no arbitrary trimming.
    pub fn messages_from_last_compaction(
        all_messages: Vec<crate::db::models::Message>,
    ) -> Vec<crate::db::models::Message> {
        const COMPACTION_MARKER: &str = "[CONTEXT COMPACTION";

        // Walk backward to find the last compaction marker
        let compaction_idx = all_messages
            .iter()
            .rposition(|msg| msg.content.contains(COMPACTION_MARKER));

        if let Some(idx) = compaction_idx {
            let kept = all_messages.len() - idx;
            tracing::info!(
                "Found compaction marker at message {}/{} — loading {} messages from compaction point",
                idx,
                all_messages.len(),
                kept,
            );
            all_messages[idx..].to_vec()
        } else {
            all_messages
        }
    }

    /// Build a "recovered brain" context string from key brain files.
    ///
    /// After compaction wipes the conversation history, this restores the agent's
    /// core identity, user context, tool documentation, and coding standards so it
    /// doesn't wake up with only a lossy LLM summary.
    ///
    /// Full files injected (~1-2k tokens total):
    /// - SOUL.md — personality, tone, hard rules
    /// - USER.md — who the human is, preferences
    /// - TOOLS.md — environment-specific tool notes
    ///
    /// CODE.md is injected as a compact summary only. Before ANY code task the
    /// agent MUST fetch the full file. Non-code tasks can ignore this section.
    ///
    /// Skipped: MEMORY.md (summary replaces it), BOOT/BOOTSTRAP/HEARTBEAT (rarely
    /// needed mid-task), SECURITY.md/AGENTS.md (loaded on demand if flagged in
    /// summary), IDENTITY.md (only for cron/social sessions).
    fn build_recovered_brain_context() -> String {
        use std::path::PathBuf;

        const CODE_MD_SUMMARY: &str =
"## CODE.md — Coding Standards (SUMMARY)
**Full file: ~/.opencrabs/CODE.md — use `load_brain_file(\"CODE.md\")` to read it before writing ANY code.**
If you are NOT doing code tasks, ignore this section entirely.

Best practices:
- Rust first. Always. (heyiolo is built in Dart/Swift — those are the only exceptions)
- Max 500 lines per file, target 100-250. Split without hesitation.
- Types in types.rs, handlers in handler.rs. One responsibility per file.
- Tests in `src/tests/<module>_test.rs` — never inline in source.
- `cargo clippy --all-features` + `cargo test --all-features` before every commit.
- No unwraps on user data, no dead code, no suppressing warnings.
- No #[allow()] unless you can defend why the lint is wrong.
- No unsafe without a soundness comment.
- Validate all external input. No hardcoded secrets. Sanitize output.
- Never give up on a problem. Never suppress errors.
- Git diff before commit — match the request exactly, no more, no less.

**CRITICAL: Before handling ANY code task, fetch full CODE.md:**
Use the `load_brain_file` tool with name=\"CODE.md\" — reads from ~/.opencrabs/CODE.md.
The summary above is NOT sufficient for implementation work.
";

        let full_files = [
            ("SOUL.md", "personality"),
            ("USER.md", "user profile"),
            ("TOOLS.md", "tool notes"),
        ];

        let opencrabs_home = crate::config::opencrabs_home();
        let mut result = String::new();

        for (filename, label) in full_files {
            let path: PathBuf = opencrabs_home.join(filename);
            if let Ok(content) = std::fs::read_to_string(&path) {
                let trimmed = content.trim();
                if !trimmed.is_empty() {
                    result.push_str(&format!(
                        "--- {} ({}) ---\n{}\n\n",
                        filename, label, trimmed
                    ));
                }
            }
        }

        result.push_str(CODE_MD_SUMMARY);

        if result.is_empty() {
            String::from("[No brain files found — agent context limited]\n\n")
        } else {
            format!(
                "[RECOVERED BRAIN CONTEXT — these files define your identity, the user, your tools, and your coding standards. They take priority over any contradictory inference from the summary.]\n\n{}\n",
                result
            )
        }
    }

    /// Auto-compact the context when usage is too high.
    ///
    /// Before compaction, calculates the remaining context budget and sends
    /// the last portion of the conversation to the LLM with a request for a
    /// structured breakdown. This breakdown serves as a "wake-up" summary so
    /// OpenCrabs can continue working seamlessly after compaction.
    pub(super) async fn compact_context(
        &self,
        session_id: Uuid,
        context: &mut AgentContext,
        model_name: &str,
        cancel_token: Option<&CancellationToken>,
    ) -> Result<String> {
        let remaining_budget = context.max_tokens.saturating_sub(context.token_count);

        // Build a summarization request with the full conversation
        let mut summary_messages = Vec::new();

        // Include all conversation messages so the LLM sees the full context.
        // Skip any leading user messages that consist only of ToolResult blocks —
        // they are orphaned (their tool_use was removed by a prior trim) and would
        // cause the API to reject the request with a 400.
        let start = context
            .messages
            .iter()
            .position(|m| {
                !(m.role == crate::brain::provider::Role::User
                    && !m.content.is_empty()
                    && m.content.iter().all(|b| {
                        matches!(b, crate::brain::provider::ContentBlock::ToolResult { .. })
                    }))
            })
            .unwrap_or(context.messages.len());

        // Send EVERY message since the last compaction. Compaction should
        // see exactly what the agent was running under, not a trimmed slice
        // — otherwise the summary silently loses the oldest turns whenever
        // the budget was smaller than the window. Reserve room only for
        // the summarizer's OUTPUT budget (8k) + compaction prompt (~1k).
        let output_reserve = 8_000usize + 1_000usize;
        let max_input_budget = context.max_tokens.saturating_sub(output_reserve);
        let all_msgs = &context.messages[start..];
        let mut running_tokens = 0usize;
        let msgs_to_include: Vec<&Message> = all_msgs
            .iter()
            .rev()
            .take_while(|m| {
                let t = AgentContext::estimate_tokens_static(m);
                if running_tokens + t <= max_input_budget {
                    running_tokens += t;
                    true
                } else {
                    tracing::warn!(
                        "Compaction: dropping oldest messages to fit input budget ({}/{} tokens used)",
                        running_tokens,
                        max_input_budget,
                    );
                    false
                }
            })
            .collect::<Vec<_>>()
            .into_iter()
            .rev()
            .collect();

        tracing::info!(
            "Compaction: sending {} / {} messages to summarizer ({} / {} input tokens, reserving {} for output)",
            msgs_to_include.len(),
            all_msgs.len(),
            running_tokens,
            context.max_tokens,
            output_reserve,
        );

        for msg in msgs_to_include {
            summary_messages.push(msg.clone());
        }

        // Add the compaction instruction as a user message
        let compaction_prompt = format!(
            "CRITICAL: The context window is at {:.0}% capacity ({} / {} tokens, {} tokens remaining). \
             The conversation must be compacted NOW.\n\n\
             You are creating a COMPREHENSIVE CONTINUATION DOCUMENT. After compaction, a fresh agent \
             instance will wake up with ONLY this summary as context. It must be able to continue \
             working immediately without asking the user what to do.\n\n\
             Analyze the ENTIRE conversation chronologically and produce the following:\n\n\
             ## 1. Chronological Analysis\n\
             Walk through every task the user requested, in order. For each task include:\n\
             - What was requested\n\
             - What was done (with exact file paths and line numbers where relevant)\n\
             - Exact code snippets for any changes made (show before/after when applicable)\n\
             - Whether it was completed, committed, pushed, or still pending\n\n\
             ## 2. Files Modified\n\
             List EVERY file that was created, edited, read, or discussed. For each file include:\n\
             - Full file path\n\
             - What was changed and why\n\
             - Key code snippets showing the current state of changes\n\
             - Whether the change is committed or uncommitted\n\n\
             ## 3. User Preferences & Constraints\n\
             List EVERY preference, constraint, or strong reaction from the user. Include:\n\
             - Things the user explicitly said to NEVER do (with their exact words if they were emphatic)\n\
             - Workflow preferences (commit style, release process, tool choices)\n\
             - Technical constraints or architectural decisions\n\
             - Any corrections the user made to your work\n\n\
             ## 4. Errors & Corrections\n\
             Every error encountered, every mistake made, and how each was resolved. Include:\n\
             - Exact error messages when available\n\
             - What caused the error\n\
             - The fix applied\n\
             - User reactions to mistakes (so the agent avoids repeating them)\n\n\
             ## 5. All User Messages\n\
             Summarize every user message in order, capturing their intent and exact wording \
             for important instructions. This is critical for understanding the user's communication \
             style and expectations.\n\n\
             ## 6. Pending Tasks\n\
             List everything that is NOT yet done:\n\
             - Uncommitted changes\n\
             - Tasks mentioned but not started\n\
             - Investigations in progress\n\
             - Next steps the user expects\n\n\
             ## 7. Current Work\n\
             What was the agent doing RIGHT BEFORE this compaction? What is the immediate next action? \
             The fresh agent must pick up exactly where this left off.\n\n\
             ## 8. Recovery Playbook\n\
             The fresh agent has these tools available to recover any missing context:\n\
             - `session_search` — search past conversation messages in this session by keyword\n\
             - `memory_search` — search daily memory logs and indexed knowledge\n\
             - `load_brain_file` — reload brain files (SOUL.md, TOOLS.md, USER.md, etc.) for identity/preferences\n\
             - `read_file` / `glob` / `grep` — read any file, search by pattern, search file contents\n\
             - `bash` — run shell commands (git status, git log, git diff, etc.)\n\
             - `ls` — list directory contents\n\
             - `gh` — GitHub CLI for ALL GitHub operations (repos, releases, issues, PRs). \
             NEVER use HTTP requests to GitHub — always use `gh` CLI.\n\n\
             Write a SPECIFIC recovery plan: which tools to call with which arguments to get back \
             up to speed. Example: \"Run `git status` and `git diff` to see uncommitted changes, \
             then `read_file src/main.rs` to verify the current state of the fix, then \
             `session_search 'vision fallback'` to recover details from the investigation.\"\n\
             Be concrete — include actual file paths, search queries, and commands.\n\n\
             ## 9. Next Step\n\
             State the single most important thing the agent should do when it wakes up. \
             If the task is clear, continue immediately. If ambiguous, ask the user ONE focused \
             follow-up question.\n\n\
             ## 10. Continuation Message\n\
             Write a SHORT, punchy message (2-4 sentences) that the agent will say to the user \
             right after waking up from compaction. This message MUST:\n\
             - Reference SPECIFIC things from the conversation (file names, user quotes, inside jokes, \
             frustrations, wins) — prove the agent remembers everything\n\
             - Mention what was just accomplished and what's next in a way that feels alive and engaged\n\
             - Match the user's energy and communication style from the conversation\n\
             - Be creative, surprising, maybe funny — make the user think \"holy shit it remembers\"\n\
             - End with a clear action: what the agent is about to do next or a specific question\n\
             DO NOT be generic. DO NOT say \"I'm ready to continue.\" Reference actual conversation details \
             that only someone who was there would know.\n\n\
             Tool approval status: {}\n\n\
             BE EXHAUSTIVE. This is not a summary — it is a complete knowledge transfer. \
             Include code snippets, exact paths, user quotes, error messages. \
             The fresh agent has ZERO context beyond what you write here.",
            context.usage_percentage(),
            context.token_count,
            context.max_tokens,
            remaining_budget,
            if self.auto_approve_tools {
                "AUTO-APPROVE ON (tools run freely)"
            } else {
                "AUTO-APPROVE OFF — tool approval is REQUIRED for every tool call"
            },
        );

        summary_messages.push(Message::user(compaction_prompt));

        let mut request = LLMRequest::new(model_name.to_string(), summary_messages)
            .with_max_tokens(self.max_tokens)
            .with_system("You are a continuation document generator. Your job is to create an exhaustive, \
             detailed knowledge transfer document from a conversation so that a fresh AI agent can \
             continue the work seamlessly. You must capture every file path, code snippet, user preference, \
             error, and pending task. The agent reading your output will have ZERO prior context — \
             your document is its entire memory. Be thorough to the point of being verbose. \
             Missing a single detail could cause the agent to repeat mistakes or violate user preferences.".to_string());
        request.working_directory =
            Some(self.get_working_directory().to_string_lossy().to_string());
        request.session_id = Some(session_id);

        // Use streaming so the TUI shows the summary being written in real-time
        // instead of freezing silently for 2-5 minutes on large contexts
        let (response, _reasoning) = self
            .stream_complete(session_id, request, cancel_token, None, None, None, true)
            .await
            .map_err(AgentError::Provider)?;

        let summary = Self::extract_text_from_response(&response);

        // Save to daily memory log
        if let Err(e) = self.save_to_memory(&summary).await {
            tracing::warn!("Failed to save compaction summary to daily log: {}", e);
        }

        // Index the updated memory file in the background so memory_search picks it up
        let memory_path = crate::config::opencrabs_home()
            .join("memory")
            .join(format!("{}.md", chrono::Local::now().format("%Y-%m-%d")));
        tokio::spawn(async move {
            if let Ok(store) = crate::memory::get_store() {
                let _ = crate::memory::index_file(store, &memory_path).await;
            }
        });

        // Snapshot the last 8 messages as formatted text before compaction.
        // This gives the agent immediate access to recent context without needing
        // an extra session_search call after waking up.
        let recent_snapshot = Self::format_recent_messages(&context.messages, 8);

        // Inject recovered brain files — after compaction the agent needs its
        // identity, user context, tool docs, and coding standards back in full
        // fidelity, not just a lossy LLM summary.
        let brain_context = Self::build_recovered_brain_context();
        let summary_with_context = if recent_snapshot.is_empty() {
            format!("{}\n\n{}", brain_context, summary)
        } else {
            format!(
                "{}\n\n{}\n\n## Recent Message Pairs (pre-compaction snapshot)\n\
                 The following are the last messages before compaction — use them to \
                 understand the current task state and decide what context to reload.\n\n{}",
                brain_context, summary, recent_snapshot
            )
        };

        // Compact the context: keep recent messages within 55% of max_tokens
        // (below the 65% budget threshold so hard-truncation never fires after compaction)
        let keep_budget = (context.max_tokens as f64 * 0.55) as usize;
        context.compact_with_summary(summary_with_context, keep_budget);

        tracing::info!(
            "Context compacted: now at {:.0}% ({} tokens)",
            context.usage_percentage(),
            context.token_count
        );

        Ok(summary)
    }

    /// Format the last N messages into a human-readable snapshot for post-compaction context.
    /// Truncates long tool results to keep the snapshot concise.
    pub(crate) fn format_recent_messages(messages: &[Message], n: usize) -> String {
        use crate::brain::provider::{ContentBlock, Role};

        let start = messages.len().saturating_sub(n);
        let mut lines = Vec::new();

        for msg in &messages[start..] {
            let role_label = match msg.role {
                Role::User => "**User**",
                Role::Assistant => "**Assistant**",
                Role::System => "**System**",
            };

            for block in &msg.content {
                match block {
                    ContentBlock::Text { text } => {
                        // Truncate very long text blocks to ~500 bytes
                        let display = if text.len() > 500 {
                            let end = text.floor_char_boundary(500);
                            format!("{}… [truncated]", &text[..end])
                        } else {
                            text.clone()
                        };
                        lines.push(format!("{}: {}", role_label, display));
                    }
                    ContentBlock::ToolUse { name, input, .. } => {
                        let input_preview = {
                            let s = input.to_string();
                            if s.len() > 200 {
                                let end = s.floor_char_boundary(200);
                                format!("{}", &s[..end])
                            } else {
                                s
                            }
                        };
                        lines.push(format!(
                            "{}: [tool_use: {}({})]",
                            role_label, name, input_preview
                        ));
                    }
                    ContentBlock::ToolResult { content, .. } => {
                        let display = if content.len() > 300 {
                            let end = content.floor_char_boundary(300);
                            format!("{}… [truncated]", &content[..end])
                        } else {
                            content.clone()
                        };
                        lines.push(format!("{}: [tool_result: {}]", role_label, display));
                    }
                    ContentBlock::Image { .. } => {
                        lines.push(format!("{}: [image]", role_label));
                    }
                    ContentBlock::Thinking { thinking, .. } => {
                        if !thinking.is_empty() {
                            let display = if thinking.len() > 300 {
                                let end = thinking.floor_char_boundary(300);
                                format!("{}… [truncated]", &thinking[..end])
                            } else {
                                thinking.clone()
                            };
                            lines.push(format!("{}: [thinking: {}]", role_label, display));
                        }
                    }
                }
            }
        }

        lines.join("\n")
    }

    /// Save a compaction summary to a daily memory log at `~/.opencrabs/memory/YYYY-MM-DD.md`.
    ///
    /// Multiple compactions per day append to the same file. The brain workspace's
    /// `MEMORY.md` is left untouched — it stays as user-curated durable memory.
    pub(super) async fn save_to_memory(&self, summary: &str) -> std::result::Result<(), String> {
        let memory_dir = crate::config::opencrabs_home().join("memory");

        std::fs::create_dir_all(&memory_dir)
            .map_err(|e| format!("Failed to create memory directory: {}", e))?;

        let date = chrono::Local::now().format("%Y-%m-%d");
        let memory_path = memory_dir.join(format!("{}.md", date));

        // Read existing content (if any — multiple compactions per day stack)
        let existing = std::fs::read_to_string(&memory_path).unwrap_or_default();

        let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
        let new_content = format!(
            "{}\n\n---\n\n## Auto-Compaction Summary ({})\n\n{}\n",
            existing.trim(),
            timestamp,
            summary
        );

        std::fs::write(&memory_path, new_content.trim_start())
            .map_err(|e| format!("Failed to write daily memory log: {}", e))?;

        tracing::info!("Saved compaction summary to {}", memory_path.display());
        Ok(())
    }
}