opencrabs 0.3.57

The autonomous, self-improving AI agent. Single Rust binary. Every channel. Install with: cargo install opencrabs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
//! Brain-file write safety: backup + append-only enforcement.
//!
//! The 2026-04-26 RSI agent rewrote `~/.opencrabs/TOOLS.md` from 33 KB
//! down to a stub by passing the entire file as `old_content` to
//! `self_improve` action="update". Same pattern hit `MEMORY.md`. Brain
//! files are append-only by user policy: removal is allowed only when
//! it deduplicates content that already exists elsewhere in the file.
//!
//! This module is the chokepoint every brain-file mutation must go
//! through. It:
//!
//! - Snapshots the file to `<name>.YYYY-MM-DDTHHMMSS.bak` before any write
//! - Rejects writes that shrink a protected file unless the caller
//!   explicitly opts into a dedup intent and the removal really is a
//!   duplicate that survives elsewhere in the result.

use std::path::Path;

/// Brain files that follow the append-only contract. Any mutation that
/// shrinks one of these without a justified dedup must be rejected.
const PROTECTED_BRAIN_FILES: &[&str] = &[
    "SOUL.md",
    "USER.md",
    "AGENTS.md",
    "TOOLS.md",
    "CODE.md",
    "SECURITY.md",
    "MEMORY.md",
    "BOOT.md",
];

/// True if `name` (a bare file name, no directories) is one of the
/// brain files the append-only contract applies to.
pub fn is_protected_brain_file(name: &str) -> bool {
    PROTECTED_BRAIN_FILES
        .iter()
        .any(|p| p.eq_ignore_ascii_case(name))
}

/// True if `path`'s file name is a protected brain file.
pub fn is_protected_path(path: &Path) -> bool {
    path.file_name()
        .and_then(|n| n.to_str())
        .map(is_protected_brain_file)
        .unwrap_or(false)
}

/// Snapshot `path` to `<path>.YYYY-MM-DDTHHMMSS.bak` before a mutation
/// happens. No-op when `path` doesn't exist yet (nothing to back up).
/// Returns the backup path on success.
///
/// Retention policy: keeps max 5 backups per file, deletes any older than 7 days.
pub fn backup_before_write(path: &Path) -> std::io::Result<Option<std::path::PathBuf>> {
    if !path.exists() {
        return Ok(None);
    }
    let stamp = chrono::Utc::now().format("%Y-%m-%dT%H%M%S");
    let mut backup = path.as_os_str().to_owned();
    backup.push(format!(".{stamp}.bak"));
    let backup = std::path::PathBuf::from(backup);
    std::fs::copy(path, &backup)?;

    // Prune old backups: keep max 5, delete any older than 7 days
    if let Err(e) = prune_backups(path, 5, 7) {
        eprintln!(
            "Warning: failed to prune backups for {}: {}",
            path.display(),
            e
        );
    }

    Ok(Some(backup))
}

/// Delete old backups for `path`, keeping at most `max_count` and removing
/// any older than `max_age_days`. Backup files match `<path>.<timestamp>.bak`.
fn prune_backups(path: &Path, max_count: usize, max_age_days: u64) -> std::io::Result<()> {
    let Some(parent) = path.parent() else {
        return Ok(());
    };
    let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
        return Ok(());
    };

    // Find all backup files for this base path
    let mut backups: Vec<(std::path::PathBuf, chrono::DateTime<chrono::Utc>)> = Vec::new();

    for entry in std::fs::read_dir(parent)? {
        let entry = entry?;
        let entry_name = entry.file_name();
        let Some(entry_str) = entry_name.to_str() else {
            continue;
        };

        // Check if this matches our backup pattern
        if !entry_str.starts_with(file_name) || !entry_str.ends_with(".bak") {
            continue;
        }

        // Extract timestamp from filename: <base>.<YYYY-MM-DDTHHMMSS>.bak
        let without_base = &entry_str[file_name.len()..];
        let without_ext = without_base
            .trim_start_matches('.')
            .trim_end_matches(".bak");

        // Parse timestamp
        if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(without_ext, "%Y-%m-%dT%H%M%S") {
            let utc = dt.and_utc();
            backups.push((entry.path(), utc));
        }
    }

    // Sort by timestamp descending (newest first)
    backups.sort_by_key(|b| std::cmp::Reverse(b.1));

    let cutoff = chrono::Utc::now() - chrono::Duration::days(max_age_days as i64);

    // Delete backups beyond max_count or older than cutoff
    for (i, (backup_path, timestamp)) in backups.iter().enumerate() {
        if !(i >= max_count || *timestamp < cutoff) {
            continue;
        }
        if let Err(e) = std::fs::remove_file(backup_path) {
            eprintln!(
                "Warning: failed to delete old backup {}: {}",
                backup_path.display(),
                e
            );
        }
    }

    Ok(())
}

/// Decision returned by [`check_no_shrink`].
#[derive(Debug, PartialEq, Eq)]
pub enum ShrinkCheck {
    /// Safe to write — the new content does not shrink a protected file.
    Allowed,
    /// Reject — would remove `removed_bytes` from a protected brain file.
    /// `message` is a user-facing explanation suitable for ToolResult::error.
    Rejected { message: String },
}

/// Enforce append-only on protected brain files. `path` is the file
/// being mutated, `existing` is its current content (empty if new),
/// `updated` is what the caller wants to write.
///
/// Allows shrinking when:
/// - `cleanup_intent=true`: User-initiated cleanup with approval gate (only for write_opencrabs_file)
/// - `dedup_intent=true` AND every byte that disappeared can still be found in the result
///
/// Otherwise any byte loss on a protected file is a hard reject.
pub fn check_no_shrink(
    path: &Path,
    existing: &str,
    updated: &str,
    dedup_intent: bool,
    cleanup_intent: bool,
) -> ShrinkCheck {
    if !is_protected_path(path) {
        return ShrinkCheck::Allowed;
    }
    if updated.len() >= existing.len() {
        return ShrinkCheck::Allowed;
    }

    // User-initiated cleanup: bypass append-only restriction.
    // This is only available in write_opencrabs_file (requires_approval: true),
    // not in self_improve (autonomous RSI, no approval mechanism).
    if cleanup_intent {
        return ShrinkCheck::Allowed;
    }

    let removed_bytes = existing.len().saturating_sub(updated.len());
    let label = path
        .file_name()
        .and_then(|n| n.to_str())
        .unwrap_or("brain file");

    if dedup_intent && shrink_only_drops_duplicates(existing, updated) {
        return ShrinkCheck::Allowed;
    }

    let hint = if dedup_intent {
        " (dedup_intent was set, but the bytes removed do not all reappear in the result \
          — that's not deduplication, that's deletion)"
    } else {
        ""
    };
    ShrinkCheck::Rejected {
        message: format!(
            "Refusing to shrink protected brain file {label} by {removed_bytes} bytes. \
             Brain files are append-only — use action='apply' / operation='append' to \
             add new content. Removals are only allowed for genuine deduplication, and \
             must opt in via dedup_intent=true with a result that still contains every \
             unique line of the original.{hint}"
        ),
    }
}

/// Result of checking an append for duplicate content.
#[derive(Debug)]
pub enum AppendDedup {
    /// All content is new — append as-is.
    AllNew,
    /// Some paragraphs were duplicates — append this filtered content instead.
    /// `skipped_paragraphs` counts how many were removed.
    Filtered {
        filtered_content: String,
        skipped_paragraphs: usize,
    },
    /// Everything is already in the file — skip the append entirely.
    AllDuplicate,
}

/// Split content into paragraphs (blocks of non-empty lines separated by
/// one or more blank lines). Preserves the original text of each paragraph.
fn split_paragraphs(text: &str) -> Vec<String> {
    let mut paragraphs = Vec::new();
    let mut current = String::new();

    for line in text.lines() {
        if line.trim().is_empty() {
            if !current.is_empty() {
                paragraphs.push(current.trim_end().to_string());
                current.clear();
            }
        } else {
            if !current.is_empty() {
                current.push('\n');
            }
            current.push_str(line);
        }
    }
    if !current.is_empty() {
        paragraphs.push(current.trim_end().to_string());
    }

    paragraphs
}

/// Check if a line is an incident-log entry (ADDED/REPEAT + date + session).
/// These lines accumulate in brain files when the RSI cycle keeps appending
/// evidence for the same rule. They should be excluded when comparing
/// paragraphs for duplicate detection.
fn is_incident_line(line: &str) -> bool {
    let trimmed = line.trim();
    let lower = trimmed.to_ascii_lowercase();
    (lower.starts_with("added ") || lower.starts_with("repeat "))
        && lower.contains("session ")
        && trimmed.chars().any(|c| c.is_ascii_digit())
}

/// Strip incident-log lines from text, keeping only the operational rule
/// content. Used for duplicate detection when the RSI cycle has appended
/// multiple dated entries to the same rule.
fn strip_incident_lines(text: &str) -> String {
    text.lines()
        .filter(|l| !is_incident_line(l))
        .collect::<Vec<_>>()
        .join("\n")
}

/// Check if a paragraph already exists in the file. Uses these strategies:
/// 1. Exact substring match (the whole paragraph appears verbatim)
/// 2. Header match: if the paragraph starts with ## or ###, check if that
///    header already exists in the file
/// 3. Line-level overlap (>70% of lines exist verbatim)
/// 4. Incident-stripped overlap: strip ADDED/REPEAT incident lines from
///    both sides, then check if the remaining rule content has >70% overlap.
///    Catches the RSI accumulation bug (issue #197) where the same rule gets
///    appended repeatedly with different dates and trailing descriptions.
fn paragraph_exists(paragraph: &str, existing: &str) -> bool {
    let trimmed = paragraph.trim();
    if trimmed.is_empty() {
        return true;
    }

    // Exact substring match
    if existing.contains(trimmed) {
        return true;
    }

    // Header match: if paragraph starts with ## or ###, check if header exists
    if let Some(first_line) = trimmed.lines().next() {
        let header = first_line.trim();
        if (header.starts_with("## ") || header.starts_with("### "))
            && existing.lines().any(|l| l.trim() == header)
        {
            return true;
        }
    }

    // Line-level overlap for longer paragraphs: if >70% of lines exist, consider it duplicate
    let existing_lines: std::collections::HashSet<&str> = existing.lines().map(str::trim).collect();
    let para_lines: Vec<&str> = trimmed.lines().filter(|l| !l.trim().is_empty()).collect();
    if para_lines.len() >= 3 {
        let overlap = para_lines
            .iter()
            .filter(|l| existing_lines.contains(l.trim()))
            .count();
        let ratio = overlap as f64 / para_lines.len() as f64;
        if ratio > 0.7 {
            return true;
        }
    }

    // Incident-stripped overlap: remove ADDED/REPEAT incident lines from
    // both the new paragraph and the existing text, then check if the
    // remaining rule content matches. This catches the RSI accumulation
    // bug where the same rule gets appended with different dates AND
    // different trailing descriptions (e.g. "Another quoting error" vs
    // "Yet another quoting error"). After stripping, both collapse to
    // just the rule itself.
    let has_incident_lines = trimmed.lines().any(is_incident_line);
    if has_incident_lines && para_lines.len() >= 2 {
        let stripped_para = strip_incident_lines(trimmed);
        let stripped_existing = strip_incident_lines(existing);
        let stripped_lines: Vec<&str> = stripped_para
            .lines()
            .filter(|l| !l.trim().is_empty())
            .collect();
        if !stripped_lines.is_empty() {
            let stripped_existing_set: std::collections::HashSet<&str> =
                stripped_existing.lines().map(str::trim).collect();
            let overlap = stripped_lines
                .iter()
                .filter(|l| stripped_existing_set.contains(l.trim()))
                .count();
            let ratio = overlap as f64 / stripped_lines.len() as f64;
            if ratio > 0.7 {
                return true;
            }
        }
    }

    false
}

/// Analyze `new_content` against `existing` and return only the genuinely
/// new portions. Works at paragraph level to preserve structure.
///
/// This replaces the old `is_duplicate_append` boolean check. Instead of
/// blocking the entire append when overlap is detected, it extracts only
/// the new paragraphs and lets those through.
pub fn filter_duplicate_append(existing: &str, new_content: &str) -> AppendDedup {
    let new_trimmed = new_content.trim();
    if new_trimmed.is_empty() {
        return AppendDedup::AllDuplicate;
    }

    // Quick check: if the entire content is a substring, it's all duplicate
    if existing.contains(new_trimmed) {
        return AppendDedup::AllDuplicate;
    }

    let paragraphs = split_paragraphs(new_trimmed);
    if paragraphs.is_empty() {
        return AppendDedup::AllDuplicate;
    }

    let mut new_paragraphs = Vec::new();
    let mut skipped = 0;

    for para in &paragraphs {
        if paragraph_exists(para, existing) {
            skipped += 1;
        } else {
            new_paragraphs.push(para.clone());
        }
    }

    if new_paragraphs.is_empty() {
        return AppendDedup::AllDuplicate;
    }

    if skipped == 0 {
        return AppendDedup::AllNew;
    }

    AppendDedup::Filtered {
        filtered_content: new_paragraphs.join("\n\n"),
        skipped_paragraphs: skipped,
    }
}

/// Legacy alias for backward compatibility with existing tests.
/// Returns true when the entire append should be skipped.
pub fn is_duplicate_append(existing: &str, new_content: &str) -> bool {
    matches!(
        filter_duplicate_append(existing, new_content),
        AppendDedup::AllDuplicate
    )
}

/// Verifies the shrink really is a dedup: every line that was in
/// `existing` must still be present in `updated` (it's allowed to
/// appear once instead of multiple times). If any line disappears
/// completely, this isn't dedup — it's deletion.
fn shrink_only_drops_duplicates(existing: &str, updated: &str) -> bool {
    let updated_lines: std::collections::HashSet<&str> =
        updated.lines().map(str::trim_end).collect();
    for line in existing.lines() {
        let trimmed = line.trim_end();
        if trimmed.is_empty() {
            continue;
        }
        if !updated_lines.contains(trimmed) {
            return false;
        }
    }
    true
}