rsclaw-cap 0.1.0

Cap crate for RsClaw — internal workspace crate, not for direct use
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
//! Workspace identity files for cap_live bindings.
//!
//! When `/cap <agent>` binds without a user-specified path, the
//! coding subagent's cwd is the rsclaw default workspace
//! (`~/.rsclaw/workspace/`). We control that directory — it is not
//! part of any git repo, the user does not edit it by hand, and
//! anything we drop there is private to rsclaw. That makes it a
//! safe place to materialise two markdown files the coding agent
//! will automatically pick up at process startup:
//!
//!   * `USER.md` — the **source of truth** for user identity facts
//!     drawn from rsclaw's memory store. Small, human-readable,
//!     deliberately scoped to the kind of profile context the agent
//!     should treat as background ("the user prefers terse Chinese
//!     replies", "their cat is named 小白", "they work on Tauri/Rust
//!     stacks"). Pinned + high-importance + recent memory docs only;
//!     ephemeral session notes stay out.
//!
//!   * `AGENTS.md` — what the coding agent **actually reads** on
//!     launch (claudecode, codex, opencode all scan for AGENTS.md
//!     or CLAUDE.md walking up from cwd). Wraps the USER.md body
//!     in a marker-bracketed section so we can refresh that section
//!     on every `/cap` bind without clobbering any manual edits the
//!     user made outside the marker range.
//!
//! When the user invokes `/cap <agent> ~/dev/myproj`, cwd is their
//! own repository — we DELIBERATELY do not write either file there.
//! Their repo may already have an AGENTS.md, may get pushed to git,
//! and may contain code they care about; we should not be putting
//! profile facts in someone else's source tree even if we're
//! "helping". For that case the existing Option B first-turn
//! injection still delivers user context, just without a file
//! artifact.

use anyhow::Result;
use std::path::{Path, PathBuf};

use rsclaw_memory::{MemDocTier, MemoryDoc, global_store};

/// Marker bracketing the auto-managed USER profile slice inside
/// `AGENTS.md`. The runtime replaces the content **between** these
/// lines on every `/cap` bind and never touches text outside them,
/// so a user can hand-edit project-specific instructions in the
/// same file without losing them on the next refresh.
const USER_BEGIN_MARKER: &str = "<!-- BEGIN rsclaw:user-profile -->";
const USER_END_MARKER: &str = "<!-- END rsclaw:user-profile -->";

/// Header stamped at the top of `USER.md`. Mentions the auto-gen
/// status so a curious user `cat`-ing the file understands the
/// edit-and-it-gets-clobbered behavior up front.
const USER_FILE_HEADER: &str =
    "<!-- auto-generated by rsclaw cap_live; refreshed on every /cap bind. \
     Manual edits to this file will be overwritten. -->";

/// Maximum number of memory docs surfaced into the USER profile.
/// Capped so a user with hundreds of low-importance notes doesn't
/// drown the agent in noise on every bind.
const MAX_PROFILE_ENTRIES: usize = 30;

/// Returns `true` when the given cap workspace path is the default
/// rsclaw workspace (`~/.rsclaw/workspace`). Only that location is a
/// safe write target — see module docs.
pub fn is_default_workspace(cwd: &Path) -> bool {
    let default = default_workspace_path();
    // canonicalize both sides so symlinks / trailing-slash mismatch
    // don't cause a false negative; if canonicalize fails on either,
    // fall back to a literal path comparison (still correct in the
    // common case where both paths are already canonical).
    match (std::fs::canonicalize(cwd), std::fs::canonicalize(&default)) {
        (Ok(a), Ok(b)) => a == b,
        _ => cwd == default.as_path(),
    }
}

fn default_workspace_path() -> PathBuf {
    let base = rsclaw_config::loader::base_dir();
    if let Ok(cfg) = rsclaw_config::load()
        && let Some(ws) = cfg
            .raw
            .agents
            .as_ref()
            .and_then(|a| a.defaults.as_ref())
            .and_then(|d| d.workspace.as_deref())
    {
        return PathBuf::from(ws);
    }
    base.join("workspace")
}

/// Write `USER.md` + `AGENTS.md` to the cap binding's workspace.
/// Best-effort: any failure (no memory store, IO error, marker
/// mismatch) logs at warn and returns Ok without aborting the cap
/// bind — identity files are an ergonomic boost, not a hard
/// dependency.
pub async fn write_identity_files(
    cwd: &Path,
    plugins: &[String],
    skills: &[String],
) -> Result<()> {
    if !is_default_workspace(cwd) {
        tracing::debug!(
            target: "cap",
            cwd = %cwd.display(),
            "cap identity: skipping non-default workspace (user-specified path)"
        );
        return Ok(());
    }
    std::fs::create_dir_all(cwd)?;

    let user_body = build_user_md_body().await;
    let user_path = cwd.join("USER.md");
    let agents_path = cwd.join("AGENTS.md");

    // USER.md preservation rule (same as AGENTS.md): only rewrite the
    // file when it is missing OR carries our auto-gen sentinel.
    // Anything else is presumed hand-authored — even in our own
    // default workspace, users sometimes drop personal notes here
    // and silently clobbering them on every /cap is a data-loss
    // footgun. The check matches `is_rsclaw_authored` to stay in
    // lock-step with the AGENTS.md branch.
    let existing_user = std::fs::read_to_string(&user_path).ok();
    let touch_user = match existing_user.as_deref() {
        None => true,
        Some(s) => is_rsclaw_authored(s),
    };
    let user_full = format!("{}\n\n{}", USER_FILE_HEADER, user_body);
    if touch_user {
        if let Err(e) = std::fs::write(&user_path, &user_full) {
            tracing::warn!(
                target: "cap",
                path = %user_path.display(),
                error = %e,
                "cap identity: failed to write USER.md"
            );
            return Ok(());
        }
    } else {
        tracing::info!(
            target: "cap",
            path = %user_path.display(),
            "cap identity: USER.md is user-authored — preserving verbatim"
        );
    }

    // For composing AGENTS.md, prefer the on-disk USER.md body (if
    // user-authored — they wrote it deliberately) over the memory-
    // derived body. Falls through to the memory body when USER.md
    // doesn't exist or was rsclaw-generated.
    let user_body_for_agents = match existing_user.as_deref() {
        Some(s) if !is_rsclaw_authored(s) => strip_header_lines(s),
        _ => user_body,
    };
    let agents_existing = std::fs::read_to_string(&agents_path).ok();
    let agents_new =
        compose_agents_md(agents_existing.as_deref(), &user_body_for_agents, plugins, skills);
    let touch_agents = match agents_existing.as_deref() {
        None => true,
        Some(s) => has_markers(s) || is_rsclaw_authored(s),
    };
    if touch_agents {
        if let Err(e) = std::fs::write(&agents_path, &agents_new) {
            tracing::warn!(
                target: "cap",
                path = %agents_path.display(),
                error = %e,
                "cap identity: failed to write AGENTS.md"
            );
            return Ok(());
        }
    } else {
        tracing::info!(
            target: "cap",
            path = %agents_path.display(),
            "cap identity: AGENTS.md is user-authored — preserving verbatim"
        );
    }
    tracing::info!(
        target: "cap",
        workspace = %cwd.display(),
        wrote_user = touch_user,
        wrote_agents = touch_agents,
        "cap identity: refresh complete"
    );
    Ok(())
}

/// Strip leading lines that are only comments or blanks, used when we
/// embed an existing user-authored `USER.md` body into AGENTS.md — we
/// don't want our own auto-gen header to bleed in (it isn't there) and
/// we strip any leading HTML comment lines so the embedded section
/// starts clean.
fn strip_header_lines(s: &str) -> String {
    let mut lines: Vec<&str> = s.lines().collect();
    while let Some(first) = lines.first() {
        let t = first.trim();
        if t.is_empty() || (t.starts_with("<!--") && t.ends_with("-->")) {
            lines.remove(0);
        } else {
            break;
        }
    }
    lines.join("\n")
}

/// Decide what to do with an existing `AGENTS.md`:
///   - With our markers → splice the new USER body into the
///     marker range, preserve everything outside.
///   - Without our markers AND clearly user-authored (no auto-gen
///     header) → don't touch it at all; the user's own instructions
///     win. (We still wrote USER.md so they can opt in manually by
///     adding markers later.)
///   - Missing entirely → emit the full default template with the
///     USER body embedded.
fn compose_agents_md(
    existing: Option<&str>,
    user_body: &str,
    plugins: &[String],
    skills: &[String],
) -> String {
    // The managed region (spliced on every /cap bind) carries BOTH the
    // live capability snapshot (loaded plugins / installed skills — they
    // change on install) and the memory-derived user profile, so the
    // coding agent always sees the current name list without running a
    // discovery command.
    let managed = managed_region(plugins, skills, user_body);
    match existing {
        Some(s) if has_markers(s) => splice_between_markers(s, &managed),
        Some(s) if !is_rsclaw_authored(s) => s.to_owned(),
        Some(_) | None => default_template(&managed),
    }
}

/// Compose the auto-managed region: a compact "loaded plugins / installed
/// skills" snapshot (with the exact CLI call/load syntax) followed by the
/// user profile body.
fn managed_region(plugins: &[String], skills: &[String], user_body: &str) -> String {
    let mut s = String::new();
    if !plugins.is_empty() || !skills.is_empty() {
        s.push_str("Loaded capabilities — prefer these over scraping the web:\n\n");
        if !plugins.is_empty() {
            s.push_str(&format!(
                "**Loaded plugins** (call: `rsclaw plugins call <plugin>.<tool> --args '<json>'`, \
                 e.g. `rsclaw plugins call astock.quote --args '{{\"code\":\"300033\"}}'`): {}\n\n",
                plugins.join(", ")
            ));
        }
        if !skills.is_empty() {
            s.push_str(&format!(
                "**Installed skills** (load + follow: `rsclaw skills use <name>`): {}\n\n",
                skills.join(", ")
            ));
        }
    }
    s.push_str(user_body.trim());
    s
}

fn has_markers(s: &str) -> bool {
    s.contains(USER_BEGIN_MARKER) && s.contains(USER_END_MARKER)
}

/// Heuristic: `AGENTS.md` we generated carries the auto-gen sentinel.
/// Anything else is presumed to be user-authored and we leave it
/// alone. (Not perfect — a sufficiently determined user can match
/// the sentinel — but the goal is "respect a clearly hand-written
/// file" not "fingerprint authorship cryptographically".)
fn is_rsclaw_authored(s: &str) -> bool {
    s.contains("auto-generated by rsclaw cap_live")
}

fn splice_between_markers(existing: &str, user_body: &str) -> String {
    let Some(begin_off) = existing.find(USER_BEGIN_MARKER) else {
        return default_template(user_body);
    };
    let Some(end_off) = existing.find(USER_END_MARKER) else {
        return default_template(user_body);
    };
    if end_off < begin_off {
        // Marker order corrupt — regenerate from template.
        return default_template(user_body);
    }
    let head = &existing[..begin_off + USER_BEGIN_MARKER.len()];
    let tail = &existing[end_off..];
    format!("{head}\n\n{}\n\n{tail}", user_body.trim())
}

fn default_template(user_body: &str) -> String {
    format!(
        "<!-- auto-generated by rsclaw cap_live; manual edits between the BEGIN/END \
         markers below are overwritten on every /cap bind, anywhere else is preserved. -->

# Coding subagent context

You are a coding subagent that **rsclaw** (the main chat-side agent) has bridged \
into this user's IM session. The user can still read every word you write. Be \
concise. Verify with tools before asserting facts.

The `## Session context` section below is auto-generated and refreshed on every \
`/cap` bind: it lists the plugins/skills loaded right now plus user-profile hints \
from rsclaw's long-term memory. Treat the profile as hints, not gospel — when in \
doubt, ask or check.

## Session context

{USER_BEGIN_MARKER}

{}

{USER_END_MARKER}

## rsclaw helpers — USE THESE FIRST

You run in a shell with the `rsclaw` CLI on PATH. It exposes rsclaw's plugins, \
skills, memory and knowledge base as cross-process commands. **When a task is in \
a plugin's or skill's domain (A-share quotes, travel, image/video gen, etc.), \
call it through these commands instead of scraping the web or guessing** — they \
are faster, structured, and authenticated. All commands take `--json`; auth + \
URL are handled inside the binary.

```
# Plugins — domain capabilities (stocks, social media, media gen, …)
rsclaw plugins list                                  # what's installed right now
rsclaw plugins describe <plugin>                     # its tools + arg schemas
rsclaw plugins call <plugin>.<tool> --args '<json>'  # invoke a tool
#   e.g.  rsclaw plugins call astock.quote --args '{{\"code\":\"300033\"}}'

# Skills — packaged playbooks (booking, market queries, …). A skill is a
# markdown SOP (+ scripts), not a one-shot call: load it, then follow it.
rsclaw skills list                                   # installed skills
rsclaw skills use <name>                              # print the full SOP + its dir, then follow it
rsclaw skills search \"<query>\"                       # find one to install
rsclaw skills install <name>

# Memory
rsclaw memory search \"<query>\" [--max-results N] [--json]
rsclaw memory save \"<fact>\" [--scope SCOPE] [--kind fact|note] [--pinned] [--json]

# Knowledge base
rsclaw kb search \"<query>\" [-k N] [--json]
rsclaw kb add <path-or-url> [--tag T ...] [--recursive]

# Messaging (send to IM channels rsclaw is wired to)
rsclaw message send --channel <wechat|feishu|...> --target <id> -m \"...\"
```
",
        user_body.trim()
    )
}

/// Pull a USER profile body from the live memory store. Picks pinned
/// docs first (deliberate user-curated facts), then top-importance
/// docs up to `MAX_PROFILE_ENTRIES`, filtering out clearly ephemeral
/// scopes (cron / heartbeat / session summaries). Returns a markdown
/// bullet list. When the gateway has no memory store (test paths,
/// fresh install) returns an empty-state placeholder.
async fn build_user_md_body() -> String {
    let Some(mem) = global_store() else {
        return "_no memory store available — rsclaw has not loaded any user facts yet._"
            .to_owned();
    };
    let docs = {
        let store = mem.lock().await;
        store.list_active()
    };
    let mut filtered: Vec<&MemoryDoc> = docs
        .iter()
        .filter(|d| !is_ephemeral_scope(&d.scope) && !is_ephemeral_kind(&d.kind))
        .collect();
    // Sort: pinned first, then by importance desc, then most-recent.
    filtered.sort_by(|a, b| {
        b.pinned
            .cmp(&a.pinned)
            .then_with(|| b.importance.partial_cmp(&a.importance).unwrap_or(std::cmp::Ordering::Equal))
            .then_with(|| b.accessed_at.cmp(&a.accessed_at))
    });
    let chosen: Vec<&&MemoryDoc> = filtered.iter().take(MAX_PROFILE_ENTRIES).collect();
    if chosen.is_empty() {
        return "_no high-importance user facts in memory yet — write facts \
                with `rsclaw memory save \"<fact>\" --pinned`._"
            .to_owned();
    }
    let mut out = String::new();
    for d in &chosen {
        let pin = if d.pinned { " 📌" } else { "" };
        let tier = match d.tier {
            MemDocTier::Core => "core",
            MemDocTier::Working => "working",
            MemDocTier::Peripheral => "peripheral",
        };
        // Single-line entry: `- {text} ({kind}/{tier}, importance=0.85){pin}`.
        // Strip embedded newlines so a single multi-line memory doc doesn't
        // break the bullet list shape.
        let one_line = d.text.replace(['\n', '\r'], " ");
        out.push_str(&format!(
            "- {one_line} _({kind}/{tier}, importance={imp:.2}){pin}_\n",
            kind = d.kind,
            imp = d.importance,
        ));
    }
    out
}

fn is_ephemeral_scope(scope: &str) -> bool {
    // Heartbeat / cron / system-task memories are operational chatter,
    // not user identity. Skip them when composing the profile.
    scope.ends_with(":heartbeat") || scope.ends_with(":cron") || scope.ends_with(":system")
}

fn is_ephemeral_kind(kind: &str) -> bool {
    // "session" summaries change conversation-by-conversation; surface them
    // via Option B's per-turn recall, not the durable profile file.
    matches!(kind, "session" | "turn" | "trace")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn splice_replaces_only_between_markers() {
        let existing = format!(
            "# Custom header\n\nUser-written notes.\n\n## Profile\n\n{begin}\n\
             OLD profile body\n{end}\n\n## Custom trailer\nMore user notes.",
            begin = USER_BEGIN_MARKER,
            end = USER_END_MARKER,
        );
        let new_body = "- new fact 1\n- new fact 2";
        let out = compose_agents_md(Some(&existing), new_body, &[], &[]);
        assert!(out.contains("# Custom header"));
        assert!(out.contains("User-written notes."));
        assert!(out.contains("## Custom trailer"));
        assert!(out.contains("- new fact 1"));
        assert!(!out.contains("OLD profile body"));
    }

    #[test]
    fn no_markers_user_authored_preserved_verbatim() {
        // Plain user-authored AGENTS.md with no rsclaw sentinel and no
        // markers → return as-is, never auto-mutated.
        let existing = "# My project AGENTS.md\n\nManual instructions here.";
        let out = compose_agents_md(Some(existing), "- new fact", &[], &[]);
        assert_eq!(out, existing, "user-authored AGENTS.md should be untouched");
    }

    #[test]
    fn no_markers_rsclaw_authored_gets_template_back() {
        // Previously-generated file (carries sentinel) but markers are gone
        // (user deleted them somehow) → regenerate from template.
        let existing = "<!-- auto-generated by rsclaw cap_live -->\n# Stuff";
        let out = compose_agents_md(Some(existing), "- fact", &[], &[]);
        assert!(out.contains(USER_BEGIN_MARKER));
        assert!(out.contains("- fact"));
    }

    #[test]
    fn missing_file_emits_default_template() {
        let out = compose_agents_md(None, "- fact", &[], &[]);
        assert!(out.contains(USER_BEGIN_MARKER));
        assert!(out.contains(USER_END_MARKER));
        assert!(out.contains("- fact"));
        assert!(out.contains("rsclaw helpers"));
    }

    #[test]
    fn corrupt_marker_order_falls_back_to_template() {
        // END before BEGIN → splice would slice negatively; we recover by
        // regenerating from the template rather than panicking on bad UTF-8
        // boundaries.
        let existing = format!(
            "junk\n{end}\nstuff\n{begin}\nmore",
            begin = USER_BEGIN_MARKER,
            end = USER_END_MARKER,
        );
        let out = compose_agents_md(Some(&existing), "- fact", &[], &[]);
        assert!(out.contains(USER_BEGIN_MARKER));
        // Order in the regenerated template MUST be BEGIN before END.
        let begin_at = out.find(USER_BEGIN_MARKER).unwrap();
        let end_at = out.find(USER_END_MARKER).unwrap();
        assert!(begin_at < end_at);
    }

    #[test]
    fn strip_header_lines_drops_comments_and_blanks() {
        let input = "<!-- some comment -->\n\n# User\n\n## Section\nbody\n";
        let out = strip_header_lines(input);
        assert!(out.starts_with("# User"));
        assert!(!out.contains("some comment"));
    }

    #[test]
    fn strip_header_lines_preserves_body() {
        let input = "# Heading\nLine 2\n";
        let out = strip_header_lines(input);
        assert_eq!(out.trim_end(), "# Heading\nLine 2");
    }

    #[test]
    fn ephemeral_filters() {
        assert!(is_ephemeral_scope("agent:main:heartbeat"));
        assert!(is_ephemeral_scope("agent:main:cron"));
        assert!(!is_ephemeral_scope("agent:main"));
        assert!(!is_ephemeral_scope("global"));
        assert!(is_ephemeral_kind("session"));
        assert!(is_ephemeral_kind("turn"));
        assert!(!is_ephemeral_kind("fact"));
        assert!(!is_ephemeral_kind("note"));
    }
}