rsclaw 2026.5.1

AI Agent Engine Compatible with OpenClaw
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
//! Config file loading: JSON5 parsing, `${VAR}` expansion, `$include`
//! resolution.

use std::path::{Path, PathBuf};

use anyhow::{Context, Result};
use regex::Regex;
use tracing::debug;

use super::schema::Config;

/// Convert a path to a string using forward slashes (cross-platform safe for JSON/config).
/// On Windows, backslashes in paths break JSON string parsing.
pub fn path_to_forward_slash(p: &Path) -> String {
    p.to_string_lossy().replace('\\', "/")
}

/// Matches `${VAR_NAME}` patterns.
static ENV_VAR_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
    Regex::new(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}").expect("valid regex")
});

/// Expand `${VAR}` references and `~/` tilde in a raw config string.
/// Variables that are not set are left verbatim and a warning is emitted.
/// `~/` is expanded to `$HOME/` so workspace and path values resolve correctly.
pub fn expand_env_vars(raw: &str) -> String {
    let expanded = ENV_VAR_RE
        .replace_all(raw, |caps: &regex::Captures<'_>| {
            let var = &caps[1];
            std::env::var(var).unwrap_or_else(|_| {
                debug!(var, "env var not set (referenced in config)");
                caps[0].to_string()
            })
        })
        .into_owned();

    // Expand ~/  →  $HOME/  so path values are absolute.
    if let Some(home) = dirs_next::home_dir() {
        let home_s = path_to_forward_slash(&home);
        // Replace every occurrence of ~/ (covers paths inside JSON strings).
        expanded.replace("~/", &format!("{home_s}/"))
    } else {
        expanded
    }
}

// ---------------------------------------------------------------------------
// JSON5 loader (openclaw.json / openclaw.json5)
// ---------------------------------------------------------------------------

/// Load and parse a JSON5 config file, resolving `$include` directives
/// and expanding `${VAR}` placeholders.
pub fn load_json5(path: &Path) -> Result<Config> {
    let base_dir = path.parent().unwrap_or(Path::new("."));
    let raw = std::fs::read_to_string(path)
        .with_context(|| format!("failed to read config: {}", path.display()))?;

    // 1. Expand env vars before any parsing.
    let expanded = expand_env_vars(&raw);

    // 2. Parse into a generic JSON value so we can handle $include.
    let mut value: serde_json::Value = json5::from_str(&expanded)
        .with_context(|| format!("JSON5 parse error in {}", path.display()))?;

    // 3. Resolve $include directives recursively.
    resolve_includes(&mut value, base_dir, 0)?;

    // 4. Deserialize into the typed schema.
    let config: Config = serde_json::from_value(value)
        .with_context(|| format!("schema error in {}", path.display()))?;

    Ok(config)
}

// ---------------------------------------------------------------------------
// $include resolution
// ---------------------------------------------------------------------------

/// Maximum nesting depth for `$include` to prevent infinite recursion.
const MAX_INCLUDE_DEPTH: usize = 10;

/// Recursively replace `{ "$include": "./path/to/file.json5" }` nodes with the
/// contents of the referenced file.
fn resolve_includes(value: &mut serde_json::Value, base_dir: &Path, depth: usize) -> Result<()> {
    if depth > MAX_INCLUDE_DEPTH {
        anyhow::bail!("$include nesting exceeds maximum depth of {MAX_INCLUDE_DEPTH}");
    }

    match value {
        serde_json::Value::Object(map) => {
            // Collect keys that need $include resolution.
            let include_keys: Vec<String> = map
                .iter()
                .filter(|(_, v)| has_include(v))
                .map(|(k, _)| k.clone())
                .collect();

            for key in include_keys {
                let path_str = extract_include_path(&map[&key])
                    .with_context(|| format!("$include in key `{key}`"))?;
                // Expand ~/ before joining so absolute home paths work.
                let include_path = if let Some(rest) = path_str.strip_prefix("~/") {
                    dirs_next::home_dir().unwrap_or_default().join(rest)
                } else {
                    base_dir.join(&path_str)
                };
                let included = load_include_file(&include_path, depth + 1)?;
                map.insert(key, included);
            }

            // Recurse into remaining values.
            for v in map.values_mut() {
                resolve_includes(v, base_dir, depth)?;
            }
        }
        serde_json::Value::Array(arr) => {
            for v in arr.iter_mut() {
                resolve_includes(v, base_dir, depth)?;
            }
        }
        _ => {}
    }

    Ok(())
}

fn has_include(value: &serde_json::Value) -> bool {
    matches!(value, serde_json::Value::Object(m) if m.contains_key("$include") && m.len() == 1)
}

fn extract_include_path(value: &serde_json::Value) -> Result<String> {
    let map = value.as_object().expect("caller checked");
    map["$include"]
        .as_str()
        .map(str::to_owned)
        .with_context(|| "$include value must be a string path")
}

fn load_include_file(path: &Path, depth: usize) -> Result<serde_json::Value> {
    let raw = std::fs::read_to_string(path)
        .with_context(|| format!("failed to read $include: {}", path.display()))?;

    let expanded = expand_env_vars(&raw);

    let mut value: serde_json::Value = json5::from_str(&expanded)
        .with_context(|| format!("JSON5 parse error in $include {}", path.display()))?;

    let base_dir = path.parent().unwrap_or(Path::new("."));
    resolve_includes(&mut value, base_dir, depth)?;

    Ok(value)
}

// ---------------------------------------------------------------------------
// Config source detection
// ---------------------------------------------------------------------------

/// Return the first existing config file path, using the following priority:
///
/// 1. `RSCLAW_CONFIG_PATH` env var (set by `--config-path` -- highest priority)
/// 2. `$RSCLAW_BASE_DIR/rsclaw.json5` (set by `--base-dir`/`--dev`/`--profile`)
/// 3. `~/.rsclaw/rsclaw.json5` -- rsclaw-native default
/// 4. `.rsclaw.json5` in the current directory
///
/// OpenClaw config is NOT auto-loaded. Use `rsclaw setup` to migrate.
pub fn detect_config_path() -> Option<PathBuf> {
    // 1. RSCLAW_CONFIG_PATH -- explicit override (set by --config-path).
    if let Ok(p) = std::env::var("RSCLAW_CONFIG_PATH") {
        let path = expand_tilde_path(&p);
        if path.exists() {
            return Some(path);
        }
    }

    // 2. Base dir config (set by --base-dir / --dev / --profile).
    if let Ok(bd) = std::env::var("RSCLAW_BASE_DIR") {
        let p = expand_tilde_path(&bd).join("rsclaw.json5");
        if p.exists() {
            return Some(p);
        }
    }

    let home = dirs_next::home_dir()?;

    // 3. rsclaw-native default.
    let rsclaw = home.join(".rsclaw/rsclaw.json5");
    if rsclaw.exists() {
        return Some(rsclaw);
    }

    // 4. Current directory fallback.
    let local = PathBuf::from(".rsclaw.json5");
    if local.exists() {
        return Some(local);
    }

    None
}

/// Resolve the rsclaw base directory (state root), respecting env vars and
/// `--base-dir` CLI arg (injected as `RSCLAW_BASE_DIR` before this is called).
///
/// Resolution order:
///   1. `RSCLAW_BASE_DIR` (set by `--base-dir`, `--dev`, `--profile`)
///   2. Parent dir of the detected config file (if config is in ~/.openclaw/, base_dir = ~/.openclaw/)
///   3. `~/.rsclaw` (default)
pub fn base_dir() -> PathBuf {
    // 1. Explicit override
    if let Ok(p) = std::env::var("RSCLAW_BASE_DIR") {
        return expand_tilde_path(&p);
    }

    // 2. Derive from config file location: data lives alongside config
    if let Some(config_path) = detect_config_path() {
        if let Some(parent) = config_path.parent() {
            return parent.to_path_buf();
        }
    }

    // 3. Default
    dirs_next::home_dir().unwrap_or_default().join(".rsclaw")
}

/// Gateway PID file path: `$base_dir/var/run/gateway.pid`
pub fn pid_file() -> PathBuf {
    base_dir().join("var").join("run").join("gateway.pid")
}

/// Gateway log file path: `$base_dir/var/logs/gateway.log`
pub fn log_file() -> PathBuf {
    base_dir().join("var").join("logs").join("gateway.log")
}

/// Look up site-rule files that match a URL's host.
///
/// Returns relative paths under `tools/web_browser/site-rules/`. Both
/// layouts are checked:
///   * `<host>.md` — flat (legacy zh sites)
///   * `<host_root>/*.md` — nested (browser-harness imports). `host_root`
///     is the part of the host before the first dot, e.g. `reddit` for
///     `www.reddit.com`.
///
/// Surfaced by `web_fetch` and `web_browser action=open` tool results so
/// the agent gets a hard pointer to read the rule before acting — the
/// prompt-only mention buried in the tool description was being ignored
/// on hosts where the agent thought it knew what to do.
pub fn applicable_site_rules(url: &str) -> Vec<String> {
    // Extract host without pulling in the `url` crate. Skip scheme via
    // `://` split, then take everything up to the first /?#: separator.
    let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
    let host_with_port = after_scheme
        .find(|c: char| matches!(c, '/' | '?' | '#'))
        .map(|i| &after_scheme[..i])
        .unwrap_or(after_scheme);
    // Strip optional port (e.g. example.com:8080).
    let host = host_with_port
        .rsplit_once(':')
        .map(|(h, _)| h)
        .unwrap_or(host_with_port);
    if host.is_empty() {
        return Vec::new();
    }
    let host = host.strip_prefix("www.").unwrap_or(host).to_owned();

    let dir = base_dir()
        .join("tools")
        .join("web_browser")
        .join("site-rules");
    if !dir.is_dir() {
        return Vec::new();
    }

    let mut rules = Vec::new();

    let flat = dir.join(format!("{host}.md"));
    if flat.is_file() {
        rules.push(format!("site-rules/{host}.md"));
    }

    // Build a list of candidate directory names to try, ordered by
    // specificity. For `api.stackexchange.com` we want both:
    //   - `api`              (matches a hypothetical `site-rules/api/`)
    //   - `stackexchange`    (matches the actual `site-rules/stackexchange/`)
    // Plain `stackexchange.com` collapses to a single candidate.
    //
    // Previously only the leftmost label was tried, so subdomains like
    // `api.stackexchange.com`, `m.youtube.com`, or `cdn.shopify.com` never
    // resolved to the registrable-host rule directory and the agent saw
    // no rule at all.
    let mut candidates: Vec<String> = Vec::new();
    let labels: Vec<&str> = host.split('.').filter(|s| !s.is_empty()).collect();
    if let Some(first) = labels.first() {
        candidates.push((*first).to_owned());
    }
    if labels.len() >= 2 {
        let second_to_last = labels[labels.len() - 2];
        if !candidates.iter().any(|c| c == second_to_last) {
            candidates.push(second_to_last.to_owned());
        }
    }

    for cand in &candidates {
        let nested = dir.join(cand);
        if !nested.is_dir() {
            continue;
        }
        let Ok(entries) = std::fs::read_dir(&nested) else {
            continue;
        };
        for entry in entries.flatten() {
            let p = entry.path();
            if p.extension().is_some_and(|e| e == "md") {
                let name = p
                    .file_name()
                    .map(|s| s.to_string_lossy().to_string())
                    .unwrap_or_default();
                if !name.is_empty() {
                    rules.push(format!("site-rules/{cand}/{name}"));
                }
            }
        }
    }

    rules
}

/// Read concatenated body of every rule returned by
/// [`applicable_site_rules`] for `url`.
///
/// Each rule body is preceded by a `# === path ===` separator line so the
/// agent can see which file each section came from. Returns `None` if no
/// rule applies.
///
/// Inlined directly into web_fetch/web_browser tool results so the agent
/// has the working approach at hand without needing a separate read_file
/// round-trip — the previous "hint that points at file paths" design was
/// being ignored.
pub fn applicable_site_rules_body(url: &str) -> Option<String> {
    let paths = applicable_site_rules(url);
    if paths.is_empty() {
        return None;
    }
    let dir = base_dir().join("tools").join("web_browser");
    let mut out = String::new();
    for rel in &paths {
        let p = dir.join(rel);
        if let Ok(body) = std::fs::read_to_string(&p) {
            if !out.is_empty() {
                out.push('\n');
            }
            out.push_str("# === ");
            out.push_str(rel);
            out.push_str(" ===\n");
            out.push_str(body.trim_end());
            out.push('\n');
        }
    }
    if out.is_empty() { None } else { Some(out) }
}

/// Cache directory: `$base_dir/var/cache/`
pub fn cache_dir() -> PathBuf {
    base_dir().join("var").join("cache")
}

/// Load defaults.toml: prefer external file at `$base_dir/defaults.toml`,
/// fallback to the version embedded at compile time.
///
/// This allows production deployments to customize providers, channels,
/// exec safety rules, etc. without recompiling.
pub fn load_defaults_toml() -> String {
    let external = base_dir().join("defaults.toml");
    if let Ok(content) = std::fs::read_to_string(&external) {
        debug!(path = %external.display(), "loaded external defaults.toml");
        content
    } else {
        include_str!("../../defaults.toml").to_owned()
    }
}

/// Expand a leading `~/` in a path string to the user's home directory.
/// Public alias used by `main.rs` for `--base-dir` resolution.
pub fn expand_tilde_path_pub(p: &str) -> PathBuf {
    expand_tilde_path(p)
}

fn expand_tilde_path(p: &str) -> PathBuf {
    if let Some(rest) = p.strip_prefix("~/").or_else(|| p.strip_prefix("~\\")) {
        dirs_next::home_dir().unwrap_or_default().join(rest)
    } else if p == "~" {
        dirs_next::home_dir().unwrap_or_default()
    } else {
        PathBuf::from(p)
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {

    use super::*;

    #[test]
    fn expand_known_var() {
        // SAFETY: single-threaded test, no concurrent env access
        unsafe { std::env::set_var("TEST_API_KEY_RSCLAW", "sk-test-123") };
        let result = expand_env_vars(r#"{"apiKey": "${TEST_API_KEY_RSCLAW}"}"#);
        assert!(result.contains("sk-test-123"), "got: {result}");
    }

    #[test]
    fn expand_missing_var_leaves_verbatim() {
        let input = r#"{"apiKey": "${RSCLAW_NONEXISTENT_XYZ}"}"#;
        let result = expand_env_vars(input);
        assert!(
            result.contains("${RSCLAW_NONEXISTENT_XYZ}"),
            "got: {result}"
        );
    }

    #[test]
    fn include_directive_loads_nested_file() {
        let dir = tempfile::tempdir().unwrap();

        // Write sub-file
        let sub_path = dir.path().join("agents.json5");
        std::fs::write(&sub_path, r#"{ list: [{ id: "main", default: true }] }"#).unwrap();

        // Write main config that $includes sub-file
        let main_path = dir.path().join("openclaw.json5");
        std::fs::write(
            &main_path,
            r#"{ agents: { "$include": "./agents.json5" } }"#,
        )
        .unwrap();

        let cfg = load_json5(&main_path).unwrap();
        let agents = cfg.agents.expect("agents should be present");
        let list = agents.list.expect("agents.list should be present");
        assert_eq!(list[0].id, "main");
    }
}