use std::fs;
use std::path::Path;
use serde_json::{json, Value};
use super::{
default_workspace_root, normalize_path, path_is_allowed, strip_html, user_home,
validate_read_path, wrap_untrusted, WorkspaceRoots, MAX_FILE_BYTES,
};
const MAX_GLOB_RESULTS: usize = 200;
const MAX_GREP_MATCHES: usize = 100;
const MAX_GREP_FILES: usize = 5000;
const MAX_GREP_LINE_CHARS: usize = 200;
use super::SEARCH_SKIP_DIRS as SKIP_DIRS;
const WEB_FETCH_MAX_CHARS: usize = 8192;
const WEB_FETCH_TIMEOUT_SECS: u64 = 15;
pub(super) fn schemas() -> Vec<Value> {
vec![
json!({
"type": "function",
"function": {
"name": "web_fetch",
"description": "Fetch a URL and return cleaned visible text (HTML stripped, max 8 KB).",
"parameters": {
"type": "object",
"properties": {
"url": { "type": "string", "description": "URL to fetch (http/https)" }
},
"required": ["url"]
}
}
}),
json!({
"type": "function",
"function": {
"name": "glob_search",
"description": "Find files by glob pattern under the user's home (e.g. **/*.py).",
"parameters": {
"type": "object",
"properties": {
"pattern": { "type": "string", "description": "Glob pattern (e.g. '**/*.py', 'Downloads/*.pdf')" }
},
"required": ["pattern"]
}
}
}),
json!({
"type": "function",
"function": {
"name": "grep_search",
"description": "Search file contents with a regular expression (ripgrep-style, case-insensitive) under a directory. Defaults to the active workspace/project; skips build & dependency dirs (target, node_modules, .git). Use a precise pattern like CLAUDETTE_MAX_FIX_ROUNDS or fn\\s+max_rounds.",
"parameters": {
"type": "object",
"properties": {
"pattern": { "type": "string", "description": "Regex to search for (e.g. 'TODO|FIXME', 'fn\\s+\\w+'). Invalid regex falls back to literal substring." },
"path": { "type": "string", "description": "Directory to search (default: the workspace/project root)" }
},
"required": ["pattern"]
}
}
}),
]
}
pub(super) fn dispatch(name: &str, input: &str) -> Option<Result<String, String>> {
let result = match name {
"glob_search" => run_glob_search(input),
"grep_search" => run_grep_search(input),
"web_fetch" => run_web_fetch(input),
_ => return None,
};
Some(result)
}
fn run_glob_search(input: &str) -> Result<String, String> {
let v: Value = serde_json::from_str(input)
.map_err(|e| format!("glob_search: invalid JSON ({e}): {input}"))?;
let raw_pattern = v
.get("pattern")
.and_then(Value::as_str)
.ok_or("glob_search: missing 'pattern'")?;
let resolved_pattern = if raw_pattern.starts_with("~/") || raw_pattern.starts_with("~\\") {
crate::tools::expand_tilde(raw_pattern)
.display()
.to_string()
} else if Path::new(raw_pattern).is_absolute() {
raw_pattern.to_string()
} else {
let base = if crate::missions::active_mission().is_some() {
crate::missions::active_cwd()
} else if let Some(root) = default_workspace_root() {
root
} else {
user_home()
};
base.join(raw_pattern).display().to_string()
};
if resolved_pattern.split(['/', '\\']).any(|c| c == "..") {
return Err("glob_search: '..' path components are not allowed in patterns".to_string());
}
let prefix_end = resolved_pattern
.find(['*', '?', '['])
.unwrap_or(resolved_pattern.len());
let literal_prefix = &resolved_pattern[..prefix_end];
let literal_path = normalize_path(Path::new(literal_prefix));
let roots = WorkspaceRoots::from_env();
if !path_is_allowed(&literal_path, &roots, false) {
return Err(format!(
"glob_search: pattern resolves outside the allowed roots — $HOME ({}) \
and CLAUDETTE_WORKSPACE; searches are restricted for safety",
roots.home.display()
));
}
let walker =
glob::glob(&resolved_pattern).map_err(|e| format!("glob_search: bad pattern: {e}"))?;
let mut paths: Vec<String> = Vec::new();
let mut truncated = false;
for entry in walker {
if paths.len() >= MAX_GLOB_RESULTS {
truncated = true;
break;
}
if let Ok(path) = entry {
let canonical = std::fs::canonicalize(&path).unwrap_or_else(|_| normalize_path(&path));
if !path_is_allowed(&canonical, &roots, true) {
continue;
}
paths.push(path.display().to_string());
}
}
paths.sort();
Ok(json!({
"pattern": resolved_pattern,
"count": paths.len(),
"truncated": truncated,
"paths": paths,
})
.to_string())
}
fn run_grep_search(input: &str) -> Result<String, String> {
let v: Value = serde_json::from_str(input)
.map_err(|e| format!("grep_search: invalid JSON ({e}): {input}"))?;
let pattern = v
.get("pattern")
.and_then(Value::as_str)
.ok_or("grep_search: missing 'pattern'")?;
if pattern.is_empty() {
return Err("grep_search: pattern is empty".to_string());
}
let default_path: String;
let path_str = match v.get("path").and_then(Value::as_str) {
Some(s) => s,
None => {
default_path = if let Some(m) = crate::missions::active_mission() {
m.path.display().to_string()
} else if let Some(root) = crate::tools::default_workspace_root() {
root.display().to_string()
} else {
"~".to_string()
};
default_path.as_str()
}
};
let root = validate_read_path(path_str)?;
let metadata = fs::metadata(&root)
.map_err(|e| format!("grep_search: stat {} failed: {e}", root.display()))?;
if !metadata.is_dir() {
return Err(format!(
"grep_search: {} is not a directory",
root.display()
));
}
let regex = regex::RegexBuilder::new(pattern)
.case_insensitive(true)
.size_limit(1 << 20)
.build()
.ok();
let mode = if regex.is_some() { "regex" } else { "literal" };
let needle = pattern.to_lowercase();
let line_matches = |line: &str| -> bool {
match ®ex {
Some(re) => re.is_match(line),
None => line.to_lowercase().contains(&needle),
}
};
let mut matches: Vec<Value> = Vec::new();
let mut files_scanned: usize = 0;
let mut truncated = false;
let walker = ignore::WalkBuilder::new(&root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.parents(true)
.follow_links(false)
.filter_entry(|entry| {
if entry.file_type().is_some_and(|ft| ft.is_dir()) {
let name = entry.file_name().to_string_lossy();
if SKIP_DIRS.contains(&name.as_ref()) {
return false;
}
}
true
})
.build();
'walk: for result in walker {
let Ok(entry) = result else { continue };
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
if files_scanned >= MAX_GREP_FILES {
truncated = true;
break 'walk;
}
files_scanned += 1;
let p = entry.path();
let Ok(meta) = entry.metadata() else { continue };
if meta.len() > MAX_FILE_BYTES as u64 {
continue;
}
let Ok(content) = fs::read_to_string(p) else {
continue;
};
for (lineno, line) in content.lines().enumerate() {
if line_matches(line) {
let snippet: String = line.chars().take(MAX_GREP_LINE_CHARS).collect();
matches.push(json!({
"file": p.display().to_string(),
"line": lineno + 1,
"text": snippet,
}));
if matches.len() >= MAX_GREP_MATCHES {
truncated = true;
break 'walk;
}
}
}
}
Ok(json!({
"pattern": pattern,
"mode": mode,
"root": root.display().to_string(),
"files_scanned": files_scanned,
"match_count": matches.len(),
"truncated": truncated,
"matches": matches,
})
.to_string())
}
fn run_web_fetch(input: &str) -> Result<String, String> {
let v: Value = serde_json::from_str(input)
.map_err(|e| format!("web_fetch: invalid JSON ({e}): {input}"))?;
let url = v
.get("url")
.and_then(Value::as_str)
.ok_or("web_fetch: missing 'url'")?;
if !(url.starts_with("http://") || url.starts_with("https://")) {
return Err(format!(
"web_fetch: only http:// and https:// URLs are allowed, got: {url}"
));
}
validate_fetch_target(url)?;
crate::egress::guard(url)?;
let client = web_fetch_client()?;
let resp = client
.get(url)
.header("User-Agent", "claudette/1.0 (Claudette personal secretary)")
.header("Accept", "text/html,application/xhtml+xml,text/plain")
.send()
.map_err(|e| format!("web_fetch: request failed: {e}"))?;
let status = resp.status();
let final_url = resp.url().to_string();
if !status.is_success() {
return Err(format!("web_fetch: HTTP {status} for {final_url}"));
}
let body = resp
.text()
.map_err(|e| format!("web_fetch: read body: {e}"))?;
let cleaned = strip_html(&body);
let total_chars = cleaned.chars().count();
let truncated = total_chars > WEB_FETCH_MAX_CHARS;
let visible: String = cleaned.chars().take(WEB_FETCH_MAX_CHARS).collect();
let wrapped = wrap_untrusted(&format!("web_fetch:{final_url}"), &visible);
Ok(json!({
"url": final_url,
"status": status.as_u16(),
"chars": visible.chars().count(),
"total_chars": total_chars,
"truncated": truncated,
"text": wrapped,
})
.to_string())
}
fn web_fetch_client() -> Result<reqwest::blocking::Client, String> {
let policy = reqwest::redirect::Policy::custom(|attempt| {
if attempt.previous().len() >= 10 {
return attempt.error(std::io::Error::other("web_fetch: too many redirects"));
}
match validate_fetch_target(attempt.url().as_str()) {
Ok(()) => attempt.follow(),
Err(msg) => attempt.error(std::io::Error::other(msg)),
}
});
reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(WEB_FETCH_TIMEOUT_SECS))
.redirect(policy)
.build()
.map_err(|e| format!("web_fetch: build http client: {e}"))
}
fn validate_fetch_target(url: &str) -> Result<(), String> {
if std::env::var("CLAUDETTE_WEB_FETCH_ALLOW_PRIVATE").as_deref() == Ok("1") {
return Ok(());
}
let (rest, default_port) = if let Some(r) = url.strip_prefix("https://") {
(r, 443u16)
} else if let Some(r) = url.strip_prefix("http://") {
(r, 80u16)
} else {
return Ok(());
};
let authority = rest.split(['/', '?', '#']).next().unwrap_or(rest);
let hostport = authority.rsplit('@').next().unwrap_or(authority);
let (host, port) = if let Some(after) = hostport.strip_prefix('[') {
let Some((h, tail)) = after.split_once(']') else {
return Err("web_fetch: malformed IPv6 host".to_string());
};
let port = tail
.strip_prefix(':')
.and_then(|p| p.parse().ok())
.unwrap_or(default_port);
(h.to_string(), port)
} else if let Some((h, p)) = hostport.rsplit_once(':') {
p.parse::<u16>().map_or_else(
|_| (hostport.to_string(), default_port),
|pn| (h.to_string(), pn),
)
} else {
(hostport.to_string(), default_port)
};
if host.is_empty() {
return Err("web_fetch: URL has no host".to_string());
}
let host_l = host.to_ascii_lowercase();
if host_l == "localhost"
|| host_l.ends_with(".localhost")
|| host_l == "metadata.google.internal"
{
return Err(blocked_target_msg(&host));
}
if let Ok(ip) = host.parse::<std::net::IpAddr>() {
if is_blocked_fetch_ip(&ip) {
return Err(blocked_target_msg(&host));
}
return Ok(());
}
use std::net::ToSocketAddrs;
if let Ok(addrs) = (host.as_str(), port).to_socket_addrs() {
for addr in addrs {
if is_blocked_fetch_ip(&addr.ip()) {
return Err(blocked_target_msg(&host));
}
}
}
Ok(())
}
fn blocked_target_msg(host: &str) -> String {
format!(
"web_fetch: refusing to fetch internal/loopback/private host '{host}' (SSRF guard; \
set CLAUDETTE_WEB_FETCH_ALLOW_PRIVATE=1 to allow LAN fetches)"
)
}
fn is_blocked_fetch_ip(ip: &std::net::IpAddr) -> bool {
match ip {
std::net::IpAddr::V4(v4) => {
let o = v4.octets();
v4.is_loopback()
|| v4.is_private()
|| v4.is_link_local()
|| v4.is_unspecified()
|| v4.is_broadcast()
|| o[0] == 0
|| (o[0] == 100 && (64..=127).contains(&o[1])) }
std::net::IpAddr::V6(v6) => {
v6.is_loopback()
|| v6.is_unspecified()
|| (v6.segments()[0] & 0xfe00) == 0xfc00 || (v6.segments()[0] & 0xffc0) == 0xfe80 || v6
.to_ipv4_mapped()
.is_some_and(|m| is_blocked_fetch_ip(&std::net::IpAddr::V4(m)))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn web_fetch_blocks_ssrf_targets() {
for url in [
"http://localhost:8080/",
"http://127.0.0.1/",
"http://169.254.169.254/latest/meta-data/",
"http://10.0.0.5/",
"http://192.168.1.1/admin",
"http://[::1]:9000/",
"https://metadata.google.internal/computeMetadata/v1/",
] {
assert!(
validate_fetch_target(url).is_err(),
"expected SSRF block for {url}"
);
}
assert!(validate_fetch_target("https://1.1.1.1/").is_ok());
}
#[test]
fn web_fetch_client_refuses_redirect_to_internal_host() {
use std::io::{Read, Write};
use std::net::TcpListener;
let listener = TcpListener::bind("127.0.0.1:0").expect("bind loopback");
let addr = listener.local_addr().expect("local addr");
let server = std::thread::spawn(move || {
if let Ok((mut stream, _)) = listener.accept() {
let mut buf = [0u8; 1024];
let _ = stream.read(&mut buf); let resp = "HTTP/1.1 301 Moved Permanently\r\n\
Location: http://169.254.169.254/\r\n\
Content-Length: 0\r\n\r\n";
let _ = stream.write_all(resp.as_bytes());
}
});
let client = web_fetch_client().expect("build client");
let result = client.get(format!("http://{addr}/")).send();
let _ = server.join();
assert!(
result.is_err(),
"redirect to the 169.254.169.254 metadata host must be refused"
);
}
#[test]
fn glob_search_rejects_missing_pattern() {
let err = run_glob_search("{}").unwrap_err();
assert!(err.contains("missing"), "got: {err}");
}
#[test]
fn glob_search_rejects_dotdot_traversal() {
#[cfg(unix)]
let pat = "/tmp/*/../../etc/*";
#[cfg(not(unix))]
let pat = r"C:\Users\*\..\..\Windows\*";
let input = serde_json::json!({ "pattern": pat }).to_string();
let err = run_glob_search(&input).unwrap_err();
assert!(err.contains(".."), "expected '..' rejection, got: {err}");
}
#[test]
fn glob_search_allows_workspace_outside_home() {
#[cfg(unix)]
let (root, pat) = ("/claudette-glob-ws-xyz", "/claudette-glob-ws-xyz/**/*.py");
#[cfg(not(unix))]
let (root, pat) = (
r"Z:\claudette-glob-ws-xyz",
r"Z:\claudette-glob-ws-xyz\**\*.py",
);
let input = json!({ "pattern": pat }).to_string();
let _guard = crate::test_env_lock();
let prev = std::env::var("CLAUDETTE_WORKSPACE").ok();
std::env::remove_var("CLAUDETTE_WORKSPACE");
let denied = run_glob_search(&input);
std::env::set_var("CLAUDETTE_WORKSPACE", root);
let allowed = run_glob_search(&input);
match prev {
Some(v) => std::env::set_var("CLAUDETTE_WORKSPACE", v),
None => std::env::remove_var("CLAUDETTE_WORKSPACE"),
}
assert!(
denied.is_err(),
"outside $HOME with no workspace must be rejected: {denied:?}"
);
assert!(
allowed.is_ok(),
"a CLAUDETTE_WORKSPACE root outside $HOME must be allowed: {allowed:?}"
);
}
#[test]
fn grep_search_rejects_missing_pattern() {
let err = run_grep_search("{}").unwrap_err();
assert!(err.contains("missing"), "got: {err}");
}
#[test]
fn grep_search_rejects_empty_pattern_inline() {
let err = run_grep_search(r#"{"pattern":""}"#).unwrap_err();
assert!(err.contains("empty"), "got: {err}");
}
#[test]
fn grep_search_uses_regex_and_skips_build_dirs() {
let _eg = crate::test_env_lock(); let base = user_home()
.join(".claudette")
.join("files")
.join("claudette-greptest-x7q");
let _ = fs::remove_dir_all(&base);
fs::create_dir_all(base.join("src")).unwrap();
fs::create_dir_all(base.join("target")).unwrap();
fs::write(
base.join("src").join("run.rs"),
"fn max_fix_rounds() -> u32 { 3 }\nconst CLAUDETTE_MAX_FIX_ROUNDS: u32 = 3;\n",
)
.unwrap();
fs::write(
base.join("target").join("junk.rs"),
"CLAUDETTE_MAX_FIX_ROUNDS in a build artifact\n",
)
.unwrap();
let input = json!({
"pattern": "max.?fix.?rounds|MAX_FIX_ROUNDS",
"path": base.to_str().unwrap()
})
.to_string();
let out = run_grep_search(&input).unwrap();
let v: Value = serde_json::from_str(&out).unwrap();
assert_eq!(v["mode"], json!("regex"), "got: {out}");
assert!(
v["match_count"].as_u64().unwrap() >= 1,
"regex should match the source symbol: {out}"
);
let files: Vec<String> = v["matches"]
.as_array()
.unwrap()
.iter()
.map(|m| m["file"].as_str().unwrap().replace('\\', "/"))
.collect();
assert!(
files.iter().any(|f| f.contains("/src/run.rs")),
"should find src/run.rs: {files:?}"
);
assert!(
!files.iter().any(|f| f.contains("/target/")),
"must skip target/: {files:?}"
);
let input2 =
json!({ "pattern": "max_fix_rounds(", "path": base.to_str().unwrap() }).to_string();
let out2 = run_grep_search(&input2).unwrap();
let v2: Value = serde_json::from_str(&out2).unwrap();
assert_eq!(v2["mode"], json!("literal"), "got: {out2}");
let _ = fs::remove_dir_all(&base);
}
#[test]
fn web_fetch_rejects_missing_url() {
let err = run_web_fetch("{}").unwrap_err();
assert!(err.contains("missing"), "got: {err}");
}
#[test]
fn web_fetch_rejects_non_http_scheme_inline() {
let err = run_web_fetch(r#"{"url":"file:///etc/passwd"}"#).unwrap_err();
assert!(err.contains("http://"), "got: {err}");
}
#[test]
fn schemas_lists_three_tools() {
let schemas = schemas();
assert_eq!(schemas.len(), 3);
let names: Vec<&str> = schemas
.iter()
.filter_map(|v| v.pointer("/function/name").and_then(Value::as_str))
.collect();
assert_eq!(names, ["web_fetch", "glob_search", "grep_search"]);
}
}