use std::path::{Path, PathBuf};
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::Value;
use url::Url;
use vigil_types::{EffectKind, EffectVector, ToolInvocation};
pub trait EffectExtractor: Send + Sync {
fn name(&self) -> &'static str;
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector);
}
#[derive(Debug)]
pub struct PathExtractor {
pub project_roots: Vec<PathBuf>,
}
impl PathExtractor {
pub fn new(project_roots: Vec<PathBuf>) -> Self {
Self { project_roots }
}
fn collect_paths(&self, args: &Value) -> Vec<String> {
let mut out = Vec::new();
const KEYS: &[&str] = &[
"path",
"paths",
"file",
"files",
"src",
"source",
"dst",
"destination",
"target",
"input",
"output",
];
for k in KEYS {
match args.get(*k) {
Some(Value::String(s)) => out.push(s.clone()),
Some(Value::Array(a)) => {
for v in a {
if let Value::String(s) = v {
out.push(s.clone());
}
}
}
_ => {}
}
}
out
}
fn canonicalize(&self, raw: &str) -> String {
let p = Path::new(raw);
let abs: PathBuf = if p.is_absolute() {
p.to_path_buf()
} else if let Some(root) = self.project_roots.first() {
root.join(p)
} else {
p.to_path_buf()
};
let normalized = dunce::canonicalize(&abs).unwrap_or_else(|_| manual_normalize(&abs));
to_posix(&normalized)
}
}
fn is_write_call(tool_name: &str) -> bool {
let lower = tool_name.to_ascii_lowercase();
for kw in [
"write", "create", "edit", "patch", "delete", "unlink", "rm", "move", "rename", "append",
"chmod", "chown", "mkdir",
] {
if lower.contains(kw) {
return true;
}
}
false
}
impl EffectExtractor for PathExtractor {
fn name(&self) -> &'static str {
"PathExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let paths = self.collect_paths(&call.args);
if paths.is_empty() {
return;
}
let norm: Vec<String> = paths.iter().map(|p| self.canonicalize(p)).collect();
if is_write_call(&call.tool_name) {
out.effects.push(EffectKind::FsWrite);
out.paths_write.extend(norm);
} else {
out.effects.push(EffectKind::FsRead);
out.paths_read.extend(norm);
}
}
}
#[derive(Debug)]
pub struct UrlExtractor;
impl EffectExtractor for UrlExtractor {
fn name(&self) -> &'static str {
"UrlExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let mut hosts = Vec::new();
for k in ["url", "endpoint", "uri"] {
if let Some(Value::String(s)) = call.args.get(k) {
if let Ok(u) = Url::parse(s) {
if let Some(h) = u.host_str() {
hosts.push(h.to_ascii_lowercase());
}
}
}
}
if let Some(Value::Array(a)) = call.args.get("urls") {
for v in a {
if let Value::String(s) = v {
if let Ok(u) = Url::parse(s) {
if let Some(h) = u.host_str() {
hosts.push(h.to_ascii_lowercase());
}
}
}
}
}
if !hosts.is_empty() {
out.effects.push(EffectKind::NetOutbound);
out.network_hosts.extend(hosts);
}
}
}
#[derive(Debug)]
pub struct SqlExtractor;
static DESTRUCTIVE_SQL: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)\b(DELETE|DROP|TRUNCATE|ALTER|UPDATE|REPLACE)\b").expect("regex")
});
static READ_ONLY_SQL: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)^\s*(SELECT|SHOW|DESCRIBE|EXPLAIN|WITH)\b").expect("regex"));
impl EffectExtractor for SqlExtractor {
fn name(&self) -> &'static str {
"SqlExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
for k in ["sql", "query", "statement"] {
if let Some(Value::String(s)) = call.args.get(k) {
if READ_ONLY_SQL.is_match(s) && !DESTRUCTIVE_SQL.is_match(s) {
out.effects.push(EffectKind::DbRead);
} else if DESTRUCTIVE_SQL.is_match(s) {
out.effects.push(EffectKind::DbWrite);
out.destructive = true;
} else {
out.effects.push(EffectKind::DbWrite);
}
}
}
}
}
#[derive(Debug)]
pub struct ShellExtractor;
const DESTRUCTIVE_BINARIES: &[&str] = &[
"rm", "rmdir", "shred", "mkfs", "fdisk", "format", "del", "dd", "srm", "erase",
];
const SHELL_WRAPPERS: &[&str] = &[
"sh",
"bash",
"zsh",
"ksh",
"dash",
"fish",
"cmd",
"cmd.exe",
"powershell",
"powershell.exe",
"pwsh",
"pwsh.exe",
];
const SHELL_EVAL_FLAGS: &[&str] = &["-c", "-lc", "-Command", "/c", "/C", "-Cmd", "--command"];
const PROTECTED_PATHS: &[&str] = &[
"/",
"/etc",
"/usr",
"/var",
"/bin",
"/sbin",
"/boot",
"/sys",
"/proc",
"~",
"~/.ssh",
"~/.config",
"~/.aws",
"~/.docker",
"C:\\",
"C:\\Windows",
"%SystemRoot%",
"%USERPROFILE%\\.ssh",
];
const SHELL_METACHARS: &[&str] = &[
"&&", "||", "|", "&", ";", ">", ">>", "<", "<<", "$(", "`", "\\\n",
];
impl EffectExtractor for ShellExtractor {
fn name(&self) -> &'static str {
"ShellExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let argv: Vec<String> = if let Some(Value::Array(a)) = call.args.get("argv") {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
} else if let Some(Value::String(s)) = call.args.get("command") {
match shlex::split(s) {
Some(v) if !v.is_empty() => v,
_ => {
out.effects.push(EffectKind::ExecNative);
out.destructive = true;
return;
}
}
} else {
return;
};
if argv.is_empty() {
return;
}
out.effects.push(EffectKind::ExecNative);
let full = argv.join(" ");
let has_meta = SHELL_METACHARS.iter().any(|m| full.contains(m));
let bin_lower = basename(&argv[0]).to_ascii_lowercase();
let strip_ext = |s: &str| -> String { s.strip_suffix(".exe").unwrap_or(s).to_string() };
let bin_noexe = strip_ext(&bin_lower);
let is_wrapper = SHELL_WRAPPERS
.iter()
.any(|w| strip_ext(w) == bin_noexe || *w == bin_lower);
let has_eval_flag = argv
.iter()
.skip(1)
.any(|a| SHELL_EVAL_FLAGS.iter().any(|f| a == f));
let wrapper_with_script = is_wrapper
&& (has_eval_flag
|| argv
.iter()
.skip(1)
.any(|a| a.contains(' ') || a.contains('\n')));
let is_destructive_bin = DESTRUCTIVE_BINARIES
.iter()
.any(|d| bin_noexe == *d || bin_lower == *d);
let rm_dangerous =
is_destructive_bin && argv.iter().any(|a| a == "-rf" || a == "-fr" || a == "-r");
let hits_protected = argv
.iter()
.any(|a| PROTECTED_PATHS.iter().any(|p| path_hits_protected(a, p)));
if has_meta || wrapper_with_script || is_destructive_bin || rm_dangerous || hits_protected {
out.destructive = true;
}
}
}
#[derive(Debug)]
pub struct EmailExtractor;
impl EffectExtractor for EmailExtractor {
fn name(&self) -> &'static str {
"EmailExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let mut recipients = Vec::new();
for k in ["to", "cc", "bcc", "recipients"] {
match call.args.get(k) {
Some(Value::String(s)) => recipients.push(s.clone()),
Some(Value::Array(a)) => {
for v in a {
if let Value::String(s) = v {
recipients.push(s.clone());
}
}
}
_ => {}
}
}
if !recipients.is_empty() {
out.effects.push(EffectKind::CommSend);
out.recipients.extend(recipients);
}
}
}
#[derive(Debug)]
pub struct SecretRefExtractor;
static SECRET_REF_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"secret://[A-Za-z0-9._\-/]+").expect("regex"));
impl EffectExtractor for SecretRefExtractor {
fn name(&self) -> &'static str {
"SecretRefExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let s = call.args.to_string(); let mut refs = Vec::new();
for m in SECRET_REF_RE.find_iter(&s) {
refs.push(m.as_str().to_string());
}
if !refs.is_empty() {
out.effects.push(EffectKind::SecretUse);
out.secret_refs.extend(refs);
}
}
}
#[derive(Debug)]
pub struct BrowserActionExtractor;
impl EffectExtractor for BrowserActionExtractor {
fn name(&self) -> &'static str {
"BrowserActionExtractor"
}
fn extract(&self, call: &ToolInvocation, out: &mut EffectVector) {
let is_browser_tool = call.tool_name.to_ascii_lowercase().contains("browser");
let action = call.args.get("action").and_then(Value::as_str);
if is_browser_tool {
match action {
Some("submit") | Some("fill_form") | Some("post") => {
out.effects.push(EffectKind::BrowserSubmit);
if let Some(Value::String(origin)) = call.args.get("origin") {
if let Ok(u) = Url::parse(origin) {
if let Some(h) = u.host_str() {
out.network_hosts.push(h.to_ascii_lowercase());
}
}
}
}
_ => {}
}
}
}
}
fn basename(p: &str) -> &str {
let idx_slash = p.rfind('/').map(|i| i + 1).unwrap_or(0);
let idx_back = p.rfind('\\').map(|i| i + 1).unwrap_or(0);
let start = idx_slash.max(idx_back);
&p[start..]
}
fn path_hits_protected(arg: &str, protected: &str) -> bool {
let a = arg.trim_matches(|c| c == '\'' || c == '"');
let p = protected;
a == p || a.starts_with(&format!("{}/", p)) || a.starts_with(&format!("{}\\", p))
}
fn manual_normalize(p: &Path) -> PathBuf {
use std::path::Component::*;
let mut out: Vec<std::path::Component> = Vec::new();
for comp in p.components() {
match comp {
CurDir => {}
ParentDir => {
match out.last() {
None | Some(Prefix(_)) | Some(RootDir) => {}
Some(ParentDir) => out.push(ParentDir), _ => {
out.pop();
}
}
}
_ => out.push(comp),
}
}
if out.is_empty() {
return PathBuf::from(".");
}
out.iter().collect()
}
fn to_posix(p: &Path) -> String {
let s = p.to_string_lossy();
let s = s.strip_prefix(r"\\?\").unwrap_or(&s);
s.replace('\\', "/")
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn mk_call(tool: &str, args: Value) -> ToolInvocation {
ToolInvocation {
invocation_id: "t".into(),
session_id: "s".into(),
server_id: "srv".into(),
tool_name: tool.into(),
args,
descriptor_hash: "hash".into(),
requested_at: 0,
}
}
#[test]
fn path_extractor_write_vs_read() {
let e = PathExtractor::new(vec![PathBuf::from("/proj")]);
let mut ev = EffectVector::default();
e.extract(
&mk_call("fs_read_file", json!({"path": "src/main.rs"})),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::FsRead));
assert!(ev
.paths_read
.iter()
.any(|p| p.ends_with("/proj/src/main.rs")));
let mut ev = EffectVector::default();
e.extract(
&mk_call("fs_write_file", json!({"path": "README.md"})),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::FsWrite));
assert!(ev
.paths_write
.iter()
.any(|p| p.ends_with("/proj/README.md")));
}
#[test]
fn path_extractor_resolves_dot_dot() {
let e = PathExtractor::new(vec![PathBuf::from("/proj")]);
let mut ev = EffectVector::default();
e.extract(
&mk_call("fs_read_file", json!({"path": "../../etc/passwd"})),
&mut ev,
);
assert!(ev.paths_read.iter().any(|p| p == "/etc/passwd"));
}
#[test]
fn url_extractor_detects_host() {
let mut ev = EffectVector::default();
UrlExtractor.extract(
&mk_call(
"http_get",
json!({"url": "https://api.github.com/users/me"}),
),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::NetOutbound));
assert_eq!(ev.network_hosts, vec!["api.github.com"]);
}
#[test]
fn sql_destructive_vs_read() {
let mut ev = EffectVector::default();
SqlExtractor.extract(
&mk_call("db_query", json!({"sql": "DELETE FROM users WHERE id=1"})),
&mut ev,
);
assert!(ev.destructive);
assert!(ev.effects.contains(&EffectKind::DbWrite));
let mut ev = EffectVector::default();
SqlExtractor.extract(
&mk_call("db_query", json!({"sql": "SELECT * FROM t"})),
&mut ev,
);
assert!(!ev.destructive);
assert!(ev.effects.contains(&EffectKind::DbRead));
}
#[test]
fn shell_rm_rf_is_destructive() {
let mut ev = EffectVector::default();
ShellExtractor.extract(
&mk_call(
"shell_run",
json!({"argv": ["rm", "-rf", "/home/user/Downloads"]}),
),
&mut ev,
);
assert!(ev.destructive);
assert!(ev.effects.contains(&EffectKind::ExecNative));
}
#[test]
fn shell_metacharacter_fails_closed() {
let mut ev = EffectVector::default();
ShellExtractor.extract(
&mk_call("shell_run", json!({"command": "ls && rm -rf /"})),
&mut ev,
);
assert!(
ev.destructive,
"shell metachar 必须 fail-closed 标 destructive"
);
}
#[test]
fn shell_protected_path_triggers() {
let mut ev = EffectVector::default();
ShellExtractor.extract(
&mk_call("shell_run", json!({"argv": ["cat", "/etc/shadow"]})),
&mut ev,
);
assert!(ev.destructive, "保护路径命中应标 destructive");
}
#[test]
fn shell_safe_ls_not_destructive() {
let mut ev = EffectVector::default();
ShellExtractor.extract(
&mk_call("shell_run", json!({"argv": ["ls", "-la"]})),
&mut ev,
);
assert!(!ev.destructive);
assert!(ev.effects.contains(&EffectKind::ExecNative));
}
#[test]
fn email_extractor_collects_recipients() {
let mut ev = EffectVector::default();
EmailExtractor.extract(
&mk_call(
"send_email",
json!({"to": "bob@example.com", "cc": ["alice@example.com"]}),
),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::CommSend));
assert_eq!(ev.recipients.len(), 2);
}
#[test]
fn secret_ref_extractor_finds_alias() {
let mut ev = EffectVector::default();
SecretRefExtractor.extract(
&mk_call(
"github_create_issue",
json!({"auth": "secret://github/repo-write", "title": "x"}),
),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::SecretUse));
assert_eq!(ev.secret_refs, vec!["secret://github/repo-write"]);
}
#[test]
fn browser_submit_detected() {
let mut ev = EffectVector::default();
BrowserActionExtractor.extract(
&mk_call(
"browser_action",
json!({"action": "submit", "origin": "https://chatgpt.com"}),
),
&mut ev,
);
assert!(ev.effects.contains(&EffectKind::BrowserSubmit));
assert_eq!(ev.network_hosts, vec!["chatgpt.com"]);
}
}