use crate::config::Config;
use crate::scanners::exec;
use crate::scanners::{Finding, ScanResults, Severity};
use anyhow::Result;
use std::path::Path;
use tokio::process::Command;
pub async fn run(path: &Path, config: &Config) -> Result<ScanResults> {
if which::which("semgrep").is_err() {
exec::warn_user(
&config.lang,
"semgrep not found — SAST scan skipped. Run 'shipsafe doctor' for install instructions.",
"semgrep が見つかりません — SAST スキャンをスキップしました。'shipsafe doctor' でインストール方法を確認できます。",
);
return Ok(ScanResults::new());
}
let output = exec::run_scanner(
"semgrep",
|| {
let mut cmd = Command::new("semgrep");
cmd.arg("scan").arg("--json").arg("--quiet").arg(path);
build_semgrep_args(&mut cmd, config, path);
cmd
},
config.scanners.timeout_seconds,
&config.lang,
)
.await?;
let Some(output) = output else {
return Ok(ScanResults::new());
};
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.is_empty() {
tracing::debug!("semgrep stderr: {}", stderr);
}
if !output.status.success() {
let first = stderr.lines().next().unwrap_or("(no details)");
exec::warn_user(
&config.lang,
&format!("semgrep exited with {}: {}", output.status, first),
&format!("semgrep が異常終了しました ({}): {}", output.status, first),
);
}
let stdout = String::from_utf8_lossy(&output.stdout);
parse_semgrep_json(&stdout)
}
fn is_semgrep_rule_file(path: &Path) -> bool {
let Ok(content) = std::fs::read_to_string(path) else {
return false;
};
content
.lines()
.any(|line| line.trim_end() == "rules:" || line.starts_with("rules:"))
}
fn discover_custom_rules(scan_path: &Path) -> Vec<std::path::PathBuf> {
let rules_dir = scan_path.join("rules");
if !rules_dir.is_dir() {
return vec![];
}
let mut found: Vec<std::path::PathBuf> = walkdir::WalkDir::new(&rules_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
e.file_type().is_file()
&& matches!(
e.path().extension().and_then(|x| x.to_str()),
Some("yml") | Some("yaml")
)
&& is_semgrep_rule_file(e.path())
})
.map(|e| e.into_path())
.collect();
found.sort();
found
}
fn build_semgrep_args(cmd: &mut Command, config: &Config, scan_path: &Path) {
let rules = &config.scanners.sast.rules;
if rules.is_empty() {
cmd.arg("--config").arg("p/owasp-top-ten");
} else {
for rule in rules {
match rule.as_str() {
"owasp-top-10" => {
cmd.arg("--config").arg("p/owasp-top-ten");
}
"ai-generated-code" => {
tracing::warn!(
"the bundled 'ai-generated-code' rule pack was removed in 0.2.0 — ignoring"
);
}
other => {
cmd.arg("--config").arg(other);
}
}
}
}
for rules_path in &config.scanners.sast.rules_paths {
cmd.arg("--config").arg(rules_path);
}
for rule_file in discover_custom_rules(scan_path) {
cmd.arg("--config").arg(rule_file);
}
for rule_id in &config.scanners.sast.disabled_rules {
cmd.arg("--exclude-rule").arg(rule_id);
}
for exclude in &config.scanners.sast.exclude {
cmd.arg("--exclude").arg(exclude);
}
}
fn map_severity(s: Option<&str>) -> Severity {
match s {
Some("ERROR") => Severity::Critical,
Some("WARNING") => Severity::Medium,
Some("INFO") => Severity::Low,
_ => Severity::Medium,
}
}
fn extract_cwe(metadata: Option<&serde_json::Value>) -> Option<String> {
let cwe = metadata?.get("cwe")?;
if let Some(s) = cwe.as_str() {
return Some(s.to_string());
}
if let Some(arr) = cwe.as_array() {
let cwe_strs: Vec<&str> = arr.iter().filter_map(|v| v.as_str()).collect();
if !cwe_strs.is_empty() {
return Some(cwe_strs.join(", "));
}
}
None
}
fn parse_semgrep_json(json_str: &str) -> Result<ScanResults> {
let mut results = ScanResults::new();
let json: serde_json::Value = match serde_json::from_str(json_str) {
Ok(v) => v,
Err(_) => return Ok(results),
};
if let Some(semgrep_results) = json.get("results").and_then(|r| r.as_array()) {
for result in semgrep_results {
let severity = map_severity(
result
.get("extra")
.and_then(|e| e.get("severity"))
.and_then(|s| s.as_str()),
);
let metadata = result.get("extra").and_then(|e| e.get("metadata"));
let finding = Finding {
id: result
.get("check_id")
.and_then(|c| c.as_str())
.unwrap_or("unknown")
.to_string(),
scanner: "sast".to_string(),
severity,
title: result
.get("check_id")
.and_then(|c| c.as_str())
.unwrap_or("")
.to_string(),
description: result
.get("extra")
.and_then(|e| e.get("message"))
.and_then(|m| m.as_str())
.unwrap_or("")
.to_string(),
file: result
.get("path")
.and_then(|p| p.as_str())
.unwrap_or("")
.to_string(),
line: result
.get("start")
.and_then(|s| s.get("line"))
.and_then(|l| l.as_u64())
.map(|l| l as u32),
cwe: extract_cwe(metadata),
cve: None,
fix_suggestion: result
.get("extra")
.and_then(|e| e.get("fix"))
.and_then(|f| f.as_str())
.map(|s| s.to_string()),
ai_triage: None,
};
results.findings.push(finding);
}
}
results.recalculate_summary();
Ok(results)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_severity_mapping() {
assert_eq!(map_severity(Some("ERROR")), Severity::Critical);
assert_eq!(map_severity(Some("WARNING")), Severity::Medium);
assert_eq!(map_severity(Some("INFO")), Severity::Low);
assert_eq!(map_severity(Some("UNKNOWN")), Severity::Medium);
assert_eq!(map_severity(None), Severity::Medium);
}
#[test]
fn test_extract_cwe_string() {
let json: serde_json::Value = serde_json::json!({
"cwe": "CWE-89: SQL Injection"
});
assert_eq!(
extract_cwe(Some(&json)),
Some("CWE-89: SQL Injection".to_string())
);
}
#[test]
fn test_extract_cwe_array() {
let json: serde_json::Value = serde_json::json!({
"cwe": ["CWE-79: XSS", "CWE-89: SQL Injection"]
});
assert_eq!(
extract_cwe(Some(&json)),
Some("CWE-79: XSS, CWE-89: SQL Injection".to_string())
);
}
#[test]
fn test_extract_cwe_missing() {
let json: serde_json::Value = serde_json::json!({});
assert_eq!(extract_cwe(Some(&json)), None);
assert_eq!(extract_cwe(None), None);
}
#[test]
fn test_parse_semgrep_json_with_results() {
let json_str = r#"{
"results": [
{
"check_id": "python.lang.security.audit.exec-detected",
"path": "app.py",
"start": {"line": 42, "col": 1},
"end": {"line": 42, "col": 20},
"extra": {
"severity": "ERROR",
"message": "Detected use of exec(). This is dangerous.",
"metadata": {
"cwe": ["CWE-95: Improper Neutralization"]
},
"fix": "Use ast.literal_eval() instead."
}
},
{
"check_id": "python.lang.security.audit.logging-warn",
"path": "utils.py",
"start": {"line": 10, "col": 1},
"end": {"line": 10, "col": 30},
"extra": {
"severity": "WARNING",
"message": "Logging sensitive data.",
"metadata": {
"cwe": "CWE-532"
}
}
},
{
"check_id": "python.lang.best-practice.info-rule",
"path": "main.py",
"start": {"line": 5, "col": 1},
"end": {"line": 5, "col": 15},
"extra": {
"severity": "INFO",
"message": "Consider using a constant.",
"metadata": {}
}
}
]
}"#;
let results = parse_semgrep_json(json_str).unwrap();
assert_eq!(results.findings.len(), 3);
assert_eq!(results.summary.total, 3);
assert_eq!(results.summary.critical, 1);
assert_eq!(results.summary.medium, 1);
assert_eq!(results.summary.low, 1);
let f0 = &results.findings[0];
assert_eq!(f0.severity, Severity::Critical);
assert_eq!(f0.id, "python.lang.security.audit.exec-detected");
assert_eq!(f0.scanner, "sast");
assert_eq!(f0.file, "app.py");
assert_eq!(f0.line, Some(42));
assert_eq!(f0.cwe, Some("CWE-95: Improper Neutralization".to_string()));
assert_eq!(
f0.fix_suggestion,
Some("Use ast.literal_eval() instead.".to_string())
);
let f1 = &results.findings[1];
assert_eq!(f1.severity, Severity::Medium);
assert_eq!(f1.cwe, Some("CWE-532".to_string()));
let f2 = &results.findings[2];
assert_eq!(f2.severity, Severity::Low);
assert_eq!(f2.cwe, None);
}
#[test]
fn test_parse_semgrep_json_empty_results() {
let json_str = r#"{"results": []}"#;
let results = parse_semgrep_json(json_str).unwrap();
assert_eq!(results.findings.len(), 0);
assert_eq!(results.summary.total, 0);
}
#[test]
fn test_parse_semgrep_json_invalid() {
let results = parse_semgrep_json("not valid json").unwrap();
assert_eq!(results.findings.len(), 0);
}
#[test]
fn test_parse_semgrep_json_missing_fields() {
let json_str = r#"{
"results": [
{
"extra": {
"severity": "ERROR",
"message": "Some issue"
}
}
]
}"#;
let results = parse_semgrep_json(json_str).unwrap();
assert_eq!(results.findings.len(), 1);
assert_eq!(results.findings[0].id, "unknown");
assert_eq!(results.findings[0].file, "");
assert_eq!(results.findings[0].line, None);
assert_eq!(results.findings[0].cwe, None);
}
#[test]
fn test_default_config_has_owasp_rules() {
let config = Config::default();
assert!(config
.scanners
.sast
.rules
.contains(&"owasp-top-10".to_string()));
}
fn get_args(cmd: &Command) -> Vec<String> {
cmd.as_std()
.get_args()
.map(|a| a.to_string_lossy().to_string())
.collect()
}
#[test]
fn test_empty_rules_defaults_to_owasp_args() {
let mut config = Config::default();
config.scanners.sast.rules = vec![];
config.scanners.sast.exclude = vec![];
let mut cmd = Command::new("semgrep");
build_semgrep_args(&mut cmd, &config, Path::new("."));
let args = get_args(&cmd);
assert!(args.contains(&"--config".to_string()));
assert!(args.contains(&"p/owasp-top-ten".to_string()));
}
#[test]
fn test_custom_rules_args() {
let mut config = Config::default();
config.scanners.sast.rules = vec!["owasp-top-10".into(), "p/django".into()];
config.scanners.sast.exclude = vec!["vendor".into()];
let mut cmd = Command::new("semgrep");
build_semgrep_args(&mut cmd, &config, Path::new("."));
let args = get_args(&cmd);
assert!(args.contains(&"p/owasp-top-ten".to_string()));
assert!(args.contains(&"p/django".to_string()));
assert!(args.contains(&"--exclude".to_string()));
assert!(args.contains(&"vendor".to_string()));
}
#[test]
fn test_removed_ai_generated_code_pack_is_skipped() {
let mut config = Config::default();
config.scanners.sast.rules = vec!["owasp-top-10".into(), "ai-generated-code".into()];
let mut cmd = Command::new("semgrep");
build_semgrep_args(&mut cmd, &config, Path::new("."));
let args = get_args(&cmd);
assert!(args.contains(&"p/owasp-top-ten".to_string()));
assert!(!args.iter().any(|a| a.contains("ai-generated")));
}
#[test]
fn test_rules_paths_and_disabled_rules_args() {
let mut config = Config::default();
config.scanners.sast.rules_paths = vec!["./my-rules/".into()];
config.scanners.sast.disabled_rules =
vec!["javascript.lang.security.audit.code-string-concat".into()];
let mut cmd = Command::new("semgrep");
build_semgrep_args(&mut cmd, &config, Path::new("."));
let args = get_args(&cmd);
assert!(args.contains(&"./my-rules/".to_string()));
assert!(args.contains(&"--exclude-rule".to_string()));
assert!(args.contains(&"javascript.lang.security.audit.code-string-concat".to_string()));
}
#[test]
fn test_discover_custom_rules() {
let dir =
std::env::temp_dir().join(format!("shipsafe-discover-test-{}", std::process::id()));
let rules_dir = dir.join("rules");
std::fs::create_dir_all(&rules_dir).unwrap();
std::fs::write(
rules_dir.join("custom.yml"),
"rules:\n - id: x\n pattern: foo\n message: m\n languages: [python]\n severity: ERROR\n",
)
.unwrap();
std::fs::write(rules_dir.join("docker-compose.yml"), "services: {}\n").unwrap();
std::fs::write(rules_dir.join("README.md"), "rules:\n").unwrap();
let found = discover_custom_rules(&dir);
std::fs::remove_dir_all(&dir).ok();
assert_eq!(found.len(), 1);
assert!(found[0].ends_with("custom.yml"));
}
#[test]
fn test_discover_custom_rules_no_dir() {
assert!(discover_custom_rules(Path::new("/nonexistent-shipsafe")).is_empty());
}
}