use crate::config::{Config, HeredocSettings};
use crate::evaluator::{
EvaluationDecision, MatchSource, PatternMatch, evaluate_command_with_pack_order_at_path,
};
use crate::packs::{DecisionMode, REGISTRY, Severity};
use crate::suggestions::{SuggestionKind, get_suggestion_by_kind};
use clap::ValueEnum;
use memchr::memmem;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
pub const SCAN_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, Default, Deserialize)]
pub struct HooksToml {
#[serde(default)]
pub scan: HooksTomlScan,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct HooksTomlScan {
pub fail_on: Option<ScanFailOn>,
pub format: Option<ScanFormat>,
pub max_file_size: Option<u64>,
pub max_findings: Option<usize>,
pub redact: Option<ScanRedactMode>,
pub truncate: Option<usize>,
#[serde(default)]
pub paths: HooksTomlScanPaths,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct HooksTomlScanPaths {
#[serde(default)]
pub include: Vec<String>,
#[serde(default)]
pub exclude: Vec<String>,
}
pub fn parse_hooks_toml(contents: &str) -> Result<(HooksToml, Vec<String>), String> {
let value: toml::Value = toml::from_str(contents).map_err(|e| e.to_string())?;
let mut warnings = Vec::new();
warn_unknown_hooks_toml_keys(&value, "", &mut warnings);
let cfg: HooksToml = toml::from_str(contents).map_err(|e| e.to_string())?;
Ok((cfg, warnings))
}
fn warn_unknown_hooks_toml_keys(value: &toml::Value, path: &str, warnings: &mut Vec<String>) {
let Some(table) = value.as_table() else {
return;
};
let allowed: &[&str] = match path {
"" => &["scan"],
"scan" => &[
"fail_on",
"format",
"max_file_size",
"max_findings",
"redact",
"truncate",
"paths",
],
"scan.paths" => &["include", "exclude"],
_ => &[],
};
for (key, val) in table {
let child_path = if path.is_empty() {
key.clone()
} else {
format!("{path}.{key}")
};
if !allowed.contains(&key.as_str()) {
warnings.push(format!("Unknown key `{child_path}` will be ignored"));
continue;
}
if val.is_table() {
warn_unknown_hooks_toml_keys(val, &child_path, warnings);
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum ScanFormat {
#[value(alias = "text")]
Pretty,
Json,
Markdown,
Sarif,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum ScanFailOn {
None,
Warning,
Error,
}
impl ScanFailOn {
#[must_use]
pub const fn blocks(&self, severity: ScanSeverity) -> bool {
match self {
Self::None => false,
Self::Warning => matches!(severity, ScanSeverity::Warning | ScanSeverity::Error),
Self::Error => matches!(severity, ScanSeverity::Error),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum ScanRedactMode {
#[default]
None,
Quoted,
Aggressive,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ScanDecision {
Allow,
Warn,
Deny,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ScanSeverity {
Info,
Warning,
Error,
}
impl ScanSeverity {
#[must_use]
pub const fn rank(&self) -> u8 {
match self {
Self::Error => 3,
Self::Warning => 2,
Self::Info => 1,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedCommand {
pub file: String,
pub line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub col: Option<usize>,
pub extractor_id: String,
pub command: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanFinding {
pub file: String,
pub line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub col: Option<usize>,
pub extractor_id: String,
pub extracted_command: String,
pub decision: ScanDecision,
pub severity: ScanSeverity,
#[serde(skip_serializing_if = "Option::is_none")]
pub rule_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub suggestion: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScanDecisionCounts {
pub allow: usize,
pub warn: usize,
pub deny: usize,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScanSeverityCounts {
pub info: usize,
pub warning: usize,
pub error: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanSummary {
pub files_scanned: usize,
pub files_skipped: usize,
pub commands_extracted: usize,
pub findings_total: usize,
pub decisions: ScanDecisionCounts,
pub severities: ScanSeverityCounts,
pub max_findings_reached: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub elapsed_ms: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanReport {
pub schema_version: u32,
pub summary: ScanSummary,
pub findings: Vec<ScanFinding>,
}
#[derive(Debug, Clone)]
pub struct ScanOptions {
pub format: ScanFormat,
pub fail_on: ScanFailOn,
pub max_file_size_bytes: u64,
pub max_findings: usize,
pub redact: ScanRedactMode,
pub truncate: usize,
}
#[derive(Debug)]
pub struct ScanEvalContext {
pub enabled_keywords: Vec<&'static str>,
pub ordered_packs: Vec<String>,
pub keyword_index: Option<crate::packs::EnabledKeywordIndex>,
pub compiled_overrides: crate::config::CompiledOverrides,
pub allowlists: crate::allowlist::LayeredAllowlist,
pub heredoc_settings: HeredocSettings,
}
impl ScanEvalContext {
#[must_use]
pub fn from_config(config: &Config) -> Self {
let enabled_packs: HashSet<String> = config.enabled_pack_ids();
let enabled_keywords = REGISTRY.collect_enabled_keywords(&enabled_packs);
let ordered_packs = REGISTRY.expand_enabled_ordered(&enabled_packs);
let keyword_index = REGISTRY.build_enabled_keyword_index(&ordered_packs);
let compiled_overrides = config.overrides.compile();
let allowlists = crate::load_default_allowlists();
let heredoc_settings = config.heredoc_settings();
Self {
enabled_keywords,
ordered_packs,
keyword_index,
compiled_overrides,
allowlists,
heredoc_settings,
}
}
}
#[must_use]
pub fn should_fail(report: &ScanReport, fail_on: ScanFailOn) -> bool {
report.findings.iter().any(|f| fail_on.blocks(f.severity))
}
pub fn sort_findings(findings: &mut [ScanFinding]) {
findings.sort_by(|a, b| {
let key_a = (
a.file.as_str(),
a.line,
a.col.unwrap_or(0),
a.rule_id.as_deref().unwrap_or(a.extractor_id.as_str()),
a.extractor_id.as_str(),
a.severity.rank(),
a.extracted_command.as_str(),
);
let key_b = (
b.file.as_str(),
b.line,
b.col.unwrap_or(0),
b.rule_id.as_deref().unwrap_or(b.extractor_id.as_str()),
b.extractor_id.as_str(),
b.severity.rank(),
b.extracted_command.as_str(),
);
key_a.cmp(&key_b)
});
}
#[must_use]
pub fn evaluate_extracted_command(
extracted: &ExtractedCommand,
options: &ScanOptions,
config: &Config,
ctx: &ScanEvalContext,
) -> Option<ScanFinding> {
let project_path = {
let candidate = std::path::Path::new(&extracted.file);
if candidate.is_absolute() {
Some(candidate.to_path_buf())
} else {
std::env::current_dir().ok().map(|cwd| cwd.join(candidate))
}
};
let result = evaluate_command_with_pack_order_at_path(
&extracted.command,
&ctx.enabled_keywords,
&ctx.ordered_packs,
ctx.keyword_index.as_ref(),
&ctx.compiled_overrides,
&ctx.allowlists,
&ctx.heredoc_settings,
project_path.as_deref(),
);
if result.decision == EvaluationDecision::Allow {
return None;
}
let Some(pattern) = result.pattern_info else {
return Some(ScanFinding {
file: extracted.file.clone(),
line: extracted.line,
col: extracted.col,
extractor_id: extracted.extractor_id.clone(),
extracted_command: extracted.command.clone(),
decision: ScanDecision::Deny,
severity: ScanSeverity::Error,
rule_id: None,
reason: Some("Blocked (missing match metadata)".to_string()),
suggestion: None,
});
};
let (rule_id, severity, decision_mode) = resolve_severity_and_rule_id(config, &pattern);
let scan_decision = match decision_mode {
Some(DecisionMode::Deny) | None => ScanDecision::Deny,
Some(DecisionMode::Warn) => ScanDecision::Warn,
Some(DecisionMode::Log) => ScanDecision::Allow,
};
let scan_severity = match severity {
Some(Severity::Medium) => ScanSeverity::Warning,
Some(Severity::Low) => ScanSeverity::Info,
Some(Severity::Critical | Severity::High) | None => ScanSeverity::Error,
};
let suggestion = rule_id
.as_deref()
.and_then(|id| get_suggestion_by_kind(id, SuggestionKind::SaferAlternative))
.map(|s| s.text.clone());
let extracted_command = redact_and_truncate(&extracted.command, options);
Some(ScanFinding {
file: extracted.file.clone(),
line: extracted.line,
col: extracted.col,
extractor_id: extracted.extractor_id.clone(),
extracted_command,
decision: scan_decision,
severity: scan_severity,
rule_id,
reason: Some(pattern.reason),
suggestion,
})
}
fn resolve_severity_and_rule_id(
config: &Config,
pattern: &PatternMatch,
) -> (Option<String>, Option<Severity>, Option<DecisionMode>) {
let Some(pack_id) = pattern.pack_id.as_deref() else {
return (None, None, None);
};
let Some(pattern_name) = pattern.pattern_name.as_deref() else {
return (None, None, None);
};
let rule_id = Some(format!("{pack_id}:{pattern_name}"));
let severity = pattern.severity;
let mode = match pattern.source {
MatchSource::Pack | MatchSource::HeredocAst => {
config
.policy()
.resolve_mode(Some(pack_id), Some(pattern_name), severity)
}
MatchSource::ConfigOverride | MatchSource::LegacyPattern => DecisionMode::Deny,
};
(rule_id, severity, Some(mode))
}
fn redact_and_truncate(command: &str, options: &ScanOptions) -> String {
let redacted = match options.redact {
ScanRedactMode::None => command.to_string(),
ScanRedactMode::Quoted => redact_quoted_strings(command),
ScanRedactMode::Aggressive => redact_aggressively(command),
};
truncate_utf8(&redacted, options.truncate)
}
fn truncate_utf8(s: &str, max_chars: usize) -> String {
if max_chars == 0 {
return s.to_string();
}
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_string();
}
if max_chars == 1 {
return "…".to_string();
}
let keep = max_chars - 1;
let truncated: String = s.chars().take(keep).collect();
format!("{truncated}…")
}
#[must_use]
pub fn redact_quoted_strings(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut it = s.chars();
while let Some(c) = it.next() {
match c {
'\'' => {
out.push('\'');
out.push('…');
for next in it.by_ref() {
if next == '\'' {
out.push('\'');
break;
}
}
}
'"' => {
out.push('"');
out.push('…');
let mut escaped = false;
for next in it.by_ref() {
if escaped {
escaped = false;
continue;
}
if next == '\\' {
escaped = true;
continue;
}
if next == '"' {
out.push('"');
break;
}
}
}
_ => out.push(c),
}
}
out
}
#[must_use]
pub fn redact_aggressively(s: &str) -> String {
let s = redact_quoted_strings(s);
let mut out = String::with_capacity(s.len());
let mut segment = String::new();
for c in s.chars() {
if c.is_whitespace() {
if !segment.is_empty() {
out.push_str(&redact_segment(&segment));
segment.clear();
}
out.push(c);
} else {
segment.push(c);
}
}
if !segment.is_empty() {
out.push_str(&redact_segment(&segment));
}
out
}
const TOKEN_KEY: &str = concat!("to", "ken");
const SECRET_KEY: &str = concat!("sec", "ret");
const PASSWORD_KEY: &str = concat!("pass", "word");
const PASSWD_KEY: &str = concat!("pass", "wd");
const API_KEY: &str = concat!("api", "_key");
const APIKEY_KEY: &str = concat!("api", "key");
const BEARER_KEY: &str = concat!("bear", "er");
fn redact_segment(segment: &str) -> String {
if segment.len() >= 32 && segment.chars().all(|c| c.is_ascii_hexdigit()) {
return "…".to_string();
}
if let Some(eq) = segment.find('=') {
let lower = segment[..eq].to_ascii_lowercase();
let (k, _v) = segment.split_at(eq + 1);
if lower.contains(TOKEN_KEY)
|| lower.contains(SECRET_KEY)
|| lower.contains(PASSWORD_KEY)
|| lower.contains(PASSWD_KEY)
|| lower.contains(API_KEY)
|| lower.contains(APIKEY_KEY)
|| lower.contains(BEARER_KEY)
{
return format!("{k}…");
}
}
segment.to_string()
}
pub type ScanProgressCallback<'a> = &'a mut dyn FnMut(usize, usize, &str);
#[allow(clippy::missing_errors_doc)]
#[allow(clippy::too_many_lines)]
pub fn scan_paths(
paths: &[PathBuf],
options: &ScanOptions,
config: &Config,
ctx: &ScanEvalContext,
include: &[String],
exclude: &[String],
repo_root: Option<&Path>,
) -> Result<ScanReport, String> {
scan_paths_with_progress(
paths, options, config, ctx, include, exclude, repo_root, None,
)
}
#[allow(clippy::missing_errors_doc)]
#[allow(clippy::too_many_lines)]
pub fn scan_paths_with_progress(
paths: &[PathBuf],
options: &ScanOptions,
config: &Config,
ctx: &ScanEvalContext,
include: &[String],
exclude: &[String],
repo_root: Option<&Path>,
progress: Option<ScanProgressCallback<'_>>,
) -> Result<ScanReport, String> {
let started = std::time::Instant::now();
let mut files: Vec<PathBuf> = Vec::new();
let mut visited: HashSet<PathBuf> = HashSet::new();
for path in paths {
collect_files_recursively(path, &mut files, &mut visited);
}
files.sort();
files.dedup();
if !include.is_empty() || !exclude.is_empty() {
files = filter_paths(&files, include, exclude, repo_root);
}
let total_files = files.len();
let mut progress = progress;
if let Some(ref mut cb) = progress {
cb(0, total_files, "");
}
let mut files_scanned = 0usize;
let mut files_skipped = 0usize;
let mut commands_extracted = 0usize;
let mut findings: Vec<ScanFinding> = Vec::new();
let mut max_findings_reached = false;
for (file_idx, file) in files.iter().enumerate() {
if let Some(ref mut cb) = progress {
cb(file_idx + 1, total_files, &file.to_string_lossy());
}
if findings.len() >= options.max_findings {
max_findings_reached = true;
break;
}
let Ok(meta) = std::fs::metadata(file) else {
files_skipped += 1;
continue;
};
if !meta.is_file() {
files_skipped += 1;
continue;
}
if meta.len() > options.max_file_size_bytes {
files_skipped += 1;
continue;
}
let is_shell = is_shell_script_path(file);
let is_docker = is_dockerfile_path(file);
let is_actions = is_github_actions_workflow_path(file);
let is_gitlab = is_gitlab_ci_path(file);
let is_azure = is_azure_pipelines_path(file);
let is_circleci = is_circleci_path(file);
let is_makefile = is_makefile_path(file);
let is_package_json = is_package_json_path(file);
let is_terraform = is_terraform_path(file);
let is_compose = is_docker_compose_path(file);
if !is_shell
&& !is_docker
&& !is_actions
&& !is_gitlab
&& !is_azure
&& !is_circleci
&& !is_makefile
&& !is_package_json
&& !is_terraform
&& !is_compose
{
files_skipped += 1;
continue;
}
let Ok(bytes) = std::fs::read(file) else {
files_skipped += 1;
continue;
};
let content = String::from_utf8_lossy(&bytes);
let file_label = file.to_string_lossy();
files_scanned += 1;
let mut extracted: Vec<ExtractedCommand> = Vec::new();
if is_shell {
extracted.extend(extract_shell_script_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_docker {
extracted.extend(extract_dockerfile_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_actions {
extracted.extend(extract_github_actions_workflow_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_gitlab {
extracted.extend(extract_gitlab_ci_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_azure {
extracted.extend(extract_azure_pipelines_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_circleci {
extracted.extend(extract_circleci_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_makefile {
extracted.extend(extract_makefile_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_package_json {
extracted.extend(extract_package_json_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_terraform {
extracted.extend(extract_terraform_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
if is_compose {
extracted.extend(extract_docker_compose_from_str(
&file_label,
&content,
&ctx.enabled_keywords,
));
}
commands_extracted += extracted.len();
for cmd in extracted {
if findings.len() >= options.max_findings {
max_findings_reached = true;
break;
}
if let Some(finding) = evaluate_extracted_command(&cmd, options, config, ctx) {
findings.push(finding);
}
}
if max_findings_reached {
break;
}
}
let elapsed_ms = u64::try_from(started.elapsed().as_millis()).ok();
Ok(build_report(
findings,
files_scanned,
files_skipped,
commands_extracted,
max_findings_reached,
elapsed_ms,
))
}
fn collect_files_recursively(
path: &PathBuf,
out: &mut Vec<PathBuf>,
visited: &mut HashSet<PathBuf>,
) {
let Ok(canonical) = std::fs::canonicalize(path) else {
return;
};
if !visited.insert(canonical) {
return;
}
let Ok(meta) = std::fs::metadata(path) else {
return;
};
if meta.is_file() {
out.push(path.clone());
return;
}
if !meta.is_dir() {
return;
}
let Ok(read_dir) = std::fs::read_dir(path) else {
return;
};
let mut entries: Vec<PathBuf> = read_dir.filter_map(|e| e.ok().map(|e| e.path())).collect();
entries.sort();
for entry in entries {
collect_files_recursively(&entry, out, visited);
}
}
pub(crate) fn filter_paths(
paths: &[PathBuf],
include: &[String],
exclude: &[String],
repo_root: Option<&Path>,
) -> Vec<PathBuf> {
paths
.iter()
.filter(|p| {
let candidates = build_glob_candidates(p, repo_root);
if !include.is_empty() {
let matches_include = include.iter().any(|pattern| {
candidates
.iter()
.any(|candidate| glob_match(pattern, candidate))
});
if !matches_include {
return false;
}
}
!exclude.iter().any(|pattern| {
candidates
.iter()
.any(|candidate| glob_match(pattern, candidate))
})
})
.cloned()
.collect()
}
fn build_glob_candidates(path: &Path, repo_root: Option<&Path>) -> Vec<String> {
let mut candidates = Vec::new();
let raw = path.to_string_lossy().to_string();
let raw_norm = raw.replace('\\', "/");
candidates.push(raw_norm.clone());
if let Some(stripped) = raw_norm.strip_prefix("./") {
candidates.push(stripped.to_string());
}
if let Some(root) = repo_root {
let joined = if path.is_absolute() {
path.to_path_buf()
} else {
root.join(path)
};
if let Ok(rel) = joined.strip_prefix(root) {
let rel_norm = rel.to_string_lossy().replace('\\', "/");
if !rel_norm.is_empty() {
candidates.push(rel_norm.clone());
if let Some(stripped) = rel_norm.strip_prefix("./") {
candidates.push(stripped.to_string());
}
}
}
}
candidates.sort();
candidates.dedup();
candidates
}
fn glob_match(pattern: &str, path: &str) -> bool {
use std::borrow::Cow;
fn normalize_separators(s: &str) -> Cow<'_, str> {
if s.contains('\\') {
Cow::Owned(s.replace('\\', "/"))
} else {
Cow::Borrowed(s)
}
}
fn matches_glob_prefix(prefix_raw: &str, path: &str) -> bool {
if prefix_raw.is_empty() {
return true;
}
let prefix_no_slash = prefix_raw.trim_end_matches('/');
if path == prefix_no_slash {
return true;
}
if prefix_raw.ends_with('/') {
return path.starts_with(prefix_raw);
}
if !path.starts_with(prefix_raw) {
return false;
}
path.as_bytes()
.get(prefix_raw.len())
.is_some_and(|b| *b == b'/')
}
let pattern = normalize_separators(pattern);
let path = normalize_separators(path);
let pattern = pattern.as_ref();
let path = path.as_ref();
if let Some((prefix_raw, suffix_raw)) = pattern.split_once("**") {
if !matches_glob_prefix(prefix_raw, path) {
return false;
}
let suffix = suffix_raw.trim_start_matches('/');
if suffix.is_empty() {
return true;
}
if suffix.contains('*') && !suffix.contains('/') {
let last = path.rsplit('/').next().unwrap_or(path);
let parts: Vec<&str> = suffix.split('*').collect();
if parts.len() == 2 {
let (pre, suf) = (parts[0], parts[1]);
if !last.starts_with(pre) || !last.ends_with(suf) {
return false;
}
let min_len = pre.len() + suf.len();
return last.len() >= min_len;
}
}
return path.ends_with(suffix);
}
if pattern.contains('*') {
let parts: Vec<&str> = pattern.split('*').collect();
if parts.len() == 2 {
let prefix = parts[0];
let suffix = parts[1];
if !path.starts_with(prefix) || !path.ends_with(suffix) {
return false;
}
let min_len = prefix.len() + suffix.len();
if path.len() < min_len {
return false;
}
let middle_start = prefix.len();
let middle_end = path.len() - suffix.len();
return !path[middle_start..middle_end].contains('/');
}
}
pattern == path
}
fn is_shell_script_path(path: &Path) -> bool {
path.extension()
.and_then(std::ffi::OsStr::to_str)
.is_some_and(|ext: &str| {
let ext = ext.to_ascii_lowercase();
matches!(ext.as_str(), "sh" | "bash" | "zsh" | "dash" | "ksh")
})
}
#[must_use]
pub fn extract_shell_script_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const MAX_CONTINUATION_LINES: usize = 20;
const MAX_JOINED_CHARS: usize = 8 * 1024;
let mut out = Vec::new();
let mut buffer: Option<(usize, String, usize)> = None;
for (idx, raw_line) in content.lines().enumerate() {
let line_no = idx + 1;
let (segment, continues) = split_shell_line_continuation(raw_line);
let segment = segment.trim();
if let Some((start_line, mut joined, cont_lines)) = buffer.take() {
if !joined.is_empty() && !segment.is_empty() {
joined.push(' ');
}
joined.push_str(segment);
if continues && cont_lines < MAX_CONTINUATION_LINES && joined.len() < MAX_JOINED_CHARS {
buffer = Some((start_line, joined, cont_lines + 1));
continue;
}
if let Some(cmd) =
extract_shell_command_line(file, start_line, &joined, enabled_keywords)
{
out.push(cmd);
}
continue;
}
if continues {
buffer = Some((line_no, segment.to_string(), 1));
continue;
}
if let Some(cmd) = extract_shell_command_line(file, line_no, segment, enabled_keywords) {
out.push(cmd);
}
}
if let Some((start_line, joined, _)) = buffer.take() {
if let Some(cmd) = extract_shell_command_line(file, start_line, &joined, enabled_keywords) {
out.push(cmd);
}
}
out
}
fn split_shell_line_continuation(line: &str) -> (&str, bool) {
let trimmed = line.trim_end();
let without_comment = strip_shell_inline_comment(trimmed).trim_end();
if !without_comment.ends_with('\\') {
return (without_comment, false);
}
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
for c in without_comment.chars() {
if escaped {
escaped = false;
continue;
}
if c == '\\' && !in_single {
escaped = true;
continue;
}
match c {
'\'' if !in_double => in_single = !in_single,
'"' if !in_single => in_double = !in_double,
_ => {}
}
}
if escaped && !in_single {
let without = without_comment
.strip_suffix('\\')
.unwrap_or(without_comment)
.trim_end();
(without, true)
} else {
(without_comment, false)
}
}
fn extract_shell_script_with_offset_and_id(
file: &str,
start_line: usize,
content: &str,
enabled_keywords: &[&'static str],
extractor_id: &'static str,
) -> Vec<ExtractedCommand> {
let mut extracted = extract_shell_script_from_str(file, content, enabled_keywords);
let offset = start_line.saturating_sub(1);
for cmd in &mut extracted {
cmd.line = cmd.line.saturating_add(offset);
cmd.extractor_id = extractor_id.to_string();
}
extracted
}
fn extract_shell_command_line(
file: &str,
line: usize,
candidate: &str,
enabled_keywords: &[&'static str],
) -> Option<ExtractedCommand> {
let candidate = candidate.trim();
if candidate.is_empty() || candidate.starts_with('#') {
return None;
}
let candidate = strip_shell_inline_comment(candidate).trim();
if candidate.is_empty() {
return None;
}
if !enabled_keywords.is_empty() && !contains_any_keyword(candidate, enabled_keywords) {
return None;
}
let words = split_shell_words(candidate);
let _first = words.first()?.as_str();
if is_shell_assignment_only(&words, candidate) {
return None;
}
Some(ExtractedCommand {
file: file.to_string(),
line,
col: None,
extractor_id: "shell.script".to_string(),
command: candidate.to_string(),
metadata: None,
})
}
fn is_shell_assignment_only(words: &[String], candidate: &str) -> bool {
if contains_shell_command_substitution(candidate) {
return false;
}
let mut idx = 0usize;
if words.first().is_some_and(|w| {
matches!(
w.as_str(),
"export" | "local" | "readonly" | "declare" | "typeset"
)
}) {
idx += 1;
}
while idx < words.len() && is_shell_assignment_word(&words[idx]) {
idx += 1;
}
idx == words.len()
}
fn contains_shell_command_substitution(s: &str) -> bool {
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
if escaped {
escaped = false;
continue;
}
if c == '\\' && !in_single {
escaped = true;
continue;
}
match c {
'\'' if !in_double => {
in_single = !in_single;
}
'"' if !in_single => {
in_double = !in_double;
}
'`' if !in_single => {
return true;
}
'$' if !in_single => {
if chars.peek().copied() == Some('(') {
return true;
}
}
_ => {}
}
}
false
}
fn is_shell_assignment_word(word: &str) -> bool {
let Some(eq) = word.find('=') else {
return false;
};
if eq == 0 {
return false;
}
let var = &word[..eq];
is_shell_var_name(var)
}
fn is_shell_var_name(s: &str) -> bool {
let mut it = s.chars();
let Some(first) = it.next() else {
return false;
};
if !(first.is_ascii_alphabetic() || first == '_') {
return false;
}
it.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
fn keyword_contains_whitespace(keyword: &str) -> bool {
keyword.bytes().any(|byte| byte.is_ascii_whitespace())
}
fn keyword_matches_with_whitespace(haystack: &str, keyword: &str) -> bool {
let parts: Vec<&str> = keyword.split_whitespace().collect();
if parts.is_empty() {
return false;
}
let hay = haystack.as_bytes();
let first = parts[0].as_bytes();
if first.len() > hay.len() {
return false;
}
let mut offset = 0;
while let Some(pos) = memmem::find(&hay[offset..], first) {
let start = offset + pos;
let mut idx = start + first.len();
let mut matched = true;
for part in parts.iter().skip(1) {
let mut ws = idx;
while ws < hay.len() && hay[ws].is_ascii_whitespace() {
ws += 1;
}
if ws == idx {
matched = false;
break;
}
idx = ws;
let part_bytes = part.as_bytes();
if idx + part_bytes.len() > hay.len() || &hay[idx..idx + part_bytes.len()] != part_bytes
{
matched = false;
break;
}
idx += part_bytes.len();
}
if matched {
return true;
}
offset = start + 1;
}
false
}
fn contains_any_keyword(haystack: &str, keywords: &[&'static str]) -> bool {
keywords.iter().any(|keyword| {
if keyword.is_empty() {
return false;
}
if keyword_contains_whitespace(keyword) {
keyword_matches_with_whitespace(haystack, keyword)
} else {
haystack.contains(keyword)
}
})
}
fn strip_shell_inline_comment(s: &str) -> &str {
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
let mut prev: Option<char> = None;
for (i, c) in s.char_indices() {
if escaped {
escaped = false;
prev = Some(c);
continue;
}
if c == '\\' && !in_single {
escaped = true;
prev = Some(c);
continue;
}
match c {
'\'' if !in_double => {
in_single = !in_single;
}
'"' if !in_single => {
in_double = !in_double;
}
'#' if !in_single && !in_double => {
let comment_start = i == 0
|| prev.is_some_and(|p| p.is_whitespace() || matches!(p, ';' | '|' | '&'));
if comment_start {
return &s[..i];
}
}
_ => {}
}
prev = Some(c);
}
s
}
fn split_shell_words(s: &str) -> Vec<String> {
let mut words = Vec::new();
let mut cur = String::new();
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
for c in s.chars() {
if escaped {
cur.push(c);
escaped = false;
continue;
}
if c == '\\' && !in_single {
escaped = true;
continue;
}
match c {
'\'' if !in_double => {
in_single = !in_single;
cur.push(c);
}
'"' if !in_single => {
in_double = !in_double;
cur.push(c);
}
c if c.is_whitespace() && !in_single && !in_double => {
if !cur.is_empty() {
words.push(cur);
cur = String::new();
}
}
_ => cur.push(c),
}
}
if !cur.is_empty() {
words.push(cur);
}
words
}
fn is_dockerfile_path(path: &Path) -> bool {
let file_name = path.file_name().and_then(std::ffi::OsStr::to_str);
let Some(name) = file_name else {
return false;
};
let lower = name.to_ascii_lowercase();
if lower == "dockerfile" {
return true;
}
if lower.ends_with(".dockerfile") {
return true;
}
if lower.starts_with("dockerfile.") {
return true;
}
false
}
#[must_use]
pub fn extract_dockerfile_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const MAX_CONTINUATION_LINES: usize = 50;
const MAX_JOINED_CHARS: usize = 32 * 1024;
let mut out = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let mut idx = 0;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed = raw_line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
idx += 1;
continue;
}
let upper = trimmed.to_ascii_uppercase();
let is_run = upper == "RUN" || upper.starts_with("RUN ") || upper.starts_with("RUN\t");
if !is_run {
idx += 1;
continue;
}
let (command, lines_consumed) =
join_dockerfile_continuation(&lines, idx, MAX_CONTINUATION_LINES, MAX_JOINED_CHARS);
idx += lines_consumed;
let full_trimmed = command.trim();
let cmd_part = if full_trimmed.len() > 4 {
full_trimmed[4..].trim_start()
} else {
continue;
};
if cmd_part.starts_with('[') {
if let Ok(args) = serde_json::from_str::<Vec<String>>(cmd_part) {
let joined = args.join(" ");
if !joined.is_empty()
&& (enabled_keywords.is_empty()
|| contains_any_keyword(&joined, enabled_keywords))
{
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: "dockerfile.run.exec".to_string(),
command: joined,
metadata: None,
});
}
}
continue;
}
let cmd_part = strip_shell_inline_comment(cmd_part).trim();
if cmd_part.is_empty() {
continue;
}
if !enabled_keywords.is_empty() && !contains_any_keyword(cmd_part, enabled_keywords) {
continue;
}
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: "dockerfile.run".to_string(),
command: cmd_part.to_string(),
metadata: None,
});
}
out
}
fn join_dockerfile_continuation(
lines: &[&str],
start_idx: usize,
max_lines: usize,
max_chars: usize,
) -> (String, usize) {
let mut joined = String::new();
let mut idx = start_idx;
let mut lines_consumed = 0usize;
while idx < lines.len() && lines_consumed < max_lines && joined.len() < max_chars {
let raw_line = lines[idx];
lines_consumed += 1;
idx += 1;
let trimmed = raw_line.trim_end();
let (segment, continues) = trimmed
.strip_suffix('\\')
.map_or((trimmed, false), |before| (before.trim_end(), true));
if !joined.is_empty() && !segment.trim().is_empty() {
joined.push(' ');
}
joined.push_str(segment.trim_start());
if !continues {
break;
}
}
(joined, lines_consumed)
}
fn is_github_actions_workflow_path(path: &Path) -> bool {
let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str) else {
return false;
};
let ext = ext.to_ascii_lowercase();
if ext != "yml" && ext != "yaml" {
return false;
}
let components: Vec<String> = path
.components()
.filter_map(|c| c.as_os_str().to_str().map(str::to_ascii_lowercase))
.collect();
components
.windows(2)
.any(|w| w[0] == ".github" && w[1] == "workflows")
}
#[allow(clippy::too_many_lines)]
#[must_use]
pub fn extract_github_actions_workflow_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "github_actions.steps.run";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut steps_indent: Option<usize> = None;
let mut skip_indent: Option<usize> = None;
let mut idx = 0usize;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if let Some(skip) = skip_indent {
if indent <= skip {
skip_indent = None;
} else {
idx += 1;
continue;
}
}
if let Some(steps) = steps_indent {
if !trimmed_start.starts_with('-') && indent <= steps {
steps_indent = None;
}
}
if steps_indent.is_none() {
if let Some(rest) = yaml_key_value(trimmed_start, "steps") {
if rest.is_empty() || rest.starts_with('#') {
steps_indent = Some(indent);
}
}
idx += 1;
continue;
}
let Some(steps) = steps_indent else {
idx += 1;
continue;
};
let in_steps_line = indent > steps || (indent == steps && trimmed_start.starts_with('-'));
if !in_steps_line {
idx += 1;
continue;
}
let mut candidate = trimmed_start;
if let Some(after_dash) = candidate.strip_prefix('-') {
candidate = after_dash.trim_start();
}
if yaml_key_value(candidate, "env").is_some()
|| yaml_key_value(candidate, "with").is_some()
|| yaml_key_value(candidate, "secrets").is_some()
{
skip_indent = Some(indent);
idx += 1;
continue;
}
let Some(run_value) = yaml_key_value(candidate, "run") else {
idx += 1;
continue;
};
if run_value.starts_with('|') || run_value.starts_with('>') {
let block_start_line = line_no + 1;
let mut block = String::new();
let mut j = idx + 1;
while j < lines.len() {
let raw = lines[j];
let trimmed = raw.trim_start();
if !trimmed.is_empty() {
let block_indent = raw.len() - trimmed.len();
if block_indent <= indent {
break;
}
}
if !block.is_empty() {
block.push('\n');
}
block.push_str(raw);
j += 1;
}
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
idx = j;
continue;
}
let unquoted = unquote_yaml_scalar(run_value);
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&unquoted,
enabled_keywords,
EXTRACTOR_ID,
));
idx += 1;
}
out
}
fn unquote_yaml_scalar(s: &str) -> String {
let s = s.trim();
if s.starts_with('"') && s.ends_with('"') {
if s.len() < 2 {
return String::new();
}
let content = &s[1..s.len() - 1];
let mut out = String::with_capacity(content.len());
let mut chars = content.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('"') => out.push('"'),
Some('\\') | None => out.push('\\'),
Some(other) => {
out.push('\\');
out.push(other);
}
}
} else {
out.push(c);
}
}
return out;
}
if s.starts_with('\'') && s.ends_with('\'') {
if s.len() < 2 {
return String::new();
}
let content = &s[1..s.len() - 1];
return content.replace("''", "'");
}
s.to_string()
}
fn is_gitlab_ci_path(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
return false;
};
let lower = file_name.to_ascii_lowercase();
lower == ".gitlab-ci.yml" || lower.ends_with(".gitlab-ci.yml")
}
#[allow(clippy::too_many_lines)]
#[must_use]
pub fn extract_gitlab_ci_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "gitlab_ci.script";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut anchors: HashMap<String, Vec<ExtractedCommand>> = HashMap::new();
let mut skip_indent: Option<usize> = None;
let mut idx = 0usize;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if let Some(skip) = skip_indent {
if indent <= skip {
skip_indent = None;
} else {
idx += 1;
continue;
}
}
if yaml_key_value(trimmed_start, "variables").is_some()
|| yaml_key_value(trimmed_start, "rules").is_some()
|| yaml_key_value(trimmed_start, "only").is_some()
|| yaml_key_value(trimmed_start, "except").is_some()
{
skip_indent = Some(indent);
idx += 1;
continue;
}
if let Some(value) = yaml_key_value(trimmed_start, "before_script")
.or_else(|| yaml_key_value(trimmed_start, "script"))
.or_else(|| yaml_key_value(trimmed_start, "after_script"))
{
if let Some((anchor_name, remainder)) = parse_yaml_anchor(value, '&') {
if remainder.is_empty() || remainder.starts_with('#') {
let (commands, next_idx) = extract_gitlab_sequence_items(
file,
&lines,
idx + 1,
indent,
enabled_keywords,
EXTRACTOR_ID,
&mut anchors,
);
anchors.insert(anchor_name, commands.clone());
out.extend(commands);
idx = next_idx;
continue;
}
if remainder.starts_with('|') || remainder.starts_with('>') {
let (block, block_start_line, next_idx) =
parse_yaml_block(&lines, idx + 1, indent);
let commands = extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
);
anchors.insert(anchor_name, commands.clone());
out.extend(commands);
idx = next_idx;
continue;
}
if let Some(items) = parse_inline_yaml_sequence(remainder) {
let mut commands = Vec::new();
for item in items {
commands.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&item,
enabled_keywords,
EXTRACTOR_ID,
));
}
anchors.insert(anchor_name, commands.clone());
out.extend(commands);
idx += 1;
continue;
}
let commands = extract_shell_script_with_offset_and_id(
file,
line_no,
&unquote_yaml_scalar(remainder),
enabled_keywords,
EXTRACTOR_ID,
);
anchors.insert(anchor_name, commands.clone());
out.extend(commands);
idx += 1;
continue;
}
if let Some((alias_name, _)) = parse_yaml_anchor(value, '*') {
if let Some(anchored) = anchors.get(&alias_name) {
out.extend(anchored.iter().cloned());
}
idx += 1;
continue;
}
if value.starts_with('|') || value.starts_with('>') {
let (block, block_start_line, next_idx) = parse_yaml_block(&lines, idx + 1, indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
idx = next_idx;
continue;
}
if let Some(items) = parse_inline_yaml_sequence(value) {
for item in items {
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&item,
enabled_keywords,
EXTRACTOR_ID,
));
}
idx += 1;
continue;
}
if value.is_empty() || value.starts_with('#') {
let (commands, next_idx) = extract_gitlab_sequence_items(
file,
&lines,
idx + 1,
indent,
enabled_keywords,
EXTRACTOR_ID,
&mut anchors,
);
out.extend(commands);
idx = next_idx;
continue;
}
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&unquote_yaml_scalar(value),
enabled_keywords,
EXTRACTOR_ID,
));
idx += 1;
continue;
}
if let Some(anchor_name) = gitlab_anchor_definition(trimmed_start) {
let (commands, next_idx) = extract_gitlab_sequence_items(
file,
&lines,
idx + 1,
indent,
enabled_keywords,
EXTRACTOR_ID,
&mut anchors,
);
if !commands.is_empty() {
anchors.insert(anchor_name, commands);
idx = next_idx;
continue;
}
}
idx += 1;
}
out
}
fn gitlab_anchor_definition(line: &str) -> Option<String> {
let (_, rest) = line.split_once(':')?;
let rest = rest.trim_start();
let (anchor_name, remainder) = parse_yaml_anchor(rest, '&')?;
if remainder.is_empty() || remainder.starts_with('#') {
return Some(anchor_name);
}
None
}
fn extract_gitlab_sequence_items(
file: &str,
lines: &[&str],
start_idx: usize,
parent_indent: usize,
enabled_keywords: &[&'static str],
extractor_id: &'static str,
anchors: &mut HashMap<String, Vec<ExtractedCommand>>,
) -> (Vec<ExtractedCommand>, usize) {
let mut out = Vec::new();
let mut idx = start_idx;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if indent <= parent_indent {
break;
}
if !trimmed_start.starts_with('-') {
idx += 1;
continue;
}
let item_indent = indent;
let mut item_value = trimmed_start.trim_start_matches('-').trim_start();
let mut anchor_name: Option<String> = None;
if let Some((anchor, remainder)) = parse_yaml_anchor(item_value, '&') {
anchor_name = Some(anchor);
item_value = remainder;
}
if let Some((alias_name, _)) = parse_yaml_anchor(item_value, '*') {
if let Some(anchored) = anchors.get(&alias_name) {
out.extend(anchored.iter().cloned());
}
idx += 1;
continue;
}
if item_value.starts_with('|') || item_value.starts_with('>') {
let (block, block_start_line, next_idx) = parse_yaml_block(lines, idx + 1, item_indent);
let extracted = extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
extractor_id,
);
if let Some(anchor) = anchor_name {
anchors.insert(anchor, extracted.clone());
}
out.extend(extracted);
idx = next_idx;
continue;
}
if !item_value.is_empty() && !item_value.starts_with('#') {
let extracted = extract_shell_script_with_offset_and_id(
file,
line_no,
&unquote_yaml_scalar(item_value),
enabled_keywords,
extractor_id,
);
if let Some(anchor) = anchor_name {
anchors.insert(anchor, extracted.clone());
}
out.extend(extracted);
}
idx += 1;
}
(out, idx)
}
fn parse_yaml_block(
lines: &[&str],
start_idx: usize,
parent_indent: usize,
) -> (String, usize, usize) {
let block_start_line = start_idx + 1;
let mut block = String::new();
let mut idx = start_idx;
while idx < lines.len() {
let raw = lines[idx];
let trimmed = raw.trim_start();
if !trimmed.is_empty() {
let indent = raw.len() - trimmed.len();
if indent <= parent_indent {
break;
}
}
if !block.is_empty() {
block.push('\n');
}
block.push_str(raw);
idx += 1;
}
(block, block_start_line, idx)
}
fn parse_inline_yaml_sequence(value: &str) -> Option<Vec<String>> {
let trimmed = value.trim_start();
if !trimmed.starts_with('[') {
return None;
}
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
let mut end_idx = None;
for (idx, c) in trimmed.char_indices() {
if idx == 0 {
continue;
}
if escaped {
escaped = false;
continue;
}
if in_double && c == '\\' {
escaped = true;
continue;
}
match c {
'\'' if !in_double => in_single = !in_single,
'"' if !in_single => in_double = !in_double,
']' if !in_single && !in_double => {
end_idx = Some(idx);
break;
}
_ => {}
}
}
let end_idx = end_idx?;
let rest = trimmed[end_idx + 1..].trim_start();
if !rest.is_empty() && !rest.starts_with('#') {
return None;
}
let inner = &trimmed[1..end_idx];
let mut out = Vec::new();
let mut buf = String::new();
in_single = false;
in_double = false;
escaped = false;
for c in inner.chars() {
if escaped {
buf.push(c);
escaped = false;
continue;
}
if in_double && c == '\\' {
buf.push(c);
escaped = true;
continue;
}
match c {
'\'' if !in_double => {
in_single = !in_single;
buf.push(c);
}
'"' if !in_single => {
in_double = !in_double;
buf.push(c);
}
',' if !in_single && !in_double => {
let item = buf.trim();
if !item.is_empty() {
out.push(unquote_yaml_scalar(item));
}
buf.clear();
}
_ => buf.push(c),
}
}
let item = buf.trim();
if !item.is_empty() {
out.push(unquote_yaml_scalar(item));
}
Some(out)
}
fn parse_yaml_anchor(value: &str, prefix: char) -> Option<(String, &str)> {
let trimmed = value.trim_start();
let rest = trimmed.strip_prefix(prefix)?;
let mut name = String::new();
for ch in rest.chars() {
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
name.push(ch);
} else {
break;
}
}
if name.is_empty() {
return None;
}
let remainder = &rest[name.len()..];
Some((name, remainder.trim_start()))
}
fn yaml_key_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
let trimmed = line.trim_start();
let after_key = trimmed.strip_prefix(key)?;
let after_key = after_key
.strip_prefix(':')
.or_else(|| after_key.trim_start().strip_prefix(':'))?;
Some(after_key.trim_start())
}
fn is_azure_pipelines_path(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
return false;
};
let lower = file_name.to_ascii_lowercase();
let is_variant = lower.starts_with("azure-pipelines")
&& Path::new(&lower)
.extension()
.and_then(std::ffi::OsStr::to_str)
.is_some_and(|ext| ext == "yml" || ext == "yaml");
lower == "azure-pipelines.yml" || lower == "azure-pipelines.yaml" || is_variant
}
#[allow(clippy::too_many_lines)]
#[must_use]
pub fn extract_azure_pipelines_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut skip_indent: Option<usize> = None;
let mut idx = 0usize;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if let Some(skip) = skip_indent {
if indent <= skip {
skip_indent = None;
} else {
idx += 1;
continue;
}
}
let candidate = trimmed_start
.strip_prefix('-')
.map_or(trimmed_start, |after_dash| after_dash.trim_start());
if yaml_key_value(candidate, "variables").is_some()
|| yaml_key_value(candidate, "parameters").is_some()
|| yaml_key_value(candidate, "resources").is_some()
|| yaml_key_value(candidate, "trigger").is_some()
|| yaml_key_value(candidate, "pr").is_some()
|| yaml_key_value(candidate, "pool").is_some()
|| yaml_key_value(candidate, "condition").is_some()
|| yaml_key_value(candidate, "dependsOn").is_some()
|| yaml_key_value(candidate, "displayName").is_some()
|| yaml_key_value(candidate, "env").is_some()
|| yaml_key_value(candidate, "inputs").is_some()
{
skip_indent = Some(indent);
idx += 1;
continue;
}
let (extractor_id, value) = if let Some(v) = yaml_key_value(candidate, "script") {
("azure_pipelines.script", v)
} else if let Some(v) = yaml_key_value(candidate, "bash") {
("azure_pipelines.bash", v)
} else if let Some(v) = yaml_key_value(candidate, "powershell") {
("azure_pipelines.powershell", v)
} else if let Some(v) = yaml_key_value(candidate, "pwsh") {
("azure_pipelines.pwsh", v)
} else {
idx += 1;
continue;
};
if value.starts_with('|') || value.starts_with('>') {
let (block, block_start_line, next_idx) = parse_yaml_block(&lines, idx + 1, indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
extractor_id,
));
idx = next_idx;
continue;
}
if let Some(items) = parse_inline_yaml_sequence(value) {
for item in items {
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&item,
enabled_keywords,
extractor_id,
));
}
idx += 1;
continue;
}
if value.is_empty() || value.starts_with('#') {
let mut j = idx + 1;
while j < lines.len() {
let next_raw = lines[j];
let next_trimmed = next_raw.trim_start();
if next_trimmed.is_empty() || next_trimmed.starts_with('#') {
j += 1;
continue;
}
let next_indent = next_raw.len() - next_trimmed.len();
if next_indent <= indent {
break;
}
if next_trimmed.starts_with('-') {
let item = next_trimmed.strip_prefix('-').unwrap_or("").trim_start();
let item = unquote_yaml_scalar(item);
if !item.is_empty() {
out.extend(extract_shell_script_with_offset_and_id(
file,
j + 1,
&item,
enabled_keywords,
extractor_id,
));
}
}
j += 1;
}
idx = j;
continue;
}
let unquoted = unquote_yaml_scalar(value);
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&unquoted,
enabled_keywords,
extractor_id,
));
idx += 1;
}
out
}
fn is_circleci_path(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
return false;
};
let lower = file_name.to_ascii_lowercase();
if lower != "config.yml" && lower != "config.yaml" {
return false;
}
path.parent()
.and_then(|p| p.file_name())
.and_then(std::ffi::OsStr::to_str)
.is_some_and(|dir| dir == ".circleci")
}
#[allow(clippy::too_many_lines)]
#[must_use]
pub fn extract_circleci_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "circleci.run";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut skip_indent: Option<usize> = None;
let mut idx = 0usize;
while idx < lines.len() {
let line_no = idx + 1;
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if let Some(skip) = skip_indent {
if indent <= skip {
skip_indent = None;
} else {
idx += 1;
continue;
}
}
let candidate = trimmed_start
.strip_prefix('-')
.map_or(trimmed_start, |after_dash| after_dash.trim_start());
if yaml_key_value(candidate, "version").is_some()
|| yaml_key_value(candidate, "orbs").is_some()
|| yaml_key_value(candidate, "parameters").is_some()
|| yaml_key_value(candidate, "environment").is_some()
|| yaml_key_value(candidate, "docker").is_some()
|| yaml_key_value(candidate, "machine").is_some()
|| yaml_key_value(candidate, "macos").is_some()
|| yaml_key_value(candidate, "executor").is_some()
|| yaml_key_value(candidate, "when").is_some()
|| yaml_key_value(candidate, "unless").is_some()
|| yaml_key_value(candidate, "filters").is_some()
|| yaml_key_value(candidate, "requires").is_some()
|| yaml_key_value(candidate, "context").is_some()
|| yaml_key_value(candidate, "name").is_some()
|| yaml_key_value(candidate, "working_directory").is_some()
|| yaml_key_value(candidate, "shell").is_some()
|| yaml_key_value(candidate, "no_output_timeout").is_some()
{
skip_indent = Some(indent);
idx += 1;
continue;
}
let Some(value) = yaml_key_value(candidate, "run") else {
if let Some(cmd_value) = yaml_key_value(candidate, "command") {
if cmd_value.starts_with('|') || cmd_value.starts_with('>') {
let (block, block_start_line, next_idx) =
parse_yaml_block(&lines, idx + 1, indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
idx = next_idx;
continue;
}
if !cmd_value.is_empty() && !cmd_value.starts_with('#') {
let unquoted = unquote_yaml_scalar(cmd_value);
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&unquoted,
enabled_keywords,
EXTRACTOR_ID,
));
}
}
idx += 1;
continue;
};
if value.starts_with('|') || value.starts_with('>') {
let (block, block_start_line, next_idx) = parse_yaml_block(&lines, idx + 1, indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
idx = next_idx;
continue;
}
if value.is_empty() || value.starts_with('#') {
let mut j = idx + 1;
while j < lines.len() {
let next_raw = lines[j];
let next_trimmed = next_raw.trim_start();
if next_trimmed.is_empty() || next_trimmed.starts_with('#') {
j += 1;
continue;
}
let next_indent = next_raw.len() - next_trimmed.len();
if next_indent <= indent {
break;
}
if let Some(cmd_value) = yaml_key_value(next_trimmed, "command") {
if cmd_value.starts_with('|') || cmd_value.starts_with('>') {
let (block, block_start_line, next_j) =
parse_yaml_block(&lines, j + 1, next_indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
j = next_j;
} else if !cmd_value.is_empty() && !cmd_value.starts_with('#') {
let unquoted = unquote_yaml_scalar(cmd_value);
out.extend(extract_shell_script_with_offset_and_id(
file,
j + 1,
&unquoted,
enabled_keywords,
EXTRACTOR_ID,
));
j += 1;
} else {
j += 1;
}
} else {
j += 1;
}
}
idx = j;
continue;
}
let unquoted = unquote_yaml_scalar(value);
out.extend(extract_shell_script_with_offset_and_id(
file,
line_no,
&unquoted,
enabled_keywords,
EXTRACTOR_ID,
));
idx += 1;
}
out
}
fn is_makefile_path(path: &Path) -> bool {
let file_name = path.file_name().and_then(std::ffi::OsStr::to_str);
file_name.is_some_and(|name| name.eq_ignore_ascii_case("makefile"))
}
#[must_use]
pub fn extract_makefile_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "makefile.recipe";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut idx = 0usize;
while idx < lines.len() {
let raw_line = lines[idx];
if !raw_line.starts_with('\t') {
idx += 1;
continue;
}
let start_line = idx + 1;
let mut block = String::new();
let mut prev_continues = false;
while idx < lines.len() {
let line = lines[idx];
let is_recipe_line = line.starts_with('\t');
if !is_recipe_line && !prev_continues {
break;
}
if !block.is_empty() {
block.push('\n');
}
block.push_str(line);
prev_continues = line.trim_end().ends_with('\\');
idx += 1;
}
out.extend(extract_shell_script_with_offset_and_id(
file,
start_line,
&block,
enabled_keywords,
EXTRACTOR_ID,
));
}
out
}
fn is_package_json_path(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
return false;
};
file_name == "package.json"
}
#[must_use]
pub fn extract_package_json_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "package_json.script";
let mut out = Vec::new();
let Ok(json) = serde_json::from_str::<serde_json::Value>(content) else {
return out;
};
let Some(scripts) = json.get("scripts").and_then(|s| s.as_object()) else {
return out;
};
let line_map = build_json_line_map(content);
for (script_name, script_value) in scripts {
let Some(script_cmd) = script_value.as_str() else {
continue;
};
let line_no = find_json_key_line(&line_map, script_name, "scripts");
let has_keyword =
enabled_keywords.is_empty() || contains_any_keyword(script_cmd, enabled_keywords);
if has_keyword {
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: EXTRACTOR_ID.to_string(),
command: script_cmd.to_string(),
metadata: Some(serde_json::json!({ "script_name": script_name })),
});
}
}
out
}
fn build_json_line_map(content: &str) -> Vec<&str> {
content.lines().collect()
}
fn find_json_key_line(lines: &[&str], key: &str, parent: &str) -> usize {
let parent_pattern = format!("\"{parent}\"");
let key_pattern = format!("\"{key}\"");
let mut start_idx = 0;
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with(&parent_pattern) && trimmed.contains(':') {
start_idx = idx + 1;
break;
}
}
for (idx, line) in lines.iter().enumerate().skip(start_idx) {
let trimmed = line.trim();
if trimmed.starts_with(&key_pattern) && trimmed.contains(':') {
return idx + 1;
}
}
if start_idx > 0 {
for (idx, line) in lines.iter().enumerate().take(start_idx) {
let trimmed = line.trim();
if trimmed.starts_with(&key_pattern) && trimmed.contains(':') {
return idx + 1;
}
}
}
1 }
fn is_terraform_path(path: &Path) -> bool {
path.extension()
.and_then(std::ffi::OsStr::to_str)
.is_some_and(|ext| ext.eq_ignore_ascii_case("tf"))
}
#[allow(clippy::too_many_lines)]
#[must_use]
pub fn extract_terraform_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID_LOCAL: &str = "terraform.provisioner.local_exec";
const EXTRACTOR_ID_REMOTE: &str = "terraform.provisioner.remote_exec";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut idx = 0usize;
while idx < lines.len() {
let raw_line = lines[idx];
let trimmed = raw_line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("//") {
idx += 1;
continue;
}
if let Some(prov_type) = detect_provisioner_block(trimmed) {
let block_indent = raw_line.len() - raw_line.trim_start().len();
idx += 1;
match prov_type {
ProvisionerType::LocalExec => {
while idx < lines.len() {
let inner_line = lines[idx];
let inner_trimmed = inner_line.trim();
if is_hcl_block_end(inner_trimmed) {
let inner_indent = inner_line.len() - inner_line.trim_start().len();
if inner_indent <= block_indent {
break;
}
}
if let Some(cmd) = extract_hcl_string_value(inner_trimmed, "command") {
let has_keyword = enabled_keywords.is_empty()
|| contains_any_keyword(&cmd, enabled_keywords);
if has_keyword {
out.push(ExtractedCommand {
file: file.to_string(),
line: idx + 1,
col: None,
extractor_id: EXTRACTOR_ID_LOCAL.to_string(),
command: cmd,
metadata: Some(serde_json::Value::String(
"provisioner: local-exec".to_string(),
)),
});
}
}
if let Some((heredoc_marker, is_stripping)) =
detect_heredoc_start(inner_trimmed, "command")
{
let heredoc_start = idx + 1;
idx += 1;
let mut heredoc_content = String::new();
while idx < lines.len() {
let heredoc_line = if is_stripping {
lines[idx].trim()
} else {
lines[idx]
};
if heredoc_line == heredoc_marker {
break;
}
if !heredoc_content.is_empty() {
heredoc_content.push('\n');
}
heredoc_content.push_str(lines[idx]);
idx += 1;
}
out.extend(extract_shell_script_with_offset_and_id(
file,
heredoc_start + 1,
&heredoc_content,
enabled_keywords,
EXTRACTOR_ID_LOCAL,
));
}
idx += 1;
}
}
ProvisionerType::RemoteExec => {
while idx < lines.len() {
let inner_line = lines[idx];
let inner_trimmed = inner_line.trim();
if is_hcl_block_end(inner_trimmed) {
let inner_indent = inner_line.len() - inner_line.trim_start().len();
if inner_indent <= block_indent {
break;
}
}
if inner_trimmed.starts_with("inline") && inner_trimmed.contains('=') {
let array_start = idx + 1;
if inner_trimmed.contains('[') && inner_trimmed.contains(']') {
for cmd in extract_hcl_array_items(inner_trimmed) {
let has_keyword = enabled_keywords.is_empty()
|| contains_any_keyword(&cmd, enabled_keywords);
if has_keyword {
out.push(ExtractedCommand {
file: file.to_string(),
line: array_start,
col: None,
extractor_id: EXTRACTOR_ID_REMOTE.to_string(),
command: cmd,
metadata: Some(serde_json::Value::String(
"provisioner: remote-exec".to_string(),
)),
});
}
}
} else if inner_trimmed.contains('[') {
idx += 1;
while idx < lines.len() {
let arr_line = lines[idx].trim();
if arr_line.starts_with(']') {
break;
}
if let Some(cmd) = extract_quoted_string(arr_line) {
let has_keyword = enabled_keywords.is_empty()
|| contains_any_keyword(&cmd, enabled_keywords);
if has_keyword {
out.push(ExtractedCommand {
file: file.to_string(),
line: idx + 1,
col: None,
extractor_id: EXTRACTOR_ID_REMOTE.to_string(),
command: cmd,
metadata: Some(serde_json::Value::String(
"provisioner: remote-exec".to_string(),
)),
});
}
}
idx += 1;
}
}
}
idx += 1;
}
}
}
}
idx += 1;
}
out
}
#[derive(Debug, Clone, Copy)]
enum ProvisionerType {
LocalExec,
RemoteExec,
}
fn detect_provisioner_block(line: &str) -> Option<ProvisionerType> {
if !line.starts_with("provisioner") {
return None;
}
if line.contains("\"local-exec\"") || line.contains("'local-exec'") {
Some(ProvisionerType::LocalExec)
} else if line.contains("\"remote-exec\"") || line.contains("'remote-exec'") {
Some(ProvisionerType::RemoteExec)
} else {
None
}
}
fn extract_hcl_string_value(line: &str, key: &str) -> Option<String> {
if !line.starts_with(key) {
return None;
}
let after_key = line[key.len()..].trim_start();
if !after_key.starts_with('=') {
return None;
}
let after_eq = after_key[1..].trim_start();
extract_quoted_string(after_eq)
}
fn extract_quoted_string(s: &str) -> Option<String> {
let s = s.trim();
let s = s.trim_end_matches(',');
if s.len() >= 2
&& ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
{
return Some(s[1..s.len() - 1].to_string());
}
None
}
fn is_hcl_block_end(line: &str) -> bool {
let trimmed = line.trim_start();
let Some(rest) = trimmed.strip_prefix('}') else {
return false;
};
let rest = rest.trim_start();
rest.is_empty() || rest.starts_with('#') || rest.starts_with("//")
}
fn detect_heredoc_start(line: &str, key: &str) -> Option<(String, bool)> {
if !line.starts_with(key) {
return None;
}
let after_key = line[key.len()..].trim_start();
if !after_key.starts_with('=') {
return None;
}
let after_eq = after_key[1..].trim_start();
if let Some(marker) = after_eq.strip_prefix("<<-") {
let marker = marker.trim();
if !marker.is_empty() {
return Some((marker.to_string(), true)); }
} else if let Some(marker) = after_eq.strip_prefix("<<") {
let marker = marker.trim();
if !marker.is_empty() {
return Some((marker.to_string(), false)); }
}
None
}
fn extract_hcl_array_items(line: &str) -> Vec<String> {
let mut items = Vec::new();
let Some(start) = line.find('[') else {
return items;
};
let Some(end) = line.rfind(']') else {
return items;
};
if start >= end {
return items;
}
let array_content = &line[start + 1..end];
for part in split_hcl_array(array_content) {
if let Some(s) = extract_quoted_string(&part) {
items.push(s);
}
}
items
}
fn split_hcl_array(content: &str) -> Vec<String> {
let mut parts = Vec::new();
let mut current = String::new();
let mut in_quote = false;
let mut quote_char = '\0';
let mut escaped = false;
for c in content.chars() {
if escaped {
current.push(c);
escaped = false;
continue;
}
if c == '\\' && in_quote {
escaped = true;
current.push(c);
continue;
}
if in_quote {
current.push(c);
if c == quote_char {
in_quote = false;
}
} else if c == '"' || c == '\'' {
in_quote = true;
quote_char = c;
current.push(c);
} else if c == ',' {
parts.push(current.trim().to_string());
current.clear();
} else {
current.push(c);
}
}
if !current.trim().is_empty() {
parts.push(current.trim().to_string());
}
parts
}
fn is_docker_compose_path(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
return false;
};
let lower = file_name.to_ascii_lowercase();
matches!(
lower.as_str(),
"docker-compose.yml" | "docker-compose.yaml" | "compose.yml" | "compose.yaml"
)
}
#[must_use]
pub fn extract_docker_compose_from_str(
file: &str,
content: &str,
enabled_keywords: &[&'static str],
) -> Vec<ExtractedCommand> {
const EXTRACTOR_ID: &str = "docker_compose.command";
let lines: Vec<&str> = content.lines().collect();
let mut out = Vec::new();
let mut skip_indent: Option<usize> = None;
let mut idx = 0usize;
while idx < lines.len() {
let raw_line = lines[idx];
let trimmed_start = raw_line.trim_start();
if trimmed_start.is_empty() || trimmed_start.starts_with('#') {
idx += 1;
continue;
}
let indent = raw_line.len() - trimmed_start.len();
if let Some(skip) = skip_indent {
if indent <= skip {
skip_indent = None;
} else {
idx += 1;
continue;
}
}
if yaml_key_value(trimmed_start, "environment").is_some()
|| yaml_key_value(trimmed_start, "labels").is_some()
|| yaml_key_value(trimmed_start, "volumes").is_some()
|| yaml_key_value(trimmed_start, "ports").is_some()
|| yaml_key_value(trimmed_start, "networks").is_some()
|| yaml_key_value(trimmed_start, "depends_on").is_some()
{
skip_indent = Some(indent);
idx += 1;
continue;
}
if let Some(value) = yaml_key_value(trimmed_start, "command") {
out.extend(extract_docker_compose_command(
file,
&lines,
idx,
indent,
value,
enabled_keywords,
EXTRACTOR_ID,
));
idx += 1;
continue;
}
if let Some(value) = yaml_key_value(trimmed_start, "entrypoint") {
out.extend(extract_docker_compose_command(
file,
&lines,
idx,
indent,
value,
enabled_keywords,
EXTRACTOR_ID,
));
idx += 1;
continue;
}
if let Some(value) = yaml_key_value(trimmed_start, "test") {
let in_healthcheck = (idx.saturating_sub(5)..idx)
.any(|i| lines.get(i).is_some_and(|l| l.contains("healthcheck")));
if in_healthcheck {
out.extend(extract_docker_compose_command(
file,
&lines,
idx,
indent,
value,
enabled_keywords,
EXTRACTOR_ID,
));
}
idx += 1;
continue;
}
idx += 1;
}
out
}
fn extract_docker_compose_command(
file: &str,
lines: &[&str],
idx: usize,
indent: usize,
value: &str,
enabled_keywords: &[&'static str],
extractor_id: &'static str,
) -> Vec<ExtractedCommand> {
let line_no = idx + 1;
let mut out = Vec::new();
if let Some(items) = parse_inline_yaml_sequence(value) {
let cmd = items.join(" ");
if enabled_keywords.is_empty() || contains_any_keyword(&cmd, enabled_keywords) {
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: extractor_id.to_string(),
command: cmd,
metadata: None,
});
}
return out;
}
if value.starts_with('|') || value.starts_with('>') {
let (block, block_start_line, _next_idx) = parse_yaml_block(lines, idx + 1, indent);
out.extend(extract_shell_script_with_offset_and_id(
file,
block_start_line,
&block,
enabled_keywords,
extractor_id,
));
return out;
}
if value.is_empty() || value.starts_with('#') {
let mut seq_idx = idx + 1;
let mut cmd_parts = Vec::new();
while seq_idx < lines.len() {
let seq_line = lines[seq_idx];
let seq_trimmed = seq_line.trim_start();
let seq_indent = seq_line.len() - seq_trimmed.len();
if seq_indent <= indent && !seq_trimmed.is_empty() {
break;
}
if seq_trimmed.starts_with("- ") {
let item = seq_trimmed.strip_prefix("- ").unwrap_or("").trim();
let item = item.trim_matches('"').trim_matches('\'');
cmd_parts.push(item.to_string());
} else if !seq_trimmed.is_empty() && !seq_trimmed.starts_with('#') {
break;
}
seq_idx += 1;
}
if !cmd_parts.is_empty() {
let cmd = cmd_parts.join(" ");
if enabled_keywords.is_empty() || contains_any_keyword(&cmd, enabled_keywords) {
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: extractor_id.to_string(),
command: cmd,
metadata: None,
});
}
}
return out;
}
let cmd = if (value.starts_with('"') && value.ends_with('"'))
|| (value.starts_with('\'') && value.ends_with('\''))
{
value[1..value.len() - 1].to_string()
} else {
value.to_string()
};
if enabled_keywords.is_empty() || contains_any_keyword(&cmd, enabled_keywords) {
out.push(ExtractedCommand {
file: file.to_string(),
line: line_no,
col: None,
extractor_id: extractor_id.to_string(),
command: cmd,
metadata: None,
});
}
out
}
#[must_use]
pub fn build_report(
mut findings: Vec<ScanFinding>,
files_scanned: usize,
files_skipped: usize,
commands_extracted: usize,
max_findings_reached: bool,
elapsed_ms: Option<u64>,
) -> ScanReport {
sort_findings(&mut findings);
let mut decisions = ScanDecisionCounts::default();
let mut severities = ScanSeverityCounts::default();
for f in &findings {
match f.decision {
ScanDecision::Allow => decisions.allow += 1,
ScanDecision::Warn => decisions.warn += 1,
ScanDecision::Deny => decisions.deny += 1,
}
match f.severity {
ScanSeverity::Info => severities.info += 1,
ScanSeverity::Warning => severities.warning += 1,
ScanSeverity::Error => severities.error += 1,
}
}
ScanReport {
schema_version: SCAN_SCHEMA_VERSION,
summary: ScanSummary {
files_scanned,
files_skipped,
commands_extracted,
findings_total: findings.len(),
decisions,
severities,
max_findings_reached,
elapsed_ms,
},
findings,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn default_config() -> Config {
Config::default()
}
#[test]
fn hooks_toml_parses_valid_config() {
let input = r#"
[scan]
fail_on = "warning"
format = "json"
max_file_size = 1234
max_findings = 50
redact = "quoted"
truncate = 10
[scan.paths]
include = ["scripts/**", ".github/workflows/**"]
exclude = ["target/**"]
"#;
let (cfg, warnings) = parse_hooks_toml(input).expect("parse");
assert!(warnings.is_empty(), "should not warn on valid config");
assert_eq!(cfg.scan.fail_on, Some(ScanFailOn::Warning));
assert_eq!(cfg.scan.format, Some(ScanFormat::Json));
assert_eq!(cfg.scan.max_file_size, Some(1234));
assert_eq!(cfg.scan.max_findings, Some(50));
assert_eq!(cfg.scan.redact, Some(ScanRedactMode::Quoted));
assert_eq!(cfg.scan.truncate, Some(10));
assert_eq!(
cfg.scan.paths.include,
vec!["scripts/**", ".github/workflows/**"]
);
assert_eq!(cfg.scan.paths.exclude, vec!["target/**"]);
}
#[test]
fn hooks_toml_warns_on_unknown_keys() {
let input = r#"
top_level = "x"
[scan]
format = "json"
unknown = 123
[scan.paths]
include = ["src/**"]
extra = ["x"]
"#;
let (_cfg, warnings) = parse_hooks_toml(input).expect("parse");
assert!(
warnings.iter().any(|w| w.contains("top_level")),
"should warn on unknown top-level keys"
);
assert!(
warnings.iter().any(|w| w.contains("scan.unknown")),
"should warn on unknown scan keys"
);
assert!(
warnings.iter().any(|w| w.contains("scan.paths.extra")),
"should warn on unknown scan.paths keys"
);
}
#[test]
fn hooks_toml_invalid_enum_value_errors() {
let input = r#"
[scan]
fail_on = "nope"
"#;
let err = parse_hooks_toml(input).expect_err("should fail");
assert!(
err.to_lowercase().contains("fail_on") || err.to_lowercase().contains("unknown"),
"error should mention the invalid value: {err}"
);
}
#[test]
fn filter_paths_matches_repo_relative_include_for_absolute_paths() {
use tempfile::TempDir;
let temp = TempDir::new().unwrap();
let repo_root = temp.path().join("repo");
std::fs::create_dir_all(repo_root.join("src")).unwrap();
let file_path = repo_root.join("src").join("main.rs");
let paths = vec![file_path.clone()];
let include = vec!["src/**".to_string()];
let exclude = Vec::new();
let filtered = filter_paths(&paths, &include, &exclude, Some(&repo_root));
assert_eq!(filtered, vec![file_path]);
}
#[test]
fn filter_paths_excludes_repo_relative_glob_for_absolute_paths() {
use tempfile::TempDir;
let temp = TempDir::new().unwrap();
let repo_root = temp.path().join("repo");
std::fs::create_dir_all(repo_root.join("target")).unwrap();
let file_path = repo_root.join("target").join("artifact.bin");
let paths = vec![file_path];
let include = Vec::new();
let exclude = vec!["target/**".to_string()];
let filtered = filter_paths(&paths, &include, &exclude, Some(&repo_root));
assert!(
filtered.is_empty(),
"target/** should exclude repo-relative paths"
);
}
#[test]
fn test_glob_match_exact() {
assert!(glob_match("src/main.rs", "src/main.rs"));
assert!(!glob_match("src/main.rs", "src/lib.rs"));
}
#[test]
fn test_glob_match_star() {
assert!(glob_match("*.rs", "main.rs"));
assert!(glob_match("src/*.rs", "src/main.rs"));
assert!(!glob_match("*.rs", "src/main.rs")); }
#[test]
fn test_glob_match_double_star() {
assert!(glob_match("**/*.rs", "main.rs"));
assert!(glob_match("**/*.rs", "src/main.rs"));
assert!(glob_match("**/*.rs", "src/deep/nested/main.rs"));
assert!(glob_match("src/**", "src/main.rs"));
assert!(glob_match("src/**", "src/deep/nested/file.rs"));
assert!(glob_match("src/**", "src"));
assert!(!glob_match("src/**", "src2/main.rs"));
assert!(!glob_match("target/**", "targeted/file.txt"));
assert!(!glob_match(
".github/workflows/**",
".github/workflows2/ci.yml"
));
assert!(glob_match("src/**", r"src\main.rs"));
assert!(glob_match("**/*.rs", r"src\main.rs"));
}
#[test]
fn test_glob_match_overlapping_prefix_suffix() {
assert!(!glob_match("test*st", "test")); assert!(glob_match("test*st", "testst")); assert!(glob_match("test*st", "test_xst")); assert!(glob_match("a*b", "ab")); assert!(glob_match("a*b", "axb")); assert!(!glob_match("a*b", "b")); assert!(!glob_match("ab*ab", "ab")); assert!(glob_match("ab*ab", "abab")); assert!(glob_match("ab*ab", "abXab")); }
#[test]
fn fail_on_policy_blocks_as_expected() {
let report = build_report(
vec![
ScanFinding {
file: "a".to_string(),
line: 1,
col: None,
extractor_id: "x".to_string(),
extracted_command: "rm -rf /".to_string(),
decision: ScanDecision::Deny,
severity: ScanSeverity::Error,
rule_id: Some("core.filesystem:rm-rf-general".to_string()),
reason: Some("blocked".to_string()),
suggestion: None,
},
ScanFinding {
file: "b".to_string(),
line: 1,
col: None,
extractor_id: "x".to_string(),
extracted_command: "echo hi".to_string(),
decision: ScanDecision::Warn,
severity: ScanSeverity::Warning,
rule_id: None,
reason: Some("warn".to_string()),
suggestion: None,
},
],
2,
0,
2,
false,
None,
);
assert!(should_fail(&report, ScanFailOn::Error));
assert!(should_fail(&report, ScanFailOn::Warning));
assert!(!should_fail(&report, ScanFailOn::None));
}
#[test]
fn finding_order_is_deterministic() {
let mut findings = vec![
ScanFinding {
file: "b".to_string(),
line: 2,
col: None,
extractor_id: "x".to_string(),
extracted_command: "cmd".to_string(),
decision: ScanDecision::Warn,
severity: ScanSeverity::Warning,
rule_id: Some("pack:rule".to_string()),
reason: None,
suggestion: None,
},
ScanFinding {
file: "a".to_string(),
line: 1,
col: None,
extractor_id: "x".to_string(),
extracted_command: "cmd".to_string(),
decision: ScanDecision::Deny,
severity: ScanSeverity::Error,
rule_id: Some("pack:rule".to_string()),
reason: None,
suggestion: None,
},
];
sort_findings(&mut findings);
assert_eq!(findings[0].file, "a");
assert_eq!(findings[0].line, 1);
}
#[test]
fn evaluator_integration_maps_pack_rule_to_rule_id() {
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let extracted = ExtractedCommand {
file: "test".to_string(),
line: 1,
col: None,
extractor_id: "shell.script".to_string(),
command: "git reset --hard".to_string(),
metadata: None,
};
let finding = evaluate_extracted_command(&extracted, &options, &config, &ctx)
.expect("git reset --hard should be blocked");
assert_eq!(finding.decision, ScanDecision::Deny);
assert_eq!(finding.severity, ScanSeverity::Error);
assert_eq!(finding.rule_id.as_deref(), Some("core.git:reset-hard"));
assert!(finding.reason.is_some());
}
#[test]
fn evaluator_integration_blocks_sh_c_with_embedded_dangerous_command() {
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let extracted = ExtractedCommand {
file: "docker-compose.yml".to_string(),
line: 4,
col: None,
extractor_id: "docker_compose.command".to_string(),
command: "sh -c \"git reset --hard && ./start.sh\"".to_string(),
metadata: None,
};
let finding = evaluate_extracted_command(&extracted, &options, &config, &ctx);
assert!(
finding.is_some(),
"sh -c with embedded 'git reset --hard' should be blocked via heredoc AST scanning"
);
let finding = finding.unwrap();
assert_eq!(finding.decision, ScanDecision::Deny);
assert!(
finding
.reason
.as_ref()
.is_some_and(|r| r.contains("git reset --hard")),
"Reason should mention the blocked command: {:?}",
finding.reason
);
}
#[test]
fn docker_compose_extractor_produces_correct_command_string() {
let content = r#"services:
app:
image: alpine
command: sh -c "git reset --hard && ./start.sh"
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["git"]);
assert_eq!(extracted.len(), 1, "Should extract exactly 1 command");
let cmd = &extracted[0].command;
eprintln!("Extracted command: {cmd:?}");
assert!(
cmd.contains("sh -c"),
"Extracted command should contain 'sh -c': {cmd:?}"
);
assert!(
cmd.contains("git reset --hard"),
"Extracted command should contain the dangerous command: {cmd:?}"
);
}
#[test]
fn docker_compose_full_scan_pipeline_detects_embedded_dangerous_command() {
let content = r#"services:
app:
image: alpine
command: sh -c "git reset --hard && ./start.sh"
"#;
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let extracted =
extract_docker_compose_from_str("docker-compose.yml", content, &ctx.enabled_keywords);
eprintln!("Enabled keywords: {:?}", ctx.enabled_keywords);
eprintln!("Extracted {} commands: {:?}", extracted.len(), extracted);
assert!(!extracted.is_empty(), "Should extract at least 1 command");
let mut found_finding = false;
for cmd in &extracted {
eprintln!("Evaluating command: {:?}", cmd.command);
if let Some(finding) = evaluate_extracted_command(cmd, &options, &config, &ctx) {
eprintln!("Found finding: {finding:?}");
found_finding = true;
assert_eq!(finding.decision, ScanDecision::Deny);
}
}
assert!(
found_finding,
"Should find at least one dangerous command in docker-compose file"
);
}
#[test]
fn shell_extractor_skips_assignment_with_trailing_comment() {
let content = r#"
DOC="rm -rf /" # this is data, not an executed command
export NOTE="git reset --hard" # also data
"#;
let extracted = extract_shell_script_from_str("test.sh", content, &["rm", "git"]);
assert!(
extracted.is_empty(),
"Expected no extracted commands, got: {extracted:?}"
);
}
#[test]
fn shell_extractor_includes_assignment_with_command_substitution() {
let content = r"export NOTE=$(rm -rf /)";
let extracted = extract_shell_script_from_str("test.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf /"));
}
#[test]
fn shell_extractor_includes_assignment_with_backticks() {
let content = r"NOTE=`git reset --hard`";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("git reset --hard"));
}
#[test]
fn shell_extractor_extracts_commands_after_export_assignment() {
let content = r"export FOO=bar && rm -rf ./tmp";
let extracted = extract_shell_script_from_str("test.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn dockerfile_extractor_ignores_shell_comments_in_run() {
let content = r"
FROM ubuntu:22.04
RUN echo hello # rm -rf /
";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert!(
extracted.is_empty(),
"Expected no extracted commands, got: {extracted:?}"
);
}
#[test]
fn dockerfile_extractor_strips_shell_comments_in_run_and_keeps_real_command() {
let content = r"
FROM ubuntu:22.04
RUN rm -rf ./tmp # cleanup temp dir
";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf ./tmp");
}
#[test]
fn dockerfile_extractor_does_not_extract_env_values() {
let content = r#"
FROM ubuntu:22.04
ENV X="rm -rf /" # should not be treated as executable context
ENV NOTE="git reset --hard"
"#;
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm", "git"]);
assert!(
extracted.is_empty(),
"Expected no extracted commands, got: {extracted:?}"
);
}
#[test]
fn package_json_extractor_allows_empty_keywords() {
let content = r#"
{
"scripts": {
"build": "rm -rf ./tmp"
}
}
"#;
let extracted = extract_package_json_from_str("package.json", content, &[]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf ./tmp");
}
#[test]
fn terraform_extractor_allows_empty_keywords() {
let content = r#"
resource "null_resource" "test" {
provisioner "local-exec" {
command = "rm -rf ./tmp"
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &[]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf ./tmp");
}
#[test]
fn github_actions_extractor_does_not_extract_env_or_with_fields() {
let content = r#"jobs:
test:
steps:
- name: "rm -rf /"
env:
X: "rm -rf /"
with:
args: "rm -rf /"
run: echo hello
"#;
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(
extracted.is_empty(),
"Expected no extracted commands, got: {extracted:?}"
);
}
#[test]
fn github_actions_extractor_ignores_run_key_inside_env_block() {
let content = r#"jobs:
test:
steps:
- name: "env run key"
env:
run: "rm -rf /"
run: echo ok
"#;
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(
extracted.is_empty(),
"Expected no extracted commands, got: {extracted:?}"
);
}
#[test]
fn json_schema_version_is_present() {
let report = build_report(vec![], 0, 0, 0, false, None);
assert_eq!(report.schema_version, SCAN_SCHEMA_VERSION);
assert_eq!(report.schema_version, 1);
}
#[test]
fn json_schema_has_all_required_fields() {
let report = build_report(vec![], 5, 2, 10, false, Some(42));
assert_eq!(report.summary.files_scanned, 5);
assert_eq!(report.summary.files_skipped, 2);
assert_eq!(report.summary.commands_extracted, 10);
assert_eq!(report.summary.findings_total, 0);
assert!(!report.summary.max_findings_reached);
assert_eq!(report.summary.elapsed_ms, Some(42));
assert_eq!(report.summary.decisions.allow, 0);
assert_eq!(report.summary.decisions.warn, 0);
assert_eq!(report.summary.decisions.deny, 0);
assert_eq!(report.summary.severities.info, 0);
assert_eq!(report.summary.severities.warning, 0);
assert_eq!(report.summary.severities.error, 0);
}
#[test]
fn report_serializes_to_valid_json() {
let report = build_report(
vec![ScanFinding {
file: "test.sh".to_string(),
line: 42,
col: Some(5),
extractor_id: "shell.script".to_string(),
extracted_command: "rm -rf /".to_string(),
decision: ScanDecision::Deny,
severity: ScanSeverity::Error,
rule_id: Some("core.filesystem:rm-rf-root-home".to_string()),
reason: Some("dangerous".to_string()),
suggestion: Some("use safer rm".to_string()),
}],
1,
0,
1,
false,
Some(100),
);
let json = serde_json::to_string(&report).expect("should serialize");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("should parse");
assert_eq!(parsed["schema_version"], 1);
assert_eq!(parsed["summary"]["files_scanned"], 1);
assert_eq!(parsed["findings"][0]["file"], "test.sh");
assert_eq!(parsed["findings"][0]["line"], 42);
assert_eq!(parsed["findings"][0]["col"], 5);
assert_eq!(parsed["findings"][0]["decision"], "deny");
assert_eq!(parsed["findings"][0]["severity"], "error");
}
#[test]
fn summary_counts_decisions_correctly() {
let findings = vec![
make_finding("a", ScanDecision::Allow, ScanSeverity::Info),
make_finding("b", ScanDecision::Allow, ScanSeverity::Info),
make_finding("c", ScanDecision::Warn, ScanSeverity::Warning),
make_finding("d", ScanDecision::Deny, ScanSeverity::Error),
make_finding("e", ScanDecision::Deny, ScanSeverity::Error),
make_finding("f", ScanDecision::Deny, ScanSeverity::Error),
];
let report = build_report(findings, 6, 0, 6, false, None);
assert_eq!(report.summary.decisions.allow, 2);
assert_eq!(report.summary.decisions.warn, 1);
assert_eq!(report.summary.decisions.deny, 3);
}
#[test]
fn summary_counts_severities_correctly() {
let findings = vec![
make_finding("a", ScanDecision::Allow, ScanSeverity::Info),
make_finding("b", ScanDecision::Warn, ScanSeverity::Warning),
make_finding("c", ScanDecision::Warn, ScanSeverity::Warning),
make_finding("d", ScanDecision::Deny, ScanSeverity::Error),
];
let report = build_report(findings, 4, 0, 4, false, None);
assert_eq!(report.summary.severities.info, 1);
assert_eq!(report.summary.severities.warning, 2);
assert_eq!(report.summary.severities.error, 1);
}
fn make_finding(file: &str, decision: ScanDecision, severity: ScanSeverity) -> ScanFinding {
ScanFinding {
file: file.to_string(),
line: 1,
col: None,
extractor_id: "test".to_string(),
extracted_command: "cmd".to_string(),
decision,
severity,
rule_id: None,
reason: None,
suggestion: None,
}
}
#[test]
fn redact_quoted_strings_handles_single_quotes() {
let input = concat!("echo 'sec", "ret pass", "word here'");
let output = redact_quoted_strings(input);
assert_eq!(output, "echo '…'");
}
#[test]
fn redact_quoted_strings_handles_double_quotes() {
let input = concat!("echo \"sec", "ret pass", "word here\"");
let output = redact_quoted_strings(input);
assert_eq!(output, r#"echo "…""#);
}
#[test]
fn redact_quoted_strings_handles_escaped_quotes() {
let input = r#"echo "hello \"world\" test""#;
let output = redact_quoted_strings(input);
assert_eq!(output, r#"echo "…""#);
}
#[test]
fn redact_quoted_strings_handles_mixed_quotes() {
let input = r#"cmd 'arg1' "arg2" 'arg3'"#;
let output = redact_quoted_strings(input);
assert_eq!(output, r#"cmd '…' "…" '…'"#);
}
#[test]
fn redact_quoted_strings_preserves_unquoted() {
let input = "git reset --hard HEAD";
let output = redact_quoted_strings(input);
assert_eq!(output, input);
}
#[test]
fn redact_aggressively_redacts_sensitive_env_vars() {
let input = concat!("curl -H TO", "KEN=abc123sec", "ret");
let output = redact_aggressively(input);
assert!(output.contains(concat!("TO", "KEN=…")));
assert!(!output.contains(concat!("abc123sec", "ret")));
}
#[test]
fn redact_aggressively_redacts_long_hex_strings() {
let input = "curl -H 0123456789abcdef0123456789abcdef";
let output = redact_aggressively(input);
assert!(output.contains("…"));
assert!(!output.contains("0123456789abcdef0123456789abcdef"));
}
#[test]
fn redact_aggressively_preserves_normal_commands() {
let input = "git status --short";
let output = redact_aggressively(input);
assert_eq!(output, input);
}
#[test]
fn truncate_utf8_handles_short_strings() {
assert_eq!(truncate_utf8("hello", 10), "hello");
assert_eq!(truncate_utf8("hello", 5), "hello");
assert_eq!(truncate_utf8("hello", 6), "hello");
}
#[test]
fn truncate_utf8_truncates_long_strings() {
assert_eq!(truncate_utf8("hello world", 6), "hello…");
assert_eq!(truncate_utf8("abcdefghij", 5), "abcd…");
}
#[test]
fn truncate_utf8_handles_edge_cases() {
assert_eq!(truncate_utf8("hello", 1), "…");
assert_eq!(truncate_utf8("hello", 0), "hello"); }
#[test]
fn truncate_utf8_handles_unicode() {
let input = "🎉🎊🎈🎁";
assert_eq!(truncate_utf8(input, 3), "🎉🎊…");
assert_eq!(truncate_utf8(input, 5), input);
}
#[test]
fn fail_on_none_never_fails() {
assert!(!ScanFailOn::None.blocks(ScanSeverity::Info));
assert!(!ScanFailOn::None.blocks(ScanSeverity::Warning));
assert!(!ScanFailOn::None.blocks(ScanSeverity::Error));
}
#[test]
fn fail_on_warning_blocks_warning_and_error() {
assert!(!ScanFailOn::Warning.blocks(ScanSeverity::Info));
assert!(ScanFailOn::Warning.blocks(ScanSeverity::Warning));
assert!(ScanFailOn::Warning.blocks(ScanSeverity::Error));
}
#[test]
fn fail_on_error_blocks_only_error() {
assert!(!ScanFailOn::Error.blocks(ScanSeverity::Info));
assert!(!ScanFailOn::Error.blocks(ScanSeverity::Warning));
assert!(ScanFailOn::Error.blocks(ScanSeverity::Error));
}
#[test]
fn should_fail_with_warning_only_findings() {
let report = build_report(
vec![make_finding("a", ScanDecision::Warn, ScanSeverity::Warning)],
1,
0,
1,
false,
None,
);
assert!(!should_fail(&report, ScanFailOn::Error));
assert!(should_fail(&report, ScanFailOn::Warning));
assert!(!should_fail(&report, ScanFailOn::None));
}
#[test]
fn should_fail_with_empty_report() {
let report = build_report(vec![], 0, 0, 0, false, None);
assert!(!should_fail(&report, ScanFailOn::Error));
assert!(!should_fail(&report, ScanFailOn::Warning));
assert!(!should_fail(&report, ScanFailOn::None));
}
#[test]
fn severity_rank_ordering() {
assert!(ScanSeverity::Error.rank() > ScanSeverity::Warning.rank());
assert!(ScanSeverity::Warning.rank() > ScanSeverity::Info.rank());
}
#[test]
fn safe_commands_are_not_blocked() {
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let safe_commands = [
"git status",
"git log --oneline",
"ls -la",
"echo hello",
"cat file.txt",
"grep pattern file",
"rm file.txt", ];
for cmd in safe_commands {
let extracted = ExtractedCommand {
file: "test.sh".to_string(),
line: 1,
col: None,
extractor_id: "shell.script".to_string(),
command: cmd.to_string(),
metadata: None,
};
let finding = evaluate_extracted_command(&extracted, &options, &config, &ctx);
assert!(
finding.is_none(),
"Command '{cmd}' should not be blocked but got: {finding:?}"
);
}
}
#[test]
fn dangerous_commands_are_blocked() {
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let dangerous_commands = [
("git reset --hard", "core.git:reset-hard"),
("git push --force origin main", "core.git:push-force-long"),
("git clean -fd", "core.git:clean-force"),
("rm -rf ./some/path", "core.filesystem:rm-rf-general"),
];
for (cmd, expected_rule) in dangerous_commands {
let extracted = ExtractedCommand {
file: "test.sh".to_string(),
line: 1,
col: None,
extractor_id: "shell.script".to_string(),
command: cmd.to_string(),
metadata: None,
};
let finding = evaluate_extracted_command(&extracted, &options, &config, &ctx)
.expect("Command should be blocked");
assert_eq!(
finding.decision,
ScanDecision::Deny,
"Command '{cmd}' should be denied"
);
assert_eq!(
finding.rule_id.as_deref(),
Some(expected_rule),
"Command '{cmd}' should match rule {expected_rule}"
);
}
}
#[test]
fn blocked_commands_include_suggestions_when_available() {
let config = default_config();
let ctx = ScanEvalContext::from_config(&config);
let options = ScanOptions {
format: ScanFormat::Pretty,
fail_on: ScanFailOn::Error,
max_file_size_bytes: 1024 * 1024,
max_findings: 100,
redact: ScanRedactMode::None,
truncate: 0,
};
let extracted = ExtractedCommand {
file: "test.sh".to_string(),
line: 1,
col: None,
extractor_id: "shell.script".to_string(),
command: "git reset --hard HEAD".to_string(),
metadata: None,
};
let finding = evaluate_extracted_command(&extracted, &options, &config, &ctx)
.expect("should be blocked");
assert!(
finding.suggestion.is_some(),
"Finding should include suggestion"
);
assert!(
finding.suggestion.as_ref().unwrap().contains("soft")
|| finding.suggestion.as_ref().unwrap().contains("mixed"),
"Suggestion should mention safer alternatives"
);
}
#[test]
fn shell_extractor_skips_comments() {
let content = "# comment with git keyword\ngit status";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "git status");
}
#[test]
fn shell_extractor_skips_control_structures() {
let content = "if [ -n \"$X\" ]; then\n git status\nfi";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "git status");
assert_eq!(extracted[0].line, 2);
}
#[test]
fn shell_extractor_skips_else_keyword() {
let content = "if [ -n \"$X\" ]; then\n git status\nelse\n git diff\nfi";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0].command, "git status");
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[1].command, "git diff");
assert_eq!(extracted[1].line, 4);
}
#[test]
fn shell_extractor_joins_line_continuations() {
let content = "git log \\\n --oneline";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 1);
assert_eq!(extracted[0].command, "git log --oneline");
}
#[test]
fn shell_extractor_ignores_comment_backslash_for_continuation() {
let content = "echo ok # comment \\\nrm -rf /";
let extracted = extract_shell_script_from_str("test.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].command, "rm -rf /");
}
#[test]
fn shell_extractor_keyword_prefilter() {
let content = "echo hello\ngit status";
let extracted = extract_shell_script_from_str("test.sh", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "git status");
}
#[test]
fn dockerfile_extractor_extracts_run_shell_form() {
let content = "FROM alpine\nRUN apt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].extractor_id, "dockerfile.run");
assert_eq!(extracted[0].command, "apt-get update");
}
#[test]
fn dockerfile_extractor_extracts_json_exec_form() {
let content = "FROM alpine\nRUN [\"apt-get\", \"update\"]\nRUN apt-get install";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0].command, "apt-get update");
assert_eq!(extracted[1].command, "apt-get install");
}
#[test]
fn dockerfile_extractor_handles_exec_form_continuation() {
let content = "FROM alpine\nRUN [\"sh\", \"-c\", \\\n \"rm -rf /tmp\"]";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].extractor_id, "dockerfile.run.exec");
assert_eq!(extracted[0].command, "sh -c rm -rf /tmp");
}
#[test]
fn dockerfile_extractor_handles_continuations() {
let content = "FROM alpine\nRUN apt-get update \\\n && apt-get install curl";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("apt-get update"));
assert!(extracted[0].command.contains("apt-get install"));
}
#[test]
fn dockerfile_extractor_ignores_non_run() {
let content = "# apt comment\nFROM alpine\nLABEL apt=test\nRUN apt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "apt-get update");
}
#[test]
fn dockerfile_path_detection() {
use std::path::Path;
assert!(is_dockerfile_path(Path::new("Dockerfile")));
assert!(is_dockerfile_path(Path::new("dockerfile")));
assert!(is_dockerfile_path(Path::new("Dockerfile.dev")));
assert!(is_dockerfile_path(Path::new("app.dockerfile")));
assert!(!is_dockerfile_path(Path::new("Dockerfile-backup")));
assert!(!is_dockerfile_path(Path::new("build.sh")));
}
#[test]
fn shell_path_detection() {
use std::path::Path;
assert!(is_shell_script_path(Path::new("build.sh")));
assert!(is_shell_script_path(Path::new("deploy.SH")));
assert!(is_shell_script_path(Path::new("script.bash")));
assert!(!is_shell_script_path(Path::new("Dockerfile")));
}
#[test]
fn dockerfile_extractor_handles_tab_after_run() {
let content = "FROM alpine\nRUN\tapt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "apt-get update");
}
#[test]
fn github_actions_path_detection() {
use std::path::Path;
assert!(is_github_actions_workflow_path(Path::new(
".github/workflows/ci.yml"
)));
assert!(is_github_actions_workflow_path(Path::new(
".github/workflows/ci.yaml"
)));
assert!(is_github_actions_workflow_path(Path::new(
".github/workflows/sub/ci.yml"
)));
assert!(is_github_actions_workflow_path(Path::new(
".GITHUB/WORKFLOWS/CI.YML"
)));
assert!(!is_github_actions_workflow_path(Path::new(
".github/workflows/ci.json"
)));
assert!(!is_github_actions_workflow_path(Path::new(
"workflows/ci.yml"
)));
assert!(!is_github_actions_workflow_path(Path::new(
".github/workflow/ci.yml"
)));
}
#[test]
fn github_actions_extractor_extracts_run_steps_only() {
let content = r#"name: CI
on: [push]
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: "rm -rf /"
- run: git status
- run: rm -rf ./build
"#;
let extracted = extract_github_actions_workflow_from_str(
".github/workflows/ci.yml",
content,
&["git", "rm"],
);
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0].line, 8);
assert_eq!(extracted[0].extractor_id, "github_actions.steps.run");
assert_eq!(extracted[0].command, "git status");
assert_eq!(extracted[1].line, 9);
assert_eq!(extracted[1].extractor_id, "github_actions.steps.run");
assert_eq!(extracted[1].command, "rm -rf ./build");
}
#[test]
fn github_actions_extractor_handles_block_scalar_and_skips_comments() {
let content = r"jobs:
test:
steps:
- run: |
echo hello
# rm -rf /
rm -rf ./build
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 7);
assert_eq!(extracted[0].extractor_id, "github_actions.steps.run");
assert_eq!(extracted[0].command, "rm -rf ./build");
}
#[test]
fn github_actions_extractor_ignores_run_outside_steps() {
let content = r"run: rm -rf /
jobs:
test:
steps:
- run: echo hello
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn gitlab_ci_path_detection() {
use std::path::Path;
assert!(is_gitlab_ci_path(Path::new(".gitlab-ci.yml")));
assert!(is_gitlab_ci_path(Path::new("foo.gitlab-ci.yml")));
assert!(is_gitlab_ci_path(Path::new("FOO.GITLAB-CI.YML")));
assert!(!is_gitlab_ci_path(Path::new("gitlab-ci.yml")));
assert!(!is_gitlab_ci_path(Path::new(".gitlab-ci.yaml")));
}
#[test]
fn gitlab_ci_extractor_extracts_script_sections_only() {
let content = r#"before_script:
- echo "before"
build:
script:
- echo "build"
after_script:
- rm -rf ./build
variables:
DANGEROUS: "rm -rf /"
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(extracted.len(), 3);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].extractor_id, "gitlab_ci.script");
assert_eq!(extracted[0].command, "echo \"before\"");
assert_eq!(extracted[1].line, 5);
assert_eq!(extracted[1].command, "echo \"build\"");
assert_eq!(extracted[2].line, 7);
assert_eq!(extracted[2].command, "rm -rf ./build");
}
#[test]
fn gitlab_ci_extractor_handles_anchor_alias() {
let content = r#".common_script: &common_script
- echo "one"
- rm -rf ./build
deploy:
script: *common_script
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].command, "echo \"one\"");
assert_eq!(extracted[1].line, 3);
assert_eq!(extracted[1].command, "rm -rf ./build");
}
#[test]
fn gitlab_ci_extractor_anchor_inline_sequence() {
let content = r#"build:
script: &shared [echo "one", "rm -rf ./build"]
deploy:
script: *shared
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
4,
"Expected anchor and alias to yield 4 commands: {extracted:?}"
);
assert!(
extracted
.iter()
.any(|cmd| cmd.command.contains("echo \"one\""))
);
assert!(
extracted
.iter()
.any(|cmd| cmd.command.contains("rm -rf ./build"))
);
}
#[test]
fn gitlab_ci_extractor_block_scalar() {
let content = r#"build:
script: |
echo "building"
rm -rf ./dist
echo "done"
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["echo", "rm"]);
assert_eq!(
extracted.len(),
3,
"Expected 3 commands from block scalar: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
assert!(extracted[1].command.contains("rm -rf"));
assert!(extracted[2].command.contains("echo"));
}
#[test]
fn gitlab_ci_extractor_before_after_script() {
let content = r#"default:
before_script:
- echo "global before"
job1:
before_script:
- echo "job before"
script:
- npm test
after_script:
- rm -rf ./temp
"#;
let extracted =
extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["echo", "npm", "rm"]);
assert_eq!(
extracted.len(),
4,
"Expected 4 commands from before/after scripts: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
assert!(extracted[1].command.contains("echo"));
assert!(extracted[2].command.contains("npm"));
assert!(extracted[3].command.contains("rm"));
}
#[test]
fn gitlab_ci_extractor_ignores_rules_only_except() {
let content = r"rules:
- rm -rf / should not match
only:
- rm -rf / should not match
except:
- rm -rf / should not match
build:
script:
- echo safe
";
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Only script content should be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo safe");
}
#[test]
fn gitlab_ci_extractor_ignores_variables_section() {
let content = r#"variables:
DANGEROUS_CMD: rm -rf /
SAFE_VAR: hello
build:
script:
- echo "$DANGEROUS_CMD"
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Variables section should not be extracted: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
}
#[test]
fn gitlab_ci_extractor_line_numbers_accurate() {
let content = r"# comment
# another comment
build:
script:
- echo line5
- echo line6
";
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["echo"]);
assert_eq!(extracted.len(), 2, "Expected 2 commands: {extracted:?}");
assert_eq!(extracted[0].line, 5, "First echo should be on line 5");
assert_eq!(extracted[1].line, 6, "Second echo should be on line 6");
}
#[test]
fn gitlab_ci_extractor_quoted_strings() {
let content = r#"build:
script:
- "rm -rf ./build"
- 'echo "hello world"'
"#;
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
2,
"Quoted strings should be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "rm -rf ./build");
assert!(extracted[1].command.contains("echo"));
}
#[test]
fn gitlab_ci_extractor_multiple_jobs() {
let content = r"build:
script:
- npm run build
test:
script:
- npm test
deploy:
script:
- rm -rf old && deploy.sh
";
let extracted =
extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["npm", "rm", "deploy"]);
assert_eq!(
extracted.len(),
3,
"Expected 3 commands from multiple jobs: {extracted:?}"
);
assert!(extracted[0].command.contains("npm run build"));
assert!(extracted[1].command.contains("npm test"));
assert!(extracted[2].command.contains("rm -rf"));
}
#[test]
fn gitlab_ci_empty_script_ignored() {
let content = r"build:
script:
test:
script:
- echo real
";
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["echo"]);
assert_eq!(
extracted.len(),
1,
"Empty script should be ignored: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo real");
}
#[test]
fn gitlab_ci_extractor_inline_script() {
let content = r"build:
script: rm -rf ./build && npm run build
";
let extracted = extract_gitlab_ci_from_str(".gitlab-ci.yml", content, &["rm", "npm"]);
assert!(
!extracted.is_empty(),
"Inline script should be extracted: {extracted:?}"
);
assert!(extracted[0].command.contains("rm"));
}
#[test]
fn azure_pipelines_path_detection() {
use std::path::Path;
assert!(is_azure_pipelines_path(Path::new("azure-pipelines.yml")));
assert!(is_azure_pipelines_path(Path::new("azure-pipelines.yaml")));
assert!(is_azure_pipelines_path(Path::new("Azure-Pipelines.yml")));
assert!(is_azure_pipelines_path(Path::new("AZURE-PIPELINES.YML")));
assert!(is_azure_pipelines_path(Path::new(
"azure-pipelines-prod.yml"
)));
assert!(is_azure_pipelines_path(Path::new(
"azure-pipelines-dev.yaml"
)));
assert!(!is_azure_pipelines_path(Path::new("pipelines.yml")));
assert!(!is_azure_pipelines_path(Path::new("azure.yml")));
assert!(!is_azure_pipelines_path(Path::new("ci.yml")));
}
#[test]
fn azure_pipelines_extractor_extracts_script_tasks() {
let content = r"trigger:
- main
pool:
vmImage: ubuntu-latest
steps:
- script: echo Hello
- script: rm -rf ./build
";
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["echo", "rm"]);
assert_eq!(
extracted.len(),
2,
"Expected 2 script commands: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo Hello");
assert_eq!(extracted[0].extractor_id, "azure_pipelines.script");
assert_eq!(extracted[1].command, "rm -rf ./build");
assert_eq!(extracted[1].extractor_id, "azure_pipelines.script");
}
#[test]
fn azure_pipelines_extractor_extracts_bash_tasks() {
let content = r#"steps:
- bash: echo "bash command"
- bash: rm -rf /tmp/cache
"#;
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["echo", "rm"]);
assert_eq!(
extracted.len(),
2,
"Expected 2 bash commands: {extracted:?}"
);
assert_eq!(extracted[0].extractor_id, "azure_pipelines.bash");
assert_eq!(extracted[1].extractor_id, "azure_pipelines.bash");
}
#[test]
fn azure_pipelines_extractor_extracts_powershell_tasks() {
let content = r#"steps:
- powershell: Write-Host "Hello"
- pwsh: Remove-Item -Recurse -Force ./build
"#;
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["Write", "Remove"]);
assert_eq!(
extracted.len(),
2,
"Expected 2 powershell commands: {extracted:?}"
);
assert_eq!(extracted[0].extractor_id, "azure_pipelines.powershell");
assert_eq!(extracted[1].extractor_id, "azure_pipelines.pwsh");
}
#[test]
fn azure_pipelines_extractor_handles_block_scalars() {
let content = r#"steps:
- script: |
echo "line1"
rm -rf ./build
echo "done"
"#;
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["echo", "rm"]);
assert_eq!(
extracted.len(),
3,
"Expected 3 individual commands from block: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
assert!(extracted[1].command.contains("rm -rf"));
assert!(extracted[2].command.contains("echo"));
}
#[test]
fn azure_pipelines_extractor_ignores_variables() {
let content = r"variables:
DANGEROUS: rm -rf /
BUILD_DIR: ./build
steps:
- script: echo safe
";
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Variables should not be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo safe");
}
#[test]
fn azure_pipelines_extractor_ignores_parameters() {
let content = r"parameters:
- name: buildCommand
default: rm -rf /
steps:
- script: echo hello
";
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Parameters should not be extracted: {extracted:?}"
);
}
#[test]
fn azure_pipelines_extractor_ignores_displayname_and_env() {
let content = r"steps:
- script: rm -rf ./build
displayName: Delete build with rm -rf
env:
CLEANUP_CMD: rm -rf /
";
let extracted = extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["rm"]);
assert_eq!(
extracted.len(),
1,
"Only script should be extracted, not displayName/env: {extracted:?}"
);
assert_eq!(extracted[0].command, "rm -rf ./build");
}
#[test]
fn azure_pipelines_extractor_line_numbers_accurate() {
let content = r"trigger:
- main
steps:
- script: echo line5
- script: echo line6
";
let extracted = extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["echo"]);
assert_eq!(extracted.len(), 2, "Expected 2 commands: {extracted:?}");
assert_eq!(extracted[0].line, 5, "First script should be on line 5");
assert_eq!(extracted[1].line, 6, "Second script should be on line 6");
}
#[test]
fn azure_pipelines_extractor_quoted_strings() {
let content = r#"steps:
- script: "rm -rf ./build"
- bash: 'echo "hello"'
"#;
let extracted =
extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
2,
"Quoted strings should be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "rm -rf ./build");
assert!(extracted[1].command.contains("echo"));
}
#[test]
fn azure_pipelines_empty_script_ignored() {
let content = r"steps:
- script:
- script: echo real
";
let extracted = extract_azure_pipelines_from_str("azure-pipelines.yml", content, &["echo"]);
assert_eq!(
extracted.len(),
1,
"Empty script should be ignored: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo real");
}
#[test]
fn circleci_path_detection() {
use std::path::Path;
assert!(is_circleci_path(Path::new(".circleci/config.yml")));
assert!(is_circleci_path(Path::new(".circleci/config.yaml")));
assert!(is_circleci_path(Path::new(
"/home/user/project/.circleci/config.yml"
)));
assert!(is_circleci_path(Path::new("foo/bar/.circleci/config.yml")));
assert!(!is_circleci_path(Path::new("config.yml")));
assert!(!is_circleci_path(Path::new(".circleci/other.yml")));
assert!(!is_circleci_path(Path::new("circleci/config.yml")));
assert!(!is_circleci_path(Path::new(".circle/config.yml")));
}
#[test]
fn circleci_extractor_extracts_inline_run() {
let content = r"version: 2.1
jobs:
build:
docker:
- image: circleci/node:14
steps:
- checkout
- run: npm install
- run: rm -rf ./build
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["npm", "rm"]);
assert_eq!(extracted.len(), 2, "Expected 2 run commands: {extracted:?}");
assert_eq!(extracted[0].command, "npm install");
assert_eq!(extracted[0].extractor_id, "circleci.run");
assert_eq!(extracted[1].command, "rm -rf ./build");
}
#[test]
fn circleci_extractor_extracts_block_scalar_run() {
let content = r#"jobs:
build:
steps:
- run: |
echo "line1"
rm -rf ./build
echo "done"
"#;
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["echo", "rm"]);
assert_eq!(
extracted.len(),
3,
"Expected 3 individual commands from block: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
assert!(extracted[1].command.contains("rm -rf"));
assert!(extracted[2].command.contains("echo"));
}
#[test]
fn circleci_extractor_extracts_nested_command() {
let content = r"jobs:
build:
steps:
- run:
name: Run tests
command: npm test
- run:
name: Clean up
command: rm -rf ./dist
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["npm", "rm"]);
assert_eq!(
extracted.len(),
2,
"Expected 2 nested command fields: {extracted:?}"
);
assert_eq!(extracted[0].command, "npm test");
assert_eq!(extracted[1].command, "rm -rf ./dist");
}
#[test]
fn circleci_extractor_extracts_block_scalar_command() {
let content = r#"jobs:
build:
steps:
- run:
name: Build
command: |
echo "building"
npm run build
"#;
let extracted =
extract_circleci_from_str(".circleci/config.yml", content, &["echo", "npm"]);
assert_eq!(
extracted.len(),
2,
"Expected 2 commands from block scalar command: {extracted:?}"
);
assert!(extracted[0].command.contains("echo"));
assert!(extracted[1].command.contains("npm"));
}
#[test]
fn circleci_extractor_ignores_environment_and_docker() {
let content = r"jobs:
build:
docker:
- image: circleci/node:14
environment:
DANGEROUS: rm -rf /
environment:
BUILD_CMD: rm -rf /
steps:
- run: echo safe
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Environment sections should not be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo safe");
}
#[test]
fn circleci_extractor_ignores_orbs_and_parameters() {
let content = r"version: 2.1
orbs:
node: circleci/node@5.0
parameters:
run_cmd:
default: rm -rf /
jobs:
build:
steps:
- run: echo hello
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"Orbs and parameters should not be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo hello");
}
#[test]
fn circleci_extractor_line_numbers_accurate() {
let content = r"version: 2.1
jobs:
build:
steps:
- run: echo line6
- run: echo line7
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["echo"]);
assert_eq!(extracted.len(), 2, "Expected 2 commands: {extracted:?}");
assert_eq!(extracted[0].line, 6, "First run should be on line 6");
assert_eq!(extracted[1].line, 7, "Second run should be on line 7");
}
#[test]
fn circleci_extractor_quoted_strings() {
let content = r#"jobs:
build:
steps:
- run: "rm -rf ./build"
- run: 'echo "hello"'
"#;
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
2,
"Quoted strings should be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "rm -rf ./build");
assert!(extracted[1].command.contains("echo"));
}
#[test]
fn circleci_extractor_ignores_name_and_working_directory() {
let content = r"jobs:
build:
steps:
- run:
name: rm -rf dangerous name
working_directory: /tmp/rm
command: echo safe
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["rm", "echo"]);
assert_eq!(
extracted.len(),
1,
"name and working_directory should not be extracted: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo safe");
}
#[test]
fn circleci_empty_run_ignored() {
let content = r"jobs:
build:
steps:
- run:
- run: echo real
";
let extracted = extract_circleci_from_str(".circleci/config.yml", content, &["echo"]);
assert_eq!(
extracted.len(),
1,
"Empty run should be ignored: {extracted:?}"
);
assert_eq!(extracted[0].command, "echo real");
}
#[test]
fn makefile_path_detection() {
use std::path::Path;
assert!(is_makefile_path(Path::new("Makefile")));
assert!(is_makefile_path(Path::new("makefile")));
assert!(!is_makefile_path(Path::new("Makefile.backup")));
assert!(!is_makefile_path(Path::new("build.mk")));
assert!(!is_makefile_path(Path::new("build.sh")));
}
#[test]
fn makefile_extractor_extracts_recipe_lines_only() {
let content = "VAR = rm -rf /\n\
\n\
all:\n\
\tgit status\n\
\t# rm -rf /\n\
\trm -rf ./build\n";
let extracted = extract_makefile_from_str("Makefile", content, &["git", "rm"]);
assert_eq!(extracted.len(), 2);
assert_eq!(extracted[0].line, 4);
assert_eq!(extracted[0].extractor_id, "makefile.recipe");
assert_eq!(extracted[0].command, "git status");
assert_eq!(extracted[1].line, 6);
assert_eq!(extracted[1].extractor_id, "makefile.recipe");
assert_eq!(extracted[1].command, "rm -rf ./build");
}
#[test]
fn makefile_extractor_handles_backslash_continuations() {
let content = "all:\n\
\tgit log \\\n\
--oneline\n";
let extracted = extract_makefile_from_str("Makefile", content, &["git"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 2);
assert_eq!(extracted[0].extractor_id, "makefile.recipe");
assert_eq!(extracted[0].command, "git log --oneline");
}
#[test]
fn is_package_json_path_detects_correctly() {
assert!(is_package_json_path(Path::new("package.json")));
assert!(is_package_json_path(Path::new("/foo/bar/package.json")));
assert!(is_package_json_path(Path::new("./package.json")));
assert!(!is_package_json_path(Path::new("package.json.bak")));
assert!(!is_package_json_path(Path::new("package-lock.json")));
assert!(!is_package_json_path(Path::new("my-package.json")));
assert!(!is_package_json_path(Path::new("Package.json"))); }
#[test]
fn package_json_extracts_scripts() {
let content = r#"{
"name": "test-package",
"scripts": {
"clean": "rm -rf dist",
"build": "npm run compile"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf dist");
assert_eq!(extracted[0].extractor_id, "package_json.script");
assert!(extracted[0].metadata.is_some());
}
#[test]
fn package_json_matches_multiword_keywords_with_extra_whitespace() {
let content = r#"{
"scripts": {
"sync": "gcloud storage rm gs://bucket"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["gcloud storage"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "gcloud storage rm gs://bucket");
}
#[test]
fn package_json_extracts_multiple_matching_scripts() {
let content = r#"{
"scripts": {
"clean": "rm -rf dist",
"nuke": "rm -rf node_modules",
"build": "tsc"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert_eq!(extracted.len(), 2);
assert!(extracted.iter().any(|e| e.command == "rm -rf dist"));
assert!(extracted.iter().any(|e| e.command == "rm -rf node_modules"));
}
#[test]
fn package_json_ignores_non_script_fields() {
let content = r#"{
"name": "test-package",
"description": "Uses rm -rf for cleanup",
"keywords": ["rm", "cleanup"],
"scripts": {
"build": "npm run compile"
},
"config": {
"danger": "rm -rf /"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn package_json_handles_empty_scripts() {
let content = r#"{
"name": "test-package",
"scripts": {}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn package_json_handles_missing_scripts() {
let content = r#"{
"name": "test-package"
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn package_json_handles_invalid_json() {
let content = "{ this is not valid json }";
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn package_json_extracts_lifecycle_scripts() {
let content = r#"{
"scripts": {
"preinstall": "rm -rf old-cache",
"postinstall": "echo done",
"build": "tsc"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf old-cache");
}
#[test]
fn package_json_line_numbers_are_accurate() {
let content = r#"{
"name": "test",
"scripts": {
"clean": "rm -rf dist"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 4);
}
#[test]
fn package_json_line_numbers_disambiguation() {
let content = r#"{
"dependencies": {
"clean": "1.0.0"
},
"scripts": {
"clean": "rm -rf dist"
}
}"#;
let extracted = extract_package_json_from_str("package.json", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].line, 6);
}
#[test]
fn terraform_path_detection() {
use std::path::Path;
assert!(is_terraform_path(Path::new("main.tf")));
assert!(is_terraform_path(Path::new("outputs.tf")));
assert!(is_terraform_path(Path::new("path/to/resource.TF")));
assert!(!is_terraform_path(Path::new("main.tf.bak")));
assert!(!is_terraform_path(Path::new("terraform.tfstate")));
assert!(!is_terraform_path(Path::new("README.md")));
}
#[test]
fn terraform_local_exec_simple_command() {
let content = r#"
resource "null_resource" "cleanup" {
provisioner "local-exec" {
command = "rm -rf /tmp/*"
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf /tmp/*");
assert_eq!(
extracted[0].extractor_id,
"terraform.provisioner.local_exec"
);
}
#[test]
fn terraform_remote_exec_inline_array() {
let content = r#"
resource "aws_instance" "web" {
provisioner "remote-exec" {
inline = [
"echo hello",
"rm -rf /tmp/*",
"echo done"
]
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &["rm", "echo"]);
assert_eq!(extracted.len(), 3);
assert!(extracted.iter().any(|c| c.command == "rm -rf /tmp/*"));
assert!(extracted.iter().any(|c| c.command == "echo hello"));
assert!(extracted.iter().any(|c| c.command == "echo done"));
assert!(
extracted
.iter()
.all(|c| c.extractor_id == "terraform.provisioner.remote_exec")
);
}
#[test]
fn terraform_ignores_non_provisioner_blocks() {
let content = r#"
variable "dangerous" {
default = "rm -rf /"
}
output "msg" {
value = "rm -rf everything"
}
"#;
let extracted = extract_terraform_from_str("variables.tf", content, &["rm"]);
assert!(
extracted.is_empty(),
"Should not extract from variable/output blocks"
);
}
#[test]
fn terraform_inline_single_line_array() {
let content = r#"
resource "null_resource" "test" {
provisioner "remote-exec" {
inline = ["rm -rf /tmp", "echo done"]
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &["rm", "echo"]);
assert_eq!(extracted.len(), 2);
}
#[test]
fn terraform_ignores_comments() {
let content = r#"
# This is a comment with rm -rf /
// This is also a comment with rm -rf
resource "null_resource" "test" {
# provisioner "local-exec" { command = "rm -rf" }
provisioner "local-exec" {
command = "rm -rf /actual"
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf /actual");
}
#[test]
fn terraform_block_end_with_comment_stops_extraction() {
let content = r#"
resource "null_resource" "test" {
provisioner "local-exec" {
command = "rm -rf /actual"
} // end local-exec
triggers = {
command = "rm -rf /should-not-extract"
}
}
"#;
let extracted = extract_terraform_from_str("main.tf", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf /actual");
}
#[test]
fn is_docker_compose_path_detects_correctly() {
assert!(is_docker_compose_path(Path::new("docker-compose.yml")));
assert!(is_docker_compose_path(Path::new("docker-compose.yaml")));
assert!(is_docker_compose_path(Path::new("compose.yml")));
assert!(is_docker_compose_path(Path::new("compose.yaml")));
assert!(is_docker_compose_path(Path::new(
"/foo/bar/docker-compose.yml"
)));
assert!(is_docker_compose_path(Path::new("Docker-Compose.YML")));
assert!(is_docker_compose_path(Path::new("COMPOSE.YAML")));
assert!(!is_docker_compose_path(Path::new("docker-compose.json")));
assert!(!is_docker_compose_path(Path::new("my-docker-compose.yml")));
assert!(!is_docker_compose_path(Path::new("compose.yml.bak")));
}
#[test]
fn docker_compose_extracts_inline_command() {
let content = r"
services:
app:
image: alpine
command: rm -rf /data
";
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf /data");
assert_eq!(extracted[0].extractor_id, "docker_compose.command");
}
#[test]
fn docker_compose_extracts_entrypoint() {
let content = r#"
services:
app:
entrypoint: /bin/sh -c "rm -rf /tmp/*"
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn docker_compose_extracts_array_command() {
let content = r#"
services:
app:
command: ["sh", "-c", "rm -rf /cache"]
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm"));
}
#[test]
fn docker_compose_array_command_preserves_commas_in_quotes() {
let content = r#"
services:
app:
command: ["sh", "-c", "echo a,b && rm -rf /cache"]
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("a,b"));
}
#[test]
fn docker_compose_extracts_array_command_without_keywords() {
let content = r#"
services:
app:
command: ["sh", "-c", "rm -rf /cache"]
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &[]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm"));
}
#[test]
fn docker_compose_ignores_environment() {
let content = r#"
services:
app:
environment:
CLEANUP: "rm -rf /"
DANGER: "kubectl delete"
command: echo safe
"#;
let extracted =
extract_docker_compose_from_str("docker-compose.yml", content, &["rm", "kubectl"]);
assert!(extracted.is_empty());
}
#[test]
fn docker_compose_ignores_labels() {
let content = r#"
services:
app:
labels:
description: "Uses rm -rf for cleanup"
command: echo hello
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn docker_compose_extracts_healthcheck_test() {
let content = r"
services:
db:
healthcheck:
test: rm -rf /health/check
interval: 30s
";
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn docker_compose_handles_empty_file() {
let content = "";
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn docker_compose_handles_no_services() {
let content = r#"
version: "3"
networks:
default:
"#;
let extracted = extract_docker_compose_from_str("docker-compose.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn dockerfile_multiline_deep_nesting() {
let content = "FROM alpine\n\
RUN apt-get update \\\n\
&& apt-get install -y \\\n\
curl \\\n\
wget \\\n\
git \\\n\
vim \\\n\
&& rm -rf /var/lib/apt/lists/*";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt", "rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("apt-get update"));
assert!(extracted[0].command.contains("rm -rf"));
assert!(extracted[0].command.contains("curl"));
}
#[test]
fn dockerfile_multiline_with_inline_comment() {
let content = "FROM alpine\n\
RUN apt-get update # update packages \\\n\
&& apt-get install curl";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("apt-get update"));
}
#[test]
fn dockerfile_backslash_in_quoted_string() {
let content = r#"FROM alpine
RUN echo "path\\with\\backslashes" && rm -rf /tmp"#;
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf /tmp"));
}
#[test]
fn dockerfile_exec_form_with_continuation() {
let content = "FROM alpine\n\
RUN [\"sh\", \"-c\", \\\n\
\"rm -rf /tmp && echo done\"]";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].extractor_id, "dockerfile.run.exec");
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn dockerfile_no_false_positive_on_comment() {
let content = "FROM alpine\n\
# RUN rm -rf /\n\
RUN echo safe";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn dockerfile_no_false_positive_on_label() {
let content = "FROM alpine\n\
LABEL description=\"rm -rf cleanup script\"\n\
RUN echo safe";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn dockerfile_no_false_positive_on_env() {
let content = "FROM alpine\n\
ENV CLEANUP_CMD=\"rm -rf /tmp\"\n\
RUN echo safe";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["rm"]);
assert!(
extracted.is_empty(),
"ENV values should not be extracted as commands: got {extracted:?}"
);
}
#[test]
fn dockerfile_empty_run_instruction() {
let content = "FROM alpine\nRUN\nRUN apt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(
extracted.len(),
1,
"Expected 1 command from non-empty RUN, got: {extracted:?}"
);
assert_eq!(extracted[0].command, "apt-get update");
}
#[test]
fn dockerfile_run_with_only_whitespace() {
let content = "FROM alpine\nRUN \nRUN apt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(
extracted.len(),
1,
"Expected 1 command (whitespace-only RUN skipped), got: {extracted:?}"
);
}
#[test]
fn dockerfile_case_insensitive_run() {
let content = "FROM alpine\nrun apt-get update\nRUN apt-get install";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(
extracted.len(),
2,
"Both 'run' and 'RUN' should be extracted: {extracted:?}"
);
}
#[test]
fn dockerfile_mixed_case_run() {
let content = "FROM alpine\nRuN apt-get update";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(
extracted.len(),
1,
"Mixed case 'RuN' should be extracted: {extracted:?}"
);
}
#[test]
fn dockerfile_line_numbers_accurate() {
let content = "FROM alpine\n\nRUN echo line3\n\nRUN echo line5";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["echo"]);
assert_eq!(extracted.len(), 2, "Expected 2 RUN commands: {extracted:?}");
assert_eq!(
extracted[0].line, 3,
"First RUN should be on line 3: {extracted:?}"
);
assert_eq!(
extracted[1].line, 5,
"Second RUN should be on line 5: {extracted:?}"
);
}
#[test]
fn dockerfile_extractor_id_correct() {
let content = "FROM alpine\nRUN echo shell\nRUN [\"echo\", \"exec\"]";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["echo"]);
assert_eq!(extracted.len(), 2, "Expected 2 commands: {extracted:?}");
assert_eq!(
extracted[0].extractor_id, "dockerfile.run",
"Shell form should have 'dockerfile.run' extractor_id"
);
assert_eq!(
extracted[1].extractor_id, "dockerfile.run.exec",
"Exec form should have 'dockerfile.run.exec' extractor_id"
);
}
#[test]
fn dockerfile_keyword_filtering_works() {
let content = "FROM alpine\nRUN apt-get update\nRUN npm install\nRUN pip install";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &["apt"]);
assert_eq!(
extracted.len(),
1,
"Only apt command should be extracted with 'apt' keyword: {extracted:?}"
);
assert!(
extracted[0].command.contains("apt"),
"Extracted command should contain 'apt': {extracted:?}"
);
}
#[test]
fn dockerfile_empty_keywords_extracts_all() {
let content = "FROM alpine\nRUN apt-get update\nRUN npm install\nRUN pip install";
let extracted = extract_dockerfile_from_str("Dockerfile", content, &[]);
assert_eq!(
extracted.len(),
3,
"All 3 RUN commands should be extracted with empty keywords: {extracted:?}"
);
}
#[test]
fn makefile_continuation_with_different_indentation() {
let content = "build:\n\
\tgcc -Wall \\\n\
-O2 \\\n\
-o main main.c";
let extracted = extract_makefile_from_str("Makefile", content, &["gcc"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("gcc"));
assert!(extracted[0].command.contains("-O2"));
}
#[test]
fn makefile_continuation_multiple_lines() {
let content = "install:\n\
\tmake \\\n\
build \\\n\
test \\\n\
deploy";
let extracted = extract_makefile_from_str("Makefile", content, &["make"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("make"));
assert!(extracted[0].command.contains("deploy"));
}
#[test]
fn makefile_recipe_prefixes() {
let content = "all:\n\
\t@echo silent\n\
\t-rm -f maybe_missing\n\
\t+make recursive";
let extracted = extract_makefile_from_str("Makefile", content, &["echo", "rm", "make"]);
assert_eq!(extracted.len(), 3);
assert!(extracted.iter().any(|e| e.command.contains("echo")));
assert!(extracted.iter().any(|e| e.command.contains("rm")));
}
#[test]
fn makefile_no_false_positive_on_variable() {
let content = "CLEAN_CMD = rm -rf build\n\
\n\
all:\n\
\techo building";
let extracted = extract_makefile_from_str("Makefile", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn makefile_no_false_positive_on_comment() {
let content = "all:\n\
\t# rm -rf dangerous\n\
\techo safe";
let extracted = extract_makefile_from_str("Makefile", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn github_actions_literal_block_with_comments() {
let content = r"jobs:
build:
steps:
- run: |
# This is a comment
echo hello
# rm -rf / (this should not be extracted)
rm -rf ./build
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert_eq!(extracted[0].command, "rm -rf ./build");
}
#[test]
fn github_actions_literal_block_multiple_commands() {
let content = r"jobs:
deploy:
steps:
- run: |
npm install
npm run build
rm -rf dist/old
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn github_actions_folded_block() {
let content = r"jobs:
build:
steps:
- run: >
echo this is a very long command that
spans multiple lines and gets folded
";
let extracted = extract_github_actions_workflow_from_str(
".github/workflows/ci.yml",
content,
&["echo"],
);
assert_eq!(extracted.len(), 1);
}
#[test]
fn github_actions_literal_block_with_empty_lines() {
let content = r"jobs:
test:
steps:
- run: |
echo start
rm -rf ./temp
echo end
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn github_actions_no_false_positive_on_env() {
let content = r"jobs:
build:
steps:
- name: Build
env:
CLEANUP: rm -rf /tmp
run: echo safe
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn github_actions_no_false_positive_on_with() {
let content = r"jobs:
build:
steps:
- uses: some/action@v1
with:
command: rm -rf /dangerous
- run: echo safe
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn github_actions_no_false_positive_on_name() {
let content = r"jobs:
build:
steps:
- name: rm -rf cleanup step
run: echo safe
";
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn github_actions_quoted_run_value() {
let content = r#"jobs:
build:
steps:
- run: "rm -rf ./build"
"#;
let extracted =
extract_github_actions_workflow_from_str(".github/workflows/ci.yml", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn shell_extractor_extracts_quoted_keyword_conservatively() {
let content = "#!/bin/bash\necho 'rm -rf /'";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("echo"));
}
#[test]
fn shell_extractor_actual_rm_command() {
let content = "#!/bin/bash\nrm -rf ./build";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf"));
}
#[test]
fn shell_extractor_variable_assignment() {
let content = "#!/bin/bash\nCMD=\"rm -rf /tmp\"\necho done";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn shell_extractor_comment_only() {
let content = "#!/bin/bash\n# rm -rf /\necho safe";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert!(extracted.is_empty());
}
#[test]
fn shell_extractor_line_continuation() {
let content = "#!/bin/bash\nrm -rf \\\n./build";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm"));
}
#[test]
fn shell_extractor_extracts_dangerous_line_only() {
let content = "#!/bin/bash\n\
echo 'safe text'\n\
ls -la\n\
rm -rf ./actual/danger";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert_eq!(extracted.len(), 1);
assert!(extracted[0].command.contains("rm -rf ./actual/danger"));
}
#[test]
fn shell_extractor_multiple_keyword_matches() {
let content = "#!/bin/bash\n\
rm -rf ./build\n\
echo 'done'\n\
rm -rf ./dist";
let extracted = extract_shell_script_from_str("script.sh", content, &["rm"]);
assert_eq!(extracted.len(), 2);
assert!(extracted.iter().any(|e| e.command.contains("./build")));
assert!(extracted.iter().any(|e| e.command.contains("./dist")));
}
}