#![allow(clippy::too_many_lines)]
pub mod apigateway;
pub mod backup;
pub mod cdn;
pub mod cicd;
pub mod cloud;
pub mod containers;
pub mod core;
pub mod database;
pub mod dns;
pub mod email;
pub mod external;
pub mod featureflags;
pub mod infrastructure;
pub mod kubernetes;
pub mod loadbalancer;
pub mod messaging;
pub mod monitoring;
pub mod package_managers;
pub mod payment;
pub mod platform;
pub mod regex_engine;
pub mod remote;
pub mod safe;
pub mod search;
pub mod secrets;
pub mod storage;
pub mod strict_git;
pub mod system;
pub mod test_helpers;
#[cfg(test)]
mod test_template;
pub use crate::normalize::normalize_command;
use memchr::memmem;
use regex_engine::LazyCompiledRegex;
use serde::Serialize;
use smallvec::SmallVec;
use std::collections::{HashMap, HashSet};
use std::sync::{LazyLock, OnceLock};
pub type PackId = String;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Critical,
#[default]
High,
Medium,
Low,
}
impl Severity {
#[must_use]
pub const fn default_mode(&self) -> DecisionMode {
match self {
Self::Critical | Self::High => DecisionMode::Deny,
Self::Medium => DecisionMode::Warn,
Self::Low => DecisionMode::Log,
}
}
#[must_use]
pub const fn blocks_by_default(&self) -> bool {
matches!(self, Self::Critical | Self::High)
}
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::Critical => "critical",
Self::High => "high",
Self::Medium => "medium",
Self::Low => "low",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DecisionMode {
#[default]
Deny,
Warn,
Log,
}
impl DecisionMode {
#[must_use]
pub const fn blocks(&self) -> bool {
matches!(self, Self::Deny)
}
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::Deny => "deny",
Self::Warn => "warn",
Self::Log => "log",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Platform {
#[default]
All,
Linux,
MacOS,
Windows,
Bsd,
}
impl Platform {
#[must_use]
pub const fn matches_current(&self) -> bool {
match self {
Self::All => true,
Self::Linux => cfg!(target_os = "linux"),
Self::MacOS => cfg!(target_os = "macos"),
Self::Windows => cfg!(target_os = "windows"),
Self::Bsd => {
cfg!(target_os = "freebsd")
|| cfg!(target_os = "openbsd")
|| cfg!(target_os = "netbsd")
}
}
}
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::All => "all",
Self::Linux => "linux",
Self::MacOS => "macos",
Self::Windows => "windows",
Self::Bsd => "bsd",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct PatternSuggestion {
pub command: &'static str,
pub description: &'static str,
pub platform: Platform,
}
impl PatternSuggestion {
#[must_use]
pub const fn new(command: &'static str, description: &'static str) -> Self {
Self {
command,
description,
platform: Platform::All,
}
}
#[must_use]
pub const fn with_platform(
command: &'static str,
description: &'static str,
platform: Platform,
) -> Self {
Self {
command,
description,
platform,
}
}
}
pub struct SafePattern {
pub regex: LazyCompiledRegex,
pub name: &'static str,
}
impl std::fmt::Debug for SafePattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SafePattern")
.field("pattern", &self.regex.as_str())
.field("name", &self.name)
.finish()
}
}
pub struct DestructivePattern {
pub regex: LazyCompiledRegex,
pub reason: &'static str,
pub name: Option<&'static str>,
pub severity: Severity,
pub explanation: Option<&'static str>,
pub suggestions: &'static [PatternSuggestion],
}
impl std::fmt::Debug for DestructivePattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DestructivePattern")
.field("pattern", &self.regex.as_str())
.field("reason", &self.reason)
.field("name", &self.name)
.field("severity", &self.severity)
.field("explanation", &self.explanation)
.field("suggestions", &self.suggestions)
.finish()
}
}
#[macro_export]
macro_rules! safe_pattern {
($name:literal, $re:literal) => {
$crate::packs::SafePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
name: $name,
}
};
}
#[macro_export]
macro_rules! destructive_pattern {
($re:literal, $reason:literal) => {
$crate::packs::DestructivePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
reason: $reason,
name: None,
severity: $crate::packs::Severity::High,
explanation: None,
suggestions: &[],
}
};
($name:literal, $re:literal, $reason:literal) => {
$crate::packs::DestructivePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
reason: $reason,
name: Some($name),
severity: $crate::packs::Severity::High,
explanation: None,
suggestions: &[],
}
};
($name:literal, $re:literal, $reason:literal, $severity:ident) => {
$crate::packs::DestructivePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
reason: $reason,
name: Some($name),
severity: $crate::packs::Severity::$severity,
explanation: None,
suggestions: &[],
}
};
($name:literal, $re:literal, $reason:literal, $severity:ident, $explanation:literal) => {
$crate::packs::DestructivePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
reason: $reason,
name: Some($name),
severity: $crate::packs::Severity::$severity,
explanation: Some($explanation),
suggestions: &[],
}
};
($name:literal, $re:literal, $reason:literal, $severity:ident, $explanation:literal, $suggestions:expr) => {
$crate::packs::DestructivePattern {
regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
reason: $reason,
name: Some($name),
severity: $crate::packs::Severity::$severity,
explanation: Some($explanation),
suggestions: $suggestions,
}
};
}
#[derive(Debug)]
pub struct Pack {
pub id: PackId,
pub name: &'static str,
pub description: &'static str,
pub keywords: &'static [&'static str],
pub safe_patterns: Vec<SafePattern>,
pub destructive_patterns: Vec<DestructivePattern>,
pub keyword_matcher: Option<aho_corasick::AhoCorasick>,
pub safe_regex_set: Option<regex::RegexSet>,
pub safe_regex_set_is_complete: bool,
}
impl Pack {
#[must_use]
pub const fn new(
id: PackId,
name: &'static str,
description: &'static str,
keywords: &'static [&'static str],
safe_patterns: Vec<SafePattern>,
destructive_patterns: Vec<DestructivePattern>,
) -> Self {
Self {
id,
name,
description,
keywords,
safe_patterns,
destructive_patterns,
keyword_matcher: None,
safe_regex_set: None,
safe_regex_set_is_complete: false,
}
}
#[must_use]
pub fn might_match(&self, cmd: &str) -> bool {
if self.keywords.is_empty() {
return true; }
if let Some(ref ac) = self.keyword_matcher {
if ac.is_match(cmd) {
return true;
}
if !self
.keywords
.iter()
.any(|kw| keyword_contains_whitespace(kw))
{
return false;
}
return self
.keywords
.iter()
.any(|kw| keyword_contains_whitespace(kw) && keyword_matches_substring(cmd, kw));
}
self.keywords
.iter()
.any(|kw| keyword_matches_substring(cmd, kw))
}
#[must_use]
pub fn matches_safe(&self, cmd: &str) -> bool {
if let Some(ref set) = self.safe_regex_set {
if set.is_match(cmd) {
return true;
}
if self.safe_regex_set_is_complete {
return false;
}
}
self.safe_patterns.iter().any(|p| p.regex.is_match(cmd))
}
#[must_use]
pub fn matches_destructive(&self, cmd: &str) -> Option<DestructiveMatch> {
self.destructive_patterns
.iter()
.find(|p| p.regex.is_match(cmd))
.map(|p| DestructiveMatch {
reason: p.reason,
name: p.name,
severity: p.severity,
explanation: p.explanation,
})
}
#[must_use]
pub fn check(&self, cmd: &str) -> Option<DestructiveMatch> {
if !self.might_match(cmd) {
return None;
}
if self.matches_safe(cmd) {
return None;
}
self.matches_destructive(cmd)
}
}
#[derive(Debug, Clone)]
pub struct DestructiveMatch {
pub reason: &'static str,
pub name: Option<&'static str>,
pub severity: Severity,
pub explanation: Option<&'static str>,
}
#[derive(Debug)]
pub struct CheckResult {
pub blocked: bool,
pub reason: Option<String>,
pub pack_id: Option<PackId>,
pub pattern_name: Option<String>,
pub severity: Option<Severity>,
pub decision_mode: Option<DecisionMode>,
}
impl CheckResult {
#[must_use]
pub const fn allowed() -> Self {
Self {
blocked: false,
reason: None,
pack_id: None,
pattern_name: None,
severity: None,
decision_mode: None,
}
}
#[must_use]
pub fn blocked(
reason: &str,
pack_id: &str,
pattern_name: Option<&str>,
severity: Severity,
) -> Self {
let decision_mode = severity.default_mode();
Self {
blocked: decision_mode.blocks(),
reason: Some(reason.to_string()),
pack_id: Some(pack_id.to_string()),
pattern_name: pattern_name.map(ToString::to_string),
severity: Some(severity),
decision_mode: Some(decision_mode),
}
}
#[must_use]
pub fn matched(
reason: &str,
pack_id: &str,
pattern_name: Option<&str>,
severity: Severity,
) -> Self {
Self::blocked(reason, pack_id, pattern_name, severity)
}
}
pub struct PackEntry {
pub id: &'static str,
pub keywords: &'static [&'static str],
builder: fn() -> Pack,
instance: OnceLock<Pack>,
}
impl PackEntry {
pub const fn new(
id: &'static str,
keywords: &'static [&'static str],
builder: fn() -> Pack,
) -> Self {
Self {
id,
keywords,
builder,
instance: OnceLock::new(),
}
}
pub fn get_pack(&self) -> &Pack {
self.instance.get_or_init(|| {
let mut pack = (self.builder)();
if !pack.keywords.is_empty() && pack.keyword_matcher.is_none() {
pack.keyword_matcher = Some(
aho_corasick::AhoCorasick::new(pack.keywords)
.expect("pack keywords should be valid patterns"),
);
}
if !pack.safe_patterns.is_empty() && pack.safe_regex_set.is_none() {
let patterns: Vec<&str> = pack
.safe_patterns
.iter()
.filter(|p| !regex_engine::needs_backtracking_engine(p.regex.as_str()))
.map(|p| p.regex.as_str())
.collect();
pack.safe_regex_set_is_complete = patterns.len() == pack.safe_patterns.len();
if !patterns.is_empty() {
pack.safe_regex_set = regex::RegexSet::new(patterns).ok();
}
}
pack
})
}
pub fn might_match(&self, cmd: &str) -> bool {
if self.keywords.is_empty() {
return true; }
let bytes = cmd.as_bytes();
if self
.keywords
.iter()
.any(|kw| memmem::find(bytes, kw.as_bytes()).is_some())
{
return true;
}
self.keywords
.iter()
.filter(|kw| keyword_contains_whitespace(kw))
.any(|kw| keyword_matches_substring(cmd, kw))
}
#[cfg(test)]
pub fn is_built(&self) -> bool {
self.instance.get().is_some()
}
}
pub struct PackRegistry {
entries: Vec<&'static PackEntry>,
categories: HashMap<String, Vec<&'static str>>,
index: HashMap<&'static str, usize>,
}
#[derive(Debug)]
pub struct EnabledKeywordIndex {
pack_count: usize,
full_mask: u128,
always_check_mask: u128,
keyword_matcher: Option<aho_corasick::AhoCorasick>,
keyword_pack_masks: Vec<u128>,
whitespace_keywords: Vec<&'static str>,
whitespace_pack_masks: Vec<u128>,
}
impl EnabledKeywordIndex {
#[must_use]
pub const fn pack_count(&self) -> usize {
self.pack_count
}
#[inline]
#[must_use]
pub fn candidate_pack_mask(&self, cmd: &str) -> u128 {
let mut mask = self.always_check_mask;
let Some(ac) = &self.keyword_matcher else {
return mask;
};
for m in ac.find_overlapping_iter(cmd) {
mask |= self.keyword_pack_masks[m.pattern().as_usize()];
if mask == self.full_mask {
break;
}
}
if !self.whitespace_keywords.is_empty() && mask != self.full_mask {
for (keyword, pack_mask) in self
.whitespace_keywords
.iter()
.zip(self.whitespace_pack_masks.iter())
{
if keyword_matches_substring(cmd, keyword) {
mask |= *pack_mask;
if mask == self.full_mask {
break;
}
}
}
}
mask
}
}
static PACK_ENTRIES: [PackEntry; 83] = [
PackEntry::new("core.git", &["git"], core::git::create_pack),
PackEntry::new(
"core.filesystem",
&["rm", "/rm"],
core::filesystem::create_pack,
),
PackEntry::new("storage.s3", &["s3", "s3api"], storage::s3::create_pack),
PackEntry::new(
"storage.gcs",
&["gsutil", "gcloud storage"],
storage::gcs::create_pack,
),
PackEntry::new("storage.minio", &["mc"], storage::minio::create_pack),
PackEntry::new(
"storage.azure_blob",
&["az storage", "azcopy"],
storage::azure_blob::create_pack,
),
PackEntry::new("remote.rsync", &["rsync"], remote::rsync::create_pack),
PackEntry::new(
"remote.ssh",
&["ssh", "ssh-keygen", "ssh-add", "ssh-agent", "ssh-keyscan"],
remote::ssh::create_pack,
),
PackEntry::new("remote.scp", &["scp"], remote::scp::create_pack),
PackEntry::new(
"cicd.github_actions",
&["gh"],
cicd::github_actions::create_pack,
),
PackEntry::new(
"cicd.gitlab_ci",
&["glab", "gitlab-runner"],
cicd::gitlab_ci::create_pack,
),
PackEntry::new(
"cicd.jenkins",
&["jenkins-cli", "jenkins", "doDelete"],
cicd::jenkins::create_pack,
),
PackEntry::new("cicd.circleci", &["circleci"], cicd::circleci::create_pack),
PackEntry::new("secrets.vault", &["vault"], secrets::vault::create_pack),
PackEntry::new(
"secrets.aws_secrets",
&["aws", "secretsmanager", "ssm"],
secrets::aws_secrets::create_pack,
),
PackEntry::new(
"secrets.onepassword",
&["op"],
secrets::onepassword::create_pack,
),
PackEntry::new(
"secrets.doppler",
&["doppler"],
secrets::doppler::create_pack,
),
PackEntry::new("platform.github", &["gh"], platform::github::create_pack),
PackEntry::new(
"platform.gitlab",
&["glab", "gitlab-rails", "gitlab-rake"],
platform::gitlab::create_pack,
),
PackEntry::new(
"dns.cloudflare",
&[
"wrangler",
"cloudflare",
"api.cloudflare.com",
"dns-records",
],
dns::cloudflare::create_pack,
),
PackEntry::new(
"dns.route53",
&["aws", "route53"],
dns::route53::create_pack,
),
PackEntry::new(
"dns.generic",
&["nsupdate", "dig", "host", "nslookup"],
dns::generic::create_pack,
),
PackEntry::new("email.ses", &["ses", "sesv2"], email::ses::create_pack),
PackEntry::new(
"email.sendgrid",
&["sendgrid", "api.sendgrid.com"],
email::sendgrid::create_pack,
),
PackEntry::new(
"email.mailgun",
&["mailgun", "api.mailgun.net"],
email::mailgun::create_pack,
),
PackEntry::new(
"email.postmark",
&["postmark", "api.postmarkapp.com"],
email::postmark::create_pack,
),
PackEntry::new(
"featureflags.flipt",
&["flipt"],
featureflags::flipt::create_pack,
),
PackEntry::new(
"featureflags.launchdarkly",
&["ldcli", "launchdarkly"],
featureflags::launchdarkly::create_pack,
),
PackEntry::new(
"featureflags.split",
&["split", "api.split.io"],
featureflags::split::create_pack,
),
PackEntry::new(
"featureflags.unleash",
&["unleash"],
featureflags::unleash::create_pack,
),
PackEntry::new(
"loadbalancer.haproxy",
&["haproxy", "socat"],
loadbalancer::haproxy::create_pack,
),
PackEntry::new(
"loadbalancer.nginx",
&["nginx", "/etc/nginx"],
loadbalancer::nginx::create_pack,
),
PackEntry::new(
"loadbalancer.traefik",
&["traefik", "ingressroute"],
loadbalancer::traefik::create_pack,
),
PackEntry::new(
"loadbalancer.elb",
&[
"elbv2",
"delete-load-balancer",
"delete-target-group",
"deregister-targets",
"delete-listener",
"delete-rule",
"deregister-instances-from-load-balancer",
],
loadbalancer::elb::create_pack,
),
PackEntry::new(
"monitoring.splunk",
&["splunk"],
monitoring::splunk::create_pack,
),
PackEntry::new(
"monitoring.datadog",
&["datadog-ci", "datadoghq", "datadog"],
monitoring::datadog::create_pack,
),
PackEntry::new(
"monitoring.pagerduty",
&["pd", "pagerduty", "api.pagerduty.com"],
monitoring::pagerduty::create_pack,
),
PackEntry::new(
"monitoring.newrelic",
&["newrelic", "api.newrelic.com", "graphql"],
monitoring::newrelic::create_pack,
),
PackEntry::new(
"monitoring.prometheus",
&[
"promtool",
"grafana-cli",
"/api/v1/admin/tsdb/delete_series",
"delete_series",
"/api/dashboards",
"/api/datasources",
"/api/alert-notifications",
"/etc/prometheus",
"rules.d",
"prometheusrule",
"servicemonitor",
"podmonitor",
],
monitoring::prometheus::create_pack,
),
PackEntry::new(
"payment.stripe",
&["stripe", "api.stripe.com"],
payment::stripe::create_pack,
),
PackEntry::new(
"payment.braintree",
&[
"braintree",
"braintreegateway.com",
"braintree.",
"gateway.customer.",
"gateway.merchant_account.",
"gateway.payment_method.",
"gateway.subscription.",
],
payment::braintree::create_pack,
),
PackEntry::new(
"payment.square",
&["square", "api.squareup.com"],
payment::square::create_pack,
),
PackEntry::new(
"messaging.kafka",
&[
"kafka-topics",
"kafka-consumer-groups",
"kafka-configs",
"kafka-acls",
"kafka-delete-records",
"rpk",
],
messaging::kafka::create_pack,
),
PackEntry::new(
"messaging.rabbitmq",
&["rabbitmqadmin", "rabbitmqctl"],
messaging::rabbitmq::create_pack,
),
PackEntry::new("messaging.nats", &["nats"], messaging::nats::create_pack),
PackEntry::new(
"messaging.sqs_sns",
&["aws", "sqs", "sns"],
messaging::sqs_sns::create_pack,
),
PackEntry::new(
"search.elasticsearch",
&[
"elasticsearch",
"9200",
"_search",
"_cluster",
"_cat",
"_doc",
"_all",
"_delete_by_query",
],
search::elasticsearch::create_pack,
),
PackEntry::new(
"search.opensearch",
&[
"opensearch",
"9200",
"_search",
"_cluster",
"_cat",
"_doc",
"_all",
"_delete_by_query",
],
search::opensearch::create_pack,
),
PackEntry::new(
"search.algolia",
&["algolia", "algoliasearch"],
search::algolia::create_pack,
),
PackEntry::new(
"search.meilisearch",
&["meili", "meilisearch", "7700", "/indexes", "/keys"],
search::meilisearch::create_pack,
),
PackEntry::new("backup.borg", &["borg"], backup::borg::create_pack),
PackEntry::new("backup.rclone", &["rclone"], backup::rclone::create_pack),
PackEntry::new("backup.restic", &["restic"], backup::restic::create_pack),
PackEntry::new("backup.velero", &["velero"], backup::velero::create_pack),
PackEntry::new(
"database.postgresql",
&[
"psql",
"dropdb",
"createdb",
"pg_dump",
"pg_restore",
"DROP",
"TRUNCATE",
"DELETE",
],
database::postgresql::create_pack,
),
PackEntry::new(
"database.mysql",
&["mysql", "mysqldump", "DROP", "TRUNCATE", "DELETE"],
database::mysql::create_pack,
),
PackEntry::new(
"database.mongodb",
&[
"mongo",
"mongosh",
"mongodump",
"mongorestore",
"dropDatabase",
"dropCollection",
],
database::mongodb::create_pack,
),
PackEntry::new(
"database.redis",
&["redis-cli", "FLUSHALL", "FLUSHDB", "DEBUG"],
database::redis::create_pack,
),
PackEntry::new(
"database.sqlite",
&["sqlite3", "DROP", "DELETE", "TRUNCATE"],
database::sqlite::create_pack,
),
PackEntry::new(
"database.supabase",
&[
"supabase",
"db reset",
"db push",
"migration repair",
"migration down",
"migration squash",
"functions delete",
"secrets unset",
"storage rm",
"projects delete",
"orgs delete",
"branches delete",
"domains delete",
"vanity-subdomains",
"sso remove",
"network-restrictions",
"config push",
"stop --no-backup",
],
database::supabase::create_pack,
),
PackEntry::new(
"containers.docker",
&["docker"],
containers::docker::create_pack,
),
PackEntry::new(
"containers.compose",
&["docker-compose", "docker compose"],
containers::compose::create_pack,
),
PackEntry::new(
"containers.podman",
&["podman"],
containers::podman::create_pack,
),
PackEntry::new(
"kubernetes.kubectl",
&["kubectl"],
kubernetes::kubectl::create_pack,
),
PackEntry::new("kubernetes.helm", &["helm"], kubernetes::helm::create_pack),
PackEntry::new(
"kubernetes.kustomize",
&["kustomize"],
kubernetes::kustomize::create_pack,
),
PackEntry::new("cloud.aws", &["aws"], cloud::aws::create_pack),
PackEntry::new(
"cloud.gcp",
&["gcloud", "gsutil", "bq"],
cloud::gcp::create_pack,
),
PackEntry::new("cloud.azure", &["az"], cloud::azure::create_pack),
PackEntry::new(
"cdn.cloudflare_workers",
&["wrangler"],
cdn::cloudflare_workers::create_pack,
),
PackEntry::new("cdn.fastly", &["fastly"], cdn::fastly::create_pack),
PackEntry::new(
"cdn.cloudfront",
&["cloudfront"],
cdn::cloudfront::create_pack,
),
PackEntry::new(
"apigateway.aws",
&["aws", "apigateway", "apigatewayv2"],
apigateway::aws::create_pack,
),
PackEntry::new(
"apigateway.kong",
&["kong", "deck", "8001"],
apigateway::kong::create_pack,
),
PackEntry::new(
"apigateway.apigee",
&["apigee", "apigeecli"],
apigateway::apigee::create_pack,
),
PackEntry::new(
"infrastructure.terraform",
&["terraform", "tofu"],
infrastructure::terraform::create_pack,
),
PackEntry::new(
"infrastructure.ansible",
&["ansible", "ansible-playbook"],
infrastructure::ansible::create_pack,
),
PackEntry::new(
"infrastructure.pulumi",
&["pulumi"],
infrastructure::pulumi::create_pack,
),
PackEntry::new(
"system.disk",
&[
"dd",
"mkfs",
"fdisk",
"parted",
"wipefs",
"mdadm",
"btrfs",
"dmsetup",
"nbd-client",
"pvremove",
"vgremove",
"lvremove",
"vgreduce",
"lvreduce",
"lvresize",
"pvmove",
"lvconvert",
],
system::disk::create_pack,
),
PackEntry::new(
"system.permissions",
&["chmod", "chown", "setfacl"],
system::permissions::create_pack,
),
PackEntry::new(
"system.services",
&["systemctl", "service"],
system::services::create_pack,
),
PackEntry::new("strict_git", &["git"], strict_git::create_pack),
PackEntry::new(
"package_managers",
&[
"npm", "yarn", "pnpm", "pip", "cargo", "gem", "composer", "go",
],
package_managers::create_pack,
),
];
impl PackRegistry {
#[must_use]
pub fn collect_enabled_keywords(&self, enabled_packs: &HashSet<String>) -> Vec<&'static str> {
let expanded = self.expand_enabled(enabled_packs);
let mut keywords = Vec::new();
for pack_id in &expanded {
if let Some(&idx) = self.index.get(pack_id.as_str()) {
keywords.extend(self.entries[idx].keywords.iter().copied());
}
}
let mut seen = HashSet::new();
keywords.retain(|kw| seen.insert(*kw));
keywords
}
#[must_use]
pub fn new() -> Self {
let mut categories: HashMap<String, Vec<&'static str>> = HashMap::new();
let mut index: HashMap<&'static str, usize> = HashMap::new();
for (i, entry) in PACK_ENTRIES.iter().enumerate() {
let category = entry.id.split('.').next().unwrap_or(entry.id);
categories
.entry(category.to_string())
.or_default()
.push(entry.id);
index.insert(entry.id, i);
}
Self {
entries: PACK_ENTRIES.iter().collect(),
categories,
index,
}
}
#[must_use]
pub fn pack_count(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn get(&self, id: &str) -> Option<&Pack> {
self.index.get(id).map(|&idx| self.entries[idx].get_pack())
}
#[must_use]
pub fn all_pack_ids(&self) -> Vec<&'static str> {
self.entries.iter().map(|e| e.id).collect()
}
#[must_use]
pub fn all_categories(&self) -> Vec<&String> {
self.categories.keys().collect()
}
#[must_use]
pub fn packs_in_category(&self, category: &str) -> Vec<&'static str> {
self.categories.get(category).cloned().unwrap_or_default()
}
#[must_use]
pub fn expand_enabled(&self, enabled: &HashSet<String>) -> HashSet<String> {
let mut expanded = HashSet::new();
for id in enabled {
if let Some(sub_packs) = self.categories.get(id) {
for &sub_pack in sub_packs {
expanded.insert(sub_pack.to_string());
}
}
expanded.insert(id.clone());
}
expanded
}
#[must_use]
pub fn expand_enabled_ordered(&self, enabled: &HashSet<String>) -> Vec<String> {
let expanded = self.expand_enabled(enabled);
let mut pack_ids: Vec<String> = expanded
.into_iter()
.filter(|id| self.index.contains_key(id.as_str()))
.collect();
pack_ids.sort_by(|a, b| {
let tier_a = Self::pack_tier(a);
let tier_b = Self::pack_tier(b);
tier_a.cmp(&tier_b).then_with(|| a.cmp(b))
});
pack_ids
}
fn pack_tier(pack_id: &str) -> u8 {
let category = pack_id.split('.').next().unwrap_or(pack_id);
match category {
"safe" => 0,
"core" | "storage" | "remote" => 1,
"system" => 2,
"infrastructure" => 3,
"apigateway" | "cdn" | "cloud" | "dns" | "loadbalancer" | "platform" => 4,
"kubernetes" => 5,
"containers" => 6,
"backup" | "database" | "messaging" | "search" => 7,
"package_managers" => 8,
"strict_git" => 9,
"cicd" | "email" | "featureflags" | "secrets" | "monitoring" | "payment" => 10, _ => 11, }
}
#[must_use]
pub fn check_command(&self, cmd: &str, enabled_packs: &HashSet<String>) -> CheckResult {
let ordered_packs = self.expand_enabled_ordered(enabled_packs);
let candidate_packs: Vec<(&String, &Pack)> = ordered_packs
.iter()
.filter_map(|pack_id| {
let pack = self.get(pack_id)?;
if pack.might_match(cmd) {
Some((pack_id, pack))
} else {
None
}
})
.collect();
for (_pack_id, pack) in &candidate_packs {
if pack.matches_safe(cmd) {
return CheckResult::allowed();
}
}
for (pack_id, pack) in &candidate_packs {
if let Some(matched) = pack.matches_destructive(cmd) {
return CheckResult::matched(
matched.reason,
pack_id,
matched.name,
matched.severity,
);
}
}
CheckResult::allowed()
}
#[must_use]
pub fn list_packs(&self, enabled: &HashSet<String>) -> Vec<PackInfo> {
let expanded = self.expand_enabled(enabled);
let mut infos: Vec<_> = self
.entries
.iter()
.map(|entry| {
let pack = entry.get_pack();
PackInfo {
id: pack.id.clone(),
name: pack.name,
description: pack.description,
enabled: expanded.contains(&pack.id),
safe_pattern_count: pack.safe_patterns.len(),
destructive_pattern_count: pack.destructive_patterns.len(),
}
})
.collect();
infos.sort_by(|a, b| a.id.cmp(&b.id));
infos
}
#[must_use]
pub fn get_entry(&self, id: &str) -> Option<&PackEntry> {
self.index.get(id).map(|&idx| self.entries[idx])
}
#[must_use]
pub fn build_enabled_keyword_index(
&self,
ordered_packs: &[String],
) -> Option<EnabledKeywordIndex> {
if ordered_packs.len() > 128 {
return None;
}
let pack_count = ordered_packs.len();
let full_mask = if pack_count == 128 {
u128::MAX
} else {
(1u128 << pack_count) - 1
};
let mut always_check_mask: u128 = 0;
let mut keyword_to_index: HashMap<&'static str, usize> = HashMap::new();
let mut patterns: Vec<&'static str> = Vec::new();
let mut keyword_pack_masks: Vec<u128> = Vec::new();
let mut whitespace_keywords: Vec<&'static str> = Vec::new();
let mut whitespace_pack_masks: Vec<u128> = Vec::new();
let mut whitespace_keyword_to_index: HashMap<&'static str, usize> = HashMap::new();
for (pack_idx, pack_id) in ordered_packs.iter().enumerate() {
let Some(entry) = self.get_entry(pack_id.as_str()) else {
continue;
};
let bit = 1u128 << pack_idx;
if entry.keywords.is_empty() {
always_check_mask |= bit;
continue;
}
for &kw in entry.keywords {
if kw.is_empty() {
continue;
}
if keyword_contains_whitespace(kw) {
if let Some(&idx) = whitespace_keyword_to_index.get(kw) {
whitespace_pack_masks[idx] |= bit;
} else {
let idx = whitespace_keywords.len();
whitespace_keywords.push(kw);
whitespace_pack_masks.push(bit);
whitespace_keyword_to_index.insert(kw, idx);
}
}
if let Some(&idx) = keyword_to_index.get(kw) {
keyword_pack_masks[idx] |= bit;
continue;
}
let idx = patterns.len();
patterns.push(kw);
keyword_to_index.insert(kw, idx);
keyword_pack_masks.push(bit);
}
}
let keyword_matcher = if patterns.is_empty() {
None
} else {
match aho_corasick::AhoCorasick::new(patterns) {
Ok(ac) => Some(ac),
Err(_) => return None,
}
};
Some(EnabledKeywordIndex {
pack_count,
full_mask,
always_check_mask,
keyword_matcher,
keyword_pack_masks,
whitespace_keywords,
whitespace_pack_masks,
})
}
}
impl Default for PackRegistry {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct PackInfo {
pub id: PackId,
pub name: &'static str,
pub description: &'static str,
pub enabled: bool,
pub safe_pattern_count: usize,
pub destructive_pattern_count: usize,
}
pub static REGISTRY: LazyLock<PackRegistry> = LazyLock::new(PackRegistry::new);
pub struct ExternalPackStore {
packs: HashMap<String, Pack>,
keywords: Vec<&'static str>,
warnings: Vec<String>,
}
impl ExternalPackStore {
fn new() -> Self {
Self {
packs: HashMap::new(),
keywords: Vec::new(),
warnings: Vec::new(),
}
}
#[must_use]
pub fn get(&self, id: &str) -> Option<&Pack> {
self.packs.get(id)
}
pub fn pack_ids(&self) -> impl Iterator<Item = &String> {
self.packs.keys()
}
pub fn iter_packs(&self) -> impl Iterator<Item = (&String, &Pack)> {
self.packs.iter()
}
#[must_use]
pub fn keywords(&self) -> &[&'static str] {
&self.keywords
}
#[must_use]
pub fn warnings(&self) -> &[String] {
&self.warnings
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.packs.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.packs.len()
}
#[must_use]
pub fn check_command(&self, cmd: &str, enabled_ids: &HashSet<String>) -> Option<CheckResult> {
for (id, pack) in &self.packs {
if !enabled_ids.contains(id) {
continue;
}
if pack.matches_safe(cmd) {
return Some(CheckResult::allowed());
}
}
for (id, pack) in &self.packs {
if !enabled_ids.contains(id) {
continue;
}
if let Some(matched) = pack.matches_destructive(cmd) {
return Some(CheckResult {
blocked: true,
reason: Some(matched.reason.to_string()),
pack_id: Some(id.clone()),
pattern_name: matched.name.map(ToString::to_string),
severity: Some(matched.severity),
decision_mode: Some(matched.severity.default_mode()),
});
}
}
None
}
#[must_use]
pub fn check_command_with_details(
&self,
cmd: &str,
enabled_ids: &HashSet<String>,
) -> Option<ExternalCheckResult> {
for (id, pack) in &self.packs {
if !enabled_ids.contains(id) {
continue;
}
if pack.matches_safe(cmd) {
return Some(ExternalCheckResult {
blocked: false,
reason: None,
pack_id: None,
pattern_name: None,
severity: None,
decision_mode: None,
explanation: None,
});
}
}
for (id, pack) in &self.packs {
if !enabled_ids.contains(id) {
continue;
}
if let Some(matched) = pack.matches_destructive(cmd) {
return Some(ExternalCheckResult {
blocked: true,
reason: Some(matched.reason.to_string()),
pack_id: Some(id.clone()),
pattern_name: matched.name.map(ToString::to_string),
severity: Some(matched.severity),
decision_mode: Some(matched.severity.default_mode()),
explanation: matched.explanation.map(ToString::to_string),
});
}
}
None
}
}
#[derive(Debug)]
pub struct ExternalCheckResult {
pub blocked: bool,
pub reason: Option<String>,
pub pack_id: Option<PackId>,
pub pattern_name: Option<String>,
pub severity: Option<Severity>,
pub decision_mode: Option<DecisionMode>,
pub explanation: Option<String>,
}
static EXTERNAL_PACKS: OnceLock<ExternalPackStore> = OnceLock::new();
pub fn load_external_packs(paths: &[String]) -> &'static ExternalPackStore {
EXTERNAL_PACKS.get_or_init(|| {
let mut store = ExternalPackStore::new();
if paths.is_empty() {
return store;
}
let loader = external::ExternalPackLoader::from_paths(paths);
let result = loader.load_all_deduped();
for warning in result.warnings {
store.warnings.push(format!(
"Failed to load external pack from {}: {}",
warning.path.display(),
warning.error
));
}
for loaded in result.packs {
let id = loaded.id.clone();
let pack = loaded.pack.into_pack();
for kw in pack.keywords {
if !store.keywords.contains(kw) {
store.keywords.push(kw);
}
}
store.packs.insert(id, pack);
}
store
})
}
#[must_use]
pub fn get_external_packs() -> Option<&'static ExternalPackStore> {
EXTERNAL_PACKS.get()
}
#[allow(dead_code)]
static GIT_FINDER: LazyLock<memmem::Finder<'static>> = LazyLock::new(|| memmem::Finder::new("git"));
#[allow(dead_code)]
static RM_FINDER: LazyLock<memmem::Finder<'static>> = LazyLock::new(|| memmem::Finder::new("rm"));
#[inline]
const fn is_word_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || byte == b'_'
}
#[inline]
fn keyword_contains_whitespace(keyword: &str) -> bool {
keyword.bytes().any(|byte| byte.is_ascii_whitespace())
}
#[inline]
fn keyword_matches_substring(haystack: &str, keyword: &str) -> bool {
if keyword.is_empty() {
return false;
}
if !keyword_contains_whitespace(keyword) {
return memmem::find(haystack.as_bytes(), keyword.as_bytes()).is_some();
}
keyword_matches_with_whitespace(haystack, keyword, false)
}
fn split_keyword_parts(keyword: &str) -> SmallVec<[&str; 4]> {
let mut parts: SmallVec<[&str; 4]> = SmallVec::new();
let mut start: Option<usize> = None;
for (idx, byte) in keyword.bytes().enumerate() {
if byte.is_ascii_whitespace() {
if let Some(part_start) = start.take() {
parts.push(&keyword[part_start..idx]);
}
} else if start.is_none() {
start = Some(idx);
}
}
if let Some(part_start) = start {
parts.push(&keyword[part_start..]);
}
parts
}
fn keyword_matches_with_whitespace(
haystack: &str,
keyword: &str,
enforce_boundaries: bool,
) -> bool {
let parts = split_keyword_parts(keyword);
if parts.is_empty() {
return false;
}
let hay = haystack.as_bytes();
let first = parts[0].as_bytes();
if first.len() > hay.len() {
return false;
}
let first_is_word = first.first().is_some_and(|b| is_word_byte(*b));
let last = parts[parts.len() - 1].as_bytes();
let last_is_word = last.last().is_some_and(|b| is_word_byte(*b));
let mut offset = 0;
while let Some(pos) = memmem::find(&hay[offset..], first) {
let start = offset + pos;
if enforce_boundaries && first_is_word {
let start_ok = start == 0 || !is_word_byte(hay[start.saturating_sub(1)]);
if !start_ok {
offset = start + 1;
continue;
}
}
let mut idx = start + first.len();
let mut matched = true;
for part in parts.iter().skip(1) {
let mut ws = idx;
while ws < hay.len() && hay[ws].is_ascii_whitespace() {
ws += 1;
}
if ws == idx {
matched = false;
break;
}
idx = ws;
let part_bytes = part.as_bytes();
if idx + part_bytes.len() > hay.len() || &hay[idx..idx + part_bytes.len()] != part_bytes
{
matched = false;
break;
}
idx += part_bytes.len();
}
if matched && enforce_boundaries && last_is_word {
let end_ok = idx == hay.len() || !is_word_byte(hay[idx]);
if !end_ok {
matched = false;
}
}
if matched {
return true;
}
offset = start + 1;
}
false
}
#[inline]
fn keyword_matches_span(span_text: &str, keyword: &str) -> bool {
if keyword.is_empty() {
return false;
}
if keyword_contains_whitespace(keyword) {
return keyword_matches_with_whitespace(span_text, keyword, true);
}
let haystack = span_text.as_bytes();
let needle = keyword.as_bytes();
if needle.len() > haystack.len() {
return false;
}
let first_is_word = needle.first().is_some_and(|b| is_word_byte(*b));
let last_is_word = needle.last().is_some_and(|b| is_word_byte(*b));
let mut offset = 0;
while let Some(pos) = memmem::find(&haystack[offset..], needle) {
let start = offset + pos;
let end = start + needle.len();
let start_ok =
!first_is_word || start == 0 || !is_word_byte(haystack[start.saturating_sub(1)]);
let end_ok = !last_is_word || end == haystack.len() || !is_word_byte(haystack[end]);
if start_ok && end_ok {
return true;
}
offset = start + 1;
}
false
}
#[inline]
fn span_matches_any_keyword(span_text: &str, enabled_keywords: &[&str]) -> bool {
enabled_keywords
.iter()
.any(|keyword| keyword_matches_span(span_text, keyword))
}
#[inline]
fn should_fallback_to_full_normalized_keyword_scan(original: &str, normalized: &str) -> bool {
original != normalized && normalized.bytes().any(|byte| matches!(byte, b'>' | b'<'))
}
#[inline]
#[must_use]
pub fn pack_aware_quick_reject(cmd: &str, enabled_keywords: &[&str]) -> bool {
pack_aware_quick_reject_with_normalized(cmd, enabled_keywords).0
}
#[inline]
#[must_use]
pub fn pack_aware_quick_reject_with_normalized<'a>(
cmd: &'a str,
enabled_keywords: &[&str],
) -> (bool, std::borrow::Cow<'a, str>) {
if enabled_keywords.is_empty() {
return (false, normalize_command(cmd));
}
let bytes = cmd.as_bytes();
let mut any_substring = enabled_keywords
.iter()
.any(|keyword| memmem::find(bytes, keyword.as_bytes()).is_some());
if !any_substring {
any_substring = enabled_keywords
.iter()
.filter(|keyword| keyword_contains_whitespace(keyword))
.any(|keyword| keyword_matches_substring(cmd, keyword));
}
if !any_substring {
let has_obfuscation = bytes.iter().any(|b| matches!(b, b'\\' | b'\'' | b'"'));
if !has_obfuscation {
return (true, std::borrow::Cow::Borrowed(cmd));
}
}
let normalized = normalize_command(cmd);
let cmd_for_spans = normalized.as_ref();
let spans = crate::context::classify_command(cmd_for_spans);
let mut saw_executable = false;
for span in spans.executable_spans() {
saw_executable = true;
let span_text = span.text(cmd_for_spans);
if span_text.is_empty() {
continue;
}
if span_matches_any_keyword(span_text, enabled_keywords) {
return (false, normalized);
}
}
if !saw_executable {
if should_fallback_to_full_normalized_keyword_scan(cmd, cmd_for_spans)
&& span_matches_any_keyword(cmd_for_spans, enabled_keywords)
{
return (false, normalized);
}
return (true, normalized);
}
if should_fallback_to_full_normalized_keyword_scan(cmd, cmd_for_spans)
&& span_matches_any_keyword(cmd_for_spans, enabled_keywords)
{
return (false, normalized);
}
(true, normalized) }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pack_aware_quick_reject_empty_keywords_is_conservative() {
assert!(
!pack_aware_quick_reject("ls -la", &[]),
"empty keyword list must not allow skipping pack evaluation"
);
assert!(
!pack_aware_quick_reject("git reset --hard", &[]),
"empty keyword list must not allow skipping pack evaluation"
);
}
#[test]
fn pack_aware_quick_reject_ignores_substring_matches() {
let keywords: Vec<&str> = vec!["git", "rm", "docker"];
assert!(
pack_aware_quick_reject("cat .gitignore", &keywords),
"substring in filename should not trigger keyword gating"
);
assert!(
pack_aware_quick_reject("echo digit", &keywords),
"substring in a larger token should not trigger keyword gating"
);
}
#[test]
fn pack_aware_quick_reject_keeps_word_boundary_matches() {
let keywords: Vec<&str> = vec!["git"];
assert!(
!pack_aware_quick_reject("git status", &keywords),
"word boundary keyword should prevent quick-reject"
);
assert!(
!pack_aware_quick_reject("/usr/bin/git status", &keywords),
"absolute path to git should still prevent quick-reject"
);
}
#[test]
fn pack_aware_quick_reject_does_not_skip_attached_redirection_bypass() {
let keywords: Vec<&str> = vec!["git"];
assert!(
!pack_aware_quick_reject(r#""git">/dev/null reset --hard"#, &keywords),
"quoted command words with attached redirections must still trigger pack evaluation"
);
}
#[test]
fn pack_aware_quick_reject_keeps_variable_assignment_data_fast_path() {
let keywords: Vec<&str> = vec!["rm"];
assert!(
pack_aware_quick_reject(r#"VAR='rm -rf /'; echo "$VAR""#, &keywords),
"safe variable assignments should not lose the quick-reject fast path"
);
}
#[test]
fn pack_aware_quick_reject_rm_commands_not_rejected() {
let keywords: Vec<&str> = vec!["rm"];
assert!(
!pack_aware_quick_reject("rm -rf build", &keywords),
"rm -rf build should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf /tmp/foo", &keywords),
"rm -rf /tmp/foo should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject(r#"rm -rf "$TMPDIR/foo""#, &keywords),
"rm -rf \"$TMPDIR/foo\" should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject(r#"rm -r -f "$TMPDIR/foo""#, &keywords),
"rm -r -f \"$TMPDIR/foo\" should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject(r#"rm --recursive --force "$TMPDIR/foo""#, &keywords),
"rm --recursive --force \"$TMPDIR/foo\" should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf src", &keywords),
"rm -rf src should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf target", &keywords),
"rm -rf target should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf dist", &keywords),
"rm -rf dist should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf node_modules", &keywords),
"rm -rf node_modules should NOT be quick-rejected"
);
assert!(
!pack_aware_quick_reject("rm -rf foo", &keywords),
"rm -rf foo should NOT be quick-rejected"
);
}
#[test]
fn full_flow_core_category_rm_commands_blocked() {
let mut enabled = HashSet::new();
enabled.insert("core".to_string());
let keywords = REGISTRY.collect_enabled_keywords(&enabled);
assert!(
keywords.contains(&"rm"),
"Keywords should include 'rm' from core.filesystem. Got: {keywords:?}"
);
assert!(
!pack_aware_quick_reject("rm -rf build", &keywords),
"rm -rf build should NOT be quick-rejected with core keywords"
);
assert!(
!pack_aware_quick_reject("rm -rf src", &keywords),
"rm -rf src should NOT be quick-rejected with core keywords"
);
assert!(
!pack_aware_quick_reject("rm -rf target", &keywords),
"rm -rf target should NOT be quick-rejected with core keywords"
);
assert!(
!pack_aware_quick_reject("rm -rf dist", &keywords),
"rm -rf dist should NOT be quick-rejected with core keywords"
);
assert!(
!pack_aware_quick_reject("rm -rf node_modules", &keywords),
"rm -rf node_modules should NOT be quick-rejected with core keywords"
);
assert!(
!pack_aware_quick_reject("rm -rf foo", &keywords),
"rm -rf foo should NOT be quick-rejected with core keywords"
);
}
#[test]
fn pack_aware_quick_reject_handles_multiword_keywords_with_extra_space() {
let keywords: Vec<&str> = vec!["gcloud storage"];
assert!(
!pack_aware_quick_reject("gcloud storage rm gs://bucket", &keywords),
"multi-word keywords should match even with extra whitespace"
);
}
#[test]
fn enabled_keyword_index_matches_multiword_keyword_with_extra_space() {
let mut enabled = HashSet::new();
enabled.insert("storage.gcs".to_string());
let ordered = REGISTRY.expand_enabled_ordered(&enabled);
let index = REGISTRY
.build_enabled_keyword_index(&ordered)
.expect("keyword index should build for small pack set");
let mask = index.candidate_pack_mask("gcloud storage rm gs://bucket");
let pack_idx = ordered
.iter()
.position(|id| id == "storage.gcs")
.expect("storage.gcs should be present in ordered list");
assert_eq!(
(mask >> pack_idx) & 1,
1,
"candidate mask should include storage.gcs when whitespace varies"
);
}
#[test]
fn pack_tier_ordering() {
assert_eq!(PackRegistry::pack_tier("core.git"), 1);
assert_eq!(PackRegistry::pack_tier("core.filesystem"), 1);
assert_eq!(PackRegistry::pack_tier("storage.s3"), 1);
assert_eq!(PackRegistry::pack_tier("remote.rsync"), 1);
assert_eq!(PackRegistry::pack_tier("system.disk"), 2);
assert_eq!(PackRegistry::pack_tier("system.permissions"), 2);
assert_eq!(PackRegistry::pack_tier("infrastructure.terraform"), 3);
assert_eq!(PackRegistry::pack_tier("cloud.aws"), 4);
assert_eq!(PackRegistry::pack_tier("apigateway.aws"), 4);
assert_eq!(PackRegistry::pack_tier("dns.cloudflare"), 4);
assert_eq!(PackRegistry::pack_tier("dns.route53"), 4);
assert_eq!(PackRegistry::pack_tier("dns.generic"), 4);
assert_eq!(PackRegistry::pack_tier("platform.github"), 4);
assert_eq!(PackRegistry::pack_tier("cdn.cloudflare_workers"), 4);
assert_eq!(PackRegistry::pack_tier("loadbalancer.nginx"), 4);
assert_eq!(PackRegistry::pack_tier("kubernetes.kubectl"), 5);
assert_eq!(PackRegistry::pack_tier("containers.docker"), 6);
assert_eq!(PackRegistry::pack_tier("database.postgresql"), 7);
assert_eq!(PackRegistry::pack_tier("backup.borg"), 7);
assert_eq!(PackRegistry::pack_tier("backup.rclone"), 7);
assert_eq!(PackRegistry::pack_tier("backup.restic"), 7);
assert_eq!(PackRegistry::pack_tier("backup.velero"), 7);
assert_eq!(PackRegistry::pack_tier("messaging.kafka"), 7);
assert_eq!(PackRegistry::pack_tier("search.elasticsearch"), 7);
assert_eq!(PackRegistry::pack_tier("package_managers"), 8);
assert_eq!(PackRegistry::pack_tier("strict_git"), 9);
assert_eq!(PackRegistry::pack_tier("cicd.github_actions"), 10);
assert_eq!(PackRegistry::pack_tier("cicd.gitlab_ci"), 10);
assert_eq!(PackRegistry::pack_tier("cicd.jenkins"), 10);
assert_eq!(PackRegistry::pack_tier("cicd.circleci"), 10);
assert_eq!(PackRegistry::pack_tier("email.ses"), 10);
assert_eq!(PackRegistry::pack_tier("featureflags.launchdarkly"), 10);
assert_eq!(PackRegistry::pack_tier("secrets.vault"), 10);
assert_eq!(PackRegistry::pack_tier("monitoring.splunk"), 10);
assert_eq!(PackRegistry::pack_tier("payment.stripe"), 10);
assert_eq!(PackRegistry::pack_tier("unknown.pack"), 11);
}
#[test]
fn expand_enabled_ordered_is_deterministic() {
let mut enabled = HashSet::new();
enabled.insert("containers.docker".to_string());
enabled.insert("kubernetes.kubectl".to_string());
enabled.insert("core.git".to_string());
enabled.insert("database.postgresql".to_string());
let first_run = REGISTRY.expand_enabled_ordered(&enabled);
for _ in 0..10 {
let run = REGISTRY.expand_enabled_ordered(&enabled);
assert_eq!(
run, first_run,
"expand_enabled_ordered should produce identical results across runs"
);
}
}
#[test]
fn expand_enabled_ordered_respects_tier_ordering() {
let mut enabled = HashSet::new();
enabled.insert("containers.docker".to_string()); enabled.insert("kubernetes.kubectl".to_string()); enabled.insert("core.git".to_string()); enabled.insert("database.postgresql".to_string());
let ordered = REGISTRY.expand_enabled_ordered(&enabled);
let core_pos = ordered.iter().position(|id| id == "core.git");
let docker_pos = ordered.iter().position(|id| id == "containers.docker");
let pg_pos = ordered.iter().position(|id| id == "database.postgresql");
assert!(
core_pos.is_some() && docker_pos.is_some() && pg_pos.is_some(),
"All packs should be present"
);
assert!(
core_pos.unwrap() < docker_pos.unwrap(),
"core.git should come before containers.docker"
);
assert!(
docker_pos.unwrap() < pg_pos.unwrap(),
"containers.docker should come before database.postgresql"
);
}
#[test]
fn expand_enabled_ordered_sorts_within_tier() {
let mut enabled = HashSet::new();
enabled.insert("core.git".to_string());
enabled.insert("core.filesystem".to_string());
let ordered = REGISTRY.expand_enabled_ordered(&enabled);
let fs_pos = ordered.iter().position(|id| id == "core.filesystem");
let git_pos = ordered.iter().position(|id| id == "core.git");
assert!(
fs_pos.is_some() && git_pos.is_some(),
"Both core packs should be present"
);
assert!(
fs_pos.unwrap() < git_pos.unwrap(),
"core.filesystem should come before core.git (lexicographic)"
);
}
#[test]
fn check_command_attribution_is_deterministic() {
let mut enabled = HashSet::new();
enabled.insert("core.git".to_string());
enabled.insert("strict_git".to_string());
let cmd = "git reset --hard";
let first_result = REGISTRY.check_command(cmd, &enabled);
for _ in 0..10 {
let result = REGISTRY.check_command(cmd, &enabled);
assert_eq!(
result.blocked, first_result.blocked,
"Blocked status should be consistent"
);
assert_eq!(
result.pack_id, first_result.pack_id,
"Pack attribution should be consistent across runs"
);
assert_eq!(
result.pattern_name, first_result.pattern_name,
"Pattern name should be consistent across runs"
);
}
}
#[test]
fn check_command_prefers_higher_priority_pack() {
let mut enabled = HashSet::new();
enabled.insert("core.git".to_string()); enabled.insert("strict_git".to_string());
let cmd = "git reset --hard";
let result = REGISTRY.check_command(cmd, &enabled);
assert!(result.blocked, "Command should be blocked");
assert_eq!(
result.pack_id.as_deref(),
Some("core.git"),
"core.git (tier 1) should be attributed over strict_git (tier 9)"
);
}
#[test]
fn database_packs_block_drop_with_if_exists() {
let pg = database::postgresql::create_pack();
assert!(
pg.check("DROP TABLE IF EXISTS foo;").is_some(),
"DROP TABLE IF EXISTS should be treated as destructive"
);
assert!(
pg.check("DROP DATABASE IF EXISTS foo;").is_some(),
"DROP DATABASE IF EXISTS should be treated as destructive"
);
let sqlite = database::sqlite::create_pack();
assert!(
sqlite.check("DROP TABLE IF EXISTS foo;").is_some(),
"SQLite DROP TABLE IF EXISTS should be treated as destructive"
);
}
#[test]
fn database_postgresql_blocks_truncate_restart_identity() {
let pg = database::postgresql::create_pack();
assert!(
pg.check("TRUNCATE TABLE foo RESTART IDENTITY;").is_some(),
"TRUNCATE ... RESTART IDENTITY permanently deletes rows and should be blocked"
);
}
#[test]
fn category_expansion_is_ordered() {
let mut enabled = HashSet::new();
enabled.insert("containers".to_string());
let ordered = REGISTRY.expand_enabled_ordered(&enabled);
let has_docker = ordered.iter().any(|id| id == "containers.docker");
let has_compose = ordered.iter().any(|id| id == "containers.compose");
let has_podman = ordered.iter().any(|id| id == "containers.podman");
assert!(
has_docker && has_compose && has_podman,
"Category expansion should include all sub-packs"
);
let compose_pos = ordered.iter().position(|id| id == "containers.compose");
let docker_pos = ordered.iter().position(|id| id == "containers.docker");
let podman_pos = ordered.iter().position(|id| id == "containers.podman");
assert!(
compose_pos.unwrap() < docker_pos.unwrap(),
"compose should come before docker"
);
assert!(
docker_pos.unwrap() < podman_pos.unwrap(),
"docker should come before podman"
);
}
#[test]
fn check_command_returns_pattern_name() {
let mut enabled = HashSet::new();
enabled.insert("containers.docker".to_string());
let cmd = "docker system prune";
let result = REGISTRY.check_command(cmd, &enabled);
assert!(result.blocked, "docker system prune should be blocked");
assert_eq!(
result.pack_id.as_deref(),
Some("containers.docker"),
"Should be attributed to containers.docker"
);
assert!(
result.pattern_name.is_some() || result.reason.is_some(),
"Blocked result should have pattern metadata"
);
}
#[test]
fn destructive_match_contains_metadata() {
let docker_pack = REGISTRY
.get("containers.docker")
.expect("docker pack exists");
let matched = docker_pack.matches_destructive("docker system prune");
assert!(matched.is_some(), "docker system prune should match");
let m = matched.unwrap();
assert!(!m.reason.is_empty(), "reason should not be empty");
}
#[test]
fn normalize_command_handles_pathological_input() {
let pathological = "//////////////////_(rm";
let result = normalize_command(pathological);
assert_eq!(result.as_ref(), pathological);
let long_slashes = "/".repeat(1000) + "rm";
let result2 = normalize_command(&long_slashes);
assert!(!result2.is_empty());
let with_nulls = "///\0\0/\0\0/\0\0//\0\0/\0[";
let result3 = normalize_command(with_nulls);
assert_eq!(result3.as_ref(), with_nulls);
}
#[test]
fn severity_default_modes() {
assert_eq!(Severity::Critical.default_mode(), DecisionMode::Deny);
assert_eq!(Severity::High.default_mode(), DecisionMode::Deny);
assert_eq!(Severity::Medium.default_mode(), DecisionMode::Warn);
assert_eq!(Severity::Low.default_mode(), DecisionMode::Log);
}
#[test]
fn severity_blocks_by_default_consistency() {
assert!(Severity::Critical.blocks_by_default());
assert!(Severity::High.blocks_by_default());
assert!(!Severity::Medium.blocks_by_default());
assert!(!Severity::Low.blocks_by_default());
for severity in [
Severity::Critical,
Severity::High,
Severity::Medium,
Severity::Low,
] {
assert_eq!(
severity.blocks_by_default(),
severity.default_mode().blocks(),
"blocks_by_default should match default_mode().blocks() for {severity:?}"
);
}
}
#[test]
fn decision_mode_blocks() {
assert!(DecisionMode::Deny.blocks(), "Deny should block");
assert!(!DecisionMode::Warn.blocks(), "Warn should not block");
assert!(!DecisionMode::Log.blocks(), "Log should not block");
}
#[test]
fn severity_labels() {
assert_eq!(Severity::Critical.label(), "critical");
assert_eq!(Severity::High.label(), "high");
assert_eq!(Severity::Medium.label(), "medium");
assert_eq!(Severity::Low.label(), "low");
}
#[test]
fn decision_mode_labels() {
assert_eq!(DecisionMode::Deny.label(), "deny");
assert_eq!(DecisionMode::Warn.label(), "warn");
assert_eq!(DecisionMode::Log.label(), "log");
}
#[test]
fn check_result_includes_severity() {
let mut enabled = HashSet::new();
enabled.insert("core.git".to_string());
let cmd = "git reset --hard";
let result = REGISTRY.check_command(cmd, &enabled);
assert!(result.blocked, "git reset --hard should be blocked");
assert!(
result.severity.is_some(),
"Blocked result should include severity"
);
assert!(
result.decision_mode.is_some(),
"Blocked result should include decision_mode"
);
let severity = result.severity.unwrap();
let mode = result.decision_mode.unwrap();
assert!(severity.blocks_by_default());
assert!(mode.blocks());
}
#[test]
fn allowed_result_no_severity() {
let result = CheckResult::allowed();
assert!(!result.blocked);
assert!(result.severity.is_none());
assert!(result.decision_mode.is_none());
}
#[test]
fn destructive_match_includes_severity() {
let docker_pack = REGISTRY
.get("containers.docker")
.expect("docker pack exists");
let matched = docker_pack.matches_destructive("docker system prune");
assert!(matched.is_some(), "docker system prune should match");
let m = matched.unwrap();
assert_eq!(m.severity, Severity::High);
}
#[test]
fn severity_default() {
let default: Severity = Severity::default();
assert_eq!(default, Severity::High);
}
#[test]
fn decision_mode_default() {
let default: DecisionMode = DecisionMode::default();
assert_eq!(default, DecisionMode::Deny);
}
#[test]
fn severity_regression_git_critical_rules() {
let git_pack = REGISTRY
.get("core.git")
.expect("core.git pack should exist");
let critical_rules = [
"reset-hard",
"clean-force",
"push-force-long",
"push-force-short",
"stash-clear",
];
for rule_name in critical_rules {
let pattern = git_pack
.destructive_patterns
.iter()
.find(|p| p.name == Some(rule_name));
assert!(
pattern.is_some(),
"Rule {rule_name} should exist in core.git"
);
let pattern = pattern.unwrap();
assert_eq!(
pattern.severity,
Severity::Critical,
"Rule {rule_name} in core.git should be Critical severity"
);
}
}
#[test]
fn severity_regression_filesystem_critical_rules() {
let fs_pack = REGISTRY
.get("core.filesystem")
.expect("core.filesystem pack should exist");
let pattern = fs_pack
.destructive_patterns
.iter()
.find(|p| p.name == Some("rm-rf-root-home"))
.expect("rm-rf-root-home rule should exist");
assert_eq!(
pattern.severity,
Severity::Critical,
"rm-rf-root-home should be Critical severity (most dangerous)"
);
}
#[test]
fn severity_regression_git_high_rules() {
let git_pack = REGISTRY
.get("core.git")
.expect("core.git pack should exist");
let high_or_above_rules = [
"checkout-discard",
"checkout-ref-discard",
"restore-worktree",
"restore-worktree-explicit",
"reset-merge",
];
for rule_name in high_or_above_rules {
let pattern = git_pack
.destructive_patterns
.iter()
.find(|p| p.name == Some(rule_name));
assert!(
pattern.is_some(),
"Rule {rule_name} should exist in core.git"
);
let pattern = pattern.unwrap();
assert!(
pattern.severity.blocks_by_default(),
"Rule {rule_name} in core.git should block by default (High or Critical)"
);
}
}
#[test]
fn core_rules_have_appropriate_severity() {
let medium_patterns = [
("core.git", "branch-force-delete"), ("core.git", "stash-drop"), ];
for pack_id in ["core.git", "core.filesystem"] {
let pack = REGISTRY.get(pack_id).expect("Pack should exist");
for pattern in &pack.destructive_patterns {
let name = pattern.name.unwrap_or("<unnamed>");
let is_expected_medium = medium_patterns
.iter()
.any(|(pid, pname)| *pid == pack_id && *pname == name);
if is_expected_medium {
assert!(
matches!(pattern.severity, Severity::Medium),
"Core pack rule {pack_id}:{name} should be Medium severity (recoverable)"
);
} else {
assert!(
pattern.severity.blocks_by_default(),
"Core pack rule {pack_id}:{name} should block by default"
);
}
}
}
}
mod normalization_tests {
use super::*;
#[test]
fn preserves_plain_git_command() {
assert_eq!(normalize_command("git status"), "git status");
}
#[test]
fn preserves_plain_rm_command() {
assert_eq!(normalize_command("rm -rf /tmp/foo"), "rm -rf /tmp/foo");
}
#[test]
fn strips_usr_bin_git() {
assert_eq!(normalize_command("/usr/bin/git status"), "git status");
}
#[test]
fn strips_usr_local_bin_git() {
assert_eq!(
normalize_command("/usr/local/bin/git checkout -b feature"),
"git checkout -b feature"
);
}
#[test]
fn strips_bin_rm() {
assert_eq!(
normalize_command("/bin/rm -rf /tmp/test"),
"rm -rf /tmp/test"
);
}
#[test]
fn strips_usr_bin_rm() {
assert_eq!(normalize_command("/usr/bin/rm file.txt"), "rm file.txt");
}
#[test]
fn strips_sbin_path() {
assert_eq!(normalize_command("/sbin/rm foo"), "rm foo");
}
#[test]
fn strips_usr_sbin_path() {
assert_eq!(normalize_command("/usr/sbin/rm bar"), "rm bar");
}
#[test]
fn preserves_command_with_path_arguments() {
assert_eq!(
normalize_command("git add /usr/bin/something"),
"git add /usr/bin/something"
);
}
#[test]
fn handles_empty_string() {
assert_eq!(normalize_command(""), "");
}
#[test]
fn strips_quotes_from_executed_git_command_word() {
assert_eq!(
normalize_command("\"git\" reset --hard"),
"git reset --hard"
);
}
#[test]
fn strips_quotes_from_executed_rm_command_word() {
assert_eq!(normalize_command("\"rm\" -rf /etc"), "rm -rf /etc");
}
#[test]
fn strips_quotes_from_executed_absolute_path_command_word() {
assert_eq!(
normalize_command("\"/usr/bin/git\" reset --hard"),
"git reset --hard"
);
}
#[test]
fn strips_quotes_after_separators() {
assert_eq!(
normalize_command("echo hi; \"rm\" -rf /etc"),
"echo hi; rm -rf /etc"
);
}
#[test]
fn strips_quotes_after_wrappers_and_options() {
assert_eq!(
normalize_command("sudo -u root \"rm\" -rf /etc"),
"rm -rf /etc"
);
}
#[test]
fn preserves_quotes_for_safe_commands() {
assert_eq!(
normalize_command("echo \"rm\" -rf /etc"),
"echo \"rm\" -rf /etc"
);
}
#[test]
fn does_not_strip_quotes_for_command_query_mode() {
assert_eq!(
normalize_command("command -v \"git\""),
"command -v \"git\""
);
}
#[test]
fn strips_quotes_inside_subshell_segments() {
assert_eq!(normalize_command("( \"rm\" -rf /etc )"), "( rm -rf /etc )");
}
#[test]
fn handles_line_continuation_split() {
assert_eq!(
normalize_command("git re\\\nset --hard"),
"git reset --hard"
);
}
}
#[test]
fn all_pack_patterns_compile() {
let mut errors: Vec<String> = Vec::new();
for pack_id in REGISTRY.all_pack_ids() {
let pack = REGISTRY.get(pack_id).expect("pack must exist");
for (idx, pattern) in pack.safe_patterns.iter().enumerate() {
if let Err(e) =
crate::packs::regex_engine::CompiledRegex::new(pattern.regex.as_str())
{
errors.push(format!(
"Pack '{}' safe pattern '{}' (index {}) failed to compile: {}\n Pattern: {}",
pack_id,
pattern.name,
idx,
e,
pattern.regex.as_str()
));
}
}
for (idx, pattern) in pack.destructive_patterns.iter().enumerate() {
let pattern_name = pattern.name.unwrap_or("<unnamed>");
if let Err(e) =
crate::packs::regex_engine::CompiledRegex::new(pattern.regex.as_str())
{
errors.push(format!(
"Pack '{}' destructive pattern '{}' (index {}) failed to compile: {}\n Pattern: {}",
pack_id,
pattern_name,
idx,
e,
pattern.regex.as_str()
));
}
}
}
assert!(
errors.is_empty(),
"Found {} invalid regex pattern(s):\n\n{}",
errors.len(),
errors.join("\n\n")
);
}
}