destructive_command_guard 0.5.6

#![allow(clippy::too_many_lines)]
//! Pack system for modular command blocking.
//!
//! This module provides the infrastructure for organizing patterns into "packs"
//! that can be enabled or disabled based on user configuration.
//!
//! # Pack Hierarchy
//!
//! Packs are organized in a two-level hierarchy:
//! - Category (e.g., "database", "kubernetes")
//! - Sub-pack (e.g., "database.postgresql", "kubernetes.kubectl")
//!
//! Enabling a category enables all its sub-packs. Sub-packs can be individually
//! disabled even if their parent category is enabled.

pub mod apigateway;
pub mod backup;
pub mod cdn;
pub mod cicd;
pub mod cloud;
pub mod containers;
pub mod core;
pub mod database;
pub mod dns;
pub mod email;
pub mod external;
pub mod featureflags;
pub mod infrastructure;
pub mod kubernetes;
pub mod loadbalancer;
pub mod messaging;
pub mod monitoring;
pub mod package_managers;
pub mod payment;
pub mod platform;
pub mod regex_engine;
pub mod remote;
pub mod safe;
pub mod search;
pub mod secrets;
pub mod storage;
pub mod strict_git;
pub mod system;

// Testing infrastructure
pub mod test_helpers;
#[cfg(test)]
mod test_template;

pub use crate::normalize::normalize_command;
use memchr::memmem;
use regex_engine::LazyCompiledRegex;
use serde::Serialize;
use smallvec::SmallVec;
use std::collections::{HashMap, HashSet};
use std::sync::{LazyLock, OnceLock};

/// Unique identifier for a pack (e.g., "core", "database.postgresql").
pub type PackId = String;

/// Severity level for destructive patterns.
///
/// Severity determines the default decision mode and allowlisting behavior:
/// - **Critical**: Always block. These are irreversible, high-confidence detections.
/// - **High**: Block by default, but allowlistable by rule ID.
/// - **Medium**: Warn by default (log + continue), blockable via config.
/// - **Low**: Log only (for history/learning), warneable/blockable via config.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
    /// Always block. Irreversible operations with high confidence.
    /// Examples: `rm -rf /`, `git reset --hard`, `DROP DATABASE`.
    Critical,

    /// Block by default, allowlistable by rule ID.
    /// Examples: `git push --force`, `docker system prune`.
    #[default]
    High,

    /// Warn by default (stderr warning, but allow execution).
    /// Examples: context-dependent patterns, lower-confidence detections.
    Medium,

    /// Log only (silent, for history and learning).
    /// Examples: advisory patterns, patterns under evaluation.
    Low,
}

impl Severity {
    /// Get the default decision mode for this severity level.
    #[must_use]
    pub const fn default_mode(&self) -> DecisionMode {
        match self {
            Self::Critical | Self::High => DecisionMode::Deny,
            Self::Medium => DecisionMode::Warn,
            Self::Low => DecisionMode::Log,
        }
    }

    /// Returns true if this severity level blocks by default.
    #[must_use]
    pub const fn blocks_by_default(&self) -> bool {
        matches!(self, Self::Critical | Self::High)
    }

    /// Get a human-readable label for this severity.
    #[must_use]
    pub const fn label(&self) -> &'static str {
        match self {
            Self::Critical => "critical",
            Self::High => "high",
            Self::Medium => "medium",
            Self::Low => "low",
        }
    }
}

/// Decision mode for how to handle a matched pattern.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DecisionMode {
    /// Block the command (output JSON deny, print warning).
    #[default]
    Deny,

    /// Warn and prompt (print warning to stderr, emit JSON "ask" decision).
    Warn,

    /// Log only (silent allow, record for history).
    Log,
}

impl DecisionMode {
    /// Returns true if this mode blocks command execution.
    #[must_use]
    pub const fn blocks(&self) -> bool {
        matches!(self, Self::Deny)
    }

    /// Get a human-readable label for this mode.
    #[must_use]
    pub const fn label(&self) -> &'static str {
        match self {
            Self::Deny => "deny",
            Self::Warn => "warn",
            Self::Log => "log",
        }
    }
}

/// Platform specifier for platform-specific suggestions.
///
/// Some safer alternatives only work on specific operating systems.
/// When `None`, the suggestion applies to all platforms.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Platform {
    /// Suggestion works on all platforms.
    #[default]
    All,
    /// Linux-specific suggestion.
    Linux,
    /// macOS-specific suggestion.
    MacOS,
    /// Windows-specific suggestion.
    Windows,
    /// BSD-specific suggestion (FreeBSD, OpenBSD, NetBSD).
    Bsd,
}

impl Platform {
    /// Check if this platform matches the current OS.
    #[must_use]
    pub const fn matches_current(&self) -> bool {
        match self {
            Self::All => true,
            Self::Linux => cfg!(target_os = "linux"),
            Self::MacOS => cfg!(target_os = "macos"),
            Self::Windows => cfg!(target_os = "windows"),
            Self::Bsd => {
                cfg!(target_os = "freebsd")
                    || cfg!(target_os = "openbsd")
                    || cfg!(target_os = "netbsd")
            }
        }
    }

    /// Get a human-readable label for this platform.
    #[must_use]
    pub const fn label(&self) -> &'static str {
        match self {
            Self::All => "all",
            Self::Linux => "linux",
            Self::MacOS => "macos",
            Self::Windows => "windows",
            Self::Bsd => "bsd",
        }
    }
}

/// A safer command alternative for a destructive pattern.
///
/// `PatternSuggestion` provides users with actionable alternatives when a command
/// is blocked. Each suggestion includes the command to use, why it's safer,
/// and optionally which platform it applies to.
///
/// Note: This is distinct from `crate::suggestions::Suggestion`, which is used
/// for the runtime suggestion registry with categorized guidance types.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct PatternSuggestion {
    /// The safer command alternative.
    ///
    /// Can include placeholders like `{path}` or `{file}` that should be
    /// replaced with the actual arguments from the blocked command.
    pub command: &'static str,

    /// Brief explanation of why this alternative is safer.
    pub description: &'static str,

    /// Platform this suggestion applies to.
    /// `Platform::All` (default) means it works everywhere.
    pub platform: Platform,
}

impl PatternSuggestion {
    /// Create a new suggestion that applies to all platforms.
    #[must_use]
    pub const fn new(command: &'static str, description: &'static str) -> Self {
        Self {
            command,
            description,
            platform: Platform::All,
        }
    }

    /// Create a new suggestion with a specific platform.
    #[must_use]
    pub const fn with_platform(
        command: &'static str,
        description: &'static str,
        platform: Platform,
    ) -> Self {
        Self {
            command,
            description,
            platform,
        }
    }
}

/// A safe pattern that, when matched, allows the command immediately.
pub struct SafePattern {
    /// Lazily-compiled regex pattern.
    pub regex: LazyCompiledRegex,
    /// Debug name for the pattern.
    pub name: &'static str,
}

impl std::fmt::Debug for SafePattern {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SafePattern")
            .field("pattern", &self.regex.as_str())
            .field("name", &self.name)
            .finish()
    }
}

/// A destructive pattern that, when matched, blocks the command.
pub struct DestructivePattern {
    /// Lazily-compiled regex pattern.
    pub regex: LazyCompiledRegex,
    /// Human-readable explanation of why this command is blocked.
    pub reason: &'static str,
    /// Optional pattern name for debugging and allowlisting.
    pub name: Option<&'static str>,
    /// Severity level (determines default decision mode).
    pub severity: Severity,
    /// Detailed explanation of why this pattern is dangerous.
    /// Should explain consequences and suggest alternatives.
    /// This is more verbose than `reason` and intended for verbose output modes.
    pub explanation: Option<&'static str>,
    /// Safer command alternatives to suggest when this pattern matches.
    /// Each suggestion includes the command, why it's safer, and which platforms it applies to.
    pub suggestions: &'static [PatternSuggestion],
}

impl std::fmt::Debug for DestructivePattern {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("DestructivePattern")
            .field("pattern", &self.regex.as_str())
            .field("reason", &self.reason)
            .field("name", &self.name)
            .field("severity", &self.severity)
            .field("explanation", &self.explanation)
            .field("suggestions", &self.suggestions)
            .finish()
    }
}

/// Macro to create a safe pattern with compile-time name checking.
///
/// The pattern is lazily compiled on first use, not at construction time.
#[macro_export]
macro_rules! safe_pattern {
    ($name:literal, $re:literal) => {
        $crate::packs::SafePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            name: $name,
        }
    };
}

/// Macro to create a destructive pattern with reason.
///
/// The pattern is lazily compiled on first use, not at construction time.
///
/// # Variants
///
/// - `destructive_pattern!("regex", "reason")` - unnamed, default High severity
/// - `destructive_pattern!("name", "regex", "reason")` - named, default High severity
/// - `destructive_pattern!("name", "regex", "reason", Critical)` - named with explicit severity
/// - `destructive_pattern!("name", "regex", "reason", Critical, "explanation")` - with explanation
/// - `destructive_pattern!("name", "regex", "reason", Critical, "explanation", &[...])` - with suggestions
#[macro_export]
macro_rules! destructive_pattern {
    // Unnamed pattern, default severity (High)
    ($re:literal, $reason:literal) => {
        $crate::packs::DestructivePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            reason: $reason,
            name: None,
            severity: $crate::packs::Severity::High,
            explanation: None,
            suggestions: &[],
        }
    };
    // Named pattern, default severity (High)
    ($name:literal, $re:literal, $reason:literal) => {
        $crate::packs::DestructivePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            reason: $reason,
            name: Some($name),
            severity: $crate::packs::Severity::High,
            explanation: None,
            suggestions: &[],
        }
    };
    // Named pattern with explicit severity
    ($name:literal, $re:literal, $reason:literal, $severity:ident) => {
        $crate::packs::DestructivePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            reason: $reason,
            name: Some($name),
            severity: $crate::packs::Severity::$severity,
            explanation: None,
            suggestions: &[],
        }
    };
    // Named pattern with explicit severity and explanation
    ($name:literal, $re:literal, $reason:literal, $severity:ident, $explanation:literal) => {
        $crate::packs::DestructivePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            reason: $reason,
            name: Some($name),
            severity: $crate::packs::Severity::$severity,
            explanation: Some($explanation),
            suggestions: &[],
        }
    };
    // Named pattern with explicit severity, explanation, and suggestions
    ($name:literal, $re:literal, $reason:literal, $severity:ident, $explanation:literal, $suggestions:expr) => {
        $crate::packs::DestructivePattern {
            regex: $crate::packs::regex_engine::LazyCompiledRegex::new($re),
            reason: $reason,
            name: Some($name),
            severity: $crate::packs::Severity::$severity,
            explanation: Some($explanation),
            suggestions: $suggestions,
        }
    };
}

/// A pack of patterns for a specific category of commands.
#[derive(Debug)]
pub struct Pack {
    /// Unique identifier (e.g., "database.postgresql").
    pub id: PackId,

    /// Human-readable name (e.g., `PostgreSQL`).
    pub name: &'static str,

    /// Description of what this pack protects against.
    pub description: &'static str,

    /// Keywords for quick-reject filtering (e.g., `["psql", "dropdb", "DROP"]`).
    /// Commands without any of these keywords skip pattern matching for this pack.
    pub keywords: &'static [&'static str],

    /// Safe patterns (whitelist) - checked first.
    pub safe_patterns: Vec<SafePattern>,

    /// Destructive patterns (blacklist) - checked if no safe pattern matches.
    pub destructive_patterns: Vec<DestructivePattern>,

    /// Pre-built Aho-Corasick automaton for O(n) keyword matching.
    /// Built by `PackRegistry::register_pack()` from keywords. Set to `None` in pack
    /// constructors; the registry initializes this during registration.
    pub keyword_matcher: Option<aho_corasick::AhoCorasick>,

    /// Pre-built `RegexSet` for O(n) safe pattern matching.
    /// Allows checking all safe patterns in a single pass. Built lazily when
    /// the pack is instantiated. Only includes patterns that can use the
    /// linear-time regex engine (no lookahead/lookbehind).
    pub safe_regex_set: Option<regex::RegexSet>,

    /// True if `safe_regex_set` covers ALL safe patterns (no backtracking patterns exist).
    /// When true and the `RegexSet` misses, we can skip individual pattern checks.
    pub safe_regex_set_is_complete: bool,
}

impl Pack {
    /// Create a new pack with the given patterns.
    ///
    /// This constructor initializes the lazy fields (`keyword_matcher`, `safe_regex_set`,
    /// `safe_regex_set_is_complete`) to their default values. These are populated
    /// during pack registration by `PackEntry::get_pack()`.
    #[must_use]
    pub const fn new(
        id: PackId,
        name: &'static str,
        description: &'static str,
        keywords: &'static [&'static str],
        safe_patterns: Vec<SafePattern>,
        destructive_patterns: Vec<DestructivePattern>,
    ) -> Self {
        Self {
            id,
            name,
            description,
            keywords,
            safe_patterns,
            destructive_patterns,
            keyword_matcher: None,
            safe_regex_set: None,
            safe_regex_set_is_complete: false,
        }
    }

    /// Check if a command contains any of this pack's keywords.
    /// Returns false if the command doesn't contain any keywords (quick reject).
    ///
    /// Uses an Aho-Corasick automaton for O(n) matching when available (built
    /// by the registry during pack registration). Falls back to sequential
    /// memchr-based search if the automaton isn't built.
    #[must_use]
    pub fn might_match(&self, cmd: &str) -> bool {
        if self.keywords.is_empty() {
            return true; // No keywords = always check patterns
        }

        // Use Aho-Corasick automaton if available (O(n) regardless of keyword count).
        if let Some(ref ac) = self.keyword_matcher {
            if ac.is_match(cmd) {
                return true;
            }

            if !self
                .keywords
                .iter()
                .any(|kw| keyword_contains_whitespace(kw))
            {
                return false;
            }

            return self
                .keywords
                .iter()
                .any(|kw| keyword_contains_whitespace(kw) && keyword_matches_substring(cmd, kw));
        }

        // Fallback: sequential memchr-based search (O(k * n) where k = keyword count).
        self.keywords
            .iter()
            .any(|kw| keyword_matches_substring(cmd, kw))
    }

    /// Check if a command matches any safe pattern.
    ///
    /// Uses `RegexSet` for O(n) matching when available (fast path).
    /// Falls back to individual pattern checks for backtracking patterns.
    #[must_use]
    pub fn matches_safe(&self, cmd: &str) -> bool {
        // Fast path: use RegexSet if available
        if let Some(ref set) = self.safe_regex_set {
            if set.is_match(cmd) {
                return true;
            }
            // If RegexSet covers all patterns and missed, no match
            if self.safe_regex_set_is_complete {
                return false;
            }
        }

        // Fallback: check patterns individually
        // This handles: no RegexSet, RegexSet compilation failed, or backtracking patterns
        self.safe_patterns.iter().any(|p| p.regex.is_match(cmd))
    }

    /// Deadline-aware safe pattern matching.
    ///
    /// Like [`matches_safe`], but polls the deadline between individual
    /// backtracking-engine pattern evaluations. Returns `None` (no match) if
    /// the deadline expires mid-scan, letting the caller fail-open.
    #[must_use]
    pub fn matches_safe_with_deadline(
        &self,
        cmd: &str,
        deadline: Option<&crate::perf::Deadline>,
    ) -> bool {
        if let Some(ref set) = self.safe_regex_set {
            if set.is_match(cmd) {
                return true;
            }
            if self.safe_regex_set_is_complete {
                return false;
            }
        }

        for p in &self.safe_patterns {
            if deadline.is_some_and(crate::perf::Deadline::is_exceeded) {
                return false;
            }
            if p.regex.is_match(cmd) {
                return true;
            }
        }
        false
    }

    /// Check if a command matches any destructive pattern.
    /// Returns the matched pattern's reason, name, severity, and explanation if found.
    #[must_use]
    pub fn matches_destructive(&self, cmd: &str) -> Option<DestructiveMatch> {
        self.destructive_patterns
            .iter()
            .find(|p| p.regex.is_match(cmd))
            .map(|p| DestructiveMatch {
                reason: p.reason,
                name: p.name,
                severity: p.severity,
                explanation: p.explanation,
            })
    }

    fn matches_destructive_named_by(
        &self,
        cmd: &str,
        predicate: impl Fn(Option<&'static str>) -> bool,
    ) -> Option<DestructiveMatch> {
        self.destructive_patterns
            .iter()
            .filter(|p| predicate(p.name))
            .find(|p| p.regex.is_match(cmd))
            .map(|p| DestructiveMatch {
                reason: p.reason,
                name: p.name,
                severity: p.severity,
                explanation: p.explanation,
            })
    }

    /// Check a command against this pack.
    /// Returns Some(DestructiveMatch) if blocked, None if allowed.
    ///
    /// Compound commands and pipelines are split into segments and each
    /// segment is evaluated independently. This prevents a safe pattern
    /// matching one segment (e.g. `docker ps`) from shielding a destructive
    /// pattern in another (`docker system prune`).
    #[must_use]
    pub fn check(&self, cmd: &str) -> Option<DestructiveMatch> {
        let segments = crate::packs::split_command_segments(cmd);
        if segments.len() > 1 {
            for seg in &segments {
                if let Some(m) = self.check_single(seg) {
                    return Some(m);
                }
            }
            // Also check the whole command so patterns that legitimately
            // span segments still match.
        }

        self.check_single(cmd)
    }

    fn check_single(&self, cmd: &str) -> Option<DestructiveMatch> {
        // Quick reject if no keywords match
        if !self.might_match(cmd) {
            return None;
        }

        if self.id == "core.filesystem" {
            if let Some(matched) = self.matches_destructive_named_by(
                cmd,
                crate::packs::core::filesystem::is_pre_rm_propagation_rule,
            ) {
                return Some(matched);
            }

            match crate::packs::core::filesystem::parse_rm_command(cmd) {
                crate::packs::core::filesystem::RmParseDecision::Allow => return None,
                crate::packs::core::filesystem::RmParseDecision::Deny(hit) => {
                    return Some(DestructiveMatch {
                        reason: hit.reason,
                        name: Some(hit.pattern_name),
                        severity: hit.severity,
                        explanation: None,
                    });
                }
                crate::packs::core::filesystem::RmParseDecision::NoMatch => {}
            }
        }

        // Check safe patterns first (whitelist)
        if self.matches_safe(cmd) {
            return None;
        }

        // Check destructive patterns (blacklist)
        self.matches_destructive(cmd)
    }
}

/// Information about a matched destructive pattern.
#[derive(Debug, Clone)]
pub struct DestructiveMatch {
    /// Human-readable explanation of why this command is blocked.
    pub reason: &'static str,
    /// Optional pattern name for debugging and allowlisting.
    pub name: Option<&'static str>,
    /// Severity level of the matched pattern.
    pub severity: Severity,
    /// Detailed explanation of why this pattern is dangerous.
    /// More verbose than `reason`, intended for explain/verbose output modes.
    /// Falls back to `reason` when not provided.
    pub explanation: Option<&'static str>,
}

/// Result of checking a command against all packs.
#[derive(Debug)]
pub struct CheckResult {
    /// Whether the command should be blocked (based on severity and mode).
    pub blocked: bool,
    /// The reason for blocking/warning (if matched).
    pub reason: Option<String>,
    /// Which pack matched (if matched).
    pub pack_id: Option<PackId>,
    /// The name of the pattern that matched (if available).
    pub pattern_name: Option<String>,
    /// Severity of the matched pattern (if matched).
    pub severity: Option<Severity>,
    /// Decision mode applied (if matched).
    pub decision_mode: Option<DecisionMode>,
}

impl CheckResult {
    /// Create an "allowed" result (no pattern matched).
    #[must_use]
    pub const fn allowed() -> Self {
        Self {
            blocked: false,
            reason: None,
            pack_id: None,
            pattern_name: None,
            severity: None,
            decision_mode: None,
        }
    }

    /// Create a "blocked" result with pattern identity and severity.
    #[must_use]
    pub fn blocked(
        reason: &str,
        pack_id: &str,
        pattern_name: Option<&str>,
        severity: Severity,
    ) -> Self {
        let decision_mode = severity.default_mode();
        Self {
            blocked: decision_mode.blocks(),
            reason: Some(reason.to_string()),
            pack_id: Some(pack_id.to_string()),
            pattern_name: pattern_name.map(ToString::to_string),
            severity: Some(severity),
            decision_mode: Some(decision_mode),
        }
    }

    /// Create a result for a matched pattern (may be blocked, warned, or logged
    /// depending on severity).
    #[must_use]
    pub fn matched(
        reason: &str,
        pack_id: &str,
        pattern_name: Option<&str>,
        severity: Severity,
    ) -> Self {
        Self::blocked(reason, pack_id, pattern_name, severity)
    }
}

/// Static pack metadata for lazy initialization.
///
/// This allows the registry to access pack IDs and keywords without
/// instantiating the full pack (avoiding pattern vector allocations).
pub struct PackEntry {
    /// Pack ID (e.g., "core.git", "database.postgresql").
    pub id: &'static str,
    /// Keywords for quick-reject filtering.
    pub keywords: &'static [&'static str],
    /// Function to build the full pack (called lazily).
    builder: fn() -> Pack,
    /// Cached pack instance (built on first access).
    instance: OnceLock<Pack>,
}

impl PackEntry {
    /// Create a new pack entry with metadata and lazy builder.
    pub const fn new(
        id: &'static str,
        keywords: &'static [&'static str],
        builder: fn() -> Pack,
    ) -> Self {
        Self {
            id,
            keywords,
            builder,
            instance: OnceLock::new(),
        }
    }

    /// Get or build the pack instance.
    ///
    /// # Panics
    ///
    /// Panics if the pack's keywords are not valid patterns for the Aho-Corasick automaton.
    /// This should be guaranteed by the static pack definitions and tests.
    pub fn get_pack(&self) -> &Pack {
        self.instance.get_or_init(|| {
            let mut pack = (self.builder)();
            // Build Aho-Corasick automaton for keyword matching
            if !pack.keywords.is_empty() && pack.keyword_matcher.is_none() {
                pack.keyword_matcher = Some(
                    aho_corasick::AhoCorasick::new(pack.keywords)
                        .expect("pack keywords should be valid patterns"),
                );
            }
            // Build RegexSet for safe pattern matching (fast path)
            if !pack.safe_patterns.is_empty() && pack.safe_regex_set.is_none() {
                // Collect pattern strings that can use linear-time engine
                let patterns: Vec<&str> = pack
                    .safe_patterns
                    .iter()
                    .filter(|p| !regex_engine::needs_backtracking_engine(p.regex.as_str()))
                    .map(|p| p.regex.as_str())
                    .collect();

                // Track if RegexSet covers all patterns (no backtracking patterns)
                pack.safe_regex_set_is_complete = patterns.len() == pack.safe_patterns.len();

                // Only build RegexSet if we have linear patterns
                if !patterns.is_empty() {
                    pack.safe_regex_set = regex::RegexSet::new(patterns).ok();
                }
            }
            pack
        })
    }

    /// Check if the command might match this pack based on keywords (metadata only).
    ///
    /// This allows quick rejection without instantiating the pack (avoiding regex compilation).
    /// Uses sequential memchr-based search since the Aho-Corasick automaton is only available
    /// on the instantiated pack.
    pub fn might_match(&self, cmd: &str) -> bool {
        if self.keywords.is_empty() {
            return true; // No keywords = always check patterns
        }

        let bytes = cmd.as_bytes();
        if self
            .keywords
            .iter()
            .any(|kw| memmem::find(bytes, kw.as_bytes()).is_some())
        {
            return true;
        }

        self.keywords
            .iter()
            .filter(|kw| keyword_contains_whitespace(kw))
            .any(|kw| keyword_matches_substring(cmd, kw))
    }

    /// Check if the pack has been built yet.
    #[cfg(test)]
    pub fn is_built(&self) -> bool {
        self.instance.get().is_some()
    }
}

/// Registry of all available packs.
pub struct PackRegistry {
    /// All registered pack entries (metadata + lazy instances).
    entries: Vec<&'static PackEntry>,

    /// Pack IDs organized by category for hierarchical enablement.
    categories: HashMap<String, Vec<&'static str>>,

    /// Index for fast pack lookup by ID.
    index: HashMap<&'static str, usize>,
}

/// Precomputed keyword index for a specific enabled pack set.
///
/// Built once per config load and reused for each command evaluation, this
/// allows the evaluator to:
/// - Compute a conservative candidate pack set via a single global substring scan.
/// - Avoid repeated per-pack `might_match()` scans when iterating packs.
///
/// Isomorphism constraint: candidate selection must be a **superset** of the
/// legacy per-pack `PackEntry::might_match()` semantics (raw substring matches).
#[derive(Debug)]
pub struct EnabledKeywordIndex {
    pack_count: usize,
    full_mask: u128,
    always_check_mask: u128,
    keyword_matcher: Option<aho_corasick::AhoCorasick>,
    keyword_pack_masks: Vec<u128>,
    whitespace_keywords: Vec<&'static str>,
    whitespace_pack_masks: Vec<u128>,
}

impl EnabledKeywordIndex {
    #[must_use]
    pub const fn pack_count(&self) -> usize {
        self.pack_count
    }

    /// Fast check: does the command contain any AC-indexed keyword at all?
    ///
    /// Returns `false` when the AC automaton finds zero keyword substrings in
    /// `cmd` **and** there are no always-check packs (packs with empty keyword
    /// lists) or whitespace keywords. This is cheaper than `candidate_pack_mask`
    /// because it can short-circuit on the first hit and skip mask bookkeeping.
    ///
    /// When this returns `false`, `candidate_pack_mask` would return 0 and the
    /// more expensive `pack_aware_quick_reject` can be skipped entirely.
    #[inline]
    #[must_use]
    pub fn has_any_keyword(&self, cmd: &str) -> bool {
        if self.always_check_mask != 0 {
            return true;
        }

        if let Some(ac) = &self.keyword_matcher {
            if ac.is_match(cmd) {
                return true;
            }
        }

        if !self.whitespace_keywords.is_empty() {
            for keyword in &self.whitespace_keywords {
                if keyword_matches_substring(cmd, keyword) {
                    return true;
                }
            }
        }

        false
    }

    #[inline]
    #[must_use]
    pub fn candidate_pack_mask(&self, cmd: &str) -> u128 {
        let mut mask = self.always_check_mask;

        let Some(ac) = &self.keyword_matcher else {
            return mask;
        };

        // Overlapping iteration is required to preserve the legacy substring
        // semantics: if "git" is a keyword and the command contains "gitlab",
        // we must include packs keyed on "git" even if a longer keyword also matches.
        for m in ac.find_overlapping_iter(cmd) {
            mask |= self.keyword_pack_masks[m.pattern().as_usize()];
            if mask == self.full_mask {
                break;
            }
        }

        if !self.whitespace_keywords.is_empty() && mask != self.full_mask {
            for (keyword, pack_mask) in self
                .whitespace_keywords
                .iter()
                .zip(self.whitespace_pack_masks.iter())
            {
                if keyword_matches_substring(cmd, keyword) {
                    mask |= *pack_mask;
                    if mask == self.full_mask {
                        break;
                    }
                }
            }
        }

        mask
    }
}

/// Static pack entries - metadata is available without instantiating packs.
/// Packs are built lazily on first access.
static PACK_ENTRIES: [PackEntry; 85] = [
    PackEntry::new("core.git", &["git"], core::git::create_pack),
    PackEntry::new(
        "core.filesystem",
        // `find` and `/find` are required so the quick-reject filter does
        // NOT drop `find ... -delete` invocations. Without them the pack
        // never runs and the find-delete-root-home / find-delete-general
        // rules can't fire — the historical bypass that motivated those
        // rules. See `core::filesystem::create_pack` for the matching
        // keyword list inside the pack.
        //
        // `unlink` and `/unlink` enable single-file destruction coverage
        // via the POSIX unlink(2) primitive (rule unlink-root-home /
        // unlink-general). Without these, `unlink /etc/passwd` quietly
        // bypasses dcg.
        //
        // `truncate` and `/truncate` enable in-place file-content
        // destruction via `truncate -s 0` (zero) and `-s -N` (shrink).
        // Without these, `truncate -s 0 /etc/passwd` silently empties
        // the file.
        //
        // `shred` and `/shred` enable overwrite-and-unlink coverage
        // (rule shred-unlink-root-home / shred-unlink-general). Without
        // these, `shred -fzu /etc/passwd` silently destroys the file
        // beyond forensic recovery.
        //
        // `tar` and `/tar` enable detection of `tar --remove-files
        // <sensitive-source>`, which is bytewise-equivalent to `rm -rf`
        // on the source tree (after archiving). Without these, agents
        // bypass dcg by switching from `rm -rf /etc` to
        // `tar --remove-files -cf /dev/null /etc`.
        //
        // `dd` and `/dd` enable file-level overwrite detection via
        // `dd if=/dev/zero of=<sensitive-file>` and similar — the
        // truncate-equivalent for files. Device-level dd (`of=/dev/sda`)
        // is system.disk's territory; this pack's dd rules exclude the
        // /dev path family entirely.
        //
        // `mv` and `/mv` enable detection of the cross-segment bypass
        // `mv /etc /tmp/x && rm -rf /tmp/x` — each segment is allowed
        // individually but the pair destroys /etc. The mv rule blocks
        // any mv whose source (or destination) is a sensitive path.
        //
        // `cp`, `ln`, and `rsync` cover the first phase-1 propagation
        // variants: sensitive source copied/symlinked/synced into a temp
        // path, followed by a forced recursive temp deletion.
        //
        // `>/`, `> /`, `>~`, `> ~`, `>$`, `> $`, `&>`, `>&`, `>|`, `1>`, `2>`
        // are the Bash output-redirect quick-reject keywords for the
        // `redirect-truncate-root-home` rule — `> /etc/passwd` and its
        // variants (numbered FDs, `>|` force-overwrite, `&>` / `>&` combined
        // stdout+stderr) truncate the target file to zero bytes, the
        // same destructive primitive as `truncate -s 0`. Append (`>>`)
        // is intentionally excluded by negative lookbehind in the
        // destructive regex; the keyword `>>` would still trigger the
        // pack — quick-reject is by design overly broad and the regex
        // does the disambiguation.
        &[
            "rm",
            "/rm",
            "find",
            "/find",
            "unlink",
            "/unlink",
            "truncate",
            "/truncate",
            "shred",
            "/shred",
            "tar",
            "/tar",
            "dd",
            "/dd",
            "mv",
            "/mv",
            "cp",
            "/cp",
            "ln",
            "/ln",
            "rsync",
            "/rsync",
            ">/",
            "> /",
            ">~",
            "> ~",
            ">$",
            "> $",
            ">\"",
            "> \"",
            ">'",
            "> '",
            "&>",
            ">&",
            ">|",
            "1>",
            "2>",
        ],
        core::filesystem::create_pack,
    ),
    PackEntry::new("storage.s3", &["s3", "s3api"], storage::s3::create_pack),
    PackEntry::new(
        "storage.gcs",
        &["gsutil", "gcloud"],
        storage::gcs::create_pack,
    ),
    PackEntry::new("storage.minio", &["mc"], storage::minio::create_pack),
    PackEntry::new(
        "storage.azure_blob",
        &["az storage", "azcopy"],
        storage::azure_blob::create_pack,
    ),
    PackEntry::new("remote.rsync", &["rsync"], remote::rsync::create_pack),
    PackEntry::new(
        "remote.ssh",
        &["ssh", "ssh-keygen", "ssh-add", "ssh-agent", "ssh-keyscan"],
        remote::ssh::create_pack,
    ),
    PackEntry::new("remote.scp", &["scp"], remote::scp::create_pack),
    PackEntry::new(
        "cicd.github_actions",
        &["gh"],
        cicd::github_actions::create_pack,
    ),
    PackEntry::new(
        "cicd.gitlab_ci",
        &["glab", "gitlab-runner"],
        cicd::gitlab_ci::create_pack,
    ),
    PackEntry::new(
        "cicd.jenkins",
        &["jenkins-cli", "jenkins", "doDelete"],
        cicd::jenkins::create_pack,
    ),
    PackEntry::new("cicd.circleci", &["circleci"], cicd::circleci::create_pack),
    PackEntry::new("secrets.vault", &["vault"], secrets::vault::create_pack),
    PackEntry::new(
        "secrets.aws_secrets",
        &["aws", "secretsmanager", "ssm"],
        secrets::aws_secrets::create_pack,
    ),
    PackEntry::new(
        "secrets.onepassword",
        &["op"],
        secrets::onepassword::create_pack,
    ),
    PackEntry::new(
        "secrets.doppler",
        &["doppler"],
        secrets::doppler::create_pack,
    ),
    PackEntry::new("platform.github", &["gh"], platform::github::create_pack),
    PackEntry::new(
        "platform.gitlab",
        &["glab", "gitlab-rails", "gitlab-rake"],
        platform::gitlab::create_pack,
    ),
    PackEntry::new(
        "platform.railway",
        &[
            "railway",
            "backboard.railway.app",
            "backboard.railway.com",
            "railway.app/graphql",
            "railway.com/graphql",
            "projectDelete",
            "projectScheduleDelete",
            "environmentDelete",
            "serviceDelete",
            "volumeDelete",
            "volumeInstanceDelete",
            "volumeInstanceBackupDelete",
            "volumeInstanceBackupRestore",
            "volumeInstanceBackupScheduleUpdate",
            "volumeInstanceUpdate",
            "variableDelete",
            "variableUpsert",
            "variableCollectionUpsert",
            "deploymentRemove",
            "deploymentStop",
        ],
        platform::railway::create_pack,
    ),
    PackEntry::new("platform.modal", &["modal"], platform::modal::create_pack),
    PackEntry::new(
        "dns.cloudflare",
        &[
            "wrangler",
            "cloudflare",
            "api.cloudflare.com",
            "dns-records",
        ],
        dns::cloudflare::create_pack,
    ),
    PackEntry::new(
        "dns.route53",
        &["aws", "route53"],
        dns::route53::create_pack,
    ),
    PackEntry::new(
        "dns.generic",
        &["nsupdate", "dig", "host", "nslookup"],
        dns::generic::create_pack,
    ),
    PackEntry::new("email.ses", &["ses", "sesv2"], email::ses::create_pack),
    PackEntry::new(
        "email.sendgrid",
        &["sendgrid", "api.sendgrid.com"],
        email::sendgrid::create_pack,
    ),
    PackEntry::new(
        "email.mailgun",
        &["mailgun", "api.mailgun.net"],
        email::mailgun::create_pack,
    ),
    PackEntry::new(
        "email.postmark",
        &["postmark", "api.postmarkapp.com"],
        email::postmark::create_pack,
    ),
    PackEntry::new(
        "featureflags.flipt",
        &["flipt"],
        featureflags::flipt::create_pack,
    ),
    PackEntry::new(
        "featureflags.launchdarkly",
        &["ldcli", "launchdarkly"],
        featureflags::launchdarkly::create_pack,
    ),
    PackEntry::new(
        "featureflags.split",
        &["split", "api.split.io"],
        featureflags::split::create_pack,
    ),
    PackEntry::new(
        "featureflags.unleash",
        &["unleash"],
        featureflags::unleash::create_pack,
    ),
    PackEntry::new(
        "loadbalancer.haproxy",
        &["haproxy", "socat"],
        loadbalancer::haproxy::create_pack,
    ),
    PackEntry::new(
        "loadbalancer.nginx",
        &["nginx", "/etc/nginx"],
        loadbalancer::nginx::create_pack,
    ),
    PackEntry::new(
        "loadbalancer.traefik",
        &["traefik", "ingressroute"],
        loadbalancer::traefik::create_pack,
    ),
    PackEntry::new(
        "loadbalancer.elb",
        &[
            "elbv2",
            "delete-load-balancer",
            "delete-target-group",
            "deregister-targets",
            "delete-listener",
            "delete-rule",
            "deregister-instances-from-load-balancer",
        ],
        loadbalancer::elb::create_pack,
    ),
    PackEntry::new(
        "monitoring.splunk",
        &["splunk"],
        monitoring::splunk::create_pack,
    ),
    PackEntry::new(
        "monitoring.datadog",
        &["datadog-ci", "datadoghq", "datadog"],
        monitoring::datadog::create_pack,
    ),
    PackEntry::new(
        "monitoring.pagerduty",
        &["pd", "pagerduty", "api.pagerduty.com"],
        monitoring::pagerduty::create_pack,
    ),
    PackEntry::new(
        "monitoring.newrelic",
        &["newrelic", "api.newrelic.com", "graphql"],
        monitoring::newrelic::create_pack,
    ),
    PackEntry::new(
        "monitoring.prometheus",
        &[
            "promtool",
            "grafana-cli",
            "/api/v1/admin/tsdb/delete_series",
            "delete_series",
            "/api/dashboards",
            "/api/datasources",
            "/api/alert-notifications",
            "/etc/prometheus",
            "rules.d",
            "prometheusrule",
            "servicemonitor",
            "podmonitor",
        ],
        monitoring::prometheus::create_pack,
    ),
    PackEntry::new(
        "payment.stripe",
        &["stripe", "api.stripe.com"],
        payment::stripe::create_pack,
    ),
    PackEntry::new(
        "payment.braintree",
        &[
            "braintree",
            "braintreegateway.com",
            "braintree.",
            "gateway.customer.",
            "gateway.merchant_account.",
            "gateway.payment_method.",
            "gateway.subscription.",
        ],
        payment::braintree::create_pack,
    ),
    PackEntry::new(
        "payment.square",
        &[
            "square",
            "api.squareup.com",
            "connect.squareup.com",
            "connect.squareupsandbox.com",
        ],
        payment::square::create_pack,
    ),
    PackEntry::new(
        "messaging.kafka",
        &[
            "kafka-topics",
            "kafka-consumer-groups",
            "kafka-configs",
            "kafka-acls",
            "kafka-delete-records",
            "rpk",
        ],
        messaging::kafka::create_pack,
    ),
    PackEntry::new(
        "messaging.rabbitmq",
        &["rabbitmqadmin", "rabbitmqctl"],
        messaging::rabbitmq::create_pack,
    ),
    PackEntry::new("messaging.nats", &["nats"], messaging::nats::create_pack),
    PackEntry::new(
        "messaging.sqs_sns",
        &["aws", "sqs", "sns"],
        messaging::sqs_sns::create_pack,
    ),
    PackEntry::new(
        "search.elasticsearch",
        &[
            "elasticsearch",
            "9200",
            "_search",
            "_cluster",
            "_cat",
            "_doc",
            "_all",
            "_delete_by_query",
        ],
        search::elasticsearch::create_pack,
    ),
    PackEntry::new(
        "search.opensearch",
        &[
            "opensearch",
            "9200",
            "_search",
            "_cluster",
            "_cat",
            "_doc",
            "_all",
            "_delete_by_query",
        ],
        search::opensearch::create_pack,
    ),
    PackEntry::new(
        "search.algolia",
        &["algolia", "algoliasearch"],
        search::algolia::create_pack,
    ),
    PackEntry::new(
        "search.meilisearch",
        &["meili", "meilisearch", "7700", "/indexes", "/keys"],
        search::meilisearch::create_pack,
    ),
    PackEntry::new("backup.borg", &["borg"], backup::borg::create_pack),
    PackEntry::new("backup.rclone", &["rclone"], backup::rclone::create_pack),
    PackEntry::new("backup.restic", &["restic"], backup::restic::create_pack),
    PackEntry::new("backup.velero", &["velero"], backup::velero::create_pack),
    PackEntry::new(
        "database.postgresql",
        &[
            "psql",
            "dropdb",
            "createdb",
            "pg_dump",
            "pg_restore",
            "DROP",
            "TRUNCATE",
            "DELETE",
        ],
        database::postgresql::create_pack,
    ),
    PackEntry::new(
        "database.mysql",
        &["mysql", "mysqldump", "DROP", "TRUNCATE", "DELETE"],
        database::mysql::create_pack,
    ),
    PackEntry::new(
        "database.mongodb",
        &[
            "mongo",
            "mongosh",
            "mongodump",
            "mongorestore",
            "dropDatabase",
            "dropCollection",
        ],
        database::mongodb::create_pack,
    ),
    PackEntry::new(
        "database.redis",
        &["redis-cli", "FLUSHALL", "FLUSHDB", "DEBUG"],
        database::redis::create_pack,
    ),
    PackEntry::new(
        "database.sqlite",
        &["sqlite3", "DROP", "DELETE", "TRUNCATE"],
        database::sqlite::create_pack,
    ),
    PackEntry::new(
        "database.supabase",
        &[
            "supabase",
            "db reset",
            "db push",
            "migration repair",
            "migration down",
            "migration squash",
            "functions delete",
            "secrets unset",
            "storage rm",
            "projects delete",
            "orgs delete",
            "branches delete",
            "domains delete",
            "vanity-subdomains",
            "sso remove",
            "network-restrictions",
            "config push",
            "stop --no-backup",
        ],
        database::supabase::create_pack,
    ),
    PackEntry::new(
        "containers.docker",
        &["docker"],
        containers::docker::create_pack,
    ),
    PackEntry::new(
        "containers.compose",
        &["docker-compose", "docker compose"],
        containers::compose::create_pack,
    ),
    PackEntry::new(
        "containers.podman",
        &["podman"],
        containers::podman::create_pack,
    ),
    PackEntry::new(
        "kubernetes.kubectl",
        &["kubectl"],
        kubernetes::kubectl::create_pack,
    ),
    PackEntry::new("kubernetes.helm", &["helm"], kubernetes::helm::create_pack),
    PackEntry::new(
        "kubernetes.kustomize",
        &["kustomize"],
        kubernetes::kustomize::create_pack,
    ),
    PackEntry::new("cloud.aws", &["aws"], cloud::aws::create_pack),
    PackEntry::new(
        "cloud.gcp",
        &["gcloud", "gsutil", "bq"],
        cloud::gcp::create_pack,
    ),
    PackEntry::new("cloud.azure", &["az"], cloud::azure::create_pack),
    PackEntry::new(
        "cdn.cloudflare_workers",
        &["wrangler"],
        cdn::cloudflare_workers::create_pack,
    ),
    PackEntry::new("cdn.fastly", &["fastly"], cdn::fastly::create_pack),
    PackEntry::new(
        "cdn.cloudfront",
        &["cloudfront"],
        cdn::cloudfront::create_pack,
    ),
    PackEntry::new(
        "apigateway.aws",
        &["aws", "apigateway", "apigatewayv2"],
        apigateway::aws::create_pack,
    ),
    PackEntry::new(
        "apigateway.kong",
        &["kong", "deck", "8001"],
        apigateway::kong::create_pack,
    ),
    PackEntry::new(
        "apigateway.apigee",
        &["apigee", "apigeecli"],
        apigateway::apigee::create_pack,
    ),
    PackEntry::new(
        "infrastructure.terraform",
        &["terraform", "tofu"],
        infrastructure::terraform::create_pack,
    ),
    PackEntry::new(
        "infrastructure.ansible",
        &["ansible", "ansible-playbook"],
        infrastructure::ansible::create_pack,
    ),
    PackEntry::new(
        "infrastructure.pulumi",
        &["pulumi"],
        infrastructure::pulumi::create_pack,
    ),
    PackEntry::new(
        "system.disk",
        &[
            "dd",
            "mkfs",
            "mkswap",
            "fdisk",
            "parted",
            "wipefs",
            "mdadm",
            "btrfs",
            "dmsetup",
            "nbd-client",
            "pvremove",
            "vgremove",
            "lvremove",
            "vgreduce",
            "lvreduce",
            "lvresize",
            "pvmove",
            "lvconvert",
        ],
        system::disk::create_pack,
    ),
    PackEntry::new(
        "system.permissions",
        &["chmod", "chown", "setfacl"],
        system::permissions::create_pack,
    ),
    PackEntry::new(
        "system.services",
        &["systemctl", "service"],
        system::services::create_pack,
    ),
    PackEntry::new("strict_git", &["git"], strict_git::create_pack),
    PackEntry::new(
        "package_managers",
        &[
            "npm", "yarn", "pnpm", "pip", "cargo", "gem", "composer", "go",
        ],
        package_managers::create_pack,
    ),
];

impl PackRegistry {
    /// Collect all keywords from enabled packs.
    ///
    /// This is a **metadata-only** operation - does not instantiate packs.
    /// Keywords are accessed from static `PackEntry` metadata.
    #[must_use]
    pub fn collect_enabled_keywords(&self, enabled_packs: &HashSet<String>) -> Vec<&'static str> {
        let expanded = self.expand_enabled(enabled_packs);
        let mut keywords = Vec::new();

        for pack_id in &expanded {
            if let Some(&idx) = self.index.get(pack_id.as_str()) {
                keywords.extend(self.entries[idx].keywords.iter().copied());
            }
        }

        // Deduplicate while preserving order (first occurrence wins)
        let mut seen = HashSet::new();
        keywords.retain(|kw| seen.insert(*kw));

        keywords
    }

    /// Create a new registry with all built-in packs.
    ///
    /// This is a **metadata-only** operation. Packs are not instantiated
    /// until they are accessed via `get()`.
    #[must_use]
    pub fn new() -> Self {
        let mut categories: HashMap<String, Vec<&'static str>> = HashMap::new();
        let mut index: HashMap<&'static str, usize> = HashMap::new();

        // Build categories and index from static entries
        for (i, entry) in PACK_ENTRIES.iter().enumerate() {
            // Extract category from ID (e.g., "database" from "database.postgresql")
            let category = entry.id.split('.').next().unwrap_or(entry.id);
            categories
                .entry(category.to_string())
                .or_default()
                .push(entry.id);
            index.insert(entry.id, i);
        }

        Self {
            entries: PACK_ENTRIES.iter().collect(),
            categories,
            index,
        }
    }

    /// Get the number of registered packs.
    #[must_use]
    pub fn pack_count(&self) -> usize {
        self.entries.len()
    }

    /// Get a pack by ID.
    ///
    /// This instantiates the pack lazily on first access.
    #[must_use]
    pub fn get(&self, id: &str) -> Option<&Pack> {
        self.index.get(id).map(|&idx| self.entries[idx].get_pack())
    }

    /// Get all pack IDs.
    ///
    /// This is a **metadata-only** operation - does not instantiate packs.
    #[must_use]
    pub fn all_pack_ids(&self) -> Vec<&'static str> {
        self.entries.iter().map(|e| e.id).collect()
    }

    /// Get all categories.
    #[must_use]
    pub fn all_categories(&self) -> Vec<&String> {
        self.categories.keys().collect()
    }

    /// Get pack IDs in a category.
    ///
    /// This is a **metadata-only** operation - does not instantiate packs.
    #[must_use]
    pub fn packs_in_category(&self, category: &str) -> Vec<&'static str> {
        self.categories.get(category).cloned().unwrap_or_default()
    }

    /// Expand enabled pack IDs to include sub-packs when a category is enabled.
    ///
    /// This is a **metadata-only** operation - does not instantiate packs.
    #[must_use]
    pub fn expand_enabled(&self, enabled: &HashSet<String>) -> HashSet<String> {
        let mut expanded = HashSet::new();

        for id in enabled {
            // Check if this is a category
            if let Some(sub_packs) = self.categories.get(id) {
                // Add all sub-packs in the category
                for &sub_pack in sub_packs {
                    expanded.insert(sub_pack.to_string());
                }
            }
            // Also add the ID itself (in case it's a specific pack)
            expanded.insert(id.clone());
        }

        expanded
    }

    /// Expand enabled pack IDs and return them in a deterministic order.
    ///
    /// This is used by `check_command` to ensure consistent attribution when
    /// multiple packs could match the same command. The ordering is:
    ///
    /// 0. **Tier 0 (safe)**: `safe.*` packs - safe patterns checked first to whitelist
    /// 1. **Tier 1 (core/storage/remote)**: `core.*`, `storage.*`, `remote.*` packs - most fundamental protections
    /// 2. **Tier 2 (system)**: `system.*` - disk, permissions, services
    /// 3. **Tier 3 (infrastructure)**: `infrastructure.*` - terraform, ansible, pulumi
    /// 4. **Tier 4 (apigateway/cloud/dns/platform/cdn/loadbalancer)**: `apigateway.*`, `cloud.*`, `dns.*`, `platform.*`, `cdn.*`, `loadbalancer.*`
    /// 5. **Tier 5 (kubernetes)**: `kubernetes.*` - kubectl, helm, kustomize
    /// 6. **Tier 6 (containers)**: `containers.*` - docker, compose, podman
    /// 7. **Tier 7 (database/search/messaging/backup)**: `database.*`, `search.*`, `messaging.*`, `backup.*`
    /// 8. **Tier 8 (`package_managers`)**: package manager protections
    /// 9. **Tier 9 (`strict_git`)**: extra git paranoia
    /// 10. **Tier 10 (services)**: `cicd.*`, `email.*`, `featureflags.*`, `secrets.*`, `monitoring.*`, `payment.*`
    ///
    /// Within each tier, packs are sorted lexicographically by ID.
    #[must_use]
    pub fn expand_enabled_ordered(&self, enabled: &HashSet<String>) -> Vec<String> {
        let expanded = self.expand_enabled(enabled);

        // Filter to only include pack IDs that actually exist in registry
        let mut pack_ids: Vec<String> = expanded
            .into_iter()
            .filter(|id| self.index.contains_key(id.as_str()))
            .collect();

        // Sort by tier then lexicographically within tier
        pack_ids.sort_by(|a, b| {
            let tier_a = Self::pack_tier(a);
            let tier_b = Self::pack_tier(b);
            tier_a.cmp(&tier_b).then_with(|| a.cmp(b))
        });

        pack_ids
    }

    /// Get the priority tier for a pack ID (lower = higher priority).
    ///
    /// Safe packs (tier 0) are evaluated first so their safe patterns can
    /// whitelist commands before other packs' destructive patterns match.
    fn pack_tier(pack_id: &str) -> u8 {
        let category = pack_id.split('.').next().unwrap_or(pack_id);
        match category {
            "safe" => 0,
            "core" | "storage" | "remote" => 1,
            "system" => 2,
            "infrastructure" => 3,
            "apigateway" | "cdn" | "cloud" | "dns" | "loadbalancer" | "platform" => 4,
            "kubernetes" => 5,
            "containers" => 6,
            "backup" | "database" | "messaging" | "search" => 7,
            "package_managers" => 8,
            "strict_git" => 9,
            "cicd" | "email" | "featureflags" | "secrets" | "monitoring" | "payment" => 10, // CI/CD + email + feature flags + secrets + monitoring + payment tooling
            _ => 11, // Unknown categories go last
        }
    }

    /// Check a command against all enabled packs.
    ///
    /// Packs are evaluated in a deterministic order (see `expand_enabled_ordered`),
    /// ensuring consistent attribution when multiple packs could match.
    ///
    /// # Evaluation order
    ///
    /// The evaluation uses a two-pass approach:
    /// 1. **Safe patterns pass**: Check safe patterns across ALL enabled packs.
    ///    If any pack's safe pattern matches, the command is allowed immediately.
    ///    This enables "safe" packs (like `safe.cleanup`) to whitelist commands
    ///    that would otherwise be blocked by other packs.
    /// 2. **Destructive patterns pass**: Check destructive patterns across all packs.
    ///    The first matching destructive pattern determines the result.
    ///
    /// Returns a `CheckResult` containing:
    /// - `blocked`: whether the command should be blocked (based on severity)
    /// - `reason`: the human-readable explanation (if matched)
    /// - `pack_id`: which pack matched (if matched)
    /// - `pattern_name`: the specific pattern that matched (if available)
    /// - `severity`: the severity level of the matched pattern
    /// - `decision_mode`: the decision mode applied (deny/warn/log)
    #[must_use]
    pub fn check_command(&self, cmd: &str, enabled_packs: &HashSet<String>) -> CheckResult {
        // Expand category IDs to include all sub-packs in deterministic order
        let ordered_packs = self.expand_enabled_ordered(enabled_packs);

        // Segment the command on shell sequence and pipeline separators so a
        // safe pattern matching one segment cannot shield a destructive pattern
        // in another.
        //
        // We run the existing per-command logic on each segment. If any
        // segment blocks, the whole command blocks. If all segments are
        // allowed, the command is allowed overall. We also run the full
        // command once at the end so that patterns which legitimately span
        // the whole input (heredoc-aware, multi-segment pipeline patterns)
        // still get a chance to fire.
        let segments = split_command_segments(cmd);
        if segments.len() > 1 {
            for seg in &segments {
                let result = self.check_command_single(seg, &ordered_packs);
                if result.blocked {
                    return result;
                }
            }
        }

        self.check_command_single(cmd, &ordered_packs)
    }

    fn check_command_single(&self, cmd: &str, ordered_packs: &[String]) -> CheckResult {
        // Pre-compute candidate packs (might_match cache).
        // This avoids calling might_match twice per pack (once per pass).
        let candidate_packs: Vec<(&String, &Pack)> = ordered_packs
            .iter()
            .filter_map(|pack_id| {
                let pack = self.get(pack_id)?;
                if pack.might_match(cmd) {
                    Some((pack_id, pack))
                } else {
                    None
                }
            })
            .collect();

        // Pass 1: Check safe patterns across ALL candidate packs first.
        // If any pack's safe pattern matches, allow the command immediately.
        // This enables "safe" packs (like `safe.cleanup`) to whitelist commands across pack boundaries.
        for (_pack_id, pack) in &candidate_packs {
            if pack.matches_safe(cmd) {
                return CheckResult::allowed();
            }
        }

        // Pass 2: Check destructive patterns across all candidate packs.
        // The first matching destructive pattern determines the result.
        for (pack_id, pack) in &candidate_packs {
            if let Some(matched) = pack.matches_destructive(cmd) {
                return CheckResult::matched(
                    matched.reason,
                    pack_id,
                    matched.name,
                    matched.severity,
                );
            }
        }

        CheckResult::allowed()
    }

    /// List all packs with their status.
    ///
    /// Note: This instantiates packs to get pattern counts. For metadata-only
    /// listing (e.g., just IDs and enabled status), use `all_pack_ids()` instead.
    #[must_use]
    pub fn list_packs(&self, enabled: &HashSet<String>) -> Vec<PackInfo> {
        let expanded = self.expand_enabled(enabled);

        let mut infos: Vec<_> = self
            .entries
            .iter()
            .map(|entry| {
                let pack = entry.get_pack();
                PackInfo {
                    id: pack.id.clone(),
                    name: pack.name,
                    description: pack.description,
                    enabled: expanded.contains(&pack.id),
                    safe_pattern_count: pack.safe_patterns.len(),
                    destructive_pattern_count: pack.destructive_patterns.len(),
                }
            })
            .collect();

        // Sort by ID for consistent output
        infos.sort_by(|a, b| a.id.cmp(&b.id));
        infos
    }

    /// Get a pack entry by ID (metadata only, no pack instantiation).
    #[must_use]
    pub fn get_entry(&self, id: &str) -> Option<&PackEntry> {
        self.index.get(id).map(|&idx| self.entries[idx])
    }

    /// Build an [`EnabledKeywordIndex`] for a precomputed ordered pack list.
    ///
    /// This is intended to run once per config load; callers reuse the returned
    /// index for each command evaluation.
    ///
    /// Returns `None` if the ordered pack list exceeds the fixed bitset budget
    /// (currently 128 packs), in which case callers should fall back to the
    /// legacy per-pack `might_match()` filtering.
    #[must_use]
    pub fn build_enabled_keyword_index(
        &self,
        ordered_packs: &[String],
    ) -> Option<EnabledKeywordIndex> {
        if ordered_packs.len() > 128 {
            return None;
        }

        let pack_count = ordered_packs.len();
        let full_mask = if pack_count == 128 {
            u128::MAX
        } else {
            (1u128 << pack_count) - 1
        };

        let mut always_check_mask: u128 = 0;
        let mut keyword_to_index: HashMap<&'static str, usize> = HashMap::new();
        let mut patterns: Vec<&'static str> = Vec::new();
        let mut keyword_pack_masks: Vec<u128> = Vec::new();
        let mut whitespace_keywords: Vec<&'static str> = Vec::new();
        let mut whitespace_pack_masks: Vec<u128> = Vec::new();
        let mut whitespace_keyword_to_index: HashMap<&'static str, usize> = HashMap::new();

        for (pack_idx, pack_id) in ordered_packs.iter().enumerate() {
            let Some(entry) = self.get_entry(pack_id.as_str()) else {
                continue;
            };

            let bit = 1u128 << pack_idx;

            if entry.keywords.is_empty() {
                always_check_mask |= bit;
                continue;
            }

            for &kw in entry.keywords {
                if kw.is_empty() {
                    continue;
                }

                if keyword_contains_whitespace(kw) {
                    if let Some(&idx) = whitespace_keyword_to_index.get(kw) {
                        whitespace_pack_masks[idx] |= bit;
                    } else {
                        let idx = whitespace_keywords.len();
                        whitespace_keywords.push(kw);
                        whitespace_pack_masks.push(bit);
                        whitespace_keyword_to_index.insert(kw, idx);
                    }
                }

                if let Some(&idx) = keyword_to_index.get(kw) {
                    keyword_pack_masks[idx] |= bit;
                    continue;
                }

                let idx = patterns.len();
                patterns.push(kw);
                keyword_to_index.insert(kw, idx);
                keyword_pack_masks.push(bit);
            }
        }

        let keyword_matcher = if patterns.is_empty() {
            None
        } else {
            match aho_corasick::AhoCorasick::new(patterns) {
                Ok(ac) => Some(ac),
                Err(_) => return None,
            }
        };

        Some(EnabledKeywordIndex {
            pack_count,
            full_mask,
            always_check_mask,
            keyword_matcher,
            keyword_pack_masks,
            whitespace_keywords,
            whitespace_pack_masks,
        })
    }
}

impl Default for PackRegistry {
    fn default() -> Self {
        Self::new()
    }
}

/// Information about a pack for display.
#[derive(Debug)]
pub struct PackInfo {
    /// Pack ID.
    pub id: PackId,
    /// Human-readable name.
    pub name: &'static str,
    /// Description.
    pub description: &'static str,
    /// Whether the pack is enabled.
    pub enabled: bool,
    /// Number of safe patterns.
    pub safe_pattern_count: usize,
    /// Number of destructive patterns.
    pub destructive_pattern_count: usize,
}

/// Global pack registry (lazily initialized).
pub static REGISTRY: LazyLock<PackRegistry> = LazyLock::new(PackRegistry::new);

// =============================================================================
// External Pack Runtime Storage
// =============================================================================

/// Runtime storage for external packs loaded from YAML files.
///
/// External packs are loaded once at startup based on config.packs.custom_paths
/// and stored here for evaluation alongside built-in packs.
pub struct ExternalPackStore {
    /// Loaded packs keyed by pack ID.
    packs: HashMap<String, Pack>,
    /// Keywords from all external packs (for quick rejection).
    keywords: Vec<&'static str>,
    /// Warnings from pack loading (for diagnostics).
    warnings: Vec<String>,
}

impl ExternalPackStore {
    /// Create an empty store.
    fn new() -> Self {
        Self {
            packs: HashMap::new(),
            keywords: Vec::new(),
            warnings: Vec::new(),
        }
    }

    /// Get a pack by ID.
    #[must_use]
    pub fn get(&self, id: &str) -> Option<&Pack> {
        self.packs.get(id)
    }

    /// Get all pack IDs.
    pub fn pack_ids(&self) -> impl Iterator<Item = &String> {
        self.packs.keys()
    }

    /// Iterate over all packs with their IDs.
    pub fn iter_packs(&self) -> impl Iterator<Item = (&String, &Pack)> {
        self.packs.iter()
    }

    /// Get all keywords from external packs.
    #[must_use]
    pub fn keywords(&self) -> &[&'static str] {
        &self.keywords
    }

    /// Get warnings from pack loading.
    #[must_use]
    pub fn warnings(&self) -> &[String] {
        &self.warnings
    }

    /// Check if any external packs are loaded.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.packs.is_empty()
    }

    /// Get the number of loaded packs.
    #[must_use]
    pub fn len(&self) -> usize {
        self.packs.len()
    }

    /// Check a command against all external packs.
    ///
    /// Returns the first match found, or None if no patterns match.
    #[must_use]
    pub fn check_command(&self, cmd: &str, enabled_ids: &HashSet<String>) -> Option<CheckResult> {
        // Check safe patterns first (across all enabled external packs)
        for (id, pack) in &self.packs {
            if !enabled_ids.contains(id) {
                continue;
            }
            if pack.matches_safe(cmd) {
                return Some(CheckResult::allowed());
            }
        }

        // Check destructive patterns
        for (id, pack) in &self.packs {
            if !enabled_ids.contains(id) {
                continue;
            }
            if let Some(matched) = pack.matches_destructive(cmd) {
                return Some(CheckResult {
                    blocked: true,
                    reason: Some(matched.reason.to_string()),
                    pack_id: Some(id.clone()),
                    pattern_name: matched.name.map(ToString::to_string),
                    severity: Some(matched.severity),
                    decision_mode: Some(matched.severity.default_mode()),
                });
            }
        }

        None
    }

    /// Check a command against all external packs, returning full match info.
    ///
    /// This is like `check_command` but returns additional details (explanation)
    /// needed for building `PatternMatch` in main.rs.
    #[must_use]
    pub fn check_command_with_details(
        &self,
        cmd: &str,
        enabled_ids: &HashSet<String>,
    ) -> Option<ExternalCheckResult> {
        // Check safe patterns first (across all enabled external packs)
        for (id, pack) in &self.packs {
            if !enabled_ids.contains(id) {
                continue;
            }
            if pack.matches_safe(cmd) {
                return Some(ExternalCheckResult {
                    blocked: false,
                    reason: None,
                    pack_id: None,
                    pattern_name: None,
                    severity: None,
                    decision_mode: None,
                    explanation: None,
                });
            }
        }

        // Check destructive patterns
        for (id, pack) in &self.packs {
            if !enabled_ids.contains(id) {
                continue;
            }
            if let Some(matched) = pack.matches_destructive(cmd) {
                return Some(ExternalCheckResult {
                    blocked: true,
                    reason: Some(matched.reason.to_string()),
                    pack_id: Some(id.clone()),
                    pattern_name: matched.name.map(ToString::to_string),
                    severity: Some(matched.severity),
                    decision_mode: Some(matched.severity.default_mode()),
                    explanation: matched.explanation.map(ToString::to_string),
                });
            }
        }

        None
    }
}

/// Extended result from external pack checking (includes explanation).
#[derive(Debug)]
pub struct ExternalCheckResult {
    /// Whether the command should be blocked.
    pub blocked: bool,
    /// The reason for blocking (if matched).
    pub reason: Option<String>,
    /// Which pack matched (if matched).
    pub pack_id: Option<PackId>,
    /// The name of the pattern that matched (if available).
    pub pattern_name: Option<String>,
    /// Severity of the matched pattern (if matched).
    pub severity: Option<Severity>,
    /// Decision mode applied (if matched).
    pub decision_mode: Option<DecisionMode>,
    /// Detailed explanation (if matched and available).
    pub explanation: Option<String>,
}

/// Global storage for external packs (initialized once at startup).
static EXTERNAL_PACKS: OnceLock<ExternalPackStore> = OnceLock::new();

/// Load external packs from the given file paths.
///
/// This should be called once at startup after config is loaded.
/// Subsequent calls are no-ops (returns the already-loaded store).
///
/// # Arguments
///
/// * `paths` - Expanded file paths (after glob/tilde expansion)
///
/// # Returns
///
/// Reference to the external pack store.
pub fn load_external_packs(paths: &[String]) -> &'static ExternalPackStore {
    EXTERNAL_PACKS.get_or_init(|| {
        let mut store = ExternalPackStore::new();

        if paths.is_empty() {
            return store;
        }

        let loader = external::ExternalPackLoader::from_paths(paths);
        let result = loader.load_all_deduped();

        // Collect warnings
        for warning in result.warnings {
            store.warnings.push(format!(
                "Failed to load external pack from {}: {}",
                warning.path.display(),
                warning.error
            ));
        }

        // Convert and store loaded packs
        for loaded in result.packs {
            let id = loaded.id.clone();
            let pack = loaded.pack.into_pack();

            // Collect keywords
            for kw in pack.keywords {
                if !store.keywords.contains(kw) {
                    store.keywords.push(kw);
                }
            }

            store.packs.insert(id, pack);
        }

        store
    })
}

/// Get the external pack store (returns None if not yet initialized).
#[must_use]
pub fn get_external_packs() -> Option<&'static ExternalPackStore> {
    EXTERNAL_PACKS.get()
}

/// Pre-compiled finders for core quick rejection (git/rm).
#[allow(dead_code)]
static GIT_FINDER: LazyLock<memmem::Finder<'static>> = LazyLock::new(|| memmem::Finder::new("git"));
#[allow(dead_code)]
static RM_FINDER: LazyLock<memmem::Finder<'static>> = LazyLock::new(|| memmem::Finder::new("rm"));

#[inline]
const fn is_word_byte(byte: u8) -> bool {
    byte.is_ascii_alphanumeric() || byte == b'_'
}

#[inline]
fn keyword_contains_whitespace(keyword: &str) -> bool {
    keyword.bytes().any(|byte| byte.is_ascii_whitespace())
}

#[inline]
fn keyword_matches_substring(haystack: &str, keyword: &str) -> bool {
    if keyword.is_empty() {
        return false;
    }

    if !keyword_contains_whitespace(keyword) {
        return memmem::find(haystack.as_bytes(), keyword.as_bytes()).is_some();
    }

    keyword_matches_with_whitespace(haystack, keyword, false)
}

fn split_keyword_parts(keyword: &str) -> SmallVec<[&str; 4]> {
    let mut parts: SmallVec<[&str; 4]> = SmallVec::new();
    let mut start: Option<usize> = None;

    for (idx, byte) in keyword.bytes().enumerate() {
        if byte.is_ascii_whitespace() {
            if let Some(part_start) = start.take() {
                parts.push(&keyword[part_start..idx]);
            }
        } else if start.is_none() {
            start = Some(idx);
        }
    }

    if let Some(part_start) = start {
        parts.push(&keyword[part_start..]);
    }

    parts
}

fn keyword_matches_with_whitespace(
    haystack: &str,
    keyword: &str,
    enforce_boundaries: bool,
) -> bool {
    let parts = split_keyword_parts(keyword);
    if parts.is_empty() {
        return false;
    }

    let hay = haystack.as_bytes();
    let first = parts[0].as_bytes();
    if first.len() > hay.len() {
        return false;
    }

    let first_is_word = first.first().is_some_and(|b| is_word_byte(*b));
    let last = parts[parts.len() - 1].as_bytes();
    let last_is_word = last.last().is_some_and(|b| is_word_byte(*b));
    let mut offset = 0;

    while let Some(pos) = memmem::find(&hay[offset..], first) {
        let start = offset + pos;
        if enforce_boundaries && first_is_word {
            let start_ok = start == 0 || !is_word_byte(hay[start.saturating_sub(1)]);
            if !start_ok {
                offset = start + 1;
                continue;
            }
        }

        let mut idx = start + first.len();
        let mut matched = true;
        for part in parts.iter().skip(1) {
            let mut ws = idx;
            while ws < hay.len() && hay[ws].is_ascii_whitespace() {
                ws += 1;
            }
            if ws == idx {
                matched = false;
                break;
            }
            idx = ws;

            let part_bytes = part.as_bytes();
            if idx + part_bytes.len() > hay.len() || &hay[idx..idx + part_bytes.len()] != part_bytes
            {
                matched = false;
                break;
            }
            idx += part_bytes.len();
        }

        if matched && enforce_boundaries && last_is_word {
            let end_ok = idx == hay.len() || !is_word_byte(hay[idx]);
            if !end_ok {
                matched = false;
            }
        }

        if matched {
            return true;
        }

        offset = start + 1;
    }

    false
}

#[inline]
fn keyword_matches_span(span_text: &str, keyword: &str) -> bool {
    if keyword.is_empty() {
        return false;
    }

    if keyword_contains_whitespace(keyword) {
        return keyword_matches_with_whitespace(span_text, keyword, true);
    }

    let haystack = span_text.as_bytes();
    let needle = keyword.as_bytes();
    if needle.len() > haystack.len() {
        return false;
    }

    let first_is_word = needle.first().is_some_and(|b| is_word_byte(*b));
    let last_is_word = needle.last().is_some_and(|b| is_word_byte(*b));
    let mut offset = 0;

    while let Some(pos) = memmem::find(&haystack[offset..], needle) {
        let start = offset + pos;
        let end = start + needle.len();
        let start_ok =
            !first_is_word || start == 0 || !is_word_byte(haystack[start.saturating_sub(1)]);
        let end_ok = !last_is_word || end == haystack.len() || !is_word_byte(haystack[end]);

        if start_ok && end_ok {
            return true;
        }

        offset = start + 1;
    }

    false
}

#[inline]
fn span_matches_any_keyword(span_text: &str, enabled_keywords: &[&str]) -> bool {
    enabled_keywords
        .iter()
        .any(|keyword| keyword_matches_span(span_text, keyword))
}

#[inline]
fn should_fallback_to_full_normalized_keyword_scan(normalized: &str) -> bool {
    // The fallback bypasses the span-aware (executable-span-only) check
    // and runs the keyword scan against the full normalized command.
    // It must fire whenever the command IS or CONTAINS a Bash output
    // redirect (`>`, `>|`, `&>`, `1>`, `2>`) because keywords like
    // `> /` (used by `redirect-truncate-root-home`) live OUTSIDE the
    // executable span — span-only matching misses them and the
    // destructive rule never gets a chance to fire. This also covers
    // the older path-prefix-normalization case (`/usr/bin/cat>file` →
    // `cat>file`) where the redirect stays glued to the command word.
    //
    // Append (`>>`) and read redirects (`<`) trigger the fallback too;
    // the destructive regex's own negative lookbehind correctly rejects
    // append, and no rule currently keys on read redirects (cost is one
    // extra AC pass that returns no matches — negligible).
    normalized.bytes().any(|byte| matches!(byte, b'>' | b'<'))
        || contains_shell_pipeline_operator(normalized)
}

#[inline]
fn contains_shell_pipeline_operator(command: &str) -> bool {
    let bytes = command.as_bytes();
    let mut i = 0usize;
    let mut in_single = false;
    let mut in_double = false;

    while i < bytes.len() {
        let b = bytes[i];

        if b == b'\\' && !in_single && i + 1 < bytes.len() {
            i += 2;
            continue;
        }
        if b == b'\'' && !in_double {
            in_single = !in_single;
            i += 1;
            continue;
        }
        if b == b'"' && !in_single {
            in_double = !in_double;
            i += 1;
            continue;
        }
        if in_single || in_double {
            i += 1;
            continue;
        }

        if b == b'|' {
            if bytes.get(i + 1) == Some(&b'|') {
                i += 2;
                continue;
            }
            return true;
        }

        i += 1;
    }

    false
}

/// Pack-aware quick-reject filter.
///
/// Returns true if the command can be safely skipped (contains none of the
/// provided keywords from enabled packs).
///
/// This is the correct function to use when non-core packs are enabled.
/// It checks all keywords from enabled packs, not just "git" and "rm".
///
/// # Performance
///
/// Uses SIMD-accelerated substring search via memchr as a fast prefilter,
/// then applies token-aware checks inside executable spans (via context
/// classification) to avoid substring false triggers.
///
/// # Arguments
///
/// * `cmd` - The command string to check
/// * `enabled_keywords` - Keywords from all enabled packs (from `PackRegistry::collect_enabled_keywords`)
///
/// # Returns
///
/// `true` if the command contains NO keywords (safe to skip pack checking)
/// `false` if the command contains at least one keyword (must check packs)
#[inline]
#[must_use]
pub fn pack_aware_quick_reject(cmd: &str, enabled_keywords: &[&str]) -> bool {
    pack_aware_quick_reject_with_normalized(cmd, enabled_keywords).0
}

/// Split a command into segments on shell sequence/pipeline separators.
///
/// Splits on `;`, `&&`, `||`, `|`, `|&`, bare `&`, and `\n`.
/// Callers that need patterns spanning a pipeline should evaluate the full
/// command after evaluating the returned segments.
///
/// Respects single and double quotes — separators inside quotes don't split.
/// Skips backslash-escaped separators outside single quotes.
///
/// This is the key fix for a compound-command bypass class: a safe pattern
/// matching one segment of the command (e.g. `docker ps`) must NOT silence
/// a destructive pattern in a different segment (`docker system prune`).
/// By running pack evaluation on each segment independently, a safe prefix
/// or safe suffix cannot shield a destructive segment.
///
/// Returns the full command as a single element when no separator or executable
/// embedded shell form is present. Command substitutions, process substitutions,
/// and backticks are returned before their enclosing segment so destructive
/// inner commands cannot be hidden behind a safe outer command.
#[must_use]
pub fn split_command_segments(cmd: &str) -> Vec<&str> {
    let mut segments: Vec<&str> = Vec::new();
    collect_command_segments(cmd, 0, cmd.len(), 0, true, &mut segments);

    if segments.is_empty() {
        let trimmed = cmd.trim();
        if !trimmed.is_empty() {
            segments.push(trimmed);
        }
    }

    segments
}

const MAX_SEGMENT_RECURSION: usize = 64;

fn collect_command_segments<'a>(
    cmd: &'a str,
    start: usize,
    end: usize,
    recursion_depth: usize,
    emit_plain_segments: bool,
    segments: &mut Vec<&'a str>,
) {
    if recursion_depth > MAX_SEGMENT_RECURSION {
        if emit_plain_segments {
            push_trimmed_segment(cmd, start, end, segments);
        }
        return;
    }

    let bytes = cmd.as_bytes();
    let mut segment_start = start;
    let mut i = start;
    let mut in_single = false;
    let mut in_double = false;

    while i < end {
        let b = bytes[i];

        if b == b'\\' && !in_single && i + 1 < end {
            i += 2;
            continue;
        }
        if b == b'\'' && !in_double {
            in_single = !in_single;
            i += 1;
            continue;
        }
        if b == b'"' && !in_single {
            in_double = !in_double;
            i += 1;
            continue;
        }

        if !in_single && b == b'$' && i + 2 < end && bytes[i + 1] == b'(' && bytes[i + 2] == b'(' {
            if let Some(close) = find_matching_arithmetic_expansion(cmd, i + 3, end) {
                collect_command_segments(cmd, i + 3, close, recursion_depth + 1, false, segments);
                i = close + 2;
                continue;
            }
        }

        if !in_single && b == b'$' && i + 1 < end && bytes[i + 1] == b'(' {
            if let Some(close) = find_matching_command_substitution(cmd, i + 2, end) {
                collect_command_segments(cmd, i + 2, close, recursion_depth + 1, true, segments);
                i = close + 1;
                continue;
            }
        }

        if !in_single
            && !in_double
            && matches!(b, b'<' | b'>')
            && i + 1 < end
            && bytes[i + 1] == b'('
        {
            if let Some(close) = find_matching_command_substitution(cmd, i + 2, end) {
                collect_command_segments(cmd, i + 2, close, recursion_depth + 1, true, segments);
                i = close + 1;
                continue;
            }
        }

        if !in_single && b == b'`' {
            if let Some(close) = find_matching_backtick(cmd, i + 1, end) {
                collect_command_segments(cmd, i + 1, close, recursion_depth + 1, true, segments);
                i = close + 1;
                continue;
            }
        }

        if in_single || in_double {
            i += 1;
            continue;
        }

        let split_width: Option<usize> = if b == b';' || b == b'\n' {
            Some(1)
        } else if b == b'&' {
            if is_redirection_ampersand(bytes, i) {
                None
            } else {
                Some(usize::from(bytes.get(i + 1) == Some(&b'&')) + 1)
            }
        } else if b == b'|' {
            Some(usize::from(matches!(bytes.get(i + 1), Some(&b'|') | Some(&b'&'))) + 1)
        } else {
            None
        };

        if let Some(width) = split_width {
            if emit_plain_segments {
                push_trimmed_segment(cmd, segment_start, i, segments);
            }
            i += width;
            segment_start = i;
            continue;
        }

        i += 1;
    }

    if emit_plain_segments {
        push_trimmed_segment(cmd, segment_start, end, segments);
    }
}

fn push_trimmed_segment<'a>(cmd: &'a str, start: usize, end: usize, segments: &mut Vec<&'a str>) {
    let segment = cmd[start..end].trim();
    if !segment.is_empty() {
        segments.push(segment);
    }
}

fn is_redirection_ampersand(bytes: &[u8], index: usize) -> bool {
    matches!(bytes.get(index + 1), Some(b'>'))
        || index
            .checked_sub(1)
            .and_then(|previous| bytes.get(previous))
            .is_some_and(|previous| matches!(previous, b'<' | b'>'))
}

fn find_matching_command_substitution(cmd: &str, start: usize, end: usize) -> Option<usize> {
    let bytes = cmd.as_bytes();
    let mut i = start;
    let mut in_single = false;
    let mut in_double = false;

    while i < end {
        let b = bytes[i];

        if b == b'\\' && !in_single && i + 1 < end {
            i += 2;
            continue;
        }
        if b == b'\'' && !in_double {
            in_single = !in_single;
            i += 1;
            continue;
        }
        if b == b'"' && !in_single {
            in_double = !in_double;
            i += 1;
            continue;
        }
        if in_single {
            i += 1;
            continue;
        }

        if b == b'`' {
            if let Some(close) = find_matching_backtick(cmd, i + 1, end) {
                i = close + 1;
                continue;
            }
        }

        if b == b'$' && i + 2 < end && bytes[i + 1] == b'(' && bytes[i + 2] == b'(' {
            if let Some(close) = find_matching_arithmetic_expansion(cmd, i + 3, end) {
                i = close + 2;
                continue;
            }
        }

        if b == b'$' && i + 1 < end && bytes[i + 1] == b'(' {
            if let Some(close) = find_matching_command_substitution(cmd, i + 2, end) {
                i = close + 1;
                continue;
            }
        }

        if !in_double && matches!(b, b'<' | b'>') && i + 1 < end && bytes[i + 1] == b'(' {
            if let Some(close) = find_matching_command_substitution(cmd, i + 2, end) {
                i = close + 1;
                continue;
            }
        }

        if b == b')' && !in_double {
            return Some(i);
        }

        i += 1;
    }

    None
}

fn find_matching_arithmetic_expansion(cmd: &str, start: usize, end: usize) -> Option<usize> {
    let bytes = cmd.as_bytes();
    let mut i = start;
    let mut paren_depth = 0usize;
    let mut in_single = false;
    let mut in_double = false;

    while i < end {
        let b = bytes[i];

        if b == b'\\' && !in_single && i + 1 < end {
            i += 2;
            continue;
        }
        if b == b'\'' && !in_double {
            in_single = !in_single;
            i += 1;
            continue;
        }
        if b == b'"' && !in_single {
            in_double = !in_double;
            i += 1;
            continue;
        }
        if in_single || in_double {
            i += 1;
            continue;
        }

        if b == b'`' {
            if let Some(close) = find_matching_backtick(cmd, i + 1, end) {
                i = close + 1;
                continue;
            }
        }

        if b == b'$' && i + 1 < end && bytes[i + 1] == b'(' {
            if let Some(close) = find_matching_command_substitution(cmd, i + 2, end) {
                i = close + 1;
                continue;
            }
        }

        match b {
            b'(' => paren_depth += 1,
            b')' if paren_depth > 0 => paren_depth -= 1,
            b')' if i + 1 < end && bytes[i + 1] == b')' => return Some(i),
            _ => {}
        }

        i += 1;
    }

    None
}

fn find_matching_backtick(cmd: &str, start: usize, end: usize) -> Option<usize> {
    let bytes = cmd.as_bytes();
    let mut i = start;

    while i < end {
        match bytes[i] {
            b'\\' if i + 1 < end => i += 2,
            b'`' => return Some(i),
            _ => i += 1,
        }
    }

    None
}

/// Result of quick-reject check with the normalized command for reuse.
///
/// Returns `(should_reject, normalized_command)` where:
/// - `should_reject = true` means no keywords found, safe to skip pack evaluation
/// - `should_reject = false` means keywords found, must check packs
/// - `normalized_command` is the normalized form (can be reused for pack evaluation)
///
/// When `should_reject = true` and the fast substring check failed (no keywords at all),
/// returns `Cow::Borrowed(cmd)` since normalization was never computed.
#[inline]
#[must_use]
pub fn pack_aware_quick_reject_with_normalized<'a>(
    cmd: &'a str,
    enabled_keywords: &[&str],
) -> (bool, std::borrow::Cow<'a, str>) {
    // Conservative: if the caller provides no keywords, we cannot safely conclude
    // that pack evaluation can be skipped (a pack may have empty/incorrect keywords).
    // Returning false forces evaluation rather than silently allowing everything.
    if enabled_keywords.is_empty() {
        return (false, normalize_command(cmd));
    }

    let bytes = cmd.as_bytes();
    let mut any_substring = enabled_keywords
        .iter()
        .any(|keyword| memmem::find(bytes, keyword.as_bytes()).is_some());
    if !any_substring {
        any_substring = enabled_keywords
            .iter()
            .filter(|keyword| keyword_contains_whitespace(keyword))
            .any(|keyword| keyword_matches_substring(cmd, keyword));
    }
    if !any_substring {
        // Before returning early, check if the command contains potential obfuscation
        // characters that could hide keywords (backslash escapes, quotes).
        // Examples: g\it -> git, g'i't -> git
        // If so, we must normalize before deciding to skip.
        let has_obfuscation = bytes.iter().any(|b| matches!(b, b'\\' | b'\'' | b'"'));
        if !has_obfuscation {
            // No substring match and no obfuscation - safe to return early.
            // The caller won't need the normalized form since we're rejecting.
            return (true, std::borrow::Cow::Borrowed(cmd));
        }
        // Has potential obfuscation - fall through to normalize and re-check
    }

    // Important: run keyword gating on a normalized view so harmless quoting or
    // path prefixes on *executed command words* don't cause false skips.
    //
    // Example: `" /usr/bin/git" reset --hard` should NOT quick-reject.
    let normalized = normalize_command(cmd);
    let cmd_for_spans = normalized.as_ref();

    let spans = crate::context::classify_command(cmd_for_spans);
    let mut saw_executable = false;

    for span in spans.executable_spans() {
        saw_executable = true;
        let span_text = span.text(cmd_for_spans);
        if span_text.is_empty() {
            continue;
        }
        if span_matches_any_keyword(span_text, enabled_keywords) {
            return (false, normalized);
        }
    }

    if !saw_executable {
        if should_fallback_to_full_normalized_keyword_scan(cmd_for_spans)
            && span_matches_any_keyword(cmd_for_spans, enabled_keywords)
        {
            return (false, normalized);
        }
        return (true, normalized);
    }

    // Bash output redirects keep their target outside the executable
    // span, so the span-only keyword gate misses keywords like `> /`
    // (used by redirect-truncate-root-home). The fallback re-scans the
    // full normalized command for any enabled keyword. Path-prefix
    // normalization that glues a redirect to the command word
    // (`/usr/bin/cat>file` → `cat>file`) is also covered. False
    // positives on benign data are unlikely because the AC scan still
    // requires a real keyword match — the fallback only widens *which
    // string* gets scanned, not what counts as a match.
    if should_fallback_to_full_normalized_keyword_scan(cmd_for_spans)
        && span_matches_any_keyword(cmd_for_spans, enabled_keywords)
    {
        return (false, normalized);
    }

    (true, normalized) // No keywords found in executable spans, safe to skip pack checking
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pack_aware_quick_reject_empty_keywords_is_conservative() {
        assert!(
            !pack_aware_quick_reject("ls -la", &[]),
            "empty keyword list must not allow skipping pack evaluation"
        );
        assert!(
            !pack_aware_quick_reject("git reset --hard", &[]),
            "empty keyword list must not allow skipping pack evaluation"
        );
    }

    #[test]
    fn pack_aware_quick_reject_ignores_substring_matches() {
        let keywords: Vec<&str> = vec!["git", "rm", "docker"];

        assert!(
            pack_aware_quick_reject("cat .gitignore", &keywords),
            "substring in filename should not trigger keyword gating"
        );
        assert!(
            pack_aware_quick_reject("echo digit", &keywords),
            "substring in a larger token should not trigger keyword gating"
        );
    }

    #[test]
    fn pack_aware_quick_reject_keeps_word_boundary_matches() {
        let keywords: Vec<&str> = vec!["git"];

        assert!(
            !pack_aware_quick_reject("git status", &keywords),
            "word boundary keyword should prevent quick-reject"
        );
        assert!(
            !pack_aware_quick_reject("/usr/bin/git status", &keywords),
            "absolute path to git should still prevent quick-reject"
        );
    }

    #[test]
    fn pack_aware_quick_reject_does_not_skip_attached_redirection_bypass() {
        let keywords: Vec<&str> = vec!["git"];

        assert!(
            !pack_aware_quick_reject(r#""git">/dev/null reset --hard"#, &keywords),
            "quoted command words with attached redirections must still trigger pack evaluation"
        );
    }

    #[test]
    fn pack_aware_quick_reject_does_not_skip_piped_code_payload() {
        let keywords: Vec<&str> = vec!["rm"];

        assert!(
            !pack_aware_quick_reject("echo rm -rf / | sh", &keywords),
            "pipeline payloads must still trigger pack evaluation"
        );
        assert!(
            !pack_aware_quick_reject("echo rm -rf / |& sh", &keywords),
            "stderr-merged pipeline payloads must still trigger pack evaluation"
        );
        assert!(
            pack_aware_quick_reject(r#"echo "rm -rf / | sh""#, &keywords),
            "quoted pipe characters are data"
        );
    }

    #[test]
    fn pack_aware_quick_reject_keeps_variable_assignment_data_fast_path() {
        let keywords: Vec<&str> = vec!["rm"];

        assert!(
            pack_aware_quick_reject(r#"VAR='rm -rf /'; echo "$VAR""#, &keywords),
            "safe variable assignments should not lose the quick-reject fast path"
        );
    }

    /// Regression test: rm commands should NOT be quick-rejected regardless of target directory.
    /// Bug git_safety_guard-nwu: "rm -rf build" was incorrectly allowed while "rm -rf src" was blocked.
    #[test]
    fn pack_aware_quick_reject_rm_commands_not_rejected() {
        let keywords: Vec<&str> = vec!["rm"];

        // All rm commands should NOT be quick-rejected
        assert!(
            !pack_aware_quick_reject("rm -rf build", &keywords),
            "rm -rf build should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf /tmp/foo", &keywords),
            "rm -rf /tmp/foo should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject(r#"rm -rf "$TMPDIR/foo""#, &keywords),
            "rm -rf \"$TMPDIR/foo\" should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject(r#"rm -r -f "$TMPDIR/foo""#, &keywords),
            "rm -r -f \"$TMPDIR/foo\" should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject(r#"rm --recursive --force "$TMPDIR/foo""#, &keywords),
            "rm --recursive --force \"$TMPDIR/foo\" should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf src", &keywords),
            "rm -rf src should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf target", &keywords),
            "rm -rf target should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf dist", &keywords),
            "rm -rf dist should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf node_modules", &keywords),
            "rm -rf node_modules should NOT be quick-rejected"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf foo", &keywords),
            "rm -rf foo should NOT be quick-rejected"
        );
    }

    /// Regression test: full flow from "core" category to keyword collection to quick-reject.
    /// Bug git_safety_guard-nwu: The full evaluation flow was incorrectly allowing build dir removals.
    #[test]
    fn full_flow_core_category_rm_commands_blocked() {
        // Simulate the default config: enabled_pack_ids returns {"core"}
        let mut enabled = HashSet::new();
        enabled.insert("core".to_string());

        // This is what enabled_pack_ids() returns by default
        let keywords = REGISTRY.collect_enabled_keywords(&enabled);

        // Verify "rm" is in the keywords (from core.filesystem)
        assert!(
            keywords.contains(&"rm"),
            "Keywords should include 'rm' from core.filesystem. Got: {keywords:?}"
        );

        // All rm commands should NOT be quick-rejected
        assert!(
            !pack_aware_quick_reject("rm -rf build", &keywords),
            "rm -rf build should NOT be quick-rejected with core keywords"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf src", &keywords),
            "rm -rf src should NOT be quick-rejected with core keywords"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf target", &keywords),
            "rm -rf target should NOT be quick-rejected with core keywords"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf dist", &keywords),
            "rm -rf dist should NOT be quick-rejected with core keywords"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf node_modules", &keywords),
            "rm -rf node_modules should NOT be quick-rejected with core keywords"
        );
        assert!(
            !pack_aware_quick_reject("rm -rf foo", &keywords),
            "rm -rf foo should NOT be quick-rejected with core keywords"
        );
    }

    #[test]
    fn split_command_segments_handles_basic_separators() {
        assert_eq!(split_command_segments("docker ps"), vec!["docker ps"]);
        assert_eq!(
            split_command_segments("docker ps; docker logs x"),
            vec!["docker ps", "docker logs x"]
        );
        assert_eq!(
            split_command_segments("docker ps && docker logs x"),
            vec!["docker ps", "docker logs x"]
        );
        assert_eq!(split_command_segments("a || b"), vec!["a", "b"]);
        assert_eq!(
            split_command_segments("docker ps | grep nginx"),
            vec!["docker ps", "grep nginx"]
        );
        assert_eq!(
            split_command_segments("docker ps |& grep nginx"),
            vec!["docker ps", "grep nginx"]
        );
        assert_eq!(
            split_command_segments("docker ps & docker logs foo"),
            vec!["docker ps", "docker logs foo"]
        );
        assert_eq!(
            split_command_segments("docker logs foo &"),
            vec!["docker logs foo"]
        );
    }

    #[test]
    fn split_command_segments_does_not_split_redirection_ampersands() {
        assert_eq!(
            split_command_segments("command 2>&1 | tee log.txt"),
            vec!["command 2>&1", "tee log.txt"]
        );
        assert_eq!(split_command_segments("echo x >&2"), vec!["echo x >&2"]);
        assert_eq!(
            split_command_segments("make &> build.log"),
            vec!["make &> build.log"]
        );
        assert_eq!(
            split_command_segments("make &>> build.log && echo done"),
            vec!["make &>> build.log", "echo done"]
        );
        assert_eq!(
            split_command_segments("cat <&0; echo done"),
            vec!["cat <&0", "echo done"]
        );
    }

    #[test]
    fn split_command_segments_respects_quotes_and_escapes() {
        // Separators inside quotes must not split.
        assert_eq!(
            split_command_segments(r#"echo "a; b && c || d""#),
            vec![r#"echo "a; b && c || d""#]
        );
        assert_eq!(
            split_command_segments("echo 'a; b && c'"),
            vec!["echo 'a; b && c'"]
        );
        // Escaped separators outside single quotes are literal.
        assert_eq!(split_command_segments(r"echo a\; b"), vec![r"echo a\; b"]);
        assert_eq!(split_command_segments(r"echo a\| b"), vec![r"echo a\| b"]);
        assert_eq!(split_command_segments(r"echo a\& b"), vec![r"echo a\& b"]);
    }

    #[test]
    fn split_command_segments_extracts_command_substitutions() {
        assert_eq!(
            split_command_segments("echo $(docker system prune -a --volumes)"),
            vec![
                "docker system prune -a --volumes",
                "echo $(docker system prune -a --volumes)"
            ]
        );
        assert_eq!(
            split_command_segments(r#"echo "$(op item delete "Prod Secret")""#),
            vec![
                r#"op item delete "Prod Secret""#,
                r#"echo "$(op item delete "Prod Secret")""#
            ]
        );
        assert_eq!(
            split_command_segments("echo `velero backup delete nightly`"),
            vec![
                "velero backup delete nightly",
                "echo `velero backup delete nightly`"
            ]
        );
        assert_eq!(
            split_command_segments(r#"echo "$(echo "$(op item delete Prod)")""#),
            vec![
                "op item delete Prod",
                r#"echo "$(op item delete Prod)""#,
                r#"echo "$(echo "$(op item delete Prod)")""#
            ]
        );
        assert_eq!(
            split_command_segments("cat <(docker system prune -a --volumes)"),
            vec![
                "docker system prune -a --volumes",
                "cat <(docker system prune -a --volumes)"
            ]
        );
        assert_eq!(
            split_command_segments("cat >(docker system prune -a --volumes)"),
            vec![
                "docker system prune -a --volumes",
                "cat >(docker system prune -a --volumes)"
            ]
        );
        assert_eq!(
            split_command_segments(r#"echo "<(docker system prune -a --volumes)""#),
            vec![r#"echo "<(docker system prune -a --volumes)""#]
        );
        assert_eq!(
            split_command_segments(r#"echo ">(docker system prune -a --volumes)""#),
            vec![r#"echo ">(docker system prune -a --volumes)""#]
        );
        assert_eq!(
            split_command_segments(
                r#"echo "$(printf "%s" "<(docker system prune -a --volumes)")""#
            ),
            vec![
                r#"printf "%s" "<(docker system prune -a --volumes)""#,
                r#"echo "$(printf "%s" "<(docker system prune -a --volumes)")""#
            ]
        );
        assert_eq!(
            split_command_segments("echo '$(docker system prune)'"),
            vec!["echo '$(docker system prune)'"]
        );
        assert_eq!(
            split_command_segments("echo $((rm -rf /))"),
            vec!["echo $((rm -rf /))"]
        );
    }

    #[test]
    fn quoted_process_substitution_literals_are_masked_before_pack_matching() {
        let docker = crate::packs::containers::docker::create_pack();
        let input_literal = crate::context::sanitize_for_pattern_matching(
            r#"echo "<(docker system prune -a --volumes)""#,
        );
        let output_literal = crate::context::sanitize_for_pattern_matching(
            r#"echo ">(docker system prune -a --volumes)""#,
        );

        assert!(
            !input_literal.as_ref().contains("docker system prune"),
            "literal <(...) text inside double quotes must be masked before pack matching"
        );
        assert!(
            !output_literal.as_ref().contains("docker system prune"),
            "literal >(...) text inside double quotes must be masked before pack matching"
        );
        assert!(
            docker.check(input_literal.as_ref()).is_none(),
            "literal <(...) text inside double quotes must not be treated as executable"
        );
        assert!(
            docker.check(output_literal.as_ref()).is_none(),
            "literal >(...) text inside double quotes must not be treated as executable"
        );
        assert!(
            docker
                .check("cat <(docker system prune -a --volumes)")
                .is_some(),
            "real process substitution must still expose destructive inner commands"
        );
        assert!(
            docker
                .check("cat >(docker system prune -a --volumes)")
                .is_some(),
            "real output process substitution must still expose destructive inner commands"
        );
    }

    #[test]
    fn safe_outer_command_with_destructive_substitution_is_blocked() {
        let onepassword = crate::packs::secrets::onepassword::create_pack();
        assert!(
            onepassword
                .check(r#"op item get $(op item delete "Prod Secret")"#)
                .is_some(),
            "safe op item get must not hide destructive op item delete in command substitution"
        );

        let velero = crate::packs::backup::velero::create_pack();
        assert!(
            velero
                .check("velero backup get $(velero backup delete nightly)")
                .is_some(),
            "safe velero backup get must not hide destructive velero backup delete"
        );

        let jenkins = crate::packs::cicd::jenkins::create_pack();
        assert!(
            jenkins
                .check(
                    "curl -X GET https://jenkins.example/api/json \
                     $(curl -X POST https://jenkins.example/job/my-job/doDelete)"
                )
                .is_some(),
            "safe Jenkins GET must not hide destructive doDelete POST"
        );
    }

    #[test]
    fn compound_command_with_safe_prefix_and_destructive_suffix_is_blocked() {
        // End-to-end regression: `docker ps; docker system prune -a --volumes`
        // must be blocked. This is the compound-command bypass class. We
        // check the pack's own `check` method on the raw command since
        // that's what the evaluator ultimately calls.
        let pack = crate::packs::containers::docker::create_pack();
        assert!(
            pack.check("docker ps; docker system prune -a --volumes")
                .is_some(),
            "safe prefix (`docker ps`) must not short-circuit destructive \
             `docker system prune` in the same compound command"
        );
        assert!(
            pack.check("docker ps && docker system prune -a --volumes")
                .is_some(),
            "`&&`-joined compound command with safe prefix must still block"
        );
        assert!(
            pack.check("echo ok; docker system prune -a --volumes")
                .is_some(),
            "unrelated safe prefix must not bypass destructive suffix"
        );
    }

    #[test]
    fn pack_aware_quick_reject_does_not_skip_compound_command_with_keyword_in_later_segment() {
        // Regression for a critical bypass class:
        //   `docker ps; docker system prune -a --volumes`
        // The fast substring check finds `docker`, so the cheap path doesn't
        // return early. But if only `docker ps` is classified as an
        // executable span and the trailing `docker system prune …` is
        // misclassified (e.g. as data because the separator detection is
        // off-by-one), the span gate fails to find the destructive keyword
        // (`prune`) and the whole command silently quick-rejects — skipping
        // pack evaluation entirely and letting the destructive second
        // segment through.
        //
        // The fix this test enforces: when any enabled keyword appears
        // *anywhere* in the command's byte content, quick-reject must NOT
        // return true. (The later pack evaluation still has to decide
        // safe/destructive — that is not this test's concern.)
        let keywords: Vec<&str> = vec!["docker", "prune", "rmi", "volume"];

        assert!(
            !pack_aware_quick_reject("docker ps; docker system prune -a --volumes", &keywords),
            "quick-reject must not skip evaluation for compound command \
             containing destructive second segment"
        );
        assert!(
            !pack_aware_quick_reject("docker ps && docker system prune -a --volumes", &keywords),
            "quick-reject must not skip evaluation for `&&`-joined compound \
             command with destructive second segment"
        );
        assert!(
            !pack_aware_quick_reject("echo hi; docker system prune -a --volumes", &keywords),
            "quick-reject must not skip evaluation when only the second \
             segment references a pack keyword"
        );
    }

    #[test]
    fn pack_aware_quick_reject_handles_multiword_keywords_with_extra_space() {
        let keywords: Vec<&str> = vec![
            "gcloud storage",
            "gcloud alpha storage",
            "gcloud beta storage",
        ];

        assert!(
            !pack_aware_quick_reject("gcloud   storage rm gs://bucket", &keywords),
            "multi-word keywords should match even with extra whitespace"
        );
        assert!(
            !pack_aware_quick_reject("gcloud alpha   storage rm gs://bucket", &keywords),
            "release-track multi-word keywords should match even with extra whitespace"
        );
        assert!(
            !pack_aware_quick_reject("gcloud beta storage rm gs://bucket", &keywords),
            "beta release-track multi-word keywords should not be quick-rejected"
        );
    }

    #[test]
    fn enabled_keyword_index_matches_multiword_keyword_with_extra_space() {
        let mut enabled = HashSet::new();
        enabled.insert("storage.azure_blob".to_string());

        let ordered = REGISTRY.expand_enabled_ordered(&enabled);
        let index = REGISTRY
            .build_enabled_keyword_index(&ordered)
            .expect("keyword index should build for small pack set");

        let mask = index.candidate_pack_mask("az   storage blob delete");
        let pack_idx = ordered
            .iter()
            .position(|id| id == "storage.azure_blob")
            .expect("storage.azure_blob should be present in ordered list");

        assert_eq!(
            (mask >> pack_idx) & 1,
            1,
            "candidate mask should include storage.azure_blob when whitespace varies"
        );
    }

    #[test]
    fn storage_gcs_keyword_gate_handles_gcloud_wide_flags() {
        let mut enabled = HashSet::new();
        enabled.insert("storage.gcs".to_string());

        let keywords = REGISTRY.collect_enabled_keywords(&enabled);
        assert!(
            !pack_aware_quick_reject("gcloud --project prod storage rm gs://bucket", &keywords),
            "gcloud-wide flags before storage must not quick-reject storage.gcs"
        );
        assert!(
            !pack_aware_quick_reject(
                "gcloud alpha --project prod storage rm gs://bucket",
                &keywords,
            ),
            "release-track flags before storage must not quick-reject storage.gcs"
        );

        let ordered = REGISTRY.expand_enabled_ordered(&enabled);
        let index = REGISTRY
            .build_enabled_keyword_index(&ordered)
            .expect("keyword index should build for small pack set");
        let pack_idx = ordered
            .iter()
            .position(|id| id == "storage.gcs")
            .expect("storage.gcs should be present in ordered list");
        let mask = index.candidate_pack_mask("gcloud --project prod storage rm gs://bucket");

        assert_eq!(
            (mask >> pack_idx) & 1,
            1,
            "candidate mask should include storage.gcs for flag-interleaved gcloud storage"
        );
    }

    #[test]
    fn has_any_keyword_returns_false_for_unrelated_command() {
        let enabled: HashSet<String> = REGISTRY
            .all_pack_ids()
            .into_iter()
            .map(String::from)
            .collect();
        let ordered = REGISTRY.expand_enabled_ordered(&enabled);
        let index = REGISTRY
            .build_enabled_keyword_index(&ordered)
            .expect("should build index");

        assert!(
            !index.has_any_keyword("ls -la"),
            "ls -la has no pack keywords"
        );
        assert!(
            !index.has_any_keyword("echo hello world"),
            "echo has no pack keywords"
        );
        assert!(!index.has_any_keyword("cd /tmp"), "cd has no pack keywords");
    }

    #[test]
    fn has_any_keyword_returns_true_for_matching_command() {
        let enabled: HashSet<String> = REGISTRY
            .all_pack_ids()
            .into_iter()
            .map(String::from)
            .collect();
        let ordered = REGISTRY.expand_enabled_ordered(&enabled);
        let index = REGISTRY
            .build_enabled_keyword_index(&ordered)
            .expect("should build index");

        assert!(index.has_any_keyword("git status"), "git is a keyword");
        assert!(index.has_any_keyword("rm -rf /tmp/foo"), "rm is a keyword");
        assert!(index.has_any_keyword("docker ps"), "docker is a keyword");
    }

    #[test]
    fn has_any_keyword_consistent_with_candidate_mask() {
        let enabled: HashSet<String> = REGISTRY
            .all_pack_ids()
            .into_iter()
            .map(String::from)
            .collect();
        let ordered = REGISTRY.expand_enabled_ordered(&enabled);
        let index = REGISTRY
            .build_enabled_keyword_index(&ordered)
            .expect("should build index");

        let cmds = [
            "ls -la",
            "git status",
            "echo hello",
            "rm -rf /",
            "docker ps",
            "kubectl get pods",
            "cd /tmp",
            "terraform plan",
        ];

        for cmd in cmds {
            let mask = index.candidate_pack_mask(cmd);
            let has = index.has_any_keyword(cmd);
            if mask == 0 {
                // If mask is 0 (ignoring always_check), has_any_keyword should
                // still be true if always_check_mask is set.
                // The semantics: has_any_keyword is true when ANY pack might need checking.
            }
            // has_any_keyword should be at least as permissive as mask != 0
            if mask != 0 {
                assert!(
                    has,
                    "has_any_keyword should be true when mask is nonzero for {cmd:?}"
                );
            }
        }
    }

    /// Test that `pack_tier` returns correct tiers for all known categories.
    #[test]
    fn pack_tier_ordering() {
        // Core should be highest priority (tier 1)
        assert_eq!(PackRegistry::pack_tier("core.git"), 1);
        assert_eq!(PackRegistry::pack_tier("core.filesystem"), 1);
        assert_eq!(PackRegistry::pack_tier("storage.s3"), 1);
        assert_eq!(PackRegistry::pack_tier("remote.rsync"), 1);

        // System should be tier 2
        assert_eq!(PackRegistry::pack_tier("system.disk"), 2);
        assert_eq!(PackRegistry::pack_tier("system.permissions"), 2);

        // Infrastructure should be tier 3
        assert_eq!(PackRegistry::pack_tier("infrastructure.terraform"), 3);

        // Tier 4 packs should be tier 4
        assert_eq!(PackRegistry::pack_tier("cloud.aws"), 4);
        assert_eq!(PackRegistry::pack_tier("apigateway.aws"), 4);
        assert_eq!(PackRegistry::pack_tier("dns.cloudflare"), 4);
        assert_eq!(PackRegistry::pack_tier("dns.route53"), 4);
        assert_eq!(PackRegistry::pack_tier("dns.generic"), 4);
        assert_eq!(PackRegistry::pack_tier("platform.github"), 4);
        assert_eq!(PackRegistry::pack_tier("cdn.cloudflare_workers"), 4);
        assert_eq!(PackRegistry::pack_tier("loadbalancer.nginx"), 4);

        // Kubernetes should be tier 5
        assert_eq!(PackRegistry::pack_tier("kubernetes.kubectl"), 5);

        // Containers should be tier 6
        assert_eq!(PackRegistry::pack_tier("containers.docker"), 6);

        // Database should be tier 7
        assert_eq!(PackRegistry::pack_tier("database.postgresql"), 7);
        assert_eq!(PackRegistry::pack_tier("backup.borg"), 7);
        assert_eq!(PackRegistry::pack_tier("backup.rclone"), 7);
        assert_eq!(PackRegistry::pack_tier("backup.restic"), 7);
        assert_eq!(PackRegistry::pack_tier("backup.velero"), 7);
        assert_eq!(PackRegistry::pack_tier("messaging.kafka"), 7);
        assert_eq!(PackRegistry::pack_tier("search.elasticsearch"), 7);

        // Package managers should be tier 8
        assert_eq!(PackRegistry::pack_tier("package_managers"), 8);

        // Strict git should be tier 9
        assert_eq!(PackRegistry::pack_tier("strict_git"), 9);

        // Tier 10 service packs should be tier 10
        assert_eq!(PackRegistry::pack_tier("cicd.github_actions"), 10);
        assert_eq!(PackRegistry::pack_tier("cicd.gitlab_ci"), 10);
        assert_eq!(PackRegistry::pack_tier("cicd.jenkins"), 10);
        assert_eq!(PackRegistry::pack_tier("cicd.circleci"), 10);
        assert_eq!(PackRegistry::pack_tier("email.ses"), 10);
        assert_eq!(PackRegistry::pack_tier("featureflags.launchdarkly"), 10);
        assert_eq!(PackRegistry::pack_tier("secrets.vault"), 10);
        assert_eq!(PackRegistry::pack_tier("monitoring.splunk"), 10);
        assert_eq!(PackRegistry::pack_tier("payment.stripe"), 10);

        // Unknown should be tier 11
        assert_eq!(PackRegistry::pack_tier("unknown.pack"), 11);
    }

    /// Test that `expand_enabled_ordered` returns packs in deterministic order.
    #[test]
    fn expand_enabled_ordered_is_deterministic() {
        let mut enabled = HashSet::new();
        enabled.insert("containers.docker".to_string());
        enabled.insert("kubernetes.kubectl".to_string());
        enabled.insert("core.git".to_string());
        enabled.insert("database.postgresql".to_string());

        // Run multiple times to verify determinism
        let first_run = REGISTRY.expand_enabled_ordered(&enabled);

        for _ in 0..10 {
            let run = REGISTRY.expand_enabled_ordered(&enabled);
            assert_eq!(
                run, first_run,
                "expand_enabled_ordered should produce identical results across runs"
            );
        }
    }

    /// Test that `expand_enabled_ordered` sorts by tier then lexicographically.
    #[test]
    fn expand_enabled_ordered_respects_tier_ordering() {
        let mut enabled = HashSet::new();
        enabled.insert("containers.docker".to_string()); // tier 6
        enabled.insert("kubernetes.kubectl".to_string()); // tier 5
        enabled.insert("core.git".to_string()); // tier 1
        enabled.insert("database.postgresql".to_string()); // tier 7

        let ordered = REGISTRY.expand_enabled_ordered(&enabled);

        // Find positions
        let core_pos = ordered.iter().position(|id| id == "core.git");
        let docker_pos = ordered.iter().position(|id| id == "containers.docker");
        let pg_pos = ordered.iter().position(|id| id == "database.postgresql");

        assert!(
            core_pos.is_some() && docker_pos.is_some() && pg_pos.is_some(),
            "All packs should be present"
        );

        // Core (tier 1) should come before containers (tier 6)
        assert!(
            core_pos.unwrap() < docker_pos.unwrap(),
            "core.git should come before containers.docker"
        );

        // Containers (tier 6) should come before database (tier 7)
        assert!(
            docker_pos.unwrap() < pg_pos.unwrap(),
            "containers.docker should come before database.postgresql"
        );
    }

    /// Test that `expand_enabled_ordered` sorts lexicographically within tier.
    #[test]
    fn expand_enabled_ordered_sorts_within_tier() {
        let mut enabled = HashSet::new();
        enabled.insert("core.git".to_string());
        enabled.insert("core.filesystem".to_string());

        let ordered = REGISTRY.expand_enabled_ordered(&enabled);

        let fs_pos = ordered.iter().position(|id| id == "core.filesystem");
        let git_pos = ordered.iter().position(|id| id == "core.git");

        assert!(
            fs_pos.is_some() && git_pos.is_some(),
            "Both core packs should be present"
        );

        // filesystem < git lexicographically
        assert!(
            fs_pos.unwrap() < git_pos.unwrap(),
            "core.filesystem should come before core.git (lexicographic)"
        );
    }

    /// Test that `check_command` returns consistent attribution across runs.
    /// This is the key regression test for deterministic pack evaluation.
    #[test]
    fn check_command_attribution_is_deterministic() {
        // Enable both core.git and strict_git packs
        // If a git command matches both, core.git should always win (lower tier)
        let mut enabled = HashSet::new();
        enabled.insert("core.git".to_string());
        enabled.insert("strict_git".to_string());

        let cmd = "git reset --hard";

        // Run multiple times
        let first_result = REGISTRY.check_command(cmd, &enabled);

        for _ in 0..10 {
            let result = REGISTRY.check_command(cmd, &enabled);
            assert_eq!(
                result.blocked, first_result.blocked,
                "Blocked status should be consistent"
            );
            assert_eq!(
                result.pack_id, first_result.pack_id,
                "Pack attribution should be consistent across runs"
            );
            assert_eq!(
                result.pattern_name, first_result.pattern_name,
                "Pattern name should be consistent across runs"
            );
        }
    }

    /// Test that when multiple packs match, the higher-priority pack is attributed.
    #[test]
    fn check_command_prefers_higher_priority_pack() {
        let mut enabled = HashSet::new();
        enabled.insert("core.git".to_string()); // tier 1
        enabled.insert("strict_git".to_string()); // tier 9

        let cmd = "git reset --hard";
        let result = REGISTRY.check_command(cmd, &enabled);

        assert!(result.blocked, "Command should be blocked");
        assert_eq!(
            result.pack_id.as_deref(),
            Some("core.git"),
            "core.git (tier 1) should be attributed over strict_git (tier 9)"
        );
    }

    #[test]
    fn database_packs_block_drop_with_if_exists() {
        let pg = database::postgresql::create_pack();
        assert!(
            pg.check("DROP TABLE IF EXISTS foo;").is_some(),
            "DROP TABLE IF EXISTS should be treated as destructive"
        );
        assert!(
            pg.check("DROP DATABASE IF EXISTS foo;").is_some(),
            "DROP DATABASE IF EXISTS should be treated as destructive"
        );

        let sqlite = database::sqlite::create_pack();
        assert!(
            sqlite.check("DROP TABLE IF EXISTS foo;").is_some(),
            "SQLite DROP TABLE IF EXISTS should be treated as destructive"
        );
    }

    #[test]
    fn database_postgresql_blocks_truncate_restart_identity() {
        let pg = database::postgresql::create_pack();
        assert!(
            pg.check("TRUNCATE TABLE foo RESTART IDENTITY;").is_some(),
            "TRUNCATE ... RESTART IDENTITY permanently deletes rows and should be blocked"
        );
    }

    /// Test category expansion produces ordered results.
    #[test]
    fn category_expansion_is_ordered() {
        let mut enabled = HashSet::new();
        enabled.insert("containers".to_string()); // Category - expands to docker, compose, podman

        let ordered = REGISTRY.expand_enabled_ordered(&enabled);

        // All containers packs should be present
        let has_docker = ordered.iter().any(|id| id == "containers.docker");
        let has_compose = ordered.iter().any(|id| id == "containers.compose");
        let has_podman = ordered.iter().any(|id| id == "containers.podman");

        assert!(
            has_docker && has_compose && has_podman,
            "Category expansion should include all sub-packs"
        );

        // Should be in lexicographic order (compose < docker < podman)
        let compose_pos = ordered.iter().position(|id| id == "containers.compose");
        let docker_pos = ordered.iter().position(|id| id == "containers.docker");
        let podman_pos = ordered.iter().position(|id| id == "containers.podman");

        assert!(
            compose_pos.unwrap() < docker_pos.unwrap(),
            "compose should come before docker"
        );
        assert!(
            docker_pos.unwrap() < podman_pos.unwrap(),
            "docker should come before podman"
        );
    }

    /// Test that `check_command` returns `pattern_name` when available.
    #[test]
    fn check_command_returns_pattern_name() {
        let mut enabled = HashSet::new();
        enabled.insert("containers.docker".to_string());

        // docker system prune should match a named destructive pattern
        let cmd = "docker system prune";
        let result = REGISTRY.check_command(cmd, &enabled);

        assert!(result.blocked, "docker system prune should be blocked");
        assert_eq!(
            result.pack_id.as_deref(),
            Some("containers.docker"),
            "Should be attributed to containers.docker"
        );
        // Verify pattern_name is propagated (may be None if pattern is unnamed)
        // The important thing is the field exists and is correctly populated
        assert!(
            result.pattern_name.is_some() || result.reason.is_some(),
            "Blocked result should have pattern metadata"
        );
    }

    /// Test that `DestructiveMatch` contains both reason and name.
    #[test]
    fn destructive_match_contains_metadata() {
        let docker_pack = REGISTRY
            .get("containers.docker")
            .expect("docker pack exists");

        // Check docker system prune matches
        let matched = docker_pack.matches_destructive("docker system prune");
        assert!(matched.is_some(), "docker system prune should match");

        let m = matched.unwrap();
        assert!(!m.reason.is_empty(), "reason should not be empty");
        // name may or may not be set depending on pack definition
    }

    /// Regression test for git_safety_guard-hcj: regex backtracking panic.
    ///
    /// Pathological inputs with many consecutive `/` characters can cause
    /// fancy-regex to exceed its backtrack limit. This should fail-open
    /// (return the original command) rather than panic.
    #[test]
    fn normalize_command_handles_pathological_input() {
        // This input was discovered by fuzzing and caused a panic
        let pathological = "//////////////////_(rm";
        let result = normalize_command(pathological);

        // Should not panic, and should return the original command unchanged
        // (since it doesn't match the expected /path/to/bin/rm pattern)
        assert_eq!(result.as_ref(), pathological);

        // Additional pathological inputs
        let long_slashes = "/".repeat(1000) + "rm";
        let result2 = normalize_command(&long_slashes);
        // Should not panic - exact output doesn't matter, just that it doesn't crash
        assert!(!result2.is_empty());

        // Input with null bytes (also discovered by fuzzing)
        let with_nulls = "///\0\0/\0\0/\0\0//\0\0/\0[";
        let result3 = normalize_command(with_nulls);
        assert_eq!(result3.as_ref(), with_nulls);
    }

    // =========================================================================
    // Severity taxonomy tests (git_safety_guard-1gt.3.1)
    // =========================================================================

    /// Test that Severity enum has correct default mode mappings.
    #[test]
    fn severity_default_modes() {
        // Critical and High should block by default
        assert_eq!(Severity::Critical.default_mode(), DecisionMode::Deny);
        assert_eq!(Severity::High.default_mode(), DecisionMode::Deny);

        // Medium should warn by default
        assert_eq!(Severity::Medium.default_mode(), DecisionMode::Warn);

        // Low should log only by default
        assert_eq!(Severity::Low.default_mode(), DecisionMode::Log);
    }

    /// Test that `Severity::blocks_by_default` is consistent with `default_mode`.
    #[test]
    fn severity_blocks_by_default_consistency() {
        assert!(Severity::Critical.blocks_by_default());
        assert!(Severity::High.blocks_by_default());
        assert!(!Severity::Medium.blocks_by_default());
        assert!(!Severity::Low.blocks_by_default());

        // Verify consistency with DecisionMode::blocks()
        for severity in [
            Severity::Critical,
            Severity::High,
            Severity::Medium,
            Severity::Low,
        ] {
            assert_eq!(
                severity.blocks_by_default(),
                severity.default_mode().blocks(),
                "blocks_by_default should match default_mode().blocks() for {severity:?}"
            );
        }
    }

    /// Test `DecisionMode` behavior.
    #[test]
    fn decision_mode_blocks() {
        assert!(DecisionMode::Deny.blocks(), "Deny should block");
        assert!(!DecisionMode::Warn.blocks(), "Warn should not block");
        assert!(!DecisionMode::Log.blocks(), "Log should not block");
    }

    /// Test severity labels.
    #[test]
    fn severity_labels() {
        assert_eq!(Severity::Critical.label(), "critical");
        assert_eq!(Severity::High.label(), "high");
        assert_eq!(Severity::Medium.label(), "medium");
        assert_eq!(Severity::Low.label(), "low");
    }

    /// Test decision mode labels.
    #[test]
    fn decision_mode_labels() {
        assert_eq!(DecisionMode::Deny.label(), "deny");
        assert_eq!(DecisionMode::Warn.label(), "warn");
        assert_eq!(DecisionMode::Log.label(), "log");
    }

    /// Test that `CheckResult` includes severity and `decision_mode`.
    #[test]
    fn check_result_includes_severity() {
        let mut enabled = HashSet::new();
        enabled.insert("core.git".to_string());

        let cmd = "git reset --hard";
        let result = REGISTRY.check_command(cmd, &enabled);

        assert!(result.blocked, "git reset --hard should be blocked");
        assert!(
            result.severity.is_some(),
            "Blocked result should include severity"
        );
        assert!(
            result.decision_mode.is_some(),
            "Blocked result should include decision_mode"
        );

        // By default, patterns are High severity which blocks
        let severity = result.severity.unwrap();
        let mode = result.decision_mode.unwrap();
        assert!(severity.blocks_by_default());
        assert!(mode.blocks());
    }

    /// Test that allowed results have None for severity and `decision_mode`.
    #[test]
    fn allowed_result_no_severity() {
        let result = CheckResult::allowed();
        assert!(!result.blocked);
        assert!(result.severity.is_none());
        assert!(result.decision_mode.is_none());
    }

    /// Test that `DestructiveMatch` includes severity.
    #[test]
    fn destructive_match_includes_severity() {
        let docker_pack = REGISTRY
            .get("containers.docker")
            .expect("docker pack exists");

        let matched = docker_pack.matches_destructive("docker system prune");
        assert!(matched.is_some(), "docker system prune should match");

        let m = matched.unwrap();
        // Default severity is High
        assert_eq!(m.severity, Severity::High);
    }

    /// Test Severity Default trait implementation.
    #[test]
    fn severity_default() {
        let default: Severity = Severity::default();
        assert_eq!(default, Severity::High);
    }

    /// Test `DecisionMode` Default trait implementation.
    #[test]
    fn decision_mode_default() {
        let default: DecisionMode = DecisionMode::default();
        assert_eq!(default, DecisionMode::Deny);
    }

    // =========================================================================
    // Severity regression tests (git_safety_guard-1gt.3.2)
    // =========================================================================
    // These tests prevent accidental severity drift for high-impact rules.
    // Changing severity of a rule changes its blocking behavior (critical/high block,
    // medium warns, low logs). Unintentional changes could let dangerous commands through.

    /// Verify critical git rules remain at Critical severity.
    #[test]
    fn severity_regression_git_critical_rules() {
        let git_pack = REGISTRY
            .get("core.git")
            .expect("core.git pack should exist");

        // These rules should ALWAYS be Critical - they're the most dangerous
        let critical_rules = [
            "reset-hard",
            "clean-force",
            "push-force-long",
            "push-force-short",
            "stash-clear",
        ];

        for rule_name in critical_rules {
            let pattern = git_pack
                .destructive_patterns
                .iter()
                .find(|p| p.name == Some(rule_name));
            assert!(
                pattern.is_some(),
                "Rule {rule_name} should exist in core.git"
            );
            let pattern = pattern.unwrap();

            assert_eq!(
                pattern.severity,
                Severity::Critical,
                "Rule {rule_name} in core.git should be Critical severity"
            );
        }
    }

    /// Verify filesystem critical rule remains at Critical severity.
    #[test]
    fn severity_regression_filesystem_critical_rules() {
        let fs_pack = REGISTRY
            .get("core.filesystem")
            .expect("core.filesystem pack should exist");

        // rm -rf on root/home is the most dangerous possible command
        let pattern = fs_pack
            .destructive_patterns
            .iter()
            .find(|p| p.name == Some("rm-rf-root-home"))
            .expect("rm-rf-root-home rule should exist");

        assert_eq!(
            pattern.severity,
            Severity::Critical,
            "rm-rf-root-home should be Critical severity (most dangerous)"
        );
    }

    /// Verify high-severity rules aren't accidentally downgraded.
    #[test]
    fn severity_regression_git_high_rules() {
        let git_pack = REGISTRY
            .get("core.git")
            .expect("core.git pack should exist");

        // These should be at least High (blocking by default)
        let high_or_above_rules = [
            "checkout-discard",
            "checkout-ref-discard",
            "restore-worktree",
            "restore-worktree-explicit",
            "reset-merge",
        ];

        for rule_name in high_or_above_rules {
            let pattern = git_pack
                .destructive_patterns
                .iter()
                .find(|p| p.name == Some(rule_name));
            assert!(
                pattern.is_some(),
                "Rule {rule_name} should exist in core.git"
            );
            let pattern = pattern.unwrap();

            assert!(
                pattern.severity.blocks_by_default(),
                "Rule {rule_name} in core.git should block by default (High or Critical)"
            );
        }
    }

    /// Verify core pack severity assignments are correct.
    ///
    /// Most core rules should block by default (Critical/High), but some recoverable
    /// operations are Medium severity (warn by default). This test documents the
    /// expected severity distribution.
    #[test]
    fn core_rules_have_appropriate_severity() {
        // Patterns that should be Medium (recoverable operations)
        let medium_patterns = [
            ("core.git", "branch-force-delete"), // Recoverable via reflog
            ("core.git", "stash-drop"),          // Recoverable via fsck
        ];

        for pack_id in ["core.git", "core.filesystem"] {
            let pack = REGISTRY.get(pack_id).expect("Pack should exist");

            for pattern in &pack.destructive_patterns {
                let name = pattern.name.unwrap_or("<unnamed>");
                let is_expected_medium = medium_patterns
                    .iter()
                    .any(|(pid, pname)| *pid == pack_id && *pname == name);

                if is_expected_medium {
                    assert!(
                        matches!(pattern.severity, Severity::Medium),
                        "Core pack rule {pack_id}:{name} should be Medium severity (recoverable)"
                    );
                } else {
                    assert!(
                        pattern.severity.blocks_by_default(),
                        "Core pack rule {pack_id}:{name} should block by default"
                    );
                }
            }
        }
    }

    mod normalization_tests {
        use super::*;

        #[test]
        fn preserves_plain_git_command() {
            assert_eq!(normalize_command("git status"), "git status");
        }

        #[test]
        fn preserves_plain_rm_command() {
            assert_eq!(normalize_command("rm -rf /tmp/foo"), "rm -rf /tmp/foo");
        }

        #[test]
        fn strips_usr_bin_git() {
            assert_eq!(normalize_command("/usr/bin/git status"), "git status");
        }

        #[test]
        fn strips_usr_local_bin_git() {
            assert_eq!(
                normalize_command("/usr/local/bin/git checkout -b feature"),
                "git checkout -b feature"
            );
        }

        #[test]
        fn strips_bin_rm() {
            assert_eq!(
                normalize_command("/bin/rm -rf /tmp/test"),
                "rm -rf /tmp/test"
            );
        }

        #[test]
        fn strips_usr_bin_rm() {
            assert_eq!(normalize_command("/usr/bin/rm file.txt"), "rm file.txt");
        }

        #[test]
        fn strips_sbin_path() {
            assert_eq!(normalize_command("/sbin/rm foo"), "rm foo");
        }

        #[test]
        fn strips_usr_sbin_path() {
            assert_eq!(normalize_command("/usr/sbin/rm bar"), "rm bar");
        }

        #[test]
        fn preserves_command_with_path_arguments() {
            assert_eq!(
                normalize_command("git add /usr/bin/something"),
                "git add /usr/bin/something"
            );
        }

        #[test]
        fn handles_empty_string() {
            assert_eq!(normalize_command(""), "");
        }

        #[test]
        fn strips_quotes_from_executed_git_command_word() {
            assert_eq!(
                normalize_command("\"git\" reset --hard"),
                "git reset --hard"
            );
        }

        #[test]
        fn strips_quotes_from_executed_rm_command_word() {
            assert_eq!(normalize_command("\"rm\" -rf /etc"), "rm -rf /etc");
        }

        #[test]
        fn strips_quotes_from_executed_absolute_path_command_word() {
            assert_eq!(
                normalize_command("\"/usr/bin/git\" reset --hard"),
                "git reset --hard"
            );
        }

        #[test]
        fn strips_quotes_after_separators() {
            assert_eq!(
                normalize_command("echo hi; \"rm\" -rf /etc"),
                "echo hi; rm -rf /etc"
            );
        }

        #[test]
        fn strips_quotes_after_wrappers_and_options() {
            assert_eq!(
                normalize_command("sudo -u root \"rm\" -rf /etc"),
                "rm -rf /etc"
            );
        }

        #[test]
        fn preserves_quotes_for_safe_commands() {
            // Safe commands like echo should preserve argument quotes to avoid false positives
            assert_eq!(
                normalize_command("echo \"rm\" -rf /etc"),
                "echo \"rm\" -rf /etc"
            );
        }

        #[test]
        fn does_not_strip_quotes_for_command_query_mode() {
            assert_eq!(
                normalize_command("command -v \"git\""),
                "command -v \"git\""
            );
        }

        #[test]
        fn strips_quotes_inside_subshell_segments() {
            assert_eq!(normalize_command("( \"rm\" -rf /etc )"), "( rm -rf /etc )");
        }

        #[test]
        fn handles_line_continuation_split() {
            // "re\\\nset" -> "reset"
            assert_eq!(
                normalize_command("git re\\\nset --hard"),
                "git reset --hard"
            );
        }
    }

    /// Test that all pack patterns compile correctly.
    ///
    /// This validates that no pack has invalid regex patterns that would only
    /// be discovered at runtime when the lazy regex is first used.
    ///
    /// Related to git_safety_guard-64dc.3 (pattern validity validation).
    #[test]
    fn all_pack_patterns_compile() {
        let mut errors: Vec<String> = Vec::new();

        for pack_id in REGISTRY.all_pack_ids() {
            let pack = REGISTRY.get(pack_id).expect("pack must exist");

            // Validate safe patterns
            for (idx, pattern) in pack.safe_patterns.iter().enumerate() {
                if let Err(e) =
                    crate::packs::regex_engine::CompiledRegex::new(pattern.regex.as_str())
                {
                    errors.push(format!(
                        "Pack '{}' safe pattern '{}' (index {}) failed to compile: {}\n  Pattern: {}",
                        pack_id,
                        pattern.name,
                        idx,
                        e,
                        pattern.regex.as_str()
                    ));
                }
            }

            // Validate destructive patterns
            for (idx, pattern) in pack.destructive_patterns.iter().enumerate() {
                let pattern_name = pattern.name.unwrap_or("<unnamed>");
                if let Err(e) =
                    crate::packs::regex_engine::CompiledRegex::new(pattern.regex.as_str())
                {
                    errors.push(format!(
                        "Pack '{}' destructive pattern '{}' (index {}) failed to compile: {}\n  Pattern: {}",
                        pack_id,
                        pattern_name,
                        idx,
                        e,
                        pattern.regex.as_str()
                    ));
                }
            }
        }

        assert!(
            errors.is_empty(),
            "Found {} invalid regex pattern(s):\n\n{}",
            errors.len(),
            errors.join("\n\n")
        );
    }

    mod pack_enable_disable_plumbing {
        use super::*;
        use std::collections::HashSet;

        #[test]
        fn core_not_in_expand_when_not_explicitly_enabled() {
            let enabled: HashSet<String> = HashSet::new();
            let ordered = REGISTRY.expand_enabled_ordered(&enabled);
            assert!(
                !ordered.iter().any(|id| id.starts_with("core")),
                "core packs should NOT be in expand_enabled_ordered when not in enabled set \
                 (core is added by PacksConfig::enabled_pack_ids, not by expand_enabled)"
            );
        }

        #[test]
        fn category_expands_to_all_subpacks() {
            let mut enabled = HashSet::new();
            enabled.insert("database".to_string());
            let expanded = REGISTRY.expand_enabled(&enabled);
            assert!(expanded.contains("database.postgresql"));
            assert!(expanded.contains("database.redis"));
            assert!(expanded.contains("database.mongodb"));
            assert!(expanded.contains("database.sqlite"));
            assert!(
                !expanded.contains("containers.docker"),
                "database category should not include containers packs"
            );
        }

        #[test]
        fn specific_subpack_only_enables_that_pack() {
            let mut enabled = HashSet::new();
            enabled.insert("database.postgresql".to_string());
            let expanded = REGISTRY.expand_enabled(&enabled);
            assert!(expanded.contains("database.postgresql"));
            assert!(
                !expanded.contains("database.redis"),
                "enabling database.postgresql should not enable database.redis"
            );
        }

        #[test]
        fn duplicate_enabled_entries_deduplicated() {
            let mut enabled = HashSet::new();
            enabled.insert("database".to_string());
            enabled.insert("database.postgresql".to_string());
            let expanded = REGISTRY.expand_enabled(&enabled);
            let ordered = REGISTRY.expand_enabled_ordered(&expanded);
            let pg_count = ordered
                .iter()
                .filter(|id| *id == "database.postgresql")
                .count();
            assert_eq!(pg_count, 1, "postgresql should appear exactly once");
        }

        #[test]
        fn nonexistent_pack_id_filtered_from_ordered() {
            let mut enabled = HashSet::new();
            enabled.insert("nonexistent.fake_pack".to_string());
            let ordered = REGISTRY.expand_enabled_ordered(&enabled);
            assert!(
                !ordered.contains(&"nonexistent.fake_pack".to_string()),
                "non-existent pack IDs should be filtered from ordered results"
            );
        }

        #[test]
        fn keywords_collected_only_from_enabled_packs() {
            let mut enabled = HashSet::new();
            enabled.insert("database.postgresql".to_string());
            let keywords = REGISTRY.collect_enabled_keywords(&enabled);
            assert!(
                keywords.contains(&"psql"),
                "postgresql keywords should include psql"
            );
            assert!(
                keywords.contains(&"dropdb"),
                "postgresql keywords should include dropdb"
            );
            assert!(
                !keywords.contains(&"redis-cli"),
                "redis keywords should not be present when only postgresql is enabled"
            );
        }

        #[test]
        fn keywords_deduplicated_across_packs() {
            let mut enabled = HashSet::new();
            enabled.insert("database".to_string());
            let keywords = REGISTRY.collect_enabled_keywords(&enabled);
            let dup_count = keywords
                .iter()
                .filter(|&&k| keywords.iter().filter(|&&k2| k == k2).count() > 1)
                .count();
            assert_eq!(dup_count, 0, "keywords should be deduplicated");
        }

        #[test]
        fn empty_enabled_set_produces_no_keywords() {
            let enabled: HashSet<String> = HashSet::new();
            let keywords = REGISTRY.collect_enabled_keywords(&enabled);
            assert!(
                keywords.is_empty(),
                "no keywords should be collected when no packs are enabled"
            );
        }

        #[test]
        fn keyword_index_built_from_enabled_packs() {
            let mut enabled = HashSet::new();
            enabled.insert("database.postgresql".to_string());
            let ordered = REGISTRY.expand_enabled_ordered(&enabled);
            let index = REGISTRY.build_enabled_keyword_index(&ordered);
            assert!(
                index.is_some(),
                "keyword index should be built for non-empty enabled set"
            );
        }

        #[test]
        fn expand_enabled_ordered_deterministic() {
            let mut enabled = HashSet::new();
            enabled.insert("database".to_string());
            enabled.insert("containers".to_string());
            enabled.insert("core".to_string());

            let run1 = REGISTRY.expand_enabled_ordered(&enabled);
            let run2 = REGISTRY.expand_enabled_ordered(&enabled);
            assert_eq!(run1, run2, "ordering should be deterministic");
        }

        #[test]
        fn tier_ordering_core_before_database() {
            let mut enabled = HashSet::new();
            enabled.insert("core".to_string());
            enabled.insert("database".to_string());
            let ordered = REGISTRY.expand_enabled_ordered(&enabled);

            let core_pos = ordered.iter().position(|id| id.starts_with("core."));
            let db_pos = ordered.iter().position(|id| id.starts_with("database."));
            if let (Some(c), Some(d)) = (core_pos, db_pos) {
                assert!(
                    c < d,
                    "core packs (tier 1) should come before database packs (tier 7)"
                );
            }
        }

        #[test]
        fn packs_in_category_returns_subpacks() {
            let db_packs = REGISTRY.packs_in_category("database");
            assert!(
                db_packs.contains(&"database.postgresql"),
                "database category should contain postgresql"
            );
            assert!(
                db_packs.contains(&"database.redis"),
                "database category should contain redis"
            );
        }

        #[test]
        fn packs_in_nonexistent_category_returns_empty() {
            let packs = REGISTRY.packs_in_category("nonexistent_category");
            assert!(packs.is_empty());
        }

        #[test]
        fn all_registered_packs_have_nonempty_keywords() {
            for entry in &PACK_ENTRIES {
                assert!(
                    !entry.keywords.is_empty(),
                    "pack {} has no keywords — it can never be activated",
                    entry.id
                );
            }
        }

        #[test]
        fn all_registered_packs_instantiate_successfully() {
            for entry in &PACK_ENTRIES {
                let pack = REGISTRY.get(entry.id);
                assert!(
                    pack.is_some(),
                    "pack {} should be retrievable from registry",
                    entry.id
                );
                let pack = pack.unwrap();
                assert_eq!(pack.id, entry.id);
                assert!(!pack.name.is_empty());
            }
        }
    }

    #[test]
    fn matches_safe_with_deadline_returns_false_when_expired() {
        use crate::perf::Deadline;
        use std::time::Duration;

        let pack = Pack {
            id: "test.deadline".to_string(),
            name: "test",
            description: "test",
            keywords: &["rm"],
            safe_patterns: vec![
                safe_pattern!("bt_safe_1", r"(?=.*safe_target)rm\s+--dry-run"),
                safe_pattern!("bt_safe_2", r"(?=.*other_target)rm\s+--interactive"),
            ],
            destructive_patterns: Vec::new(),
            keyword_matcher: None,
            safe_regex_set: None,
            safe_regex_set_is_complete: false,
        };

        let deadline = Deadline::new(Duration::ZERO);
        assert!(
            !pack.matches_safe_with_deadline("rm --dry-run safe_target", Some(&deadline)),
            "Expired deadline should cause safe match to return false"
        );
    }

    #[test]
    fn matches_safe_with_deadline_matches_when_budget_available() {
        use crate::perf::Deadline;
        use std::time::Duration;

        let pack = Pack {
            id: "test.deadline".to_string(),
            name: "test",
            description: "test",
            keywords: &["rm"],
            safe_patterns: vec![safe_pattern!(
                "bt_safe_1",
                r"(?=.*safe_target)rm\s+--dry-run"
            )],
            destructive_patterns: Vec::new(),
            keyword_matcher: None,
            safe_regex_set: None,
            safe_regex_set_is_complete: false,
        };

        let deadline = Deadline::new(Duration::from_secs(10));
        assert!(
            pack.matches_safe_with_deadline("rm --dry-run safe_target", Some(&deadline)),
            "Should find safe match when deadline is generous"
        );
    }

    #[test]
    fn matches_safe_with_no_deadline_behaves_like_matches_safe() {
        let pack = Pack {
            id: "test.deadline".to_string(),
            name: "test",
            description: "test",
            keywords: &["rm"],
            safe_patterns: vec![safe_pattern!(
                "bt_safe_1",
                r"(?=.*safe_target)rm\s+--dry-run"
            )],
            destructive_patterns: Vec::new(),
            keyword_matcher: None,
            safe_regex_set: None,
            safe_regex_set_is_complete: false,
        };

        let cmd = "rm --dry-run safe_target";
        assert_eq!(
            pack.matches_safe(cmd),
            pack.matches_safe_with_deadline(cmd, None),
            "No deadline should behave identically to matches_safe"
        );
    }
}