enact-core 0.0.2

//! Enforcement - Kernel-owned limits, quotas, and rate limiting
//!
//! This module provides the enforcement layer that ensures executions
//! respect their resource boundaries. All limit enforcement happens
//! in the kernel, not in providers.
//!
//! ## Design Principles
//!
//! 1. **Kernel Owns Enforcement**: Providers are dumb adapters
//! 2. **Hard Limits**: Quota exceeded = execution halts immediately
//! 3. **Deterministic**: Same limits → same enforcement behavior
//! 4. **Observable**: All enforcement decisions are logged/events
//!
//! ## Key Components
//!
//! - `UsageTracker`: Tracks resource consumption per execution
//! - `EnforcementPolicy`: Defines limits and enforcement rules
//! - `EnforcementResult`: Outcome of limit checks
//!
//! @see docs/feat-03-limits-quotas.md

use super::error::{ExecutionError, ExecutionErrorCategory};
use super::ids::{ExecutionId, StepId, TenantId};
use crate::context::ResourceLimits;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;

// =============================================================================
// Usage Tracking
// =============================================================================

/// Tracks resource usage for a single execution
#[derive(Debug)]
pub struct ExecutionUsage {
    /// Execution ID
    pub execution_id: ExecutionId,
    /// Tenant ID
    pub tenant_id: TenantId,
    /// Number of steps executed
    pub steps: AtomicU32,
    /// Total input tokens consumed
    pub input_tokens: AtomicU32,
    /// Total output tokens consumed
    pub output_tokens: AtomicU32,
    /// Wall clock start time
    pub started_at: Instant,
    /// Last activity timestamp
    pub last_activity: RwLock<Instant>,
    // === Long-running execution tracking ===
    /// Number of dynamically discovered steps (StepSource::Discovered)
    pub discovered_steps: AtomicU32,
    /// Current discovery chain depth (how deep in the discovery tree)
    pub discovery_depth: AtomicU32,
    /// Maximum discovery depth reached during execution
    pub max_discovery_depth_reached: AtomicU32,
    /// Cumulative cost in cents (USD * 100 for integer precision)
    pub cost_cents: AtomicU64,
}

impl ExecutionUsage {
    /// Create a new usage tracker for an execution
    pub fn new(execution_id: ExecutionId, tenant_id: TenantId) -> Self {
        let now = Instant::now();
        Self {
            execution_id,
            tenant_id,
            steps: AtomicU32::new(0),
            input_tokens: AtomicU32::new(0),
            output_tokens: AtomicU32::new(0),
            started_at: now,
            last_activity: RwLock::new(now),
            discovered_steps: AtomicU32::new(0),
            discovery_depth: AtomicU32::new(0),
            max_discovery_depth_reached: AtomicU32::new(0),
            cost_cents: AtomicU64::new(0),
        }
    }

    /// Record step execution
    pub fn record_step(&self) {
        self.steps.fetch_add(1, Ordering::SeqCst);
    }

    /// Record a discovered step (dynamically added to DAG)
    pub fn record_discovered_step(&self) {
        self.discovered_steps.fetch_add(1, Ordering::SeqCst);
    }

    /// Record token usage
    pub fn record_tokens(&self, input: u32, output: u32) {
        self.input_tokens.fetch_add(input, Ordering::SeqCst);
        self.output_tokens.fetch_add(output, Ordering::SeqCst);
    }

    /// Record cost in USD (converted to cents for storage)
    pub fn record_cost_usd(&self, cost_usd: f64) {
        let cents = (cost_usd * 100.0) as u64;
        self.cost_cents.fetch_add(cents, Ordering::SeqCst);
    }

    /// Push discovery depth (entering a discovered step)
    pub fn push_discovery_depth(&self) {
        let new_depth = self.discovery_depth.fetch_add(1, Ordering::SeqCst) + 1;
        // Update max if this is deeper than before
        let current_max = self.max_discovery_depth_reached.load(Ordering::SeqCst);
        if new_depth > current_max {
            self.max_discovery_depth_reached
                .store(new_depth, Ordering::SeqCst);
        }
    }

    /// Pop discovery depth (exiting a discovered step)
    pub fn pop_discovery_depth(&self) {
        self.discovery_depth.fetch_sub(1, Ordering::SeqCst);
    }

    /// Update last activity timestamp
    pub async fn touch(&self) {
        let mut last = self.last_activity.write().await;
        *last = Instant::now();
    }

    /// Get current step count
    pub fn step_count(&self) -> u32 {
        self.steps.load(Ordering::SeqCst)
    }

    /// Get discovered step count
    pub fn discovered_step_count(&self) -> u32 {
        self.discovered_steps.load(Ordering::SeqCst)
    }

    /// Get current discovery depth
    pub fn current_discovery_depth(&self) -> u32 {
        self.discovery_depth.load(Ordering::SeqCst)
    }

    /// Get total token count
    pub fn total_tokens(&self) -> u32 {
        self.input_tokens.load(Ordering::SeqCst) + self.output_tokens.load(Ordering::SeqCst)
    }

    /// Get cumulative cost in USD
    pub fn cost_usd(&self) -> f64 {
        self.cost_cents.load(Ordering::SeqCst) as f64 / 100.0
    }

    /// Get wall clock duration
    pub fn wall_time(&self) -> Duration {
        self.started_at.elapsed()
    }

    /// Get wall time in milliseconds
    pub fn wall_time_ms(&self) -> u64 {
        self.wall_time().as_millis() as u64
    }

    /// Get idle duration (time since last activity)
    pub async fn idle_duration(&self) -> Duration {
        let last = self.last_activity.read().await;
        last.elapsed()
    }

    /// Get idle duration in seconds
    pub async fn idle_seconds(&self) -> u64 {
        self.idle_duration().await.as_secs()
    }
}

/// Serializable snapshot of execution usage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UsageSnapshot {
    pub execution_id: String,
    pub tenant_id: String,
    pub steps: u32,
    pub input_tokens: u32,
    pub output_tokens: u32,
    pub total_tokens: u32,
    pub wall_time_ms: u64,
    // Long-running execution metrics
    pub discovered_steps: u32,
    pub discovery_depth: u32,
    pub max_discovery_depth: u32,
    pub cost_usd: f64,
}

impl From<&ExecutionUsage> for UsageSnapshot {
    fn from(usage: &ExecutionUsage) -> Self {
        let input = usage.input_tokens.load(Ordering::SeqCst);
        let output = usage.output_tokens.load(Ordering::SeqCst);
        Self {
            execution_id: usage.execution_id.as_str().to_string(),
            tenant_id: usage.tenant_id.as_str().to_string(),
            steps: usage.steps.load(Ordering::SeqCst),
            input_tokens: input,
            output_tokens: output,
            total_tokens: input + output,
            wall_time_ms: usage.wall_time_ms(),
            discovered_steps: usage.discovered_steps.load(Ordering::SeqCst),
            discovery_depth: usage.discovery_depth.load(Ordering::SeqCst),
            max_discovery_depth: usage.max_discovery_depth_reached.load(Ordering::SeqCst),
            cost_usd: usage.cost_usd(),
        }
    }
}

// =============================================================================
// Enforcement Results
// =============================================================================

/// Result of an enforcement check
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EnforcementResult {
    /// Operation is allowed to proceed
    Allowed,
    /// Operation is blocked due to limit exceeded
    Blocked(EnforcementViolation),
    /// Operation is allowed but near limit (warning)
    Warning(EnforcementWarning),
}

impl EnforcementResult {
    /// Check if the result allows the operation
    pub fn is_allowed(&self) -> bool {
        matches!(self, Self::Allowed | Self::Warning(_))
    }

    /// Check if the result blocks the operation
    pub fn is_blocked(&self) -> bool {
        matches!(self, Self::Blocked(_))
    }

    /// Convert to an ExecutionError if blocked
    pub fn to_error(&self) -> Option<ExecutionError> {
        match self {
            Self::Blocked(violation) => Some(violation.to_error()),
            _ => None,
        }
    }
}

/// Type of enforcement violation
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ViolationType {
    /// Maximum steps exceeded
    StepLimit,
    /// Maximum tokens exceeded
    TokenLimit,
    /// Wall clock timeout exceeded
    WallTimeLimit,
    /// Memory limit exceeded
    MemoryLimit,
    /// Concurrent execution limit exceeded
    ConcurrencyLimit,
    /// Rate limit exceeded
    RateLimit,
    /// Network access denied in air-gapped mode
    NetworkViolation,
    // === Long-running execution controls ===
    /// Maximum discovered steps exceeded (agentic DAG)
    DiscoveredStepLimit,
    /// Discovery chain depth exceeded (prevents infinite discovery)
    DiscoveryDepthLimit,
    /// Cost threshold exceeded (USD-based alerting)
    CostThreshold,
    /// No activity for too long (idle timeout)
    IdleTimeout,
    /// Agent repeating same methodology (semantic loop)
    SameStepLoop,
}

impl std::fmt::Display for ViolationType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::StepLimit => write!(f, "step_limit"),
            Self::TokenLimit => write!(f, "token_limit"),
            Self::WallTimeLimit => write!(f, "wall_time_limit"),
            Self::MemoryLimit => write!(f, "memory_limit"),
            Self::ConcurrencyLimit => write!(f, "concurrency_limit"),
            Self::RateLimit => write!(f, "rate_limit"),
            Self::NetworkViolation => write!(f, "network_violation"),
            Self::DiscoveredStepLimit => write!(f, "discovered_step_limit"),
            Self::DiscoveryDepthLimit => write!(f, "discovery_depth_limit"),
            Self::CostThreshold => write!(f, "cost_threshold"),
            Self::IdleTimeout => write!(f, "idle_timeout"),
            Self::SameStepLoop => write!(f, "same_step_loop"),
        }
    }
}

/// Details of an enforcement violation
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EnforcementViolation {
    /// Type of violation
    pub violation_type: ViolationType,
    /// Current value
    pub current: u64,
    /// Limit value
    pub limit: u64,
    /// Human-readable message
    pub message: String,
}

impl EnforcementViolation {
    /// Create a new violation
    pub fn new(violation_type: ViolationType, current: u64, limit: u64) -> Self {
        let message = format!(
            "{} exceeded: {} / {} ({}%)",
            violation_type,
            current,
            limit,
            (current as f64 / limit as f64 * 100.0) as u32
        );
        Self {
            violation_type,
            current,
            limit,
            message,
        }
    }

    /// Convert to an ExecutionError
    pub fn to_error(&self) -> ExecutionError {
        let category = match self.violation_type {
            ViolationType::WallTimeLimit => ExecutionErrorCategory::Timeout,
            ViolationType::RateLimit => ExecutionErrorCategory::LlmError, // Rate limits are retryable
            ViolationType::NetworkViolation => ExecutionErrorCategory::PolicyViolation, // Non-retryable policy
            _ => ExecutionErrorCategory::QuotaExceeded,
        };

        ExecutionError::new(category, self.message.clone())
            .with_code(self.violation_type.to_string())
            .with_details(serde_json::json!({
                "current": self.current,
                "limit": self.limit,
                "violation_type": self.violation_type.to_string(),
            }))
    }
}

/// Warning about approaching limits
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EnforcementWarning {
    /// Type of limit being approached
    pub warning_type: ViolationType,
    /// Current usage percentage (0-100)
    pub usage_percent: u32,
    /// Human-readable message
    pub message: String,
}

impl EnforcementWarning {
    /// Create a new warning
    pub fn new(warning_type: ViolationType, current: u64, limit: u64) -> Self {
        let percent = (current as f64 / limit as f64 * 100.0) as u32;
        let message = format!("{} at {}%: {} / {}", warning_type, percent, current, limit);
        Self {
            warning_type,
            usage_percent: percent,
            message,
        }
    }
}

// =============================================================================
// Enforcement Policy
// =============================================================================

/// Configuration for enforcement behavior
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnforcementPolicy {
    /// Warning threshold (percentage of limit, 0-100)
    pub warning_threshold: u32,
    /// Whether to emit events on warnings
    pub emit_warning_events: bool,
    /// Whether to emit events on blocks
    pub emit_block_events: bool,
    /// Grace period for timeouts (milliseconds)
    pub timeout_grace_ms: u64,
}

impl Default for EnforcementPolicy {
    fn default() -> Self {
        Self {
            warning_threshold: 80, // Warn at 80% usage
            emit_warning_events: true,
            emit_block_events: true,
            timeout_grace_ms: 1000, // 1 second grace period
        }
    }
}

// =============================================================================
// Long-Running Execution Policy
// =============================================================================

/// Policy configuration for long-running agentic executions
///
/// These controls prevent runaway costs, infinite discovery loops, and idle
/// executions from consuming resources in the Agentic DAG model.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LongRunningExecutionPolicy {
    /// Maximum number of dynamically discovered steps before intervention
    /// (Steps with StepSource::Discovered)
    pub max_discovered_steps: Option<u32>,
    /// Maximum depth of discovery chains (prevents infinite discovery)
    /// e.g., agent discovers step A, which discovers step B, which discovers C...
    pub max_discovery_depth: Option<u32>,
    /// Alert threshold for cumulative cost in USD
    /// When exceeded, execution pauses for approval
    pub cost_alert_threshold_usd: Option<f64>,
    /// Maximum time without activity before idle timeout (seconds)
    pub idle_timeout_seconds: Option<u64>,
    /// Maximum repetitions of same methodology before loop detection (default: 3)
    pub max_same_step_repetitions: Option<u32>,
}

impl Default for LongRunningExecutionPolicy {
    fn default() -> Self {
        Self::standard()
    }
}

impl LongRunningExecutionPolicy {
    /// Standard preset - balanced limits for typical long-running executions
    /// - Max duration: ~30 minutes (via idle timeout)
    /// - Discovered steps: 50
    /// - Discovery depth: 5
    /// - Cost alert: $5.00 USD
    pub fn standard() -> Self {
        Self {
            max_discovered_steps: Some(50),
            max_discovery_depth: Some(5),
            cost_alert_threshold_usd: Some(5.0),
            idle_timeout_seconds: Some(1800), // 30 minutes
            max_same_step_repetitions: Some(3),
        }
    }

    /// Extended preset - higher limits for complex, supervised workflows
    /// - Max duration: ~4 hours
    /// - Discovered steps: 300
    /// - Discovery depth: 10
    /// - Cost alert: $50.00 USD
    pub fn extended() -> Self {
        Self {
            max_discovered_steps: Some(300),
            max_discovery_depth: Some(10),
            cost_alert_threshold_usd: Some(50.0),
            idle_timeout_seconds: Some(14400), // 4 hours
            max_same_step_repetitions: Some(5),
        }
    }

    /// Unlimited preset - no discovery limits, but requires cost monitoring
    /// - No step/depth limits
    /// - Cost alert: $100.00 USD (mandatory safety net)
    /// - Idle timeout: 24 hours
    pub fn unlimited() -> Self {
        Self {
            max_discovered_steps: None,
            max_discovery_depth: None,
            cost_alert_threshold_usd: Some(100.0), // Required safety net
            idle_timeout_seconds: Some(86400),     // 24 hours
            max_same_step_repetitions: None,
        }
    }

    /// Disabled - no long-running controls (use with caution)
    pub fn disabled() -> Self {
        Self {
            max_discovered_steps: None,
            max_discovery_depth: None,
            cost_alert_threshold_usd: None,
            idle_timeout_seconds: None,
            max_same_step_repetitions: None,
        }
    }
}

// =============================================================================
// Enforcement Middleware
// =============================================================================

/// Enforcement middleware for checking limits before operations
#[derive(Debug)]
pub struct EnforcementMiddleware {
    /// Active executions and their usage
    executions: RwLock<HashMap<ExecutionId, Arc<ExecutionUsage>>>,
    /// Active execution count per tenant
    tenant_executions: RwLock<HashMap<TenantId, AtomicU32>>,
    /// Global rate limiter state
    #[allow(dead_code)]
    rate_limiter: RwLock<RateLimiterState>,
    /// Enforcement policy
    policy: EnforcementPolicy,
}

impl EnforcementMiddleware {
    /// Create a new enforcement middleware
    pub fn new() -> Self {
        Self::with_policy(EnforcementPolicy::default())
    }

    /// Create with custom policy
    pub fn with_policy(policy: EnforcementPolicy) -> Self {
        Self {
            executions: RwLock::new(HashMap::new()),
            tenant_executions: RwLock::new(HashMap::new()),
            rate_limiter: RwLock::new(RateLimiterState::new()),
            policy,
        }
    }

    /// Whether warning events should be emitted when limits are near
    pub fn emit_warning_events_enabled(&self) -> bool {
        self.policy.emit_warning_events
    }

    /// Register a new execution
    pub async fn register_execution(
        &self,
        execution_id: ExecutionId,
        tenant_id: TenantId,
    ) -> Arc<ExecutionUsage> {
        let usage = Arc::new(ExecutionUsage::new(execution_id.clone(), tenant_id.clone()));

        // Register in executions map
        {
            let mut executions = self.executions.write().await;
            executions.insert(execution_id, Arc::clone(&usage));
        }

        // Increment tenant execution count
        {
            let mut tenant_execs = self.tenant_executions.write().await;
            tenant_execs
                .entry(tenant_id)
                .or_insert_with(|| AtomicU32::new(0))
                .fetch_add(1, Ordering::SeqCst);
        }

        usage
    }

    /// Unregister an execution
    pub async fn unregister_execution(&self, execution_id: &ExecutionId) {
        let tenant_id = {
            let mut executions = self.executions.write().await;
            executions.remove(execution_id).map(|u| u.tenant_id.clone())
        };

        // Decrement tenant execution count
        if let Some(tenant_id) = tenant_id {
            let tenant_execs = self.tenant_executions.read().await;
            if let Some(count) = tenant_execs.get(&tenant_id) {
                count.fetch_sub(1, Ordering::SeqCst);
            }
        }
    }

    /// Get usage for an execution
    pub async fn get_usage(&self, execution_id: &ExecutionId) -> Option<Arc<ExecutionUsage>> {
        let executions = self.executions.read().await;
        executions.get(execution_id).cloned()
    }

    /// Get usage snapshot for an execution
    pub async fn get_usage_snapshot(&self, execution_id: &ExecutionId) -> Option<UsageSnapshot> {
        self.get_usage(execution_id)
            .await
            .map(|u| UsageSnapshot::from(u.as_ref()))
    }

    /// Check if a new step can be started
    pub async fn check_step_allowed(
        &self,
        execution_id: &ExecutionId,
        limits: &ResourceLimits,
    ) -> EnforcementResult {
        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed, // No tracking = allowed
        };

        let current = usage.step_count() as u64 + 1; // +1 for the step we're about to start
        let limit = limits.max_steps as u64;

        if current > limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::StepLimit,
                current,
                limit,
            ));
        }

        let percent = (current as f64 / limit as f64 * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::StepLimit,
                current,
                limit,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Check if token usage is within limits
    pub async fn check_tokens_allowed(
        &self,
        execution_id: &ExecutionId,
        limits: &ResourceLimits,
        additional_tokens: u32,
    ) -> EnforcementResult {
        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let current = usage.total_tokens() as u64 + additional_tokens as u64;
        let limit = limits.max_tokens as u64;

        if current > limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::TokenLimit,
                current,
                limit,
            ));
        }

        let percent = (current as f64 / limit as f64 * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::TokenLimit,
                current,
                limit,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Check if wall time is within limits
    pub async fn check_wall_time_allowed(
        &self,
        execution_id: &ExecutionId,
        limits: &ResourceLimits,
    ) -> EnforcementResult {
        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let current = usage.wall_time_ms();
        let limit = limits.max_wall_time_ms;

        // Add grace period
        let effective_limit = limit + self.policy.timeout_grace_ms;

        if current > effective_limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::WallTimeLimit,
                current,
                limit,
            ));
        }

        let percent = (current as f64 / limit as f64 * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::WallTimeLimit,
                current,
                limit,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Check if concurrent execution limit is respected
    pub async fn check_concurrency_allowed(
        &self,
        tenant_id: &TenantId,
        limits: &ResourceLimits,
    ) -> EnforcementResult {
        let max_concurrent = match limits.max_concurrent_executions {
            Some(max) => max,
            None => return EnforcementResult::Allowed, // No limit set
        };

        let current = {
            let tenant_execs = self.tenant_executions.read().await;
            tenant_execs
                .get(tenant_id)
                .map(|c| c.load(Ordering::SeqCst))
                .unwrap_or(0) as u64
        };

        let limit = max_concurrent as u64;

        if current >= limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::ConcurrencyLimit,
                current + 1, // +1 for the execution we're about to start
                limit,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Perform all limit checks before starting a step
    pub async fn check_all_limits(
        &self,
        execution_id: &ExecutionId,
        limits: &ResourceLimits,
    ) -> EnforcementResult {
        // Check wall time first (most likely to timeout)
        let wall_check = self.check_wall_time_allowed(execution_id, limits).await;
        if wall_check.is_blocked() {
            return wall_check;
        }

        // Check step limit
        let step_check = self.check_step_allowed(execution_id, limits).await;
        if step_check.is_blocked() {
            return step_check;
        }

        // Check token limit
        let token_check = self.check_tokens_allowed(execution_id, limits, 0).await;
        if token_check.is_blocked() {
            return token_check;
        }

        // Return warnings if any
        if let EnforcementResult::Warning(w) = wall_check {
            return EnforcementResult::Warning(w);
        }
        if let EnforcementResult::Warning(w) = step_check {
            return EnforcementResult::Warning(w);
        }
        if let EnforcementResult::Warning(w) = token_check {
            return EnforcementResult::Warning(w);
        }

        EnforcementResult::Allowed
    }

    /// Record step completion and update usage
    pub async fn record_step(&self, execution_id: &ExecutionId) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.record_step();
            usage.touch().await;
        }
    }

    /// Record token usage
    pub async fn record_tokens(&self, execution_id: &ExecutionId, input: u32, output: u32) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.record_tokens(input, output);
            usage.touch().await;
        }
    }

    /// Record a discovered step and update usage
    pub async fn record_discovered_step(&self, execution_id: &ExecutionId) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.record_discovered_step();
            usage.touch().await;
        }
    }

    /// Record cost in USD
    pub async fn record_cost(&self, execution_id: &ExecutionId, cost_usd: f64) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.record_cost_usd(cost_usd);
            usage.touch().await;
        }
    }

    /// Push discovery depth (entering a discovered step's sub-execution)
    pub async fn push_discovery_depth(&self, execution_id: &ExecutionId) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.push_discovery_depth();
        }
    }

    /// Pop discovery depth (exiting a discovered step's sub-execution)
    pub async fn pop_discovery_depth(&self, execution_id: &ExecutionId) {
        if let Some(usage) = self.get_usage(execution_id).await {
            usage.pop_discovery_depth();
        }
    }

    // =========================================================================
    // Long-Running Execution Checks
    // =========================================================================

    /// Check if discovered step limit is within bounds
    pub async fn check_discovered_step_limit(
        &self,
        execution_id: &ExecutionId,
        policy: &LongRunningExecutionPolicy,
    ) -> EnforcementResult {
        let max_discovered = match policy.max_discovered_steps {
            Some(max) => max,
            None => return EnforcementResult::Allowed,
        };

        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let current = usage.discovered_step_count() as u64 + 1; // +1 for step we're about to discover
        let limit = max_discovered as u64;

        if current > limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::DiscoveredStepLimit,
                current,
                limit,
            ));
        }

        let percent = (current as f64 / limit as f64 * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::DiscoveredStepLimit,
                current,
                limit,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Check if discovery depth is within bounds
    pub async fn check_discovery_depth_limit(
        &self,
        execution_id: &ExecutionId,
        policy: &LongRunningExecutionPolicy,
    ) -> EnforcementResult {
        let max_depth = match policy.max_discovery_depth {
            Some(max) => max,
            None => return EnforcementResult::Allowed,
        };

        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let current = usage.current_discovery_depth() as u64 + 1; // +1 for depth we're about to enter
        let limit = max_depth as u64;

        if current > limit {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::DiscoveryDepthLimit,
                current,
                limit,
            ));
        }

        // No warning for depth - it's either allowed or not
        EnforcementResult::Allowed
    }

    /// Check if cost threshold has been exceeded
    pub async fn check_cost_threshold(
        &self,
        execution_id: &ExecutionId,
        policy: &LongRunningExecutionPolicy,
    ) -> EnforcementResult {
        let threshold = match policy.cost_alert_threshold_usd {
            Some(t) => t,
            None => return EnforcementResult::Allowed,
        };

        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let current_cents = usage.cost_cents.load(Ordering::SeqCst);
        let current_usd = current_cents as f64 / 100.0;
        let limit_cents = (threshold * 100.0) as u64;

        if current_usd >= threshold {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::CostThreshold,
                current_cents,
                limit_cents,
            ));
        }

        let percent = (current_usd / threshold * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::CostThreshold,
                current_cents,
                limit_cents,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Check if idle timeout has been exceeded
    pub async fn check_idle_timeout(
        &self,
        execution_id: &ExecutionId,
        policy: &LongRunningExecutionPolicy,
    ) -> EnforcementResult {
        let timeout_secs = match policy.idle_timeout_seconds {
            Some(t) => t,
            None => return EnforcementResult::Allowed,
        };

        let usage = match self.get_usage(execution_id).await {
            Some(u) => u,
            None => return EnforcementResult::Allowed,
        };

        let idle_secs = usage.idle_seconds().await;

        if idle_secs >= timeout_secs {
            return EnforcementResult::Blocked(EnforcementViolation::new(
                ViolationType::IdleTimeout,
                idle_secs,
                timeout_secs,
            ));
        }

        // Warn at 80% of idle timeout
        let percent = (idle_secs as f64 / timeout_secs as f64 * 100.0) as u32;
        if percent >= self.policy.warning_threshold {
            return EnforcementResult::Warning(EnforcementWarning::new(
                ViolationType::IdleTimeout,
                idle_secs,
                timeout_secs,
            ));
        }

        EnforcementResult::Allowed
    }

    /// Perform all long-running execution checks
    pub async fn check_long_running_limits(
        &self,
        execution_id: &ExecutionId,
        policy: &LongRunningExecutionPolicy,
    ) -> EnforcementResult {
        // Check cost threshold first (most critical for runaway costs)
        let cost_check = self.check_cost_threshold(execution_id, policy).await;
        if cost_check.is_blocked() {
            return cost_check;
        }

        // Check discovery depth (prevents infinite discovery)
        let depth_check = self.check_discovery_depth_limit(execution_id, policy).await;
        if depth_check.is_blocked() {
            return depth_check;
        }

        // Check discovered step count
        let discovered_check = self.check_discovered_step_limit(execution_id, policy).await;
        if discovered_check.is_blocked() {
            return discovered_check;
        }

        // Check idle timeout
        let idle_check = self.check_idle_timeout(execution_id, policy).await;
        if idle_check.is_blocked() {
            return idle_check;
        }

        // Return warnings if any
        if let EnforcementResult::Warning(w) = cost_check {
            return EnforcementResult::Warning(w);
        }
        if let EnforcementResult::Warning(w) = discovered_check {
            return EnforcementResult::Warning(w);
        }
        if let EnforcementResult::Warning(w) = idle_check {
            return EnforcementResult::Warning(w);
        }

        EnforcementResult::Allowed
    }
}

impl Default for EnforcementMiddleware {
    fn default() -> Self {
        Self::new()
    }
}

// =============================================================================
// Rate Limiter
// =============================================================================

/// Rate limiter state using token bucket algorithm
#[derive(Debug)]
struct RateLimiterState {
    /// Tokens per provider
    #[allow(dead_code)]
    provider_tokens: HashMap<String, TokenBucket>,
}

impl RateLimiterState {
    fn new() -> Self {
        Self {
            provider_tokens: HashMap::new(),
        }
    }
}

/// Token bucket for rate limiting
#[derive(Debug)]
struct TokenBucket {
    /// Current token count
    tokens: AtomicU64,
    /// Maximum tokens (bucket size)
    max_tokens: u64,
    /// Tokens added per second
    refill_rate: u64,
    /// Last refill timestamp
    last_refill: RwLock<Instant>,
}

impl TokenBucket {
    /// Create a new token bucket
    #[allow(dead_code)]
    fn new(max_tokens: u64, refill_rate: u64) -> Self {
        Self {
            tokens: AtomicU64::new(max_tokens),
            max_tokens,
            refill_rate,
            last_refill: RwLock::new(Instant::now()),
        }
    }

    /// Try to acquire tokens
    #[allow(dead_code)]
    async fn try_acquire(&self, count: u64) -> bool {
        // Refill tokens based on elapsed time
        {
            let mut last = self.last_refill.write().await;
            let elapsed = last.elapsed();
            let new_tokens = (elapsed.as_secs_f64() * self.refill_rate as f64) as u64;
            if new_tokens > 0 {
                let current = self.tokens.load(Ordering::SeqCst);
                let new_total = std::cmp::min(current + new_tokens, self.max_tokens);
                self.tokens.store(new_total, Ordering::SeqCst);
                *last = Instant::now();
            }
        }

        // Try to acquire
        let current = self.tokens.load(Ordering::SeqCst);
        if current >= count {
            self.tokens.fetch_sub(count, Ordering::SeqCst);
            true
        } else {
            false
        }
    }
}

// =============================================================================
// Step Timeout Guard
// =============================================================================

/// Guard for enforcing step timeouts
pub struct StepTimeoutGuard {
    step_id: StepId,
    timeout: Duration,
    started_at: Instant,
}

impl StepTimeoutGuard {
    /// Create a new timeout guard
    pub fn new(step_id: StepId, timeout: Duration) -> Self {
        Self {
            step_id,
            timeout,
            started_at: Instant::now(),
        }
    }

    /// Check if the timeout has been exceeded
    pub fn is_timed_out(&self) -> bool {
        self.started_at.elapsed() > self.timeout
    }

    /// Get remaining time
    pub fn remaining(&self) -> Duration {
        self.timeout.saturating_sub(self.started_at.elapsed())
    }

    /// Get elapsed time
    pub fn elapsed(&self) -> Duration {
        self.started_at.elapsed()
    }

    /// Check and return an error if timed out
    #[allow(clippy::result_large_err)]
    pub fn check(&self) -> Result<(), ExecutionError> {
        if self.is_timed_out() {
            Err(ExecutionError::timeout(format!(
                "Step {} timed out after {:?}",
                self.step_id, self.timeout
            ))
            .with_step_id(self.step_id.clone()))
        } else {
            Ok(())
        }
    }
}

// =============================================================================
// Tests
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_usage_tracking() {
        let exec_id = ExecutionId::new();
        let tenant_id = TenantId::from("tenant_test123456789012345");
        let usage = ExecutionUsage::new(exec_id, tenant_id);

        usage.record_step();
        usage.record_step();
        assert_eq!(usage.step_count(), 2);

        usage.record_tokens(100, 50);
        assert_eq!(usage.total_tokens(), 150);
    }

    #[tokio::test]
    async fn test_step_limit_enforcement() {
        let middleware = EnforcementMiddleware::new();
        let exec_id = ExecutionId::new();
        let tenant_id = TenantId::from("tenant_test123456789012345");

        let limits = ResourceLimits {
            max_steps: 5,
            max_tokens: 1000,
            max_wall_time_ms: 60000,
            max_memory_mb: None,
            max_concurrent_executions: None,
        };

        let usage = middleware
            .register_execution(exec_id.clone(), tenant_id)
            .await;

        // First 5 steps should be allowed
        for _ in 0..5 {
            let result = middleware.check_step_allowed(&exec_id, &limits).await;
            assert!(result.is_allowed(), "Step should be allowed");
            usage.record_step();
        }

        // 6th step should be blocked
        let result = middleware.check_step_allowed(&exec_id, &limits).await;
        assert!(result.is_blocked(), "Step should be blocked");
    }

    #[tokio::test]
    async fn test_token_limit_enforcement() {
        let middleware = EnforcementMiddleware::new();
        let exec_id = ExecutionId::new();
        let tenant_id = TenantId::from("tenant_test123456789012345");

        let limits = ResourceLimits {
            max_steps: 100,
            max_tokens: 100,
            max_wall_time_ms: 60000,
            max_memory_mb: None,
            max_concurrent_executions: None,
        };

        let usage = middleware
            .register_execution(exec_id.clone(), tenant_id)
            .await;

        // Record some tokens
        usage.record_tokens(50, 30);

        // Check with additional tokens that would exceed
        let result = middleware.check_tokens_allowed(&exec_id, &limits, 25).await;
        assert!(
            result.is_blocked(),
            "Should be blocked when exceeding limit"
        );

        // Check with tokens that would stay within limit
        let result = middleware.check_tokens_allowed(&exec_id, &limits, 10).await;
        assert!(result.is_allowed(), "Should be allowed within limit");
    }

    #[tokio::test]
    async fn test_warning_threshold() {
        let policy = EnforcementPolicy {
            warning_threshold: 80,
            ..Default::default()
        };
        let middleware = EnforcementMiddleware::with_policy(policy);
        let exec_id = ExecutionId::new();
        let tenant_id = TenantId::from("tenant_test123456789012345");

        let limits = ResourceLimits {
            max_steps: 10,
            max_tokens: 1000,
            max_wall_time_ms: 60000,
            max_memory_mb: None,
            max_concurrent_executions: None,
        };

        let usage = middleware
            .register_execution(exec_id.clone(), tenant_id)
            .await;

        // Record 8 steps (80% = warning threshold)
        for _ in 0..7 {
            usage.record_step();
        }

        // 8th step should trigger warning
        let result = middleware.check_step_allowed(&exec_id, &limits).await;
        assert!(matches!(result, EnforcementResult::Warning(_)));
    }

    #[test]
    fn test_step_timeout_guard() {
        let step_id = StepId::new();
        let guard = StepTimeoutGuard::new(step_id, Duration::from_millis(100));

        assert!(!guard.is_timed_out());
        assert!(guard.check().is_ok());

        // Sleep past timeout
        std::thread::sleep(Duration::from_millis(150));

        assert!(guard.is_timed_out());
        assert!(guard.check().is_err());
    }

    #[tokio::test]
    async fn test_concurrency_limit() {
        let middleware = EnforcementMiddleware::new();
        let tenant_id = TenantId::from("tenant_test123456789012345");

        let limits = ResourceLimits {
            max_steps: 100,
            max_tokens: 1000,
            max_wall_time_ms: 60000,
            max_memory_mb: None,
            max_concurrent_executions: Some(2),
        };

        // Register 2 executions
        let exec1 = ExecutionId::new();
        let exec2 = ExecutionId::new();
        middleware
            .register_execution(exec1.clone(), tenant_id.clone())
            .await;
        middleware
            .register_execution(exec2.clone(), tenant_id.clone())
            .await;

        // Third should be blocked
        let result = middleware
            .check_concurrency_allowed(&tenant_id, &limits)
            .await;
        assert!(result.is_blocked());

        // Unregister one
        middleware.unregister_execution(&exec1).await;

        // Now should be allowed
        let result = middleware
            .check_concurrency_allowed(&tenant_id, &limits)
            .await;
        assert!(result.is_allowed());
    }

    #[test]
    fn test_network_violation_type() {
        // Verify NetworkViolation exists and is non-retryable
        let violation = EnforcementViolation::new(ViolationType::NetworkViolation, 0, 0);

        let error = violation.to_error();
        assert_eq!(error.category, ExecutionErrorCategory::PolicyViolation);
        assert!(!error.is_retryable());
        assert!(error.is_fatal());
    }

    #[test]
    fn test_violation_type_display_network() {
        assert_eq!(
            format!("{}", ViolationType::NetworkViolation),
            "network_violation"
        );
    }
}