escher-execution-engine 0.1.2

//! ExecutionEngine - Main entry point for command execution
//!
//! Provides high-level API with state management and concurrency control.

use crate::config::ExecutionConfig;
use crate::errors::{ExecutionError, Result};
use crate::events::EventHandler;
use crate::executor::Executor;
use crate::types::{
    ExecutionRequest, ExecutionResult, ExecutionState, ExecutionStatus, ExecutionSummary,
};
use once_cell::sync::OnceCell;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::{RwLock, Semaphore};
use tokio_util::sync::CancellationToken;
use uuid::Uuid;

static INSTANCE: OnceCell<ExecutionEngine> = OnceCell::new();

/// Main execution engine
///
/// Thread-safe, async execution engine with:
/// - Semaphore-based concurrency limiting
/// - In-memory state management
/// - Event emission support
/// - Automatic cleanup (optional)
#[derive(Clone)]
pub struct ExecutionEngine {
    config: ExecutionConfig,
    executions: Arc<RwLock<HashMap<Uuid, Arc<RwLock<ExecutionState>>>>>,
    cancellation_tokens: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
    event_handler: Option<Arc<dyn EventHandler>>,
    semaphore: Arc<Semaphore>,
    executor: Arc<Executor>,
}

impl ExecutionEngine {
    /// Initialize the global singleton instance with an optional event handler
    ///
    /// This method ensures that the engine is initialized only once.
    /// It enforces `max_concurrent_executions = 1` for serial execution safety.
    pub fn init_global_with_handler(
        mut config: ExecutionConfig,
        handler: Option<Arc<dyn EventHandler>>,
    ) -> Result<&'static ExecutionEngine> {
        // Enforce serial execution as requested
        if config.max_concurrent_executions != 1 {
            tracing::warn!(
                "Overriding max_concurrent_executions from {} to 1 for global singleton",
                config.max_concurrent_executions
            );
            config.max_concurrent_executions = 1;
        }

        let mut engine = ExecutionEngine::new(config)?;

        // Attach handler if provided
        if let Some(h) = handler {
            engine = engine.with_event_handler(h);
        }

        INSTANCE.set(engine).map_err(|_| {
            ExecutionError::Internal("ExecutionEngine already initialized".to_string())
        })?;

        Ok(INSTANCE.get().expect("ExecutionEngine just initialized"))
    }

    /// Initialize the global singleton instance
    ///
    /// This method ensures that the engine is initialized only once.
    /// It enforces `max_concurrent_executions = 1` for serial execution safety.
    pub fn init_global(config: ExecutionConfig) -> Result<&'static ExecutionEngine> {
        Self::init_global_with_handler(config, None)
    }

    /// Get reference to the global singleton instance
    ///
    /// # Panics
    /// Panics if `init_global` has not been called.
    pub fn global() -> &'static ExecutionEngine {
        INSTANCE.get().expect("ExecutionEngine not initialized")
    }

    /// Create new ExecutionEngine
    pub fn new(config: ExecutionConfig) -> Result<Self> {
        // Validate config
        config.validate().map_err(ExecutionError::InvalidConfig)?;

        let executor = Executor::new(config.clone());
        let semaphore = Arc::new(Semaphore::new(config.max_concurrent_executions));

        Ok(Self {
            config,
            executions: Arc::new(RwLock::new(HashMap::new())),
            cancellation_tokens: Arc::new(RwLock::new(HashMap::new())),
            event_handler: None,
            semaphore,
            executor: Arc::new(executor),
        })
    }

    /// Set event handler (builder pattern)
    pub fn with_event_handler(mut self, handler: Arc<dyn EventHandler>) -> Self {
        self.event_handler = Some(handler.clone());

        // Update executor with handler
        let executor = Executor::new(self.config.clone()).with_event_handler(handler);
        self.executor = Arc::new(executor);

        self
    }

    /// Execute a command asynchronously
    ///
    /// Returns execution ID immediately and spawns background task.
    /// Use get_status() or get_result() to check progress.
    pub async fn execute(&self, request: ExecutionRequest) -> Result<Uuid> {
        let execution_id = request.id;

        // Create execution state with cancellation token
        let cancel_token = CancellationToken::new();
        let state = Arc::new(RwLock::new(ExecutionState::new(request.clone())));

        // Store state and cancellation token
        {
            let mut executions = self.executions.write().await;
            executions.insert(execution_id, state.clone());
        }
        {
            let mut tokens = self.cancellation_tokens.write().await;
            tokens.insert(execution_id, cancel_token.clone());
        }

        // Try to acquire semaphore permit (non-blocking check)
        let semaphore = self.semaphore.clone();
        let current_permits = semaphore.available_permits();

        if current_permits == 0 {
            // No permits available - at concurrency limit
            return Err(ExecutionError::ConcurrencyLimitReached(
                self.config.max_concurrent_executions,
            ));
        }

        // Acquire permit (will block if at limit, but we checked above)
        let permit = semaphore
            .clone()
            .acquire_owned()
            .await
            .map_err(|_| ExecutionError::Internal("Semaphore closed".to_string()))?;

        // Spawn background execution task
        let executor = self.executor.clone();

        tokio::spawn(async move {
            // Execute command
            let result = executor.execute(request, state.clone(), cancel_token).await;

            // Write logs if successful
            if let Ok(ref exec_result) = result {
                let _ = executor.write_logs(execution_id, exec_result).await;
            }

            // Release semaphore permit (via drop)
            drop(permit);

            // Note: We keep the state in memory for later retrieval
            // Cleanup task will remove old executions based on retention policy

            result
        });

        Ok(execution_id)
    }

    /// Get current status of an execution
    pub async fn get_status(&self, execution_id: Uuid) -> Result<ExecutionStatus> {
        let executions = self.executions.read().await;
        let state = executions
            .get(&execution_id)
            .ok_or(ExecutionError::NotFound(execution_id))?;

        let state_lock = state.read().await;
        Ok(state_lock.status)
    }

    /// Get execution result (returns error if not complete)
    pub async fn get_result(&self, execution_id: Uuid) -> Result<ExecutionResult> {
        let executions = self.executions.read().await;
        let state = executions
            .get(&execution_id)
            .ok_or(ExecutionError::NotFound(execution_id))?;

        let state_lock = state.read().await;

        if !state_lock.status.is_terminal() {
            return Err(ExecutionError::Internal(format!(
                "Execution {} is still running (status: {:?})",
                execution_id, state_lock.status
            )));
        }

        Ok(state_lock.to_result())
    }

    /// Wait for execution to complete and return result
    pub async fn wait_for_completion(&self, execution_id: Uuid) -> Result<ExecutionResult> {
        // Poll status until complete
        loop {
            let status = self.get_status(execution_id).await?;

            if status.is_terminal() {
                return self.get_result(execution_id).await;
            }

            // Sleep briefly before checking again
            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
        }
    }

    /// Cancel a running execution
    pub async fn cancel(&self, execution_id: Uuid) -> Result<()> {
        // Check if execution exists and get its state
        let state = {
            let executions = self.executions.read().await;
            executions
                .get(&execution_id)
                .ok_or(ExecutionError::NotFound(execution_id))?
                .clone()
        };

        // Check if already terminal
        {
            let state_lock = state.read().await;
            if state_lock.status.is_terminal() {
                return Err(ExecutionError::Internal(format!(
                    "Cannot cancel execution {} - already in terminal state: {:?}",
                    execution_id, state_lock.status
                )));
            }
        }

        // Get and trigger the cancellation token
        let cancel_token = {
            let tokens = self.cancellation_tokens.read().await;
            tokens
                .get(&execution_id)
                .ok_or(ExecutionError::Internal(format!(
                    "Cancellation token not found for execution {}",
                    execution_id
                )))?
                .clone()
        };

        // Trigger cancellation
        cancel_token.cancel();

        Ok(())
    }

    /// List all executions in memory
    pub async fn list_executions(&self) -> Vec<ExecutionSummary> {
        let executions = self.executions.read().await;
        let mut summaries = Vec::new();

        for (id, state) in executions.iter() {
            let state_lock = state.read().await;
            let duration = state_lock.completed_at.map(|completed| {
                (completed - state_lock.started_at)
                    .to_std()
                    .unwrap_or(std::time::Duration::from_secs(0))
            });

            summaries.push(ExecutionSummary {
                id: *id,
                status: state_lock.status,
                started_at: state_lock.started_at,
                duration,
            });
        }

        // Sort by started_at (newest first)
        summaries.sort_by(|a, b| b.started_at.cmp(&a.started_at));

        summaries
    }

    /// Get number of currently running executions
    pub async fn running_count(&self) -> usize {
        let executions = self.executions.read().await;
        let mut count = 0;

        for (_, state) in executions.iter() {
            let state_lock = state.read().await;
            if state_lock.status == ExecutionStatus::Running
                || state_lock.status == ExecutionStatus::Pending
            {
                count += 1;
            }
        }

        count
    }

    /// Get total number of executions in memory
    pub async fn total_count(&self) -> usize {
        let executions = self.executions.read().await;
        executions.len()
    }

    /// Read logs for an execution
    pub async fn read_logs(&self, execution_id: Uuid) -> Result<String> {
        self.executor.read_logs(execution_id).await
    }

    /// Get configuration
    pub fn config(&self) -> &ExecutionConfig {
        &self.config
    }

    /// Get available semaphore permits (concurrency slots)
    pub fn available_permits(&self) -> usize {
        self.semaphore.available_permits()
    }

    /// Clean up old executions based on retention policy
    ///
    /// Removes executions based on:
    /// 1. Age: Older than `execution_retention_secs`
    /// 2. Count: Exceeds `max_in_memory_executions`
    ///
    /// Returns the number of executions removed.
    pub async fn cleanup_old_executions(&self) -> usize {
        crate::cleanup::cleanup_old_executions(
            &self.executions,
            &self.cancellation_tokens,
            self.config.execution_retention_secs,
            self.config.max_in_memory_executions,
        )
        .await
    }

    /// Remove a specific execution from memory
    ///
    /// Returns `Ok(())` if removed, or `NotFound` error if execution doesn't exist.
    pub async fn remove_execution(&self, execution_id: Uuid) -> Result<()> {
        let removed = crate::cleanup::remove_execution(&self.executions, execution_id).await;

        if removed {
            // Also remove the cancellation token
            let mut tokens = self.cancellation_tokens.write().await;
            tokens.remove(&execution_id);
            Ok(())
        } else {
            Err(ExecutionError::NotFound(execution_id))
        }
    }

    /// Start automatic cleanup task
    ///
    /// Spawns a background task that runs every 5 minutes to clean up old executions.
    /// Only starts if `enable_auto_cleanup` is true in config.
    ///
    /// This method consumes self by value and requires Arc wrapper.
    pub fn start_cleanup_task(self: Arc<Self>) {
        if !self.config.enable_auto_cleanup {
            return;
        }

        tokio::spawn(async move {
            let mut interval = tokio::time::interval(std::time::Duration::from_secs(300)); // 5 minutes

            loop {
                interval.tick().await;

                let removed = self.cleanup_old_executions().await;

                if removed > 0 {
                    tracing::info!("Cleanup task removed {} old executions", removed);
                }
            }
        });
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::Command;
    use std::collections::HashMap;

    fn create_test_request() -> ExecutionRequest {
        ExecutionRequest {
            id: Uuid::new_v4(),
            command: Command::Shell {
                command: "echo 'test'".to_string(),
                shell: "bash".to_string(),
            },
            env: HashMap::new(),
            working_dir: None,
            timeout_ms: Some(5000),
            output_log_path: None,
            metadata: Default::default(),
        }
    }

    #[tokio::test]
    async fn test_engine_creation() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config);
        assert!(engine.is_ok());
    }

    #[tokio::test]
    async fn test_engine_invalid_config() {
        let mut config = ExecutionConfig::default();
        config.max_concurrent_executions = 0; // Invalid

        let engine = ExecutionEngine::new(config);
        assert!(engine.is_err());
    }

    #[tokio::test]
    async fn test_engine_execute_simple() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        let request = create_test_request();
        let execution_id = engine.execute(request).await.unwrap();

        // Wait a bit for execution to complete
        tokio::time::sleep(std::time::Duration::from_millis(500)).await;

        let status = engine.get_status(execution_id).await.unwrap();
        assert_eq!(status, ExecutionStatus::Completed);
    }

    #[tokio::test]
    async fn test_engine_wait_for_completion() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        let request = create_test_request();
        let execution_id = engine.execute(request).await.unwrap();

        let result = engine.wait_for_completion(execution_id).await.unwrap();
        assert_eq!(result.status, ExecutionStatus::Completed);
        assert_eq!(result.exit_code, 0);
    }

    #[tokio::test]
    async fn test_engine_get_result_before_complete() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        let request = ExecutionRequest {
            id: Uuid::new_v4(),
            command: Command::Shell {
                command: "sleep 1".to_string(),
                shell: "bash".to_string(),
            },
            env: HashMap::new(),
            working_dir: None,
            timeout_ms: Some(5000),
            output_log_path: None,
            metadata: Default::default(),
        };

        let execution_id = engine.execute(request).await.unwrap();

        // Try to get result immediately (should fail)
        let result = engine.get_result(execution_id).await;
        assert!(result.is_err());
    }

    #[tokio::test]
    async fn test_engine_list_executions() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        // Execute multiple commands
        let request1 = create_test_request();
        let request2 = create_test_request();

        let _id1 = engine.execute(request1).await.unwrap();
        let _id2 = engine.execute(request2).await.unwrap();

        // Wait a bit
        tokio::time::sleep(std::time::Duration::from_millis(500)).await;

        let list = engine.list_executions().await;
        assert_eq!(list.len(), 2);
    }

    #[tokio::test]
    async fn test_engine_running_count() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        assert_eq!(engine.running_count().await, 0);

        // Execute a long-running command
        let request = ExecutionRequest {
            id: Uuid::new_v4(),
            command: Command::Shell {
                command: "sleep 2".to_string(),
                shell: "bash".to_string(),
            },
            env: HashMap::new(),
            working_dir: None,
            timeout_ms: Some(10000),
            output_log_path: None,
            metadata: Default::default(),
        };

        let _id = engine.execute(request).await.unwrap();

        // Check running count (should be 1)
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
        let count = engine.running_count().await;
        assert!(count > 0);
    }

    #[tokio::test]
    async fn test_engine_concurrency_limit() {
        let config = ExecutionConfig {
            max_concurrent_executions: 2,
            ..Default::default()
        };
        let engine = ExecutionEngine::new(config).unwrap();

        // Start 2 long-running commands (at limit)
        let request1 = ExecutionRequest {
            id: Uuid::new_v4(),
            command: Command::Shell {
                command: "sleep 2".to_string(),
                shell: "bash".to_string(),
            },
            env: HashMap::new(),
            working_dir: None,
            timeout_ms: Some(10000),
            output_log_path: None,
            metadata: Default::default(),
        };

        let request2 = request1.clone();
        let mut request2 = request2;
        request2.id = Uuid::new_v4();

        let _id1 = engine.execute(request1).await.unwrap();
        let _id2 = engine.execute(request2).await.unwrap();

        // Wait for them to start
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;

        // Try to execute a 3rd command (should fail)
        let request3 = ExecutionRequest {
            id: Uuid::new_v4(),
            command: Command::Shell {
                command: "echo 'test'".to_string(),
                shell: "bash".to_string(),
            },
            env: HashMap::new(),
            working_dir: None,
            timeout_ms: Some(5000),
            output_log_path: None,
            metadata: Default::default(),
        };

        let result = engine.execute(request3).await;
        assert!(result.is_err());
        assert!(matches!(
            result.unwrap_err(),
            ExecutionError::ConcurrencyLimitReached(_)
        ));
    }

    #[tokio::test]
    async fn test_engine_available_permits() {
        let config = ExecutionConfig {
            max_concurrent_executions: 5,
            ..Default::default()
        };
        let engine = ExecutionEngine::new(config).unwrap();

        assert_eq!(engine.available_permits(), 5);

        // Execute a command
        let request = create_test_request();
        let _id = engine.execute(request).await.unwrap();

        // Wait a bit
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;

        // Should have fewer permits available (might be back to 5 if execution completed)
        let permits = engine.available_permits();
        assert!(permits <= 5);
    }

    #[tokio::test]
    async fn test_engine_not_found() {
        let config = ExecutionConfig::default();
        let engine = ExecutionEngine::new(config).unwrap();

        let fake_id = Uuid::new_v4();
        let result = engine.get_status(fake_id).await;

        assert!(result.is_err());
        assert!(matches!(result.unwrap_err(), ExecutionError::NotFound(_)));
    }
}