zlayer-agent 0.11.12

//! Service-level container lifecycle management

use crate::container_supervisor::{ContainerSupervisor, SupervisedState, SupervisorEvent};
use crate::cron_scheduler::CronScheduler;
use crate::dependency::{
    DependencyConditionChecker, DependencyGraph, DependencyWaiter, WaitResult,
};
use crate::error::{AgentError, Result};
use crate::health::{HealthCallback, HealthChecker, HealthMonitor, HealthState};
use crate::init::InitOrchestrator;
use crate::job::{JobExecution, JobExecutionId, JobExecutor, JobTrigger};
use crate::overlay_manager::OverlayManager;
use crate::proxy_manager::ProxyManager;
use crate::runtime::{Container, ContainerId, ContainerState, Runtime};
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::{RwLock, Semaphore};
use zlayer_observability::logs::LogEntry;
use zlayer_overlay::DnsServer;
use zlayer_proxy::{StreamRegistry, StreamService};
use zlayer_spec::{
    effective_pull_policy, DependsSpec, HealthCheck, Protocol, PullPolicy, ResourceType,
    ServiceSpec,
};

/// Service instance manages a single service's containers
pub struct ServiceInstance {
    pub service_name: String,
    pub spec: ServiceSpec,
    runtime: Arc<dyn Runtime + Send + Sync>,
    containers: tokio::sync::RwLock<std::collections::HashMap<ContainerId, Container>>,
    /// Overlay network manager for container networking (optional, not needed for Docker runtime)
    overlay_manager: Option<Arc<RwLock<OverlayManager>>>,
    /// Proxy manager for updating backend health (optional)
    proxy_manager: Option<Arc<ProxyManager>>,
    /// DNS server for service discovery (optional)
    dns_server: Option<Arc<DnsServer>>,
    /// Shared health states map so callbacks can update ServiceManager-level health
    health_states: Option<Arc<RwLock<HashMap<String, HealthState>>>>,
    /// Most recently observed image digest after a successful pull. Used by
    /// `upsert_service` to detect drift on `:latest`/`Newer` redeploys without
    /// requiring callers to track digest state externally. Wrapped in a
    /// `RwLock` so `&self` methods (`scale_to`) can update it.
    last_pulled_digest: tokio::sync::RwLock<Option<String>>,
}

impl ServiceInstance {
    /// Create a new service instance
    pub fn new(
        service_name: String,
        spec: ServiceSpec,
        runtime: Arc<dyn Runtime + Send + Sync>,
        overlay_manager: Option<Arc<RwLock<OverlayManager>>>,
    ) -> Self {
        Self {
            service_name,
            spec,
            runtime,
            containers: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            overlay_manager,
            proxy_manager: None,
            dns_server: None,
            health_states: None,
            last_pulled_digest: tokio::sync::RwLock::new(None),
        }
    }

    /// Create a new service instance with proxy manager for health-aware load balancing
    pub fn with_proxy(
        service_name: String,
        spec: ServiceSpec,
        runtime: Arc<dyn Runtime + Send + Sync>,
        overlay_manager: Option<Arc<RwLock<OverlayManager>>>,
        proxy_manager: Arc<ProxyManager>,
    ) -> Self {
        Self {
            service_name,
            spec,
            runtime,
            containers: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            overlay_manager,
            proxy_manager: Some(proxy_manager),
            dns_server: None,
            health_states: None,
            last_pulled_digest: tokio::sync::RwLock::new(None),
        }
    }

    /// Builder method to add DNS server for service discovery
    #[must_use]
    pub fn with_dns(mut self, dns_server: Arc<DnsServer>) -> Self {
        self.dns_server = Some(dns_server);
        self
    }

    /// Set the DNS server for service discovery
    pub fn set_dns_server(&mut self, dns_server: Arc<DnsServer>) {
        self.dns_server = Some(dns_server);
    }

    /// Set the proxy manager for health-aware load balancing
    pub fn set_proxy_manager(&mut self, proxy_manager: Arc<ProxyManager>) {
        self.proxy_manager = Some(proxy_manager);
    }

    /// Set the shared health states map so health callbacks can bridge state back to `ServiceManager`
    pub fn set_health_states(&mut self, states: Arc<RwLock<HashMap<String, HealthState>>>) {
        self.health_states = Some(states);
    }

    /// Get the last observed image digest (after the most recent successful
    /// pull). Returns `None` when no pull has happened yet, when the runtime
    /// does not expose digests, or when no matching `ImageInfo` was found.
    pub async fn last_pulled_digest(&self) -> Option<String> {
        self.last_pulled_digest.read().await.clone()
    }

    /// Pull the service image using `effective_pull_policy` (so a default
    /// `IfNotPresent` on a `:latest` tag auto-upgrades to `Newer`) and refresh
    /// the cached digest from `Runtime::list_images` when the runtime exposes
    /// it. Returns the digest observed after the pull, when known.
    ///
    /// `Never` skips the pull entirely; the cached digest is returned
    /// unchanged.
    async fn pull_and_refresh_digest(&self) -> Result<Option<String>> {
        let image_str = self.spec.image.name.to_string();
        let effective = effective_pull_policy(&self.spec.image.name, self.spec.image.pull_policy);

        if matches!(effective, PullPolicy::Never) {
            return Ok(self.last_pulled_digest.read().await.clone());
        }

        self.runtime
            .pull_image_with_policy(&image_str, effective, None)
            .await
            .map_err(|e| AgentError::PullFailed {
                image: self.spec.image.name.to_string(),
                reason: e.to_string(),
            })?;

        // Best-effort: try to discover the resolved digest via list_images.
        // Runtimes that don't support introspection (Unsupported) leave the
        // cached digest unchanged; drift detection then falls back to "always
        // recreate on PullPolicy::Always, never recreate on PullPolicy::Newer
        // when no digests are known".
        let new_digest = match self.runtime.list_images().await {
            Ok(images) => images
                .into_iter()
                .find(|info| info.reference == image_str)
                .and_then(|info| info.digest),
            Err(e) => {
                tracing::debug!(
                    image = %image_str,
                    error = %e,
                    "list_images unavailable; cannot record post-pull digest"
                );
                None
            }
        };

        if let Some(ref digest) = new_digest {
            *self.last_pulled_digest.write().await = Some(digest.clone());
        }

        Ok(new_digest)
    }

    /// Scale to the desired number of replicas
    ///
    /// This method uses short-lived locks to avoid blocking concurrent operations.
    /// I/O operations (pull, create, start, stop, remove) are performed without
    /// holding the containers lock to allow other operations to proceed.
    ///
    /// # Errors
    /// Returns an error if image pull, container creation, or container lifecycle operations fail.
    #[allow(clippy::too_many_lines, clippy::cast_possible_truncation)]
    pub async fn scale_to(&self, replicas: u32) -> Result<()> {
        // Phase 1: Determine current state (short read lock)
        let current_replicas = { self.containers.read().await.len() as u32 }; // Lock released here

        // Phase 1b: Pull image up front so a redeploy on `:latest` (which lands
        // here with replicas == current_replicas in the steady state) actually
        // refreshes the cached digest. We skip the pull when scaling strictly
        // down (no new containers needed) and when policy is `Never`. Cached
        // layers make this cheap when nothing changed.
        let effective = effective_pull_policy(&self.spec.image.name, self.spec.image.pull_policy);
        if replicas >= current_replicas && !matches!(effective, PullPolicy::Never) {
            let _ = self.pull_and_refresh_digest().await?;
        }

        // Phase 2: Scale up - create new containers (no lock held during I/O)
        if replicas > current_replicas {
            for i in current_replicas..replicas {
                let id = ContainerId {
                    service: self.service_name.clone(),
                    replica: i + 1,
                };

                // Create container (no lock needed - I/O operation)
                //
                // RouteToPeer must propagate unchanged: the scheduler uses it
                // to re-place the workload on a capable peer, and wrapping it
                // in `CreateFailed` would hide the signal and mark the service
                // dead instead of rescheduling it. All other errors are
                // normalised to `CreateFailed` for upstream handling.
                self.runtime
                    .create_container(&id, &self.spec)
                    .await
                    .map_err(|e| match e {
                        AgentError::RouteToPeer { .. } => e,
                        other => AgentError::CreateFailed {
                            id: id.to_string(),
                            reason: other.to_string(),
                        },
                    })?;

                // Run init actions with error policy enforcement (no lock needed)
                let init_orchestrator = InitOrchestrator::with_error_policy(
                    id.clone(),
                    self.spec.init.clone(),
                    self.spec.errors.clone(),
                );
                init_orchestrator.run().await?;

                // Start container (no lock needed - I/O operation)
                self.runtime
                    .start_container(&id)
                    .await
                    .map_err(|e| AgentError::StartFailed {
                        id: id.to_string(),
                        reason: e.to_string(),
                    })?;

                // Get container PID with retries (may not be immediately available)
                let mut container_pid = None;
                for attempt in 1..=5u32 {
                    match self.runtime.get_container_pid(&id).await {
                        Ok(Some(pid)) => {
                            container_pid = Some(pid);
                            break;
                        }
                        Ok(None) if attempt < 5 => {
                            tracing::debug!(container = %id, attempt, "PID not available yet, retrying");
                            tokio::time::sleep(std::time::Duration::from_millis(200)).await;
                        }
                        Ok(None) => {
                            tracing::warn!(container = %id, "Container PID unavailable after 5 attempts");
                        }
                        Err(e) => {
                            tracing::warn!(container = %id, attempt, error = %e, "Failed to get PID");
                            if attempt < 5 {
                                tokio::time::sleep(std::time::Duration::from_millis(200)).await;
                            }
                        }
                    }
                }

                // Verify the container is still running before attempting
                // overlay attach. If the init process crashed during start
                // (bad image, missing libs, failed mount), the PID above is
                // now dead and every `ip link set ... netns {pid}` will
                // return a cryptic RTNETLINK error. Surface the real cause
                // from the container's log tail instead of the cascade.
                if container_pid.is_some() {
                    let alive = match self.runtime.container_state(&id).await {
                        Ok(
                            ContainerState::Running
                            | ContainerState::Pending
                            | ContainerState::Initializing,
                        ) => true,
                        Ok(state) => {
                            tracing::warn!(
                                container = %id,
                                ?state,
                                "container exited before overlay attach could run"
                            );
                            false
                        }
                        Err(e) => {
                            // State query failed — don't block the attach on
                            // it. The overlay manager's own cleanup-on-error
                            // path now handles the dead-PID case cleanly.
                            tracing::warn!(
                                container = %id,
                                error = %e,
                                "container state query failed before overlay attach, proceeding"
                            );
                            true
                        }
                    };
                    if !alive {
                        let log_tail = self.runtime.container_logs(&id, 40).await.ok().map_or_else(
                            || "  <log read failed>".to_string(),
                            |entries| {
                                if entries.is_empty() {
                                    "  <no log output>".to_string()
                                } else {
                                    entries
                                        .into_iter()
                                        .map(|e| format!("  {}", e.message))
                                        .collect::<Vec<_>>()
                                        .join("\n")
                                }
                            },
                        );
                        return Err(AgentError::StartFailed {
                            id: id.to_string(),
                            reason: format!("container exited during startup:\n{log_tail}"),
                        });
                    }
                }

                // Attach to overlay network if manager is available.
                //
                // Linux uses the container PID to enter the netns and attach a
                // veth. Windows has no PID-addressable netns — the HCN namespace
                // GUID (obtained from `get_container_namespace_id`) is used
                // instead, and the endpoint's IP has already been populated by
                // `EndpointAttachment::create_overlay` during container creation.
                // We simply register that IP with the slice allocator so host
                // accounting stays in sync.
                let overlay_ip = if let Some(overlay) = &self.overlay_manager {
                    let overlay_guard = overlay.read().await;
                    #[cfg(target_os = "windows")]
                    let attach_result: Option<std::net::IpAddr> = {
                        let _ = container_pid; // unused on Windows
                        match self.runtime.get_container_namespace_id(&id).await {
                            Ok(Some(ns_id)) => {
                                let ip_override =
                                    self.runtime.get_container_ip(&id).await.ok().flatten();
                                let dns_server = overlay_guard.dns_server_addr().map(|sa| sa.ip());
                                let dns_domain =
                                    overlay_guard.dns_domain().map(ToString::to_string);
                                match overlay_guard
                                    .attach_container_hcn(
                                        ns_id,
                                        &self.service_name,
                                        ip_override,
                                        true,
                                        dns_server,
                                        dns_domain,
                                    )
                                    .await
                                {
                                    Ok(ip) => Some(ip),
                                    Err(e) => {
                                        tracing::warn!(
                                            container = %id,
                                            error = %e,
                                            "HCN overlay attach failed"
                                        );
                                        None
                                    }
                                }
                            }
                            Ok(None) => {
                                tracing::debug!(
                                    container = %id,
                                    "skipping HCN overlay attach - no namespace id available"
                                );
                                None
                            }
                            Err(e) => {
                                tracing::warn!(
                                    container = %id,
                                    error = %e,
                                    "failed to fetch HCN namespace id"
                                );
                                None
                            }
                        }
                    };
                    #[cfg(not(target_os = "windows"))]
                    let attach_result: Option<std::net::IpAddr> = {
                        if let Some(pid) = container_pid {
                            match overlay_guard
                                .attach_container(pid, &self.service_name, true)
                                .await
                            {
                                Ok(ip) => Some(ip),
                                Err(e) => {
                                    tracing::warn!(
                                        container = %id,
                                        error = %e,
                                        "failed to attach container to overlay network"
                                    );
                                    None
                                }
                            }
                        } else {
                            // No PID available (e.g. WASM runtime) - skip overlay attachment
                            tracing::debug!(
                                container = %id,
                                "skipping overlay attachment - no PID available"
                            );
                            None
                        }
                    };

                    if let Some(ip) = attach_result {
                        tracing::info!(
                            container = %id,
                            overlay_ip = %ip,
                            "attached container to overlay network"
                        );

                        // Register DNS for service discovery
                        if let Some(dns) = &self.dns_server {
                            // Register service-level hostname: {service}.service.local
                            let service_hostname = format!("{}.service.local", self.service_name);

                            // Register replica-specific hostname: {replica}.{service}.service.local
                            let replica_hostname =
                                format!("{}.{}.service.local", id.replica, self.service_name);

                            match dns.add_record(&service_hostname, ip).await {
                                Ok(()) => tracing::debug!(
                                    hostname = %service_hostname,
                                    ip = %ip,
                                    "registered DNS for service"
                                ),
                                Err(e) => tracing::warn!(
                                    hostname = %service_hostname,
                                    error = %e,
                                    "failed to register DNS for service"
                                ),
                            }

                            // Also register replica-specific entry
                            if let Err(e) = dns.add_record(&replica_hostname, ip).await {
                                tracing::warn!(
                                    hostname = %replica_hostname,
                                    error = %e,
                                    "failed to register replica DNS"
                                );
                            } else {
                                tracing::debug!(
                                    hostname = %replica_hostname,
                                    ip = %ip,
                                    "registered DNS for replica"
                                );
                            }
                        }

                        Some(ip)
                    } else {
                        None
                    }
                } else {
                    None
                };

                // If overlay failed, try the container runtime's own IP as fallback
                let effective_ip = if overlay_ip.is_none() {
                    match self.runtime.get_container_ip(&id).await {
                        Ok(Some(ip)) => {
                            tracing::info!(
                                container = %id,
                                ip = %ip,
                                "using runtime container IP for proxy (overlay unavailable)"
                            );
                            Some(ip)
                        }
                        Ok(None) => {
                            tracing::warn!(
                                container = %id,
                                "no container IP available from runtime, proxy routing will be unavailable"
                            );
                            None
                        }
                        Err(e) => {
                            tracing::warn!(
                                container = %id,
                                error = %e,
                                "failed to get container IP from runtime"
                            );
                            None
                        }
                    }
                } else {
                    overlay_ip
                };

                tracing::info!(
                    container = %id,
                    service = %self.service_name,
                    overlay_ip = ?overlay_ip,
                    effective_ip = ?effective_ip,
                    "Container IP resolution complete"
                );

                // Query port override from the runtime.
                // On macOS sandbox, each container is assigned a unique port since
                // all processes share the host network (no network namespaces).
                // The runtime passes the port to the process via the PORT env var.
                let port_override = match self.runtime.get_container_port_override(&id).await {
                    Ok(Some(port)) => {
                        tracing::info!(
                            container = %id,
                            port = port,
                            "runtime assigned dynamic port override for this container"
                        );
                        Some(port)
                    }
                    Ok(None) => None,
                    Err(e) => {
                        tracing::warn!(
                            container = %id,
                            error = %e,
                            "failed to query port override from runtime, using spec port"
                        );
                        None
                    }
                };

                // Start health monitoring and store handle (no lock needed during start)
                let health_monitor_handle = {
                    let mut check = self.spec.health.check.clone();

                    // Resolve Tcp { port: 0 } ("use first endpoint") to the actual
                    // port the container is listening on. With mac-sandbox, each
                    // replica gets a unique assigned port via port_override.
                    if let HealthCheck::Tcp { ref mut port } = check {
                        if *port == 0 {
                            *port = port_override.unwrap_or_else(|| {
                                self.spec
                                    .endpoints
                                    .iter()
                                    .find(|ep| {
                                        matches!(
                                            ep.protocol,
                                            Protocol::Http | Protocol::Https | Protocol::Websocket
                                        )
                                    })
                                    .map_or(8080, zlayer_spec::EndpointSpec::target_port)
                            });
                        }
                    }

                    let start_grace = self
                        .spec
                        .health
                        .start_grace
                        .unwrap_or(Duration::from_secs(5));
                    let check_timeout = self.spec.health.timeout.unwrap_or(Duration::from_secs(5));
                    let interval = self.spec.health.interval.unwrap_or(Duration::from_secs(10));
                    let retries = self.spec.health.retries;

                    let checker = HealthChecker::new(check, effective_ip);
                    let mut monitor = HealthMonitor::new(id.clone(), checker, interval, retries)
                        .with_start_grace(start_grace)
                        .with_check_timeout(check_timeout);

                    // Create health callback to update proxy backend health if proxy is configured
                    // and we have an overlay IP for this container
                    if let (Some(proxy), Some(ip)) = (&self.proxy_manager, effective_ip) {
                        let proxy = Arc::clone(proxy);
                        let service_name = self.service_name.clone();
                        // Get the container's target port, using the runtime override if present.
                        // On macOS sandbox, port_override gives each replica a unique port
                        // so the proxy can distinguish backends sharing 127.0.0.1.
                        let port = port_override.unwrap_or_else(|| {
                            self.spec
                                .endpoints
                                .iter()
                                .find(|ep| {
                                    matches!(
                                        ep.protocol,
                                        Protocol::Http | Protocol::Https | Protocol::Websocket
                                    )
                                })
                                .map_or(8080, zlayer_spec::EndpointSpec::target_port)
                        });

                        let backend_addr = SocketAddr::new(ip, port);

                        // Register backend with load balancer so proxy can route to it.
                        // This must happen before the health callback is created, because
                        // update_backend_health only updates *existing* backends.
                        proxy.add_backend(&self.service_name, backend_addr).await;

                        let health_states_opt = self.health_states.clone();
                        let svc_name_for_states = self.service_name.clone();

                        let health_callback: HealthCallback =
                            Arc::new(move |container_id: ContainerId, is_healthy: bool| {
                                let proxy = Arc::clone(&proxy);
                                let service_name = service_name.clone();
                                tracing::info!(
                                    container = %container_id,
                                    service = %service_name,
                                    backend = %backend_addr,
                                    healthy = is_healthy,
                                    "health status changed, updating proxy backend"
                                );
                                // Spawn a task to update the proxy (callback is sync, proxy update is async)
                                tokio::spawn(async move {
                                    proxy
                                        .update_backend_health(
                                            &service_name,
                                            backend_addr,
                                            is_healthy,
                                        )
                                        .await;
                                });
                                // Bridge health state back to ServiceManager's health_states map
                                if let Some(ref health_states) = health_states_opt {
                                    let states = Arc::clone(health_states);
                                    let svc = svc_name_for_states.clone();
                                    tokio::spawn(async move {
                                        let state = if is_healthy {
                                            HealthState::Healthy
                                        } else {
                                            HealthState::Unhealthy {
                                                failures: 0,
                                                reason: "health check failed".into(),
                                            }
                                        };
                                        states.write().await.insert(svc, state);
                                    });
                                }
                            });

                        monitor = monitor.with_callback(health_callback);
                    }

                    monitor.start()
                };

                // Update state (short write lock)
                {
                    let mut containers = self.containers.write().await;
                    containers.insert(
                        id.clone(),
                        Container {
                            id: id.clone(),
                            state: ContainerState::Running,
                            pid: None,
                            task: None,
                            overlay_ip: effective_ip,
                            health_monitor: Some(health_monitor_handle),
                            port_override,
                        },
                    );
                } // Lock released here
            }
        }

        // Phase 3: Scale down - remove containers (short write lock per removal)
        if replicas < current_replicas {
            for i in replicas..current_replicas {
                let id = ContainerId {
                    service: self.service_name.clone(),
                    replica: i + 1,
                };

                // Remove from state first and get the container to abort health monitor (short write lock)
                let removed_container = {
                    let mut containers = self.containers.write().await;
                    containers.remove(&id)
                }; // Lock released here

                // Then perform cleanup (no lock held - I/O operations)
                if let Some(container) = removed_container {
                    // Abort the health monitor task if it exists
                    if let Some(handle) = container.health_monitor {
                        handle.abort();
                    }

                    // Remove DNS records for this container
                    if let Some(dns) = &self.dns_server {
                        // Remove replica-specific DNS entry
                        let replica_hostname =
                            format!("{}.{}.service.local", id.replica, self.service_name);
                        if let Err(e) = dns.remove_record(&replica_hostname).await {
                            tracing::warn!(
                                hostname = %replica_hostname,
                                error = %e,
                                "failed to remove replica DNS record"
                            );
                        } else {
                            tracing::debug!(
                                hostname = %replica_hostname,
                                "removed replica DNS record"
                            );
                        }

                        // Note: We don't remove the service-level hostname here because
                        // other replicas may still be using it. The service-level record
                        // should be cleaned up when the entire service is removed.
                    }

                    // Stop container
                    self.runtime
                        .stop_container(&id, Duration::from_secs(30))
                        .await?;

                    // Sync volumes to S3 before removal (no-op if not configured)
                    if let Err(e) = self.runtime.sync_container_volumes(&id).await {
                        tracing::warn!(
                            container = %id,
                            error = %e,
                            "failed to sync volumes before removal"
                        );
                    }

                    // Remove container
                    self.runtime.remove_container(&id).await?;
                }
            }
        }

        Ok(())
    }

    /// Get current number of replicas
    pub async fn replica_count(&self) -> usize {
        self.containers.read().await.len()
    }

    /// Get all container IDs
    pub async fn container_ids(&self) -> Vec<ContainerId> {
        self.containers.read().await.keys().cloned().collect()
    }

    /// Get read access to the containers map
    ///
    /// This allows callers to access container overlay IPs and other metadata
    /// without copying the entire map.
    pub fn containers(
        &self,
    ) -> &tokio::sync::RwLock<std::collections::HashMap<ContainerId, Container>> {
        &self.containers
    }

    /// Check if this service instance has an overlay manager configured
    pub fn has_overlay_manager(&self) -> bool {
        self.overlay_manager.is_some()
    }

    /// Check if this service instance has a proxy manager configured
    pub fn has_proxy_manager(&self) -> bool {
        self.proxy_manager.is_some()
    }

    /// Check if this service instance has a DNS server configured
    pub fn has_dns_server(&self) -> bool {
        self.dns_server.is_some()
    }
}

/// Service manager for multiple services
pub struct ServiceManager {
    runtime: Arc<dyn Runtime + Send + Sync>,
    services: tokio::sync::RwLock<std::collections::HashMap<String, ServiceInstance>>,
    scale_semaphore: Arc<Semaphore>,
    /// Overlay network manager for container networking
    overlay_manager: Option<Arc<RwLock<OverlayManager>>>,
    /// Stream registry for L4 proxy route registration (TCP/UDP)
    stream_registry: Option<Arc<StreamRegistry>>,
    /// Proxy manager for health-aware load balancing (hyper-based proxy)
    proxy_manager: Option<Arc<ProxyManager>>,
    /// DNS server for service discovery
    dns_server: Option<Arc<DnsServer>>,
    /// Deployment name (used for generating hostnames)
    deployment_name: Option<String>,
    /// Health states for dependency condition checking
    health_states: Arc<RwLock<HashMap<String, HealthState>>>,
    /// Job executor for run-to-completion workloads
    job_executor: Option<Arc<JobExecutor>>,
    /// Cron scheduler for time-based job triggers
    cron_scheduler: Option<Arc<CronScheduler>>,
    /// Container supervisor for crash/panic policy enforcement
    container_supervisor: Option<Arc<ContainerSupervisor>>,
}

// ---------------------------------------------------------------------------
// ServiceManagerBuilder
// ---------------------------------------------------------------------------

/// Builder for constructing a [`ServiceManager`] with optional subsystems.
///
/// Prefer using `ServiceManager::builder(runtime)` to start building.
///
/// # Example
///
/// ```ignore
/// let manager = ServiceManager::builder(runtime)
///     .overlay_manager(om)
///     .proxy_manager(proxy)
///     .deployment_name("prod")
///     .build();
/// ```
pub struct ServiceManagerBuilder {
    runtime: Arc<dyn Runtime + Send + Sync>,
    overlay_manager: Option<Arc<RwLock<OverlayManager>>>,
    proxy_manager: Option<Arc<ProxyManager>>,
    stream_registry: Option<Arc<StreamRegistry>>,
    dns_server: Option<Arc<DnsServer>>,
    deployment_name: Option<String>,
    job_executor: Option<Arc<JobExecutor>>,
    cron_scheduler: Option<Arc<CronScheduler>>,
    container_supervisor: Option<Arc<ContainerSupervisor>>,
}

impl ServiceManagerBuilder {
    /// Create a new builder with the required runtime.
    pub fn new(runtime: Arc<dyn Runtime + Send + Sync>) -> Self {
        Self {
            runtime,
            overlay_manager: None,
            proxy_manager: None,
            stream_registry: None,
            dns_server: None,
            deployment_name: None,
            job_executor: None,
            cron_scheduler: None,
            container_supervisor: None,
        }
    }

    /// Set the overlay network manager for container networking.
    #[must_use]
    pub fn overlay_manager(mut self, om: Arc<RwLock<OverlayManager>>) -> Self {
        self.overlay_manager = Some(om);
        self
    }

    /// Set the proxy manager for health-aware load balancing.
    #[must_use]
    pub fn proxy_manager(mut self, pm: Arc<ProxyManager>) -> Self {
        self.proxy_manager = Some(pm);
        self
    }

    /// Set the stream registry for TCP/UDP L4 proxy route registration.
    #[must_use]
    pub fn stream_registry(mut self, sr: Arc<StreamRegistry>) -> Self {
        self.stream_registry = Some(sr);
        self
    }

    /// Set the DNS server for service discovery.
    #[must_use]
    pub fn dns_server(mut self, dns: Arc<DnsServer>) -> Self {
        self.dns_server = Some(dns);
        self
    }

    /// Set the deployment name (used for hostname generation).
    #[must_use]
    pub fn deployment_name(mut self, name: impl Into<String>) -> Self {
        self.deployment_name = Some(name.into());
        self
    }

    /// Set the job executor for run-to-completion workloads.
    #[must_use]
    pub fn job_executor(mut self, je: Arc<JobExecutor>) -> Self {
        self.job_executor = Some(je);
        self
    }

    /// Set the cron scheduler for time-based job triggers.
    #[must_use]
    pub fn cron_scheduler(mut self, cs: Arc<CronScheduler>) -> Self {
        self.cron_scheduler = Some(cs);
        self
    }

    /// Set the container supervisor for crash/panic policy enforcement.
    #[must_use]
    pub fn container_supervisor(mut self, cs: Arc<ContainerSupervisor>) -> Self {
        self.container_supervisor = Some(cs);
        self
    }

    /// Consume the builder and produce a fully-wired [`ServiceManager`].
    ///
    /// Logs warnings for missing recommended subsystems (proxy,
    /// `stream_registry`, `container_supervisor`, `deployment_name`).
    pub fn build(self) -> ServiceManager {
        if self.proxy_manager.is_none() {
            tracing::warn!("ServiceManager built without proxy_manager");
        }
        if self.stream_registry.is_none() {
            tracing::warn!("ServiceManager built without stream_registry");
        }
        if self.container_supervisor.is_none() {
            tracing::warn!("ServiceManager built without container_supervisor");
        }
        if self.deployment_name.is_none() {
            tracing::warn!("ServiceManager built without deployment_name");
        }

        ServiceManager {
            runtime: self.runtime,
            services: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            scale_semaphore: Arc::new(Semaphore::new(10)),
            overlay_manager: self.overlay_manager,
            stream_registry: self.stream_registry,
            proxy_manager: self.proxy_manager,
            dns_server: self.dns_server,
            deployment_name: self.deployment_name,
            health_states: Arc::new(RwLock::new(HashMap::new())),
            job_executor: self.job_executor,
            cron_scheduler: self.cron_scheduler,
            container_supervisor: self.container_supervisor,
        }
    }
}

impl ServiceManager {
    /// Create a [`ServiceManagerBuilder`] for constructing a `ServiceManager`.
    ///
    /// This is the preferred way to construct a `ServiceManager` since v0.2.0.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let manager = ServiceManager::builder(runtime)
    ///     .overlay_manager(om)
    ///     .proxy_manager(proxy)
    ///     .build();
    /// ```
    pub fn builder(runtime: Arc<dyn Runtime + Send + Sync>) -> ServiceManagerBuilder {
        ServiceManagerBuilder::new(runtime)
    }

    /// Create a new service manager
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn new(runtime: Arc<dyn Runtime + Send + Sync>) -> Self {
        Self {
            runtime,
            services: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            scale_semaphore: Arc::new(Semaphore::new(10)), // Max 10 concurrent scaling operations
            overlay_manager: None,
            stream_registry: None,
            proxy_manager: None,
            dns_server: None,
            deployment_name: None,
            health_states: Arc::new(RwLock::new(HashMap::new())),
            job_executor: None,
            cron_scheduler: None,
            container_supervisor: None,
        }
    }

    /// Create a service manager with overlay network support
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn with_overlay(
        runtime: Arc<dyn Runtime + Send + Sync>,
        overlay_manager: Arc<RwLock<OverlayManager>>,
    ) -> Self {
        Self {
            runtime,
            services: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            scale_semaphore: Arc::new(Semaphore::new(10)),
            overlay_manager: Some(overlay_manager),
            stream_registry: None,
            proxy_manager: None,
            dns_server: None,
            deployment_name: None,
            health_states: Arc::new(RwLock::new(HashMap::new())),
            job_executor: None,
            cron_scheduler: None,
            container_supervisor: None,
        }
    }

    /// Create a fully-configured service manager with overlay and proxy support
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn with_full_config(
        runtime: Arc<dyn Runtime + Send + Sync>,
        overlay_manager: Arc<RwLock<OverlayManager>>,
        deployment_name: String,
    ) -> Self {
        Self {
            runtime,
            services: tokio::sync::RwLock::new(std::collections::HashMap::new()),
            scale_semaphore: Arc::new(Semaphore::new(10)),
            overlay_manager: Some(overlay_manager),
            stream_registry: None,
            proxy_manager: None,
            dns_server: None,
            deployment_name: Some(deployment_name),
            health_states: Arc::new(RwLock::new(HashMap::new())),
            job_executor: None,
            cron_scheduler: None,
            container_supervisor: None,
        }
    }

    /// Get the health states map for external monitoring
    pub fn health_states(&self) -> Arc<RwLock<HashMap<String, HealthState>>> {
        Arc::clone(&self.health_states)
    }

    /// Update health state for a service
    pub async fn update_health_state(&self, service_name: &str, state: HealthState) {
        let mut states = self.health_states.write().await;
        states.insert(service_name.to_string(), state);
    }

    /// Set the deployment name (used for generating hostnames)
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_deployment_name(&mut self, name: String) {
        self.deployment_name = Some(name);
    }

    /// Set the stream registry for L4 proxy integration (TCP/UDP)
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_stream_registry(&mut self, registry: Arc<StreamRegistry>) {
        self.stream_registry = Some(registry);
    }

    /// Builder pattern: add stream registry for L4 proxy integration
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_stream_registry(mut self, registry: Arc<StreamRegistry>) -> Self {
        self.stream_registry = Some(registry);
        self
    }

    /// Get the stream registry (if configured)
    pub fn stream_registry(&self) -> Option<&Arc<StreamRegistry>> {
        self.stream_registry.as_ref()
    }

    /// Set the overlay manager for container networking
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_overlay_manager(&mut self, manager: Arc<RwLock<OverlayManager>>) {
        self.overlay_manager = Some(manager);
    }

    /// Set the proxy manager for health-aware load balancing
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_proxy_manager(&mut self, proxy: Arc<ProxyManager>) {
        self.proxy_manager = Some(proxy);
    }

    /// Builder pattern: add proxy manager for health-aware load balancing
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_proxy_manager(mut self, proxy: Arc<ProxyManager>) -> Self {
        self.proxy_manager = Some(proxy);
        self
    }

    /// Get the proxy manager (if configured)
    pub fn proxy_manager(&self) -> Option<&Arc<ProxyManager>> {
        self.proxy_manager.as_ref()
    }

    /// Set the DNS server for service discovery
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_dns_server(&mut self, dns: Arc<DnsServer>) {
        self.dns_server = Some(dns);
    }

    /// Builder pattern: add DNS server for service discovery
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_dns_server(mut self, dns: Arc<DnsServer>) -> Self {
        self.dns_server = Some(dns);
        self
    }

    /// Get the DNS server (if configured)
    pub fn dns_server(&self) -> Option<&Arc<DnsServer>> {
        self.dns_server.as_ref()
    }

    /// Set the job executor for run-to-completion workloads
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_job_executor(&mut self, executor: Arc<JobExecutor>) {
        self.job_executor = Some(executor);
    }

    /// Set the cron scheduler for time-based job triggers
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_cron_scheduler(&mut self, scheduler: Arc<CronScheduler>) {
        self.cron_scheduler = Some(scheduler);
    }

    /// Builder pattern: add job executor
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_job_executor(mut self, executor: Arc<JobExecutor>) -> Self {
        self.job_executor = Some(executor);
        self
    }

    /// Builder pattern: add cron scheduler
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_cron_scheduler(mut self, scheduler: Arc<CronScheduler>) -> Self {
        self.cron_scheduler = Some(scheduler);
        self
    }

    /// Get the job executor (if configured)
    pub fn job_executor(&self) -> Option<&Arc<JobExecutor>> {
        self.job_executor.as_ref()
    }

    /// Get the cron scheduler (if configured)
    pub fn cron_scheduler(&self) -> Option<&Arc<CronScheduler>> {
        self.cron_scheduler.as_ref()
    }

    /// Set the container supervisor for crash/panic policy enforcement
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    pub fn set_container_supervisor(&mut self, supervisor: Arc<ContainerSupervisor>) {
        self.container_supervisor = Some(supervisor);
    }

    /// Builder pattern: add container supervisor
    #[deprecated(since = "0.2.0", note = "use ServiceManager::builder() instead")]
    #[must_use]
    pub fn with_container_supervisor(mut self, supervisor: Arc<ContainerSupervisor>) -> Self {
        self.container_supervisor = Some(supervisor);
        self
    }

    /// Get the container supervisor (if configured)
    pub fn container_supervisor(&self) -> Option<&Arc<ContainerSupervisor>> {
        self.container_supervisor.as_ref()
    }

    /// Start the container supervisor background task
    ///
    /// This spawns a background task that monitors containers for crashes
    /// and enforces the `on_panic` error policy.
    ///
    /// # Errors
    /// Returns an error if no container supervisor is configured.
    ///
    /// # Returns
    /// A `JoinHandle` for the supervisor task.
    pub fn start_container_supervisor(&self) -> Result<tokio::task::JoinHandle<()>> {
        let supervisor = self.container_supervisor.as_ref().ok_or_else(|| {
            AgentError::Configuration("Container supervisor not configured".to_string())
        })?;

        let supervisor = Arc::clone(supervisor);
        Ok(tokio::spawn(async move {
            supervisor.run_loop().await;
        }))
    }

    /// Shutdown the container supervisor
    pub fn shutdown_container_supervisor(&self) {
        if let Some(supervisor) = &self.container_supervisor {
            supervisor.shutdown();
        }
    }

    /// Get the supervised state of a container
    pub async fn get_container_supervised_state(
        &self,
        container_id: &ContainerId,
    ) -> Option<SupervisedState> {
        if let Some(supervisor) = &self.container_supervisor {
            supervisor.get_state(container_id).await
        } else {
            None
        }
    }

    /// Get supervisor events receiver
    ///
    /// Note: This can only be called once; the receiver is moved to the caller.
    pub async fn take_supervisor_events(
        &self,
    ) -> Option<tokio::sync::mpsc::Receiver<SupervisorEvent>> {
        if let Some(supervisor) = &self.container_supervisor {
            supervisor.take_event_receiver().await
        } else {
            None
        }
    }

    // ==================== Dependency Orchestration ====================

    /// Deploy multiple services respecting their dependency order
    ///
    /// This method:
    /// 1. Builds a dependency graph from the services
    /// 2. Validates no cycles exist
    /// 3. Computes topological order (services with no deps first)
    /// 4. For each service in order, waits for dependencies then starts the service
    ///
    /// # Arguments
    /// * `services` - Map of service name to service specification
    ///
    /// # Errors
    /// - Returns `AgentError::InvalidSpec` if there are cyclic dependencies
    /// - Returns `AgentError::DependencyTimeout` if a dependency times out with `on_timeout`: fail
    pub async fn deploy_with_dependencies(
        &self,
        services: HashMap<String, ServiceSpec>,
    ) -> Result<()> {
        if services.is_empty() {
            return Ok(());
        }

        // Build dependency graph
        let graph = DependencyGraph::build(&services)?;

        tracing::info!(
            service_count = services.len(),
            "Starting deployment with dependency ordering"
        );

        // Get startup order
        let order = graph.startup_order();
        tracing::debug!(order = ?order, "Computed startup order");

        // Start services in dependency order
        for service_name in order {
            let service_spec = services
                .get(service_name)
                .ok_or_else(|| AgentError::Internal(format!("Service {service_name} not found")))?;

            // Wait for dependencies first
            if !service_spec.depends.is_empty() {
                tracing::info!(
                    service = %service_name,
                    dependency_count = service_spec.depends.len(),
                    "Waiting for dependencies"
                );
                self.wait_for_dependencies(service_name, &service_spec.depends)
                    .await?;
            }

            // Register and start service
            tracing::info!(service = %service_name, "Starting service");
            Box::pin(self.upsert_service(service_name.clone(), service_spec.clone())).await?;

            // Get the desired replica count from scale config
            let replicas = match &service_spec.scale {
                zlayer_spec::ScaleSpec::Fixed { replicas } => *replicas,
                zlayer_spec::ScaleSpec::Adaptive { min, .. } => *min, // Start with min replicas
                zlayer_spec::ScaleSpec::Manual => 1, // Default to 1 for manual scaling
            };
            self.scale_service(service_name, replicas).await?;

            // Mark service as started in health states (Unknown until health check runs)
            self.update_health_state(service_name, HealthState::Unknown)
                .await;

            tracing::info!(
                service = %service_name,
                replicas = replicas,
                "Service started"
            );
        }

        tracing::info!(service_count = services.len(), "Deployment complete");

        Ok(())
    }

    /// Wait for all dependencies of a service to be satisfied
    ///
    /// # Arguments
    /// * `service` - Name of the service waiting for dependencies
    /// * `deps` - Slice of dependency specifications
    ///
    /// # Errors
    /// Returns `AgentError::DependencyTimeout` if any dependency with `on_timeout`: fail times out
    async fn wait_for_dependencies(&self, service: &str, deps: &[DependsSpec]) -> Result<()> {
        let condition_checker = DependencyConditionChecker::new(
            Arc::clone(&self.runtime),
            Arc::clone(&self.health_states),
            None,
        );

        let waiter = DependencyWaiter::new(condition_checker);
        let results = waiter.wait_for_all(deps).await?;

        // Check results for failures
        for result in results {
            match result {
                WaitResult::TimedOutFail {
                    service: dep_service,
                    condition,
                    timeout,
                } => {
                    return Err(AgentError::DependencyTimeout {
                        service: service.to_string(),
                        dependency: dep_service,
                        condition: format!("{condition:?}"),
                        timeout,
                    });
                }
                WaitResult::TimedOutWarn {
                    service: dep_service,
                    condition,
                } => {
                    tracing::warn!(
                        service = %service,
                        dependency = %dep_service,
                        condition = ?condition,
                        "Dependency timed out but continuing"
                    );
                }
                WaitResult::TimedOutContinue | WaitResult::Satisfied => {
                    // Continue silently
                }
            }
        }

        Ok(())
    }

    /// Check if all dependencies for a service are currently satisfied
    ///
    /// This is a one-shot check (no waiting). Useful for pre-flight validation.
    ///
    /// # Errors
    /// Returns an error if a dependency check fails unexpectedly.
    pub async fn check_dependencies(&self, deps: &[DependsSpec]) -> Result<bool> {
        let condition_checker = DependencyConditionChecker::new(
            Arc::clone(&self.runtime),
            Arc::clone(&self.health_states),
            None,
        );

        for dep in deps {
            if !condition_checker.check(dep).await? {
                return Ok(false);
            }
        }

        Ok(true)
    }

    /// Add or update a workload (service, job, or cron)
    ///
    /// This method handles different resource types appropriately:
    /// - **Service**: Traditional long-running containers with scaling and health checks
    /// - **Job**: Run-to-completion workloads triggered on-demand (stores spec for later)
    /// - **Cron**: Scheduled run-to-completion workloads (registers with cron scheduler)
    ///
    /// # Errors
    /// Returns an error if service creation, scaling, or cron registration fails.
    #[allow(clippy::too_many_lines)]
    pub async fn upsert_service(&self, name: String, spec: ServiceSpec) -> Result<()> {
        match spec.rtype {
            ResourceType::Service => {
                // Long-running service: create/update instance
                let mut services = self.services.write().await;

                if let Some(instance) = services.get_mut(&name) {
                    // Update existing service. We need to:
                    //   1. Update the in-memory spec (so future scale-ups use the new image).
                    //   2. Honour the effective pull policy. For Never/IfNotPresent (after
                    //      effective resolution) we noop. For Always/Newer we pull, compare
                    //      digests, and trigger a rolling recreate when drift is observed.
                    instance.spec = spec.clone();
                    if let Some(dns) = &self.dns_server {
                        instance.set_dns_server(Arc::clone(dns));
                    }

                    let effective = effective_pull_policy(&spec.image.name, spec.image.pull_policy);
                    let old_digest = instance.last_pulled_digest().await;
                    let current_replicas =
                        u32::try_from(instance.replica_count().await).unwrap_or(u32::MAX);
                    drop(services); // Release write lock before pull / scale (which take their own locks).

                    match effective {
                        PullPolicy::Never | PullPolicy::IfNotPresent => {
                            // No pull, no recreate. Drift is silently ignored when the
                            // user has explicitly opted into "do not refresh" semantics.
                            tracing::debug!(
                                service = %name,
                                policy = ?effective,
                                "service unchanged on re-deploy (effective pull policy skips refresh)"
                            );
                        }
                        PullPolicy::Always | PullPolicy::Newer => {
                            // Pull (this updates the cached digest as a side-effect).
                            // We need a read guard to keep the instance alive while
                            // calling its &self method.
                            let services_ro = self.services.read().await;
                            let new_digest = if let Some(inst) = services_ro.get(&name) {
                                inst.pull_and_refresh_digest().await?
                            } else {
                                // The service vanished between our write-lock release
                                // and read-lock acquisition (race with remove_service).
                                // Treat this as a no-op; the caller will see the removal.
                                tracing::warn!(
                                    service = %name,
                                    "service removed during upsert; skipping drift recreate"
                                );
                                return Ok(());
                            };
                            drop(services_ro);

                            // Decide whether to recreate. Always forces a recreate.
                            // Newer recreates only when the digest actually changed.
                            // When digests are unknown (runtime doesn't expose them),
                            // we can't observe drift safely under Newer, so no-op.
                            let should_recreate = match effective {
                                PullPolicy::Always => true,
                                PullPolicy::Newer => match (&old_digest, &new_digest) {
                                    (Some(old), Some(new)) => old != new,
                                    _ => false,
                                },
                                _ => false,
                            };

                            if should_recreate && current_replicas > 0 {
                                tracing::info!(
                                    service = %name,
                                    policy = ?effective,
                                    old_digest = ?old_digest,
                                    new_digest = ?new_digest,
                                    replicas = current_replicas,
                                    "image drift detected; performing rolling recreate"
                                );
                                self.scale_service(&name, 0).await?;
                                self.scale_service(&name, current_replicas).await?;
                                tracing::info!(
                                    service = %name,
                                    new_digest = ?new_digest,
                                    "service recreated with refreshed image"
                                );
                            } else {
                                tracing::debug!(
                                    service = %name,
                                    policy = ?effective,
                                    old_digest = ?old_digest,
                                    new_digest = ?new_digest,
                                    "service up to date; no recreate required"
                                );
                            }
                        }
                    }
                    return Ok(());
                }
                // Create new service with proxy manager for health-aware load balancing
                let overlay = self.overlay_manager.as_ref().map(Arc::clone);
                let mut instance = if let Some(proxy) = &self.proxy_manager {
                    ServiceInstance::with_proxy(
                        name.clone(),
                        spec,
                        self.runtime.clone(),
                        overlay,
                        Arc::clone(proxy),
                    )
                } else {
                    ServiceInstance::new(name.clone(), spec, self.runtime.clone(), overlay)
                };
                // Set DNS server if configured
                if let Some(dns) = &self.dns_server {
                    instance.set_dns_server(Arc::clone(dns));
                }
                // Wire shared health states so callbacks bridge back to ServiceManager
                instance.set_health_states(Arc::clone(&self.health_states));
                // Register HTTP routes via proxy manager
                if let Some(proxy) = &self.proxy_manager {
                    proxy.add_service(&name, &instance.spec).await;
                }
                // Register TCP/UDP endpoints in stream registry
                if let Some(stream_registry) = &self.stream_registry {
                    for endpoint in &instance.spec.endpoints {
                        let svc = StreamService::new(
                            name.clone(),
                            Vec::new(), // No backends yet; added on scale-up
                        );
                        match endpoint.protocol {
                            Protocol::Tcp => {
                                stream_registry.register_tcp(endpoint.port, svc);
                                tracing::debug!(
                                    service = %name,
                                    port = endpoint.port,
                                    "Registered TCP stream route"
                                );
                            }
                            Protocol::Udp => {
                                stream_registry.register_udp(endpoint.port, svc);
                                tracing::debug!(
                                    service = %name,
                                    port = endpoint.port,
                                    "Registered UDP stream route"
                                );
                            }
                            _ => {} // HTTP routes handled by proxy manager
                        }
                    }
                }
                services.insert(name, instance);
            }
            ResourceType::Job => {
                // Job: Just store the spec for later triggering
                // Jobs don't start containers immediately; they're triggered on-demand
                if let Some(executor) = &self.job_executor {
                    executor.register_job(&name, spec).await;
                    tracing::info!(job = %name, "Registered job spec");
                } else {
                    tracing::warn!(
                        job = %name,
                        "Job executor not configured, storing as service for reference"
                    );
                    // Fallback: store as service instance for reference
                    let mut services = self.services.write().await;
                    let overlay = self.overlay_manager.as_ref().map(Arc::clone);
                    let mut instance = if let Some(proxy) = &self.proxy_manager {
                        ServiceInstance::with_proxy(
                            name.clone(),
                            spec,
                            self.runtime.clone(),
                            overlay,
                            Arc::clone(proxy),
                        )
                    } else {
                        ServiceInstance::new(name.clone(), spec, self.runtime.clone(), overlay)
                    };
                    // Set DNS server if configured
                    if let Some(dns) = &self.dns_server {
                        instance.set_dns_server(Arc::clone(dns));
                    }
                    services.insert(name, instance);
                }
            }
            ResourceType::Cron => {
                // Cron: Register with the cron scheduler
                if let Some(scheduler) = &self.cron_scheduler {
                    scheduler.register(&name, &spec).await?;
                    tracing::info!(cron = %name, "Registered cron job with scheduler");
                } else {
                    return Err(AgentError::Configuration(format!(
                        "Cron scheduler not configured for cron job '{name}'"
                    )));
                }
            }
        }

        Ok(())
    }

    /// Update backend addresses via `ProxyManager` after scaling
    async fn update_proxy_backends(&self, service_name: &str, addrs: Vec<SocketAddr>) {
        if let Some(proxy) = &self.proxy_manager {
            proxy.update_backends(service_name, addrs).await;
        }
    }

    /// Update backend addresses in the `StreamRegistry` for TCP/UDP endpoints after scaling
    ///
    /// For containers with a port override (macOS sandbox), the addresses already
    /// carry the runtime-assigned port. In that case, the container listens on the
    /// override port for all traffic, so we use the address port directly. For
    /// containers without a port override (Linux, VMs), we reconstruct addresses
    /// using the endpoint's declared port, since each container has its own IP
    /// and can bind any port independently.
    fn update_stream_backends(&self, spec: &ServiceSpec, addrs: &[SocketAddr]) {
        let Some(stream_registry) = &self.stream_registry else {
            return;
        };

        // Determine if any addresses have a port override by checking whether
        // all addresses use the same port as the primary spec endpoint. If not,
        // they carry per-container port overrides and should be used as-is.
        let primary_spec_port = spec
            .endpoints
            .iter()
            .find(|ep| {
                matches!(
                    ep.protocol,
                    Protocol::Http | Protocol::Https | Protocol::Websocket
                )
            })
            .map_or(8080, zlayer_spec::EndpointSpec::target_port);

        let has_port_overrides = addrs.iter().any(|addr| addr.port() != primary_spec_port);

        for endpoint in &spec.endpoints {
            match endpoint.protocol {
                Protocol::Tcp => {
                    let tcp_backends: Vec<SocketAddr> = if has_port_overrides {
                        // Port overrides active (macOS sandbox): the container listens
                        // on its assigned port for all traffic. Use addresses as-is.
                        addrs.to_vec()
                    } else {
                        // Normal case: each container has its own IP, construct
                        // addresses using the TCP endpoint's container target port.
                        addrs
                            .iter()
                            .map(|addr| SocketAddr::new(addr.ip(), endpoint.target_port()))
                            .collect()
                    };

                    stream_registry.update_tcp_backends(endpoint.port, tcp_backends);

                    tracing::debug!(
                        endpoint = %endpoint.name,
                        port = endpoint.port,
                        backend_count = addrs.len(),
                        "Updated TCP stream backends"
                    );
                }
                Protocol::Udp => {
                    let udp_backends: Vec<SocketAddr> = if has_port_overrides {
                        addrs.to_vec()
                    } else {
                        addrs
                            .iter()
                            .map(|addr| SocketAddr::new(addr.ip(), endpoint.target_port()))
                            .collect()
                    };

                    stream_registry.update_udp_backends(endpoint.port, udp_backends);

                    tracing::debug!(
                        endpoint = %endpoint.name,
                        port = endpoint.port,
                        backend_count = addrs.len(),
                        "Updated UDP stream backends"
                    );
                }
                _ => {} // HTTP endpoints handled by update_proxy_backends
            }
        }
    }

    /// Scale a service to desired replica count
    ///
    /// # Errors
    /// Returns an error if the service is not found or scaling fails.
    #[allow(clippy::cast_possible_truncation)]
    pub async fn scale_service(&self, name: &str, replicas: u32) -> Result<()> {
        let _permit = self.scale_semaphore.acquire().await;

        let services = self.services.read().await;
        let instance = services.get(name).ok_or_else(|| AgentError::NotFound {
            container: name.to_string(),
            reason: "service not found".to_string(),
        })?;

        // Get current replica count before scaling
        let current_replicas = instance.replica_count().await as u32;

        // Perform the scaling operation
        instance.scale_to(replicas).await?;

        // After scaling, update proxy backends with new container addresses
        // Note: In a real implementation, we would get actual container IPs
        // from the overlay network or container runtime. For now, we construct
        // backend addresses based on the endpoint port and localhost (for same-node).
        // TODO: Get actual container addresses from overlay_manager or runtime
        let addrs = self.collect_backend_addrs(instance, replicas).await;

        // Update HTTP backends via ProxyManager
        if self.proxy_manager.is_some() && !addrs.is_empty() {
            self.update_proxy_backends(name, addrs.clone()).await;
        }

        // Update TCP/UDP backends in StreamRegistry
        if self.stream_registry.is_some() {
            self.update_stream_backends(&instance.spec, &addrs);
        }

        // Register new containers with supervisor for crash monitoring
        if let Some(supervisor) = &self.container_supervisor {
            // For scale-up, register new containers
            if replicas > current_replicas {
                for i in current_replicas..replicas {
                    let container_id = ContainerId {
                        service: name.to_string(),
                        replica: i + 1,
                    };
                    supervisor.supervise(&container_id, &instance.spec).await;
                }
            }
            // For scale-down, unregister removed containers
            if replicas < current_replicas {
                for i in replicas..current_replicas {
                    let container_id = ContainerId {
                        service: name.to_string(),
                        replica: i + 1,
                    };
                    supervisor.unsupervise(&container_id).await;
                }
            }
        }

        Ok(())
    }

    /// Collect backend addresses for a service's containers
    ///
    /// This queries the service instance's containers for their overlay network
    /// IP addresses and constructs backend addresses using those IPs with the
    /// service's endpoint port.
    ///
    /// If a container has a `port_override` (e.g., macOS sandbox where all
    /// containers share the host network), that port is used instead of the
    /// spec-declared endpoint port. This allows multiple replicas on the same
    /// IP (`127.0.0.1`) to be distinguished by port.
    async fn collect_backend_addrs(
        &self,
        instance: &ServiceInstance,
        _replicas: u32, // No longer needed - we iterate containers directly
    ) -> Vec<SocketAddr> {
        let mut addrs = Vec::new();

        // Get the primary container target port (first HTTP endpoint) as the default
        let spec_port = instance
            .spec
            .endpoints
            .iter()
            .find(|ep| {
                matches!(
                    ep.protocol,
                    Protocol::Http | Protocol::Https | Protocol::Websocket
                )
            })
            .map_or(8080, zlayer_spec::EndpointSpec::target_port);

        // Collect backend addresses from containers with overlay IPs
        let containers = instance.containers().read().await;

        for container in containers.values() {
            if let Some(ip) = container.overlay_ip {
                // Use the runtime-assigned port override if present (macOS sandbox),
                // otherwise fall back to the spec-declared endpoint port.
                let port = container.port_override.unwrap_or(spec_port);
                addrs.push(SocketAddr::new(ip, port));
            }
        }

        // If no overlay IPs available, this might be Docker runtime or failed attachments
        // Log a warning but don't fallback to localhost in production
        if addrs.is_empty() && !containers.is_empty() {
            tracing::warn!(
                service = %instance.service_name,
                container_count = containers.len(),
                "no overlay IPs available for backends - containers may not be reachable via proxy"
            );
        }

        addrs
    }

    /// Get service replica count
    ///
    /// # Errors
    /// Returns an error if the service is not found.
    pub async fn service_replica_count(&self, name: &str) -> Result<usize> {
        let services = self.services.read().await;
        let instance = services.get(name).ok_or_else(|| AgentError::NotFound {
            container: name.to_string(),
            reason: "service not found".to_string(),
        })?;

        Ok(instance.replica_count().await)
    }

    /// Remove a workload (service, job, or cron)
    ///
    /// This method handles cleanup for different resource types:
    /// - **Service**: Unregisters proxy routes, supervisor, and removes from service map
    /// - **Job**: Unregisters from job executor
    /// - **Cron**: Unregisters from cron scheduler
    ///
    /// # Errors
    /// Returns an error if the service cannot be removed or scale-down fails.
    pub async fn remove_service(&self, name: &str) -> Result<()> {
        // Try to unregister from cron scheduler first
        if let Some(scheduler) = &self.cron_scheduler {
            scheduler.unregister(name).await;
        }

        // Try to unregister from job executor
        if let Some(executor) = &self.job_executor {
            executor.unregister_job(name).await;
        }

        // Unregister stream routes (TCP/UDP) from the stream registry
        if let Some(stream_registry) = &self.stream_registry {
            // Need to get the service spec to know which ports to unregister
            let services = self.services.read().await;
            if let Some(instance) = services.get(name) {
                for endpoint in &instance.spec.endpoints {
                    match endpoint.protocol {
                        Protocol::Tcp => {
                            let _ = stream_registry.unregister_tcp(endpoint.port);
                            tracing::debug!(
                                service = %name,
                                port = endpoint.port,
                                "Unregistered TCP stream route"
                            );
                        }
                        Protocol::Udp => {
                            let _ = stream_registry.unregister_udp(endpoint.port);
                            tracing::debug!(
                                service = %name,
                                port = endpoint.port,
                                "Unregistered UDP stream route"
                            );
                        }
                        _ => {} // HTTP routes handled above
                    }
                }
            }
            drop(services); // Release read lock
        }

        // Unregister containers from the supervisor
        if let Some(supervisor) = &self.container_supervisor {
            let containers = self.get_service_containers(name).await;
            for container_id in containers {
                supervisor.unsupervise(&container_id).await;
            }
            tracing::debug!(service = %name, "Unregistered containers from supervisor");
        }

        // Clean up DNS records for the service
        if let Some(dns) = &self.dns_server {
            // Remove the service-level DNS entry
            let service_hostname = format!("{name}.service.local");
            if let Err(e) = dns.remove_record(&service_hostname).await {
                tracing::warn!(
                    hostname = %service_hostname,
                    error = %e,
                    "failed to remove service DNS record"
                );
            } else {
                tracing::debug!(
                    hostname = %service_hostname,
                    "removed service DNS record"
                );
            }

            // Also remove any remaining replica-specific DNS entries
            let services = self.services.read().await;
            if let Some(instance) = services.get(name) {
                let containers = instance.containers().read().await;
                for (id, _) in containers.iter() {
                    let replica_hostname = format!("{}.{}.service.local", id.replica, name);
                    if let Err(e) = dns.remove_record(&replica_hostname).await {
                        tracing::warn!(
                            hostname = %replica_hostname,
                            error = %e,
                            "failed to remove replica DNS record during service removal"
                        );
                    }
                }
            }
            drop(services); // Release read lock before write lock
        }

        // Remove from services map (may or may not exist depending on rtype)
        let mut services = self.services.write().await;
        if services.remove(name).is_some() {
            tracing::debug!(service = %name, "Removed service from manager");
        }

        Ok(())
    }

    /// Introspect service infrastructure wiring.
    /// Returns (`has_overlay`, `has_proxy`, `has_dns`), or None if service not found.
    pub async fn service_infrastructure(&self, name: &str) -> Option<(bool, bool, bool)> {
        let services = self.services.read().await;
        services.get(name).map(|i| {
            (
                i.has_overlay_manager(),
                i.has_proxy_manager(),
                i.has_dns_server(),
            )
        })
    }

    /// List all services
    pub async fn list_services(&self) -> Vec<String> {
        self.services.read().await.keys().cloned().collect()
    }

    /// Get logs for a service, aggregated from all container replicas.
    ///
    /// # Arguments
    /// * `service_name` - Name of the service to fetch logs for
    /// * `tail` - Number of lines to return per container (0 = all)
    /// * `instance` - Optional specific instance (container ID suffix like "1", "2")
    ///
    /// # Errors
    /// Returns an error if the service or instance is not found.
    ///
    /// # Returns
    /// Structured log entries from all (or specific) container replicas. Each
    /// entry has its `service` and `deployment` fields populated when available.
    pub async fn get_service_logs(
        &self,
        service_name: &str,
        tail: usize,
        instance: Option<&str>,
    ) -> Result<Vec<LogEntry>> {
        let container_ids = self.get_service_containers(service_name).await;

        if container_ids.is_empty() {
            return Err(AgentError::NotFound {
                container: service_name.to_string(),
                reason: "no containers found for service".to_string(),
            });
        }

        // If a specific instance is requested, filter to just that one
        let target_ids: Vec<&ContainerId> = if let Some(inst) = instance {
            if let Ok(replica_num) = inst.parse::<u32>() {
                container_ids
                    .iter()
                    .filter(|id| id.replica == replica_num)
                    .collect()
            } else {
                // Try matching by full container ID string suffix
                container_ids
                    .iter()
                    .filter(|id| id.to_string().contains(inst))
                    .collect()
            }
        } else {
            container_ids.iter().collect()
        };

        if target_ids.is_empty() {
            return Err(AgentError::NotFound {
                container: format!("{}/{}", service_name, instance.unwrap_or("?")),
                reason: "instance not found".to_string(),
            });
        }

        let mut all_entries: Vec<LogEntry> = Vec::new();

        for id in &target_ids {
            match self.runtime.container_logs(id, tail).await {
                Ok(mut entries) => {
                    // Populate service and deployment metadata on each entry
                    for entry in &mut entries {
                        if entry.service.is_none() {
                            entry.service = Some(service_name.to_string());
                        }
                        if entry.deployment.is_none() {
                            entry.deployment.clone_from(&self.deployment_name);
                        }
                    }
                    all_entries.extend(entries);
                }
                Err(e) => {
                    tracing::warn!(
                        service = service_name,
                        container = %id,
                        error = %e,
                        "Failed to read container logs"
                    );
                }
            }
        }

        Ok(all_entries)
    }

    /// Get all container IDs for a specific service
    ///
    /// Returns an empty vector if the service doesn't exist.
    ///
    /// # Arguments
    /// * `service_name` - Name of the service to query
    ///
    /// # Returns
    /// Vector of `ContainerIds` for all replicas of the service
    pub async fn get_service_containers(&self, service_name: &str) -> Vec<ContainerId> {
        let services = self.services.read().await;
        if let Some(instance) = services.get(service_name) {
            instance.container_ids().await
        } else {
            Vec::new()
        }
    }

    /// Execute a command inside a running container for a service
    ///
    /// Picks a specific replica if provided, otherwise uses the first available container.
    ///
    /// # Arguments
    /// * `service_name` - Name of the service
    /// * `replica` - Optional replica number to target
    /// * `cmd` - Command and arguments to execute
    ///
    /// # Errors
    /// Returns an error if the service or replica is not found, or if exec fails.
    ///
    /// # Panics
    /// Panics if no replica is specified and the container list is unexpectedly empty
    /// after the emptiness check (should not happen in practice).
    ///
    /// # Returns
    /// Tuple of (`exit_code`, stdout, stderr)
    pub async fn exec_in_container(
        &self,
        service_name: &str,
        replica: Option<u32>,
        cmd: &[String],
    ) -> Result<(i32, String, String)> {
        let container_ids = self.get_service_containers(service_name).await;

        if container_ids.is_empty() {
            return Err(AgentError::NotFound {
                container: service_name.to_string(),
                reason: "no containers found for service".to_string(),
            });
        }

        // Pick the target container
        let target = if let Some(rep) = replica {
            container_ids
                .into_iter()
                .find(|cid| cid.replica == rep)
                .ok_or_else(|| AgentError::NotFound {
                    container: format!("{service_name}-rep-{rep}"),
                    reason: format!("replica {rep} not found for service"),
                })?
        } else {
            // Use the first container (lowest replica number)
            container_ids.into_iter().next().unwrap()
        };

        self.runtime.exec(&target, cmd).await
    }

    // ==================== Job Management ====================

    /// Trigger a job execution
    ///
    /// # Arguments
    /// * `name` - Name of the registered job
    /// * `trigger` - How the job was triggered (endpoint, cli, etc.)
    ///
    /// # Returns
    /// The execution ID for tracking the job
    ///
    /// # Errors
    /// - Returns error if job executor is not configured
    /// - Returns error if the job is not registered
    pub async fn trigger_job(&self, name: &str, trigger: JobTrigger) -> Result<JobExecutionId> {
        let executor = self
            .job_executor
            .as_ref()
            .ok_or_else(|| AgentError::Configuration("Job executor not configured".to_string()))?;

        let spec = executor
            .get_job_spec(name)
            .await
            .ok_or_else(|| AgentError::NotFound {
                container: name.to_string(),
                reason: "job not registered".to_string(),
            })?;

        executor.trigger(name, &spec, trigger).await
    }

    /// Get the status of a job execution
    ///
    /// # Arguments
    /// * `id` - The execution ID returned from `trigger_job`
    ///
    /// # Returns
    /// The job execution details, or None if not found
    pub async fn get_job_execution(&self, id: &JobExecutionId) -> Option<JobExecution> {
        if let Some(executor) = &self.job_executor {
            executor.get_execution(id).await
        } else {
            None
        }
    }

    /// List all executions for a specific job
    ///
    /// # Arguments
    /// * `name` - Name of the job
    ///
    /// # Returns
    /// Vector of job executions for the specified job
    pub async fn list_job_executions(&self, name: &str) -> Vec<JobExecution> {
        if let Some(executor) = &self.job_executor {
            executor.list_executions(name).await
        } else {
            Vec::new()
        }
    }

    /// Cancel a running job execution
    ///
    /// # Arguments
    /// * `id` - The execution ID to cancel
    ///
    /// # Errors
    /// Returns error if job executor is not configured or if cancellation fails
    pub async fn cancel_job(&self, id: &JobExecutionId) -> Result<()> {
        let executor = self
            .job_executor
            .as_ref()
            .ok_or_else(|| AgentError::Configuration("Job executor not configured".to_string()))?;

        executor.cancel(id).await
    }

    // ==================== Cron Management ====================

    /// Manually trigger a cron job (outside of its schedule)
    ///
    /// # Arguments
    /// * `name` - Name of the cron job
    ///
    /// # Returns
    /// The execution ID for tracking the triggered job
    ///
    /// # Errors
    /// Returns error if cron scheduler is not configured or job not found
    pub async fn trigger_cron(&self, name: &str) -> Result<JobExecutionId> {
        let scheduler = self.cron_scheduler.as_ref().ok_or_else(|| {
            AgentError::Configuration("Cron scheduler not configured".to_string())
        })?;

        scheduler.trigger_now(name).await
    }

    /// Enable or disable a cron job
    ///
    /// # Arguments
    /// * `name` - Name of the cron job
    /// * `enabled` - Whether to enable or disable the job
    pub async fn set_cron_enabled(&self, name: &str, enabled: bool) {
        if let Some(scheduler) = &self.cron_scheduler {
            scheduler.set_enabled(name, enabled).await;
        }
    }

    /// List all registered cron jobs
    pub async fn list_cron_jobs(&self) -> Vec<crate::cron_scheduler::CronJobInfo> {
        if let Some(scheduler) = &self.cron_scheduler {
            scheduler.list_jobs().await
        } else {
            Vec::new()
        }
    }

    /// Start the cron scheduler background task
    ///
    /// This spawns a background task that checks for due cron jobs every second.
    /// Returns a `JoinHandle` that can be used to wait for the scheduler to stop.
    ///
    /// # Errors
    /// Returns error if cron scheduler is not configured
    pub fn start_cron_scheduler(&self) -> Result<tokio::task::JoinHandle<()>> {
        let scheduler = self.cron_scheduler.as_ref().ok_or_else(|| {
            AgentError::Configuration("Cron scheduler not configured".to_string())
        })?;

        let scheduler: Arc<CronScheduler> = Arc::clone(scheduler);
        Ok(tokio::spawn(async move {
            scheduler.run_loop().await;
        }))
    }

    /// Shutdown the cron scheduler
    pub fn shutdown_cron(&self) {
        if let Some(scheduler) = &self.cron_scheduler {
            scheduler.shutdown();
        }
    }
}

#[cfg(test)]
#[allow(deprecated)]
mod tests {
    use super::*;
    use crate::runtime::MockRuntime;

    #[tokio::test]
    async fn test_service_manager() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Add service
        let spec = mock_spec();
        Box::pin(manager.upsert_service("test".to_string(), spec))
            .await
            .unwrap();

        // Scale up
        manager.scale_service("test", 3).await.unwrap();

        // Check count
        let count = manager.service_replica_count("test").await.unwrap();
        assert_eq!(count, 3);

        // List services
        let services = manager.list_services().await;
        assert_eq!(services, vec!["test".to_string()]);
    }

    #[tokio::test]
    async fn test_service_manager_basic_lifecycle() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Add service with HTTP endpoint
        let spec = mock_spec();
        Box::pin(manager.upsert_service("api".to_string(), spec))
            .await
            .unwrap();

        // Scale up
        manager.scale_service("api", 2).await.unwrap();

        // Check count
        let count = manager.service_replica_count("api").await.unwrap();
        assert_eq!(count, 2);

        // Remove service
        manager.remove_service("api").await.unwrap();

        // Verify service is gone
        let services = manager.list_services().await;
        assert!(!services.contains(&"api".to_string()));
    }

    #[tokio::test]
    async fn test_service_manager_with_full_config() {
        use tokio::sync::RwLock;

        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());

        // Create a mock overlay manager (skip actual network setup)
        let overlay_manager = Arc::new(RwLock::new(
            OverlayManager::new("test-deployment".to_string())
                .await
                .unwrap(),
        ));

        let manager =
            ServiceManager::with_full_config(runtime, overlay_manager, "prod".to_string());

        // Add service
        let spec = mock_spec();
        Box::pin(manager.upsert_service("web".to_string(), spec))
            .await
            .unwrap();

        // Verify service is registered
        let services = manager.list_services().await;
        assert!(services.contains(&"web".to_string()));
    }

    fn mock_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r"
version: v1
deployment: test
services:
  test:
    rtype: service
    image:
      name: test:latest
    endpoints:
      - name: http
        protocol: http
        port: 8080
    scale:
      mode: fixed
      replicas: 1
",
        )
        .unwrap()
        .services
        .remove("test")
        .unwrap()
    }

    /// Helper to create a `ServiceSpec` with dependencies
    fn mock_spec_with_deps(deps: Vec<DependsSpec>) -> ServiceSpec {
        let mut spec = mock_spec();
        spec.depends = deps;
        spec
    }

    /// Helper to create a `DependsSpec`
    fn dep(
        service: &str,
        condition: zlayer_spec::DependencyCondition,
        timeout_ms: u64,
        on_timeout: zlayer_spec::TimeoutAction,
    ) -> DependsSpec {
        DependsSpec {
            service: service.to_string(),
            condition,
            timeout: Some(Duration::from_millis(timeout_ms)),
            on_timeout,
        }
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_no_deps() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Services with no dependencies
        let mut services = HashMap::new();
        services.insert("a".to_string(), mock_spec());
        services.insert("b".to_string(), mock_spec());

        // Should deploy both without issue
        Box::pin(manager.deploy_with_dependencies(services))
            .await
            .unwrap();

        // Both services should be registered
        let service_list = manager.list_services().await;
        assert_eq!(service_list.len(), 2);
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_linear() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // A -> B -> C (A depends on B, B depends on C)
        // All use "started" condition which is satisfied when container is running
        let mut services = HashMap::new();
        services.insert("c".to_string(), mock_spec());
        services.insert(
            "b".to_string(),
            mock_spec_with_deps(vec![dep(
                "c",
                zlayer_spec::DependencyCondition::Started,
                5000,
                zlayer_spec::TimeoutAction::Fail,
            )]),
        );
        services.insert(
            "a".to_string(),
            mock_spec_with_deps(vec![dep(
                "b",
                zlayer_spec::DependencyCondition::Started,
                5000,
                zlayer_spec::TimeoutAction::Fail,
            )]),
        );

        // Should deploy in order: c, b, a
        Box::pin(manager.deploy_with_dependencies(services))
            .await
            .unwrap();

        // All services should be registered
        let service_list = manager.list_services().await;
        assert_eq!(service_list.len(), 3);
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_cycle_detection() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // A -> B -> A (cycle)
        let mut services = HashMap::new();
        services.insert(
            "a".to_string(),
            mock_spec_with_deps(vec![dep(
                "b",
                zlayer_spec::DependencyCondition::Started,
                5000,
                zlayer_spec::TimeoutAction::Fail,
            )]),
        );
        services.insert(
            "b".to_string(),
            mock_spec_with_deps(vec![dep(
                "a",
                zlayer_spec::DependencyCondition::Started,
                5000,
                zlayer_spec::TimeoutAction::Fail,
            )]),
        );

        // Should fail with cycle detection
        let result = Box::pin(manager.deploy_with_dependencies(services)).await;
        assert!(result.is_err());
        let err = result.unwrap_err().to_string();
        assert!(err.contains("Cyclic dependency"));
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_timeout_continue() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // A depends on B (healthy), but B never becomes healthy
        // Using continue action, so it should proceed anyway
        let mut services = HashMap::new();
        services.insert("b".to_string(), mock_spec());
        services.insert(
            "a".to_string(),
            mock_spec_with_deps(vec![dep(
                "b",
                zlayer_spec::DependencyCondition::Healthy, // B won't pass healthy check
                100,                                       // Short timeout
                zlayer_spec::TimeoutAction::Continue,      // But continue anyway
            )]),
        );

        // Should deploy both despite timeout
        Box::pin(manager.deploy_with_dependencies(services))
            .await
            .unwrap();

        let service_list = manager.list_services().await;
        assert_eq!(service_list.len(), 2);
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_timeout_warn() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // A depends on B (healthy), but B never becomes healthy
        // Using warn action, so it should proceed with a warning
        let mut services = HashMap::new();
        services.insert("b".to_string(), mock_spec());
        services.insert(
            "a".to_string(),
            mock_spec_with_deps(vec![dep(
                "b",
                zlayer_spec::DependencyCondition::Healthy,
                100,
                zlayer_spec::TimeoutAction::Warn,
            )]),
        );

        // Should deploy both despite timeout (with warning)
        Box::pin(manager.deploy_with_dependencies(services))
            .await
            .unwrap();

        let service_list = manager.list_services().await;
        assert_eq!(service_list.len(), 2);
    }

    #[tokio::test]
    async fn test_deploy_with_dependencies_timeout_fail() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // A depends on B (healthy), but B never becomes healthy
        // Using fail action, so deployment should fail
        let mut services = HashMap::new();
        services.insert("b".to_string(), mock_spec());
        services.insert(
            "a".to_string(),
            mock_spec_with_deps(vec![dep(
                "b",
                zlayer_spec::DependencyCondition::Healthy,
                100,
                zlayer_spec::TimeoutAction::Fail,
            )]),
        );

        // Should fail after B is started but doesn't become healthy
        let result = Box::pin(manager.deploy_with_dependencies(services)).await;
        assert!(result.is_err());

        // B should be started (it has no deps), but A should fail
        let err = result.unwrap_err().to_string();
        assert!(err.contains("Dependency timeout"));
    }

    #[tokio::test]
    async fn test_check_dependencies_all_satisfied() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Mark a service as healthy
        manager
            .update_health_state("db", HealthState::Healthy)
            .await;

        let deps = vec![DependsSpec {
            service: "db".to_string(),
            condition: zlayer_spec::DependencyCondition::Healthy,
            timeout: Some(Duration::from_secs(60)),
            on_timeout: zlayer_spec::TimeoutAction::Fail,
        }];

        let satisfied = manager.check_dependencies(&deps).await.unwrap();
        assert!(satisfied);
    }

    #[tokio::test]
    async fn test_check_dependencies_not_satisfied() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Service not healthy (no state set = Unknown)
        let deps = vec![DependsSpec {
            service: "db".to_string(),
            condition: zlayer_spec::DependencyCondition::Healthy,
            timeout: Some(Duration::from_secs(60)),
            on_timeout: zlayer_spec::TimeoutAction::Fail,
        }];

        let satisfied = manager.check_dependencies(&deps).await.unwrap();
        assert!(!satisfied);
    }

    #[tokio::test]
    async fn test_health_state_tracking() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Update health states
        manager
            .update_health_state("db", HealthState::Healthy)
            .await;
        manager
            .update_health_state("cache", HealthState::Unknown)
            .await;

        // Verify states
        let states = manager.health_states();
        let states_read = states.read().await;

        assert!(matches!(states_read.get("db"), Some(HealthState::Healthy)));
        assert!(matches!(
            states_read.get("cache"),
            Some(HealthState::Unknown)
        ));
    }

    // ==================== Job/Cron Integration Tests ====================

    fn mock_job_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r"
version: v1
deployment: test
services:
  backup:
    rtype: job
    image:
      name: backup:latest
",
        )
        .unwrap()
        .services
        .remove("backup")
        .unwrap()
    }

    fn mock_cron_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r#"
version: v1
deployment: test
services:
  cleanup:
    rtype: cron
    schedule: "0 0 * * * * *"
    image:
      name: cleanup:latest
"#,
        )
        .unwrap()
        .services
        .remove("cleanup")
        .unwrap()
    }

    #[tokio::test]
    async fn test_service_manager_with_job_executor() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_job_executor(job_executor);

        // Register job
        let job_spec = mock_job_spec();
        Box::pin(manager.upsert_service("backup".to_string(), job_spec))
            .await
            .unwrap();

        // Trigger job
        let exec_id = manager
            .trigger_job("backup", JobTrigger::Cli)
            .await
            .unwrap();

        // Give job time to start
        tokio::time::sleep(Duration::from_millis(50)).await;

        // Check execution exists
        let execution = manager.get_job_execution(&exec_id).await;
        assert!(execution.is_some());
        assert_eq!(execution.unwrap().job_name, "backup");
    }

    #[tokio::test]
    async fn test_service_manager_with_cron_scheduler() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));
        let cron_scheduler = Arc::new(CronScheduler::new(job_executor));

        let manager = ServiceManager::new(runtime).with_cron_scheduler(cron_scheduler);

        // Register cron job
        let cron_spec = mock_cron_spec();
        Box::pin(manager.upsert_service("cleanup".to_string(), cron_spec))
            .await
            .unwrap();

        // List cron jobs
        let cron_jobs = manager.list_cron_jobs().await;
        assert_eq!(cron_jobs.len(), 1);
        assert_eq!(cron_jobs[0].name, "cleanup");
        assert!(cron_jobs[0].enabled);
    }

    #[tokio::test]
    async fn test_service_manager_trigger_cron() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));
        let cron_scheduler = Arc::new(CronScheduler::new(job_executor.clone()));

        let manager = ServiceManager::new(runtime)
            .with_job_executor(job_executor)
            .with_cron_scheduler(cron_scheduler);

        // Register cron job
        let cron_spec = mock_cron_spec();
        Box::pin(manager.upsert_service("cleanup".to_string(), cron_spec))
            .await
            .unwrap();

        // Manually trigger the cron job
        let exec_id = manager.trigger_cron("cleanup").await.unwrap();
        assert!(!exec_id.0.is_empty());
    }

    #[tokio::test]
    async fn test_service_manager_enable_disable_cron() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));
        let cron_scheduler = Arc::new(CronScheduler::new(job_executor));

        let manager = ServiceManager::new(runtime).with_cron_scheduler(cron_scheduler);

        // Register cron job
        let cron_spec = mock_cron_spec();
        Box::pin(manager.upsert_service("cleanup".to_string(), cron_spec))
            .await
            .unwrap();

        // Initially enabled
        let cron_jobs = manager.list_cron_jobs().await;
        assert!(cron_jobs[0].enabled);

        // Disable
        manager.set_cron_enabled("cleanup", false).await;
        let cron_jobs = manager.list_cron_jobs().await;
        assert!(!cron_jobs[0].enabled);

        // Re-enable
        manager.set_cron_enabled("cleanup", true).await;
        let cron_jobs = manager.list_cron_jobs().await;
        assert!(cron_jobs[0].enabled);
    }

    #[tokio::test]
    async fn test_service_manager_remove_cleans_up_job() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_job_executor(job_executor.clone());

        // Register job
        let job_spec = mock_job_spec();
        Box::pin(manager.upsert_service("backup".to_string(), job_spec))
            .await
            .unwrap();

        // Verify job is registered
        let spec = job_executor.get_job_spec("backup").await;
        assert!(spec.is_some());

        // Remove job
        manager.remove_service("backup").await.unwrap();

        // Verify job is unregistered
        let spec = job_executor.get_job_spec("backup").await;
        assert!(spec.is_none());
    }

    #[tokio::test]
    async fn test_service_manager_remove_cleans_up_cron() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));
        let cron_scheduler = Arc::new(CronScheduler::new(job_executor));

        let manager = ServiceManager::new(runtime).with_cron_scheduler(cron_scheduler.clone());

        // Register cron job
        let cron_spec = mock_cron_spec();
        Box::pin(manager.upsert_service("cleanup".to_string(), cron_spec))
            .await
            .unwrap();

        // Verify cron job is registered
        assert_eq!(cron_scheduler.job_count().await, 1);

        // Remove cron job
        manager.remove_service("cleanup").await.unwrap();

        // Verify cron job is unregistered
        assert_eq!(cron_scheduler.job_count().await, 0);
    }

    #[tokio::test]
    async fn test_service_manager_job_without_executor() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Try to trigger job without executor configured
        let result = manager.trigger_job("nonexistent", JobTrigger::Cli).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().to_string().contains("not configured"));
    }

    #[tokio::test]
    async fn test_service_manager_cron_without_scheduler() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Try to register cron job without scheduler configured
        let cron_spec = mock_cron_spec();
        let result = Box::pin(manager.upsert_service("cleanup".to_string(), cron_spec)).await;
        assert!(result.is_err());
        assert!(result.unwrap_err().to_string().contains("not configured"));
    }

    #[tokio::test]
    async fn test_service_manager_list_job_executions() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let job_executor = Arc::new(JobExecutor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_job_executor(job_executor);

        // Register job
        let job_spec = mock_job_spec();
        Box::pin(manager.upsert_service("backup".to_string(), job_spec))
            .await
            .unwrap();

        // Trigger job twice
        manager
            .trigger_job("backup", JobTrigger::Cli)
            .await
            .unwrap();
        manager
            .trigger_job("backup", JobTrigger::Scheduler)
            .await
            .unwrap();

        // Give jobs time to start
        tokio::time::sleep(Duration::from_millis(50)).await;

        // List executions
        let executions = manager.list_job_executions("backup").await;
        assert_eq!(executions.len(), 2);
    }

    // ==================== Container Supervisor Integration Tests ====================

    #[tokio::test]
    async fn test_service_manager_with_supervisor() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let supervisor = Arc::new(ContainerSupervisor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_container_supervisor(supervisor.clone());

        // Add service
        let spec = mock_spec();
        Box::pin(manager.upsert_service("api".to_string(), spec))
            .await
            .unwrap();

        // Scale up - containers should be registered with supervisor
        manager.scale_service("api", 2).await.unwrap();

        // Verify containers are supervised
        assert_eq!(supervisor.supervised_count().await, 2);

        // Scale down - containers should be unregistered
        manager.scale_service("api", 1).await.unwrap();
        assert_eq!(supervisor.supervised_count().await, 1);

        // Remove service - remaining containers should be unregistered
        manager.remove_service("api").await.unwrap();
        assert_eq!(supervisor.supervised_count().await, 0);
    }

    #[tokio::test]
    async fn test_service_manager_supervisor_state() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let supervisor = Arc::new(ContainerSupervisor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_container_supervisor(supervisor);

        // Add and scale service
        let spec = mock_spec();
        Box::pin(manager.upsert_service("web".to_string(), spec))
            .await
            .unwrap();
        manager.scale_service("web", 1).await.unwrap();

        // Check supervised state
        let container_id = ContainerId {
            service: "web".to_string(),
            replica: 1,
        };
        let state = manager.get_container_supervised_state(&container_id).await;
        assert_eq!(state, Some(SupervisedState::Running));
    }

    #[tokio::test]
    async fn test_service_manager_start_supervisor() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let supervisor = Arc::new(ContainerSupervisor::new(runtime.clone()));

        let manager = ServiceManager::new(runtime).with_container_supervisor(supervisor.clone());

        // Start the supervisor
        let handle = manager.start_container_supervisor().unwrap();

        // Give it time to start
        tokio::time::sleep(Duration::from_millis(50)).await;
        assert!(supervisor.is_running());

        // Shutdown
        manager.shutdown_container_supervisor();

        // Wait for it to stop
        tokio::time::timeout(Duration::from_secs(1), handle)
            .await
            .unwrap()
            .unwrap();

        assert!(!supervisor.is_running());
    }

    #[tokio::test]
    async fn test_service_manager_supervisor_not_configured() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let manager = ServiceManager::new(runtime);

        // Try to start supervisor without configuring it
        let result = manager.start_container_supervisor();
        assert!(result.is_err());
        assert!(result.unwrap_err().to_string().contains("not configured"));
    }

    // ==================== Stream Registry Integration Tests ====================

    fn mock_tcp_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r"
version: v1
deployment: test
services:
  database:
    rtype: service
    image:
      name: postgres:latest
    endpoints:
      - name: postgresql
        protocol: tcp
        port: 5432
    scale:
      mode: fixed
      replicas: 1
",
        )
        .unwrap()
        .services
        .remove("database")
        .unwrap()
    }

    fn mock_udp_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r"
version: v1
deployment: test
services:
  dns:
    rtype: service
    image:
      name: dns:latest
    endpoints:
      - name: dns
        protocol: udp
        port: 53
    scale:
      mode: fixed
      replicas: 1
",
        )
        .unwrap()
        .services
        .remove("dns")
        .unwrap()
    }

    fn mock_mixed_spec() -> ServiceSpec {
        serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
            r"
version: v1
deployment: test
services:
  mixed:
    rtype: service
    image:
      name: mixed:latest
    endpoints:
      - name: http
        protocol: http
        port: 8080
      - name: grpc
        protocol: tcp
        port: 9000
      - name: metrics
        protocol: udp
        port: 8125
    scale:
      mode: fixed
      replicas: 1
",
        )
        .unwrap()
        .services
        .remove("mixed")
        .unwrap()
    }

    #[tokio::test]
    async fn test_service_manager_with_stream_registry_tcp() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let stream_registry = Arc::new(StreamRegistry::new());

        let mut manager = ServiceManager::new(runtime);
        manager.set_stream_registry(stream_registry.clone());
        manager.set_deployment_name("test".to_string());

        // Add TCP-only service
        let spec = mock_tcp_spec();
        Box::pin(manager.upsert_service("database".to_string(), spec))
            .await
            .unwrap();

        // Verify TCP route was registered
        assert_eq!(stream_registry.tcp_count(), 1);
        assert!(stream_registry.tcp_ports().contains(&5432));

        // Remove service and verify cleanup
        manager.remove_service("database").await.unwrap();
        assert_eq!(stream_registry.tcp_count(), 0);
    }

    #[tokio::test]
    async fn test_service_manager_with_stream_registry_udp() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let stream_registry = Arc::new(StreamRegistry::new());

        let mut manager = ServiceManager::new(runtime);
        manager.set_stream_registry(stream_registry.clone());
        manager.set_deployment_name("test".to_string());

        // Add UDP-only service
        let spec = mock_udp_spec();
        Box::pin(manager.upsert_service("dns".to_string(), spec))
            .await
            .unwrap();

        // Verify UDP route was registered
        assert_eq!(stream_registry.udp_count(), 1);
        assert!(stream_registry.udp_ports().contains(&53));

        // Remove service and verify cleanup
        manager.remove_service("dns").await.unwrap();
        assert_eq!(stream_registry.udp_count(), 0);
    }

    #[tokio::test]
    async fn test_service_manager_with_stream_registry_mixed() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let stream_registry = Arc::new(StreamRegistry::new());

        let mut manager = ServiceManager::new(runtime);
        manager.set_stream_registry(stream_registry.clone());
        manager.set_deployment_name("test".to_string());

        // Add mixed service (HTTP + TCP + UDP)
        let spec = mock_mixed_spec();
        Box::pin(manager.upsert_service("mixed".to_string(), spec))
            .await
            .unwrap();

        // Verify stream routes were registered
        assert_eq!(stream_registry.tcp_count(), 1); // TCP: 9000
        assert_eq!(stream_registry.udp_count(), 1); // UDP: 8125

        assert!(stream_registry.tcp_ports().contains(&9000));
        assert!(stream_registry.udp_ports().contains(&8125));

        // Remove service and verify stream cleanup
        manager.remove_service("mixed").await.unwrap();
        assert_eq!(stream_registry.tcp_count(), 0);
        assert_eq!(stream_registry.udp_count(), 0);
    }

    #[tokio::test]
    async fn test_service_manager_stream_registry_builder() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());
        let stream_registry = Arc::new(StreamRegistry::new());

        // Test builder pattern
        let manager = ServiceManager::new(runtime).with_stream_registry(stream_registry.clone());

        // Verify stream registry is accessible
        assert!(manager.stream_registry().is_some());
    }

    #[tokio::test]
    async fn test_tcp_service_without_stream_registry() {
        let runtime: Arc<dyn Runtime + Send + Sync> = Arc::new(MockRuntime::new());

        // Manager without stream registry
        let mut manager = ServiceManager::new(runtime);
        manager.set_deployment_name("test".to_string());

        // Add TCP service - should log warning but not fail
        let spec = mock_tcp_spec();
        Box::pin(manager.upsert_service("database".to_string(), spec))
            .await
            .unwrap();

        // No stream registry to check, but service should be tracked
        let services = manager.list_services().await;
        assert!(services.contains(&"database".to_string()));
    }
}