mecha10_runtime/
runtime.rs

1//! Main runtime for Mecha10 nodes
2
3use crate::config::{LogFormat, RestartPolicy, RuntimeConfig};
4use crate::health::HealthChecker;
5use crate::launcher::Launcher;
6use crate::lifecycle::{LifecycleManager, ModeConfig};
7use crate::logging::init_logging;
8use crate::node::NodeRunner;
9use crate::shutdown::{wait_for_signal, ShutdownHandle};
10use crate::supervisor::Supervisor;
11use anyhow::Result;
12use std::collections::HashMap;
13use std::sync::Arc;
14use std::time::Duration;
15
16/// Main runtime for supervising and running nodes
17pub struct Runtime {
18    config: RuntimeConfig,
19    health_checker: Arc<HealthChecker>,
20    supervisor: Arc<Supervisor>,
21    launcher: Option<Arc<Launcher>>,
22    lifecycle: Option<LifecycleManager>,
23    shutdown: ShutdownHandle,
24}
25
26impl Runtime {
27    /// Create a new runtime builder
28    pub fn builder() -> RuntimeBuilder {
29        RuntimeBuilder::default()
30    }
31
32    /// Run a single node until completion or shutdown
33    pub async fn run_node(&mut self, name: &str, node: Box<dyn NodeRunner>) -> Result<()> {
34        tracing::info!("Starting runtime for node: {}", name);
35
36        let handle = self.supervisor.launch_node(node);
37
38        // Wait for either node completion or shutdown signal
39        tokio::select! {
40            result = handle.wait() => {
41                result?;
42                tracing::info!("Node {} completed", name);
43            }
44            _ = wait_for_signal() => {
45                tracing::info!("Shutdown signal received");
46                self.shutdown.shutdown();
47
48                // Wait for graceful shutdown with timeout
49                tokio::time::timeout(
50                    self.config.shutdown_timeout,
51                    self.wait_shutdown()
52                ).await??;
53            }
54        }
55
56        Ok(())
57    }
58
59    /// Run multiple nodes until all complete or shutdown
60    pub async fn run_nodes(&mut self, nodes: Vec<Box<dyn NodeRunner>>) -> Result<()> {
61        tracing::info!("Starting runtime for {} nodes", nodes.len());
62
63        let handles = self.supervisor.launch_nodes(nodes);
64
65        // Wait for either all nodes to complete or shutdown signal
66        tokio::select! {
67            _ = async {
68                for handle in handles {
69                    let _ = handle.wait().await;
70                }
71            } => {
72                tracing::info!("All nodes completed");
73            }
74            _ = wait_for_signal() => {
75                tracing::info!("Shutdown signal received");
76                self.shutdown.shutdown();
77
78                // Wait for graceful shutdown with timeout
79                tokio::time::timeout(
80                    self.config.shutdown_timeout,
81                    self.wait_shutdown()
82                ).await??;
83            }
84        }
85
86        Ok(())
87    }
88
89    /// Run with launcher service for dynamic node management
90    pub async fn run_with_launcher(&mut self) -> Result<()> {
91        let _launcher = self
92            .launcher
93            .as_ref()
94            .ok_or_else(|| anyhow::anyhow!("Launcher not enabled. Use builder.with_launcher(true)"))?;
95
96        tracing::info!("Starting runtime with launcher service");
97
98        // Wait for shutdown signal
99        wait_for_signal().await;
100        tracing::info!("Shutdown signal received");
101        self.shutdown.shutdown();
102
103        // Wait for graceful shutdown with timeout
104        tokio::time::timeout(self.config.shutdown_timeout, self.wait_shutdown()).await??;
105
106        Ok(())
107    }
108
109    /// Run with lifecycle management
110    ///
111    /// This starts the runtime with lifecycle management enabled. The lifecycle manager
112    /// will handle mode transitions and start/stop nodes accordingly.
113    ///
114    /// The runtime will:
115    /// 1. Start nodes for the default mode
116    /// 2. Wait for shutdown signal
117    /// 3. Gracefully shut down all nodes
118    ///
119    /// Mode transitions should be triggered externally via the lifecycle manager's
120    /// `change_mode()` method.
121    pub async fn run_with_lifecycle(&mut self) -> Result<()> {
122        let lifecycle = self
123            .lifecycle
124            .as_mut()
125            .ok_or_else(|| anyhow::anyhow!("Lifecycle not enabled. Use builder.with_lifecycle()"))?;
126
127        tracing::info!("Starting runtime with lifecycle management");
128
129        // Start nodes for default mode
130        let default_mode = lifecycle.current_mode().to_string();
131        tracing::info!("Initializing default mode: {}", default_mode);
132
133        match lifecycle.change_mode(&default_mode).await {
134            Ok(result) => {
135                tracing::info!("Default mode initialized: started {} nodes", result.nodes_started.len());
136            }
137            Err(e) => {
138                tracing::error!("Failed to initialize default mode: {}", e);
139                return Err(e);
140            }
141        }
142
143        // Wait for shutdown signal
144        wait_for_signal().await;
145        tracing::info!("Shutdown signal received");
146        self.shutdown.shutdown();
147
148        // Wait for graceful shutdown with timeout
149        tokio::time::timeout(self.config.shutdown_timeout, self.wait_shutdown()).await??;
150
151        Ok(())
152    }
153
154    /// Wait for all nodes to shut down
155    async fn wait_shutdown(&self) -> Result<()> {
156        // Health checker will track when nodes complete shutdown
157        loop {
158            let statuses = self.health_checker.check_all().await;
159            if statuses.is_empty() {
160                break;
161            }
162            tokio::time::sleep(Duration::from_millis(100)).await;
163        }
164        Ok(())
165    }
166
167    /// Get the health checker
168    pub fn health_checker(&self) -> &Arc<HealthChecker> {
169        &self.health_checker
170    }
171
172    /// Get the supervisor
173    pub fn supervisor(&self) -> &Arc<Supervisor> {
174        &self.supervisor
175    }
176
177    /// Get the launcher (if enabled)
178    pub fn launcher(&self) -> Option<&Arc<Launcher>> {
179        self.launcher.as_ref()
180    }
181
182    /// Get the lifecycle manager (if enabled)
183    pub fn lifecycle(&self) -> Option<&LifecycleManager> {
184        self.lifecycle.as_ref()
185    }
186
187    /// Get mutable lifecycle manager (if enabled)
188    pub fn lifecycle_mut(&mut self) -> Option<&mut LifecycleManager> {
189        self.lifecycle.as_mut()
190    }
191
192    /// Get the shutdown handle
193    pub fn shutdown(&self) -> &ShutdownHandle {
194        &self.shutdown
195    }
196}
197
198/// Builder for creating a runtime with custom configuration
199#[derive(Default)]
200pub struct RuntimeBuilder {
201    config: RuntimeConfig,
202    enable_launcher: bool,
203    lifecycle_config: Option<(HashMap<String, ModeConfig>, String)>,
204}
205
206impl RuntimeBuilder {
207    /// Set log level (trace, debug, info, warn, error)
208    pub fn log_level(mut self, level: impl Into<String>) -> Self {
209        self.config.log_level = level.into();
210        self
211    }
212
213    /// Set log format
214    pub fn log_format(mut self, format: LogFormat) -> Self {
215        self.config.log_format = format;
216        self
217    }
218
219    /// Set restart policy
220    pub fn restart_policy(mut self, policy: RestartPolicy) -> Self {
221        self.config.restart_policy = policy;
222        self
223    }
224
225    /// Set health check interval
226    pub fn health_check_interval(mut self, interval: Duration) -> Self {
227        self.config.health_check_interval = interval;
228        self
229    }
230
231    /// Set graceful shutdown timeout
232    pub fn shutdown_timeout(mut self, timeout: Duration) -> Self {
233        self.config.shutdown_timeout = timeout;
234        self
235    }
236
237    /// Enable launcher service for dynamic node management
238    pub fn with_launcher(mut self, enable: bool) -> Self {
239        self.enable_launcher = enable;
240        self
241    }
242
243    /// Enable lifecycle management with mode configuration
244    ///
245    /// # Arguments
246    ///
247    /// * `modes` - Map of mode name to mode configuration
248    /// * `default_mode` - Initial mode to start in
249    pub fn with_lifecycle(mut self, modes: HashMap<String, ModeConfig>, default_mode: String) -> Self {
250        self.lifecycle_config = Some((modes, default_mode));
251        self
252    }
253
254    /// Build the runtime
255    pub fn build(self) -> Runtime {
256        // Initialize logging
257        init_logging(&self.config.log_level, self.config.log_format.clone());
258
259        // Create shutdown handle
260        let shutdown = ShutdownHandle::new();
261
262        // Create health checker
263        let health_checker = Arc::new(HealthChecker::new(self.config.health_check_interval));
264
265        // Create supervisor
266        let supervisor = Arc::new(Supervisor::new(
267            health_checker.clone(),
268            shutdown.clone(),
269            self.config.restart_policy.clone(),
270        ));
271
272        // Create launcher if enabled
273        let launcher = if self.enable_launcher {
274            Some(Arc::new(Launcher::new(supervisor.clone())))
275        } else {
276            None
277        };
278
279        // Create lifecycle manager if enabled
280        let lifecycle = if let Some((modes, default_mode)) = self.lifecycle_config {
281            Some(LifecycleManager::new(
282                supervisor.clone() as Arc<dyn crate::lifecycle::SupervisorTrait>,
283                modes,
284                default_mode,
285            ))
286        } else {
287            None
288        };
289
290        Runtime {
291            config: self.config,
292            health_checker,
293            supervisor,
294            launcher,
295            lifecycle,
296            shutdown,
297        }
298    }
299}