mecha10_cli/services/
process.rs

1#![allow(dead_code)]
2
3//! Process service for managing child processes
4//!
5//! This service provides process lifecycle management for CLI commands,
6//! particularly for the `dev` and `run` commands that need to manage
7//! multiple node processes.
8//!
9//! This is a thin wrapper around mecha10-runtime's ProcessManager,
10//! delegating core process management to the runtime layer.
11
12use anyhow::{Context, Result};
13use mecha10_runtime::ProcessManager;
14use std::collections::HashMap;
15use std::path::Path;
16use std::process::{Command, Stdio};
17
18/// Process service for managing child processes
19///
20/// This is a thin wrapper around the runtime's ProcessManager,
21/// adding CLI-specific conveniences and delegating core functionality
22/// to the runtime layer.
23///
24/// # Examples
25///
26/// ```rust,ignore
27/// use mecha10_cli::services::ProcessService;
28///
29/// # async fn example() -> anyhow::Result<()> {
30/// let mut service = ProcessService::new();
31///
32/// // Spawn a node process
33/// service.spawn_node("camera_driver", "target/debug/camera_driver", &[])?;
34///
35/// // Check status
36/// let status = service.get_status();
37/// println!("Running processes: {:?}", status);
38///
39/// // Stop a specific process
40/// service.stop("camera_driver")?;
41///
42/// // Cleanup all processes
43/// service.cleanup();
44/// # Ok(())
45/// # }
46/// ```
47pub struct ProcessService {
48    /// Runtime's process manager (handles core lifecycle)
49    manager: ProcessManager,
50}
51
52impl ProcessService {
53    /// Create a new process service
54    pub fn new() -> Self {
55        Self {
56            manager: ProcessManager::new(),
57        }
58    }
59
60    /// Track dependency relationship for a node
61    ///
62    /// # Arguments
63    ///
64    /// * `node` - Name of the node
65    /// * `dependencies` - List of nodes this node depends on
66    pub fn track_dependency(&mut self, node: &str, dependencies: Vec<String>) {
67        for dep in dependencies {
68            self.manager.add_dependency(node.to_string(), dep);
69        }
70    }
71
72    /// Get shutdown order (reverse dependency order)
73    ///
74    /// Returns nodes in order they should be stopped:
75    /// - Nodes with dependents first (high-level nodes)
76    /// - Then their dependencies (low-level nodes)
77    ///
78    /// This ensures we don't stop a node while other nodes depend on it.
79    ///
80    /// Delegates to the runtime's ProcessManager.
81    pub fn get_shutdown_order(&self) -> Vec<String> {
82        self.manager.shutdown_order()
83    }
84
85    /// Check if we're in framework development mode
86    ///
87    /// Framework dev mode is detected by:
88    /// 1. MECHA10_FRAMEWORK_PATH environment variable
89    /// 2. Existence of .cargo/config.toml with patches
90    pub fn is_framework_dev_mode() -> bool {
91        // Check MECHA10_FRAMEWORK_PATH
92        if std::env::var("MECHA10_FRAMEWORK_PATH").is_ok() {
93            return true;
94        }
95
96        // Check if .cargo/config.toml exists (indicates framework dev)
97        std::path::Path::new(".cargo/config.toml").exists()
98    }
99
100    /// Find globally installed binary for a node
101    ///
102    /// Searches in:
103    /// 1. ~/.cargo/bin/{node_name}
104    /// 2. ~/.mecha10/bin/{node_name}
105    ///
106    /// # Arguments
107    ///
108    /// * `node_name` - Name of the node (e.g., "simulation-bridge")
109    ///
110    /// # Returns
111    ///
112    /// Path to the binary if found, None otherwise
113    pub fn find_global_binary(node_name: &str) -> Option<std::path::PathBuf> {
114        if let Some(home) = dirs::home_dir() {
115            // Try ~/.cargo/bin/ first
116            let cargo_bin = home.join(".cargo/bin").join(node_name);
117            if cargo_bin.exists() && cargo_bin.is_file() {
118                return Some(cargo_bin);
119            }
120
121            // Try ~/.mecha10/bin/
122            let mecha10_bin = home.join(".mecha10/bin").join(node_name);
123            if mecha10_bin.exists() && mecha10_bin.is_file() {
124                return Some(mecha10_bin);
125            }
126        }
127
128        None
129    }
130
131    /// Resolve binary path for a node with smart resolution
132    ///
133    /// Resolution strategy:
134    /// 1. If framework dev mode: use local build (target/debug or target/release)
135    /// 2. If global binary exists: use global binary
136    /// 3. Fallback: use local build path
137    ///
138    /// # Arguments
139    ///
140    /// * `node_name` - Name of the node
141    /// * `is_monorepo_node` - Whether this is a framework node
142    /// * `project_name` - Name of the project
143    ///
144    /// # Returns
145    ///
146    /// Path to the binary to execute
147    pub fn resolve_node_binary(node_name: &str, is_monorepo_node: bool, project_name: &str) -> String {
148        // Framework dev mode: always use local builds
149        if Self::is_framework_dev_mode() {
150            return Self::get_local_binary_path(node_name, is_monorepo_node, project_name);
151        }
152
153        // For monorepo (framework) nodes, check for global installation
154        if is_monorepo_node {
155            if let Some(global_path) = Self::find_global_binary(node_name) {
156                return global_path.to_string_lossy().to_string();
157            }
158        }
159
160        // Fallback to local build
161        Self::get_local_binary_path(node_name, is_monorepo_node, project_name)
162    }
163
164    /// Get local binary path (in target/ directory)
165    fn get_local_binary_path(node_name: &str, is_monorepo_node: bool, project_name: &str) -> String {
166        if is_monorepo_node {
167            // Monorepo nodes run via the project binary with 'node' subcommand
168            format!("target/release/{}", project_name)
169        } else {
170            // Local nodes have their own binary
171            format!("target/release/{}", node_name)
172        }
173    }
174
175    /// Resolve path to mecha10-node-runner binary
176    ///
177    /// Resolution strategy:
178    /// 1. Framework dev mode: $MECHA10_FRAMEWORK_PATH/target/release/mecha10-node-runner
179    /// 2. Global installation: ~/.cargo/bin/mecha10-node-runner
180    /// 3. Fallback: "mecha10-node-runner" (rely on PATH)
181    ///
182    /// # Returns
183    ///
184    /// Path to the mecha10-node-runner binary
185    pub fn resolve_node_runner_path() -> String {
186        // Framework dev mode: use framework's target directory
187        if let Ok(framework_path) = std::env::var("MECHA10_FRAMEWORK_PATH") {
188            let framework_binary = std::path::PathBuf::from(&framework_path).join("target/release/mecha10-node-runner");
189
190            if framework_binary.exists() {
191                return framework_binary.to_string_lossy().to_string();
192            }
193
194            // Try debug build if release not available
195            let framework_binary_debug =
196                std::path::PathBuf::from(&framework_path).join("target/debug/mecha10-node-runner");
197
198            if framework_binary_debug.exists() {
199                return framework_binary_debug.to_string_lossy().to_string();
200            }
201        }
202
203        // Try global installation
204        if let Some(global_path) = Self::find_global_binary("mecha10-node-runner") {
205            return global_path.to_string_lossy().to_string();
206        }
207
208        // Fallback: rely on PATH
209        "mecha10-node-runner".to_string()
210    }
211
212    /// Spawn a node process
213    ///
214    /// # Arguments
215    ///
216    /// * `name` - Name to identify the process
217    /// * `binary_path` - Path to the binary to execute
218    /// * `args` - Command-line arguments
219    ///
220    /// # Errors
221    ///
222    /// Returns an error if the process cannot be spawned
223    pub fn spawn_node(&mut self, name: &str, binary_path: &str, args: &[&str]) -> Result<u32> {
224        let child = Command::new(binary_path)
225            .args(args)
226            .stdout(Stdio::inherit())
227            .stderr(Stdio::inherit())
228            .spawn()
229            .with_context(|| format!("Failed to spawn process: {}", binary_path))?;
230
231        let pid = child.id();
232        self.manager.track(name.to_string(), child);
233
234        Ok(pid)
235    }
236
237    /// Spawn a process with output capture
238    ///
239    /// # Arguments
240    ///
241    /// * `name` - Name to identify the process
242    /// * `binary_path` - Path to the binary to execute
243    /// * `args` - Command-line arguments
244    ///
245    /// # Errors
246    ///
247    /// Returns an error if the process cannot be spawned
248    pub fn spawn_with_output(&mut self, name: &str, binary_path: &str, args: &[&str]) -> Result<u32> {
249        let child = Command::new(binary_path)
250            .args(args)
251            .stdout(Stdio::piped())
252            .stderr(Stdio::piped())
253            .spawn()
254            .with_context(|| format!("Failed to spawn process: {}", binary_path))?;
255
256        let pid = child.id();
257        self.manager.track(name.to_string(), child);
258
259        Ok(pid)
260    }
261
262    /// Spawn a process with custom environment variables
263    ///
264    /// # Arguments
265    ///
266    /// * `name` - Name to identify the process
267    /// * `binary_path` - Path to the binary to execute
268    /// * `args` - Command-line arguments
269    /// * `env` - Environment variables to set
270    pub fn spawn_with_env(
271        &mut self,
272        name: &str,
273        binary_path: &str,
274        args: &[&str],
275        env: HashMap<String, String>,
276    ) -> Result<u32> {
277        // Create logs directory if it doesn't exist
278        let logs_dir = std::path::PathBuf::from("logs");
279        if !logs_dir.exists() {
280            std::fs::create_dir_all(&logs_dir)?;
281        }
282
283        // Create log file for this process
284        let log_file_path = logs_dir.join(format!("{}.log", name));
285        let log_file = std::fs::OpenOptions::new()
286            .create(true)
287            .append(true)
288            .open(&log_file_path)
289            .with_context(|| format!("Failed to create log file: {}", log_file_path.display()))?;
290
291        // Clone for stderr
292        let log_file_stderr = log_file.try_clone().context("Failed to clone log file handle")?;
293
294        let mut cmd = Command::new(binary_path);
295        cmd.args(args)
296            .envs(&env)
297            .stdout(log_file) // Redirect stdout to log file
298            .stderr(log_file_stderr); // Redirect stderr to log file
299
300        // On Unix: Create new process group to prevent terminal signals from reaching child processes
301        // This ensures Ctrl+C in the terminal only affects the main CLI process, not node-runner
302        #[cfg(unix)]
303        {
304            use std::os::unix::process::CommandExt;
305            cmd.process_group(0); // 0 = create new process group with same ID as child PID
306        }
307
308        let child = cmd
309            .spawn()
310            .with_context(|| format!("Failed to spawn process: {}", binary_path))?;
311
312        let pid = child.id();
313        self.manager.track(name.to_string(), child);
314
315        Ok(pid)
316    }
317
318    /// Spawn a process in a specific working directory
319    ///
320    /// # Arguments
321    ///
322    /// * `name` - Name to identify the process
323    /// * `binary_path` - Path to the binary to execute
324    /// * `args` - Command-line arguments
325    /// * `working_dir` - Working directory for the process
326    pub fn spawn_in_dir(
327        &mut self,
328        name: &str,
329        binary_path: &str,
330        args: &[&str],
331        working_dir: impl AsRef<Path>,
332    ) -> Result<u32> {
333        let child = Command::new(binary_path)
334            .args(args)
335            .current_dir(working_dir)
336            .stdout(Stdio::inherit())
337            .stderr(Stdio::inherit())
338            .spawn()
339            .with_context(|| format!("Failed to spawn process: {}", binary_path))?;
340
341        let pid = child.id();
342        self.manager.track(name.to_string(), child);
343
344        Ok(pid)
345    }
346
347    /// Spawn multiple node processes from a list
348    ///
349    /// # Arguments
350    ///
351    /// * `nodes` - Vec of (name, binary_path, args) tuples
352    ///
353    /// # Returns
354    ///
355    /// HashMap of node names to PIDs
356    pub fn spawn_nodes(&mut self, nodes: Vec<(&str, &str, Vec<&str>)>) -> Result<HashMap<String, u32>> {
357        let mut pids = HashMap::new();
358
359        for (name, binary_path, args) in nodes {
360            match self.spawn_node(name, binary_path, &args) {
361                Ok(pid) => {
362                    pids.insert(name.to_string(), pid);
363                }
364                Err(e) => {
365                    eprintln!("Failed to spawn {}: {}", name, e);
366                }
367            }
368        }
369
370        Ok(pids)
371    }
372
373    /// Get status of all processes
374    ///
375    /// Returns a HashMap mapping process names to status strings
376    pub fn get_status(&mut self) -> HashMap<String, String> {
377        use mecha10_runtime::ProcessStatus;
378
379        self.manager
380            .status_all()
381            .into_iter()
382            .map(|(name, status)| {
383                let status_str = match status {
384                    ProcessStatus::Running => "running".to_string(),
385                    ProcessStatus::Exited(code) => format!("exited (code: {})", code),
386                    ProcessStatus::Error => "error".to_string(),
387                };
388                (name, status_str)
389            })
390            .collect()
391    }
392
393    /// Get the number of tracked processes
394    pub fn count(&self) -> usize {
395        self.manager.len()
396    }
397
398    /// Check if any processes are being tracked
399    pub fn is_empty(&self) -> bool {
400        self.manager.is_empty()
401    }
402
403    /// Stop a specific process by name
404    ///
405    /// # Arguments
406    ///
407    /// * `name` - Name of the process to stop
408    ///
409    /// # Errors
410    ///
411    /// Returns an error if the process is not found
412    ///
413    /// Note: This uses a default 10-second timeout for graceful shutdown.
414    /// Use stop_with_timeout() for custom timeout.
415    pub fn stop(&mut self, name: &str) -> Result<()> {
416        self.manager.stop_graceful(name, std::time::Duration::from_secs(10))
417    }
418
419    /// Stop a process with timeout for graceful shutdown
420    ///
421    /// Tries graceful shutdown (SIGTERM on Unix), then force kills after timeout
422    ///
423    /// # Arguments
424    ///
425    /// * `name` - Name of the process to stop
426    /// * `timeout` - How long to wait for graceful shutdown
427    ///
428    /// # Errors
429    ///
430    /// Returns an error if process not found or cannot be stopped
431    pub fn stop_with_timeout(&mut self, name: &str, timeout: std::time::Duration) -> Result<()> {
432        self.manager.stop_graceful(name, timeout)
433    }
434
435    /// Force kill a process by name
436    ///
437    /// # Arguments
438    ///
439    /// * `name` - Name of the process to kill
440    pub fn force_kill(&mut self, name: &str) -> Result<()> {
441        self.manager.force_kill(name)
442    }
443
444    /// Stop all processes gracefully in dependency order
445    ///
446    /// This delegates to the runtime's ProcessManager which handles:
447    /// - Dependency-based shutdown ordering
448    /// - Graceful shutdown with timeout
449    /// - Force kill fallback
450    pub fn cleanup(&mut self) {
451        self.manager.shutdown_all();
452    }
453
454    /// Check if a process is running
455    ///
456    /// # Arguments
457    ///
458    /// * `name` - Name of the process to check
459    pub fn is_running(&mut self, name: &str) -> bool {
460        self.manager.is_running(name)
461    }
462
463    /// Get access to the underlying ProcessManager
464    ///
465    /// Useful for advanced operations or when migrating existing code.
466    /// Provides direct access to the runtime's ProcessManager.
467    pub fn manager(&mut self) -> &mut ProcessManager {
468        &mut self.manager
469    }
470
471    /// Build a node binary if needed
472    ///
473    /// Helper method to build a specific node package
474    ///
475    /// # Arguments
476    ///
477    /// * `node_name` - Name of the node to build
478    /// * `release` - Whether to build in release mode
479    pub fn build_node(&self, node_name: &str, release: bool) -> Result<()> {
480        let mut cmd = Command::new("cargo");
481        cmd.arg("build");
482
483        if release {
484            cmd.arg("--release");
485        }
486
487        cmd.arg("--bin").arg(node_name);
488
489        let output = cmd
490            .output()
491            .with_context(|| format!("Failed to build node: {}", node_name))?;
492
493        if !output.status.success() {
494            let stderr = String::from_utf8_lossy(&output.stderr);
495            return Err(anyhow::anyhow!("Build failed for {}: {}", node_name, stderr));
496        }
497
498        Ok(())
499    }
500
501    /// Build all nodes in the workspace
502    ///
503    /// # Arguments
504    ///
505    /// * `release` - Whether to build in release mode
506    pub fn build_all(&self, release: bool) -> Result<()> {
507        let mut cmd = Command::new("cargo");
508        cmd.arg("build");
509
510        if release {
511            cmd.arg("--release");
512        }
513
514        cmd.arg("--all");
515
516        let output = cmd.output().context("Failed to build workspace")?;
517
518        if !output.status.success() {
519            let stderr = String::from_utf8_lossy(&output.stderr);
520            return Err(anyhow::anyhow!("Build failed: {}", stderr));
521        }
522
523        Ok(())
524    }
525
526    /// Build a binary from the framework monorepo
527    ///
528    /// This builds a binary from the framework path (MECHA10_FRAMEWORK_PATH).
529    /// Used for binaries like `mecha10-node-runner` that exist in the monorepo
530    /// but need to be built when running from a generated project.
531    ///
532    /// # Arguments
533    ///
534    /// * `package_name` - Name of the package to build (e.g., "mecha10-node-runner")
535    /// * `release` - Whether to build in release mode
536    ///
537    /// # Returns
538    ///
539    /// Ok(()) on success, or error if build fails or framework path not set
540    pub fn build_from_framework(&self, package_name: &str, release: bool) -> Result<()> {
541        // Get framework path from environment
542        let framework_path = std::env::var("MECHA10_FRAMEWORK_PATH")
543            .context("MECHA10_FRAMEWORK_PATH not set - cannot build from framework")?;
544
545        let mut cmd = Command::new("cargo");
546        cmd.arg("build");
547
548        if release {
549            cmd.arg("--release");
550        }
551
552        cmd.arg("-p").arg(package_name);
553        cmd.current_dir(&framework_path);
554
555        let output = cmd
556            .output()
557            .with_context(|| format!("Failed to build package from framework: {}", package_name))?;
558
559        if !output.status.success() {
560            let stderr = String::from_utf8_lossy(&output.stderr);
561            return Err(anyhow::anyhow!(
562                "Build failed for {} (from framework): {}",
563                package_name,
564                stderr
565            ));
566        }
567
568        Ok(())
569    }
570
571    /// Build only packages needed by the current project (smart selective build)
572    ///
573    /// For generated projects, this just builds the project binary.
574    /// Cargo automatically builds only the dependencies actually used.
575    /// With .cargo/config.toml patches, this rebuilds framework packages from source.
576    ///
577    /// # Arguments
578    ///
579    /// * `release` - Whether to build in release mode
580    ///
581    /// # Returns
582    ///
583    /// Ok(()) on success, or error if build fails
584    pub fn build_project_packages(&self, release: bool) -> Result<()> {
585        use crate::types::ProjectConfig;
586
587        // Load project config
588        let config_path = std::path::Path::new("mecha10.json");
589        if !config_path.exists() {
590            // Fallback to build_all if no project config
591            return self.build_all(release);
592        }
593
594        // Parse config to get project name
595        let config_content = std::fs::read_to_string(config_path)?;
596        let config: ProjectConfig = serde_json::from_str(&config_content)?;
597
598        // Build just the project binary
599        // Cargo will automatically:
600        // 1. Resolve dependencies from Cargo.toml
601        // 2. Apply .cargo/config.toml patches (framework dev mode)
602        // 3. Build only the dependencies actually used
603        // 4. Use incremental compilation for unchanged code
604        let mut cmd = Command::new("cargo");
605        cmd.arg("build");
606
607        if release {
608            cmd.arg("--release");
609        }
610
611        // Build the project binary - Cargo handles the rest
612        cmd.arg("--bin").arg(&config.name);
613
614        let output = cmd.output().context("Failed to build project")?;
615
616        if !output.status.success() {
617            let stderr = String::from_utf8_lossy(&output.stderr);
618            return Err(anyhow::anyhow!("Build failed: {}", stderr));
619        }
620
621        Ok(())
622    }
623
624    /// Restart a specific process
625    ///
626    /// Stops the process if running and starts it again
627    ///
628    /// # Arguments
629    ///
630    /// * `name` - Name of the process
631    /// * `binary_path` - Path to the binary
632    /// * `args` - Command-line arguments
633    pub fn restart(&mut self, name: &str, binary_path: &str, args: &[&str]) -> Result<u32> {
634        // Stop if running
635        if self.is_running(name) {
636            self.stop(name)?;
637            // Give it a moment to shutdown
638            std::thread::sleep(std::time::Duration::from_millis(100));
639        }
640
641        // Start again
642        self.spawn_node(name, binary_path, args)
643    }
644
645    /// Restart all processes
646    ///
647    /// # Arguments
648    ///
649    /// * `nodes` - Vec of (name, binary_path, args) tuples
650    pub fn restart_all(&mut self, nodes: Vec<(&str, &str, Vec<&str>)>) -> Result<HashMap<String, u32>> {
651        // Stop all
652        self.cleanup();
653
654        // Small delay for cleanup
655        std::thread::sleep(std::time::Duration::from_millis(500));
656
657        // Start all
658        self.spawn_nodes(nodes)
659    }
660
661    /// Spawn a node using mecha10-node-runner
662    ///
663    /// This is the simplified spawning method for Phase 2+ of Node Lifecycle Architecture.
664    /// It delegates all complexity (binary resolution, model pulling, env setup) to node-runner.
665    ///
666    /// # Arguments
667    ///
668    /// * `node_name` - Name of the node to run
669    ///
670    /// # Returns
671    ///
672    /// Process ID of the spawned node-runner instance
673    ///
674    /// # Errors
675    ///
676    /// Returns an error if the node-runner cannot be spawned
677    ///
678    /// # Configuration
679    ///
680    /// The node-runner reads configuration from the node's config file (e.g., `configs/nodes/{node_name}.json`)
681    /// and supports the following runtime settings:
682    ///
683    /// ```json
684    /// {
685    ///   "runtime": {
686    ///     "restart_policy": "on-failure",  // never, on-failure, always
687    ///     "max_retries": 3,
688    ///     "backoff_secs": 1
689    ///   },
690    ///   "depends_on": ["camera", "lidar"],
691    ///   "startup_timeout_secs": 30
692    /// }
693    /// ```
694    ///
695    /// To enable dependency checking, use: `mecha10-node-runner --wait-for-deps <node-name>`
696    pub fn spawn_node_runner(&mut self, node_name: &str) -> Result<u32> {
697        // Simply spawn: mecha10-node-runner <node-name>
698        // The node-runner handles:
699        // - Binary path resolution (monorepo vs local)
700        // - Model pulling (if node needs AI models)
701        // - Environment setup
702        // - Log redirection
703        // - Restart policies (from config)
704        // - Health monitoring
705        // - Dependency checking (if --wait-for-deps enabled)
706        // - Actual node execution
707
708        // Resolve path to mecha10-node-runner binary
709        let runner_path = Self::resolve_node_runner_path();
710
711        self.spawn_with_env(
712            node_name,
713            &runner_path,
714            &[node_name],
715            HashMap::new(), // Runner gets env from Context
716        )
717    }
718}
719
720impl Default for ProcessService {
721    fn default() -> Self {
722        Self::new()
723    }
724}
725
726impl Drop for ProcessService {
727    fn drop(&mut self) {
728        // ProcessManager's Drop will handle graceful shutdown
729    }
730}