duende_test/
harness.rs

1//! Daemon test harness.
2//!
3//! # Toyota Way: Built-in Quality (品質の作り込み)
4//! Quality cannot be inspected in; it must be built in.
5
6use std::time::{Duration, Instant};
7
8use duende_core::types::HealthCheck;
9use duende_core::{Daemon, DaemonStatus, HealthStatus, Signal};
10use duende_platform::{DaemonHandle, NativeAdapter, Platform, PlatformAdapter, detect_platform};
11
12use crate::chaos::ChaosConfig;
13use crate::error::{Result, TestError};
14
15/// Test harness for daemon lifecycle testing.
16pub struct DaemonTestHarness {
17    platform: Platform,
18    chaos: Option<ChaosConfig>,
19    adapter: Box<dyn PlatformAdapter>,
20}
21
22impl DaemonTestHarness {
23    /// Creates a new test harness builder.
24    #[must_use]
25    pub fn builder() -> DaemonTestHarnessBuilder {
26        DaemonTestHarnessBuilder::default()
27    }
28
29    /// Creates a new test harness with default settings.
30    #[must_use]
31    pub fn new() -> Self {
32        Self::builder().build()
33    }
34
35    /// Spawns a daemon for testing.
36    ///
37    /// # Errors
38    /// Returns an error if spawning fails.
39    pub async fn spawn(&self, daemon: impl Daemon + 'static) -> Result<TestDaemonHandle> {
40        let handle = self.adapter.spawn(Box::new(daemon)).await?;
41
42        Ok(TestDaemonHandle {
43            inner: handle,
44            chaos: self.chaos.clone(),
45            // Create own adapter for health/signal operations
46            adapter: Box::new(NativeAdapter::new()),
47        })
48    }
49
50    /// Returns the platform being tested.
51    #[must_use]
52    pub const fn platform(&self) -> Platform {
53        self.platform
54    }
55}
56
57impl Default for DaemonTestHarness {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63/// Builder for test harness.
64#[derive(Default)]
65pub struct DaemonTestHarnessBuilder {
66    platform: Option<Platform>,
67    chaos: Option<ChaosConfig>,
68}
69
70impl DaemonTestHarnessBuilder {
71    /// Sets the platform to test on.
72    #[must_use]
73    pub const fn with_platform(mut self, platform: Platform) -> Self {
74        self.platform = Some(platform);
75        self
76    }
77
78    /// Enables chaos injection.
79    #[must_use]
80    pub fn with_chaos(mut self, config: ChaosConfig) -> Self {
81        self.chaos = Some(config);
82        self
83    }
84
85    /// Builds the test harness.
86    #[must_use]
87    pub fn build(self) -> DaemonTestHarness {
88        let platform = self.platform.unwrap_or_else(detect_platform);
89
90        // For now, always use native adapter for testing
91        let adapter: Box<dyn PlatformAdapter> = Box::new(NativeAdapter::new());
92
93        DaemonTestHarness {
94            platform,
95            chaos: self.chaos,
96            adapter,
97        }
98    }
99}
100
101/// Handle to a daemon spawned for testing.
102pub struct TestDaemonHandle {
103    inner: DaemonHandle,
104    chaos: Option<ChaosConfig>,
105    adapter: Box<dyn PlatformAdapter>,
106}
107
108impl TestDaemonHandle {
109    /// Performs a health check on the daemon.
110    ///
111    /// Checks:
112    /// 1. Process is running (via adapter status)
113    /// 2. Process is responsive (can receive signal 0)
114    /// 3. Optional: memory/CPU within limits
115    ///
116    /// # Errors
117    /// Returns an error if health check encounters an error.
118    pub async fn health_check(&self) -> Result<HealthStatus> {
119        let start = Instant::now();
120        let mut checks = Vec::new();
121
122        // Check 1: Process status via adapter
123        let status_result = self.adapter.status(&self.inner).await;
124        let process_running = match &status_result {
125            Ok(DaemonStatus::Running) => {
126                checks.push(HealthCheck {
127                    name: "process_status".to_string(),
128                    passed: true,
129                    message: Some("Process is running".to_string()),
130                });
131                true
132            }
133            Ok(status) => {
134                checks.push(HealthCheck {
135                    name: "process_status".to_string(),
136                    passed: false,
137                    message: Some(format!("Process status: {status:?}")),
138                });
139                false
140            }
141            Err(e) => {
142                checks.push(HealthCheck {
143                    name: "process_status".to_string(),
144                    passed: false,
145                    message: Some(format!("Failed to check status: {e}")),
146                });
147                false
148            }
149        };
150
151        // Check 2: Process has valid PID
152        let has_pid = self.inner.pid.is_some();
153        checks.push(HealthCheck {
154            name: "pid_valid".to_string(),
155            passed: has_pid,
156            message: self.inner.pid.map_or_else(
157                || Some("No PID assigned".to_string()),
158                |pid| Some(format!("PID: {pid}")),
159            ),
160        });
161
162        // Check 3: Optional Linux-specific checks via /proc
163        #[cfg(target_os = "linux")]
164        if let Some(pid) = self.inner.pid {
165            // Check if /proc/{pid} exists and is readable
166            let proc_path = format!("/proc/{pid}/stat");
167            let proc_exists = std::path::Path::new(&proc_path).exists();
168            checks.push(HealthCheck {
169                name: "proc_accessible".to_string(),
170                passed: proc_exists,
171                message: Some(if proc_exists {
172                    "Process info accessible via /proc".to_string()
173                } else {
174                    "Cannot access /proc info".to_string()
175                }),
176            });
177        }
178
179        let latency_ms = start.elapsed().as_millis() as u64;
180        let healthy = process_running && has_pid;
181
182        Ok(HealthStatus {
183            healthy,
184            checks,
185            latency_ms,
186            last_check_epoch_ms: std::time::SystemTime::now()
187                .duration_since(std::time::UNIX_EPOCH)
188                .map(|d| d.as_millis() as u64)
189                .unwrap_or(0),
190        })
191    }
192
193    /// Shuts down the daemon gracefully.
194    ///
195    /// # Shutdown sequence (Toyota Way: Jidoka - stop-on-error)
196    /// 1. Send SIGTERM for graceful shutdown
197    /// 2. Wait for process to exit (up to timeout)
198    /// 3. If still running, send SIGKILL
199    /// 4. Verify process terminated
200    ///
201    /// # Errors
202    /// Returns an error if shutdown fails completely.
203    pub async fn shutdown(&self, timeout: Duration) -> Result<()> {
204        let Some(pid) = self.inner.pid else {
205            return Ok(()); // No process to shutdown
206        };
207
208        tracing::info!(pid = pid, timeout = ?timeout, "initiating graceful shutdown");
209
210        // Step 1: Send SIGTERM for graceful shutdown
211        if let Err(e) = self.adapter.signal(&self.inner, Signal::Term).await {
212            tracing::warn!(pid = pid, error = %e, "failed to send SIGTERM, trying SIGKILL");
213            // Process might already be dead, continue
214        }
215
216        // Step 2: Wait for process to exit with polling
217        let poll_interval = Duration::from_millis(50);
218        let start = Instant::now();
219
220        loop {
221            match self.adapter.status(&self.inner).await {
222                Ok(DaemonStatus::Stopped | DaemonStatus::Failed(_)) => {
223                    tracing::info!(pid = pid, elapsed = ?start.elapsed(), "daemon stopped gracefully");
224                    return Ok(());
225                }
226                Ok(_) => {
227                    // Still running
228                    if start.elapsed() >= timeout {
229                        break; // Timeout reached
230                    }
231                    tokio::time::sleep(poll_interval).await;
232                }
233                Err(e) => {
234                    // Error checking status - process likely dead
235                    tracing::debug!(pid = pid, error = %e, "status check failed, assuming stopped");
236                    return Ok(());
237                }
238            }
239        }
240
241        // Step 3: Timeout reached, send SIGKILL
242        tracing::warn!(pid = pid, "graceful shutdown timed out, sending SIGKILL");
243        if let Err(e) = self.adapter.signal(&self.inner, Signal::Kill).await {
244            tracing::debug!(pid = pid, error = %e, "SIGKILL failed, process may be dead");
245            return Ok(()); // Process likely already dead
246        }
247
248        // Step 4: Final verification with short wait
249        tokio::time::sleep(Duration::from_millis(100)).await;
250        match self.adapter.status(&self.inner).await {
251            Ok(DaemonStatus::Stopped | DaemonStatus::Failed(_)) => {
252                tracing::info!(pid = pid, "daemon killed");
253                Ok(())
254            }
255            Ok(status) => {
256                tracing::error!(pid = pid, status = ?status, "daemon failed to terminate");
257                Err(TestError::Shutdown(format!(
258                    "daemon PID {pid} failed to terminate after SIGKILL"
259                )))
260            }
261            Err(_) => {
262                // Error checking status - process likely dead
263                Ok(())
264            }
265        }
266    }
267
268    /// Returns the inner daemon handle.
269    #[must_use]
270    pub const fn handle(&self) -> &DaemonHandle {
271        &self.inner
272    }
273
274    /// Returns the chaos config, if any.
275    #[must_use]
276    pub const fn chaos(&self) -> Option<&ChaosConfig> {
277        self.chaos.as_ref()
278    }
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use async_trait::async_trait;
285    use duende_core::{DaemonConfig, DaemonContext, DaemonId, DaemonMetrics, ExitReason};
286
287    /// Mock daemon for testing.
288    struct MockDaemon {
289        id: DaemonId,
290        name: String,
291        metrics: DaemonMetrics,
292    }
293
294    impl MockDaemon {
295        fn new(name: &str) -> Self {
296            Self {
297                id: DaemonId::new(),
298                name: name.to_string(),
299                metrics: DaemonMetrics::new(),
300            }
301        }
302    }
303
304    #[async_trait]
305    impl Daemon for MockDaemon {
306        fn id(&self) -> DaemonId {
307            self.id
308        }
309
310        fn name(&self) -> &str {
311            &self.name
312        }
313
314        async fn init(&mut self, _config: &DaemonConfig) -> duende_core::error::Result<()> {
315            Ok(())
316        }
317
318        async fn run(
319            &mut self,
320            _ctx: &mut DaemonContext,
321        ) -> duende_core::error::Result<ExitReason> {
322            Ok(ExitReason::Graceful)
323        }
324
325        async fn shutdown(&mut self, _timeout: Duration) -> duende_core::error::Result<()> {
326            Ok(())
327        }
328
329        async fn health_check(&self) -> HealthStatus {
330            HealthStatus::healthy(1)
331        }
332
333        fn metrics(&self) -> &DaemonMetrics {
334            &self.metrics
335        }
336    }
337
338    #[test]
339    fn test_harness_builder() {
340        let harness = DaemonTestHarness::builder()
341            .with_platform(Platform::Native)
342            .build();
343
344        assert_eq!(harness.platform(), Platform::Native);
345    }
346
347    #[test]
348    fn test_harness_default() {
349        let harness = DaemonTestHarness::default();
350        // Platform should be detected
351        let platform = harness.platform();
352        assert!(matches!(
353            platform,
354            Platform::Native | Platform::Linux | Platform::MacOS | Platform::Container
355        ));
356    }
357
358    #[test]
359    fn test_harness_new() {
360        let harness = DaemonTestHarness::new();
361        // Should work the same as default
362        let _ = harness.platform();
363    }
364
365    #[test]
366    fn test_harness_with_chaos() {
367        let harness = DaemonTestHarness::builder()
368            .with_chaos(ChaosConfig::default())
369            .build();
370
371        // Should have chaos config (harness doesn't expose it directly)
372        assert!(harness.chaos.is_some());
373    }
374
375    #[test]
376    fn test_builder_default() {
377        let builder = DaemonTestHarnessBuilder::default();
378        let harness = builder.build();
379        // Should work with all defaults
380        let _ = harness.platform();
381    }
382
383    #[tokio::test]
384    async fn test_test_daemon_handle_health_check_running() {
385        // Use our own PID which should be running
386        let pid = std::process::id();
387        let handle = TestDaemonHandle {
388            inner: DaemonHandle::native(pid),
389            chaos: None,
390            adapter: Box::new(NativeAdapter::new()),
391        };
392
393        let health = handle.health_check().await;
394        assert!(health.is_ok());
395        let status = health.expect("health status");
396        assert!(status.healthy, "Our own process should be healthy");
397        assert!(!status.checks.is_empty(), "Should have checks");
398    }
399
400    #[tokio::test]
401    async fn test_test_daemon_handle_health_check_not_running() {
402        // Use a very high PID that shouldn't exist
403        let handle = TestDaemonHandle {
404            inner: DaemonHandle::native(4000000),
405            chaos: None,
406            adapter: Box::new(NativeAdapter::new()),
407        };
408
409        let health = handle.health_check().await;
410        assert!(health.is_ok());
411        let status = health.expect("health status");
412        assert!(!status.healthy, "Non-existent process should be unhealthy");
413    }
414
415    #[tokio::test]
416    async fn test_test_daemon_handle_health_check_no_pid() {
417        let handle = TestDaemonHandle {
418            inner: DaemonHandle {
419                platform: Platform::Native,
420                pid: None,
421                id: "no-pid".to_string(),
422            },
423            chaos: None,
424            adapter: Box::new(NativeAdapter::new()),
425        };
426
427        let health = handle.health_check().await;
428        assert!(health.is_ok());
429        let status = health.expect("health status");
430        assert!(!status.healthy, "No PID should be unhealthy");
431    }
432
433    #[tokio::test]
434    async fn test_test_daemon_handle_shutdown_no_pid() {
435        // Shutdown with no PID should succeed (nothing to do)
436        let handle = TestDaemonHandle {
437            inner: DaemonHandle {
438                platform: Platform::Native,
439                pid: None,
440                id: "no-pid".to_string(),
441            },
442            chaos: None,
443            adapter: Box::new(NativeAdapter::new()),
444        };
445
446        let result = handle.shutdown(Duration::from_secs(1)).await;
447        assert!(result.is_ok());
448    }
449
450    #[tokio::test]
451    async fn test_test_daemon_handle_shutdown_nonexistent() {
452        // Shutdown of non-existent process should succeed
453        let handle = TestDaemonHandle {
454            inner: DaemonHandle::native(4000000),
455            chaos: None,
456            adapter: Box::new(NativeAdapter::new()),
457        };
458
459        let result = handle.shutdown(Duration::from_millis(100)).await;
460        assert!(result.is_ok());
461    }
462
463    #[test]
464    fn test_test_daemon_handle_accessors() {
465        let chaos = ChaosConfig::default();
466        let handle = TestDaemonHandle {
467            inner: DaemonHandle::native(12345),
468            chaos: Some(chaos),
469            adapter: Box::new(NativeAdapter::new()),
470        };
471
472        assert_eq!(handle.handle().pid, Some(12345));
473        assert!(handle.chaos().is_some());
474    }
475
476    #[test]
477    fn test_test_daemon_handle_no_chaos() {
478        let handle = TestDaemonHandle {
479            inner: DaemonHandle::native(12345),
480            chaos: None,
481            adapter: Box::new(NativeAdapter::new()),
482        };
483
484        assert!(handle.chaos().is_none());
485    }
486
487    #[tokio::test]
488    async fn test_mock_daemon_lifecycle() {
489        use duende_core::Daemon;
490
491        let mut daemon = MockDaemon::new("test-daemon");
492        assert_eq!(daemon.name(), "test-daemon");
493        assert!(!daemon.id().as_uuid().is_nil());
494
495        // Init should succeed
496        let config = DaemonConfig::new("test", "/bin/test");
497        let result = daemon.init(&config).await;
498        assert!(result.is_ok());
499
500        // Health check should return healthy
501        let health = daemon.health_check().await;
502        assert!(health.is_healthy());
503
504        // Shutdown should succeed
505        let result = daemon.shutdown(Duration::from_secs(5)).await;
506        assert!(result.is_ok());
507    }
508
509    #[test]
510    fn test_mock_daemon_metrics() {
511        use duende_core::Daemon;
512
513        let daemon = MockDaemon::new("metrics-test");
514        let metrics = daemon.metrics();
515
516        // Metrics should be accessible
517        assert_eq!(metrics.requests_total(), 0);
518    }
519}