clnrm_core/backend/
testcontainer.rs

1//! Testcontainers backend for containerized command execution
2//!
3//! Provides testcontainers-rs integration for hermetic, isolated execution
4//! with automatic container lifecycle management.
5
6use crate::backend::volume::{VolumeMount, VolumeValidator};
7use crate::backend::{Backend, Cmd, RunResult};
8use crate::error::{BackendError, Result};
9use crate::policy::Policy;
10use std::sync::Arc;
11use std::time::{Duration, Instant};
12use testcontainers::{core::ExecCommand, runners::SyncRunner, GenericImage, ImageExt};
13
14use tracing::{info, instrument, warn};
15
16/// Testcontainers backend for containerized execution
17#[derive(Debug, Clone)]
18pub struct TestcontainerBackend {
19    /// Base image configuration
20    image_name: String,
21    image_tag: String,
22    /// Default policy
23    policy: Policy,
24    /// Command execution timeout
25    timeout: Duration,
26    /// Container startup timeout
27    startup_timeout: Duration,
28    /// Environment variables to set in container
29    env_vars: std::collections::HashMap<String, String>,
30    /// Default command to run in container
31    default_command: Option<Vec<String>>,
32    /// Volume mounts for the container
33    volume_mounts: Vec<VolumeMount>,
34    /// Volume validator for security checks
35    volume_validator: Arc<VolumeValidator>,
36    /// Memory limit in MB
37    memory_limit: Option<u64>,
38    /// CPU limit (number of CPUs)
39    cpu_limit: Option<f64>,
40    /// Determinism engine for reproducible execution
41    determinism_engine: Option<Arc<crate::determinism::DeterminismEngine>>,
42}
43
44impl TestcontainerBackend {
45    /// Create a new testcontainers backend
46    pub fn new(image: impl Into<String>) -> Result<Self> {
47        let image_str = image.into();
48
49        // Parse image name and tag
50        let (image_name, image_tag) = if let Some((name, tag)) = image_str.split_once(':') {
51            (name.to_string(), tag.to_string())
52        } else {
53            (image_str, "latest".to_string())
54        };
55
56        Ok(Self {
57            image_name,
58            image_tag,
59            policy: Policy::default(),
60            timeout: Duration::from_secs(30), // Reduced from 300s
61            startup_timeout: Duration::from_secs(10), // Reduced from 60s
62            env_vars: std::collections::HashMap::new(),
63            default_command: None,
64            volume_mounts: Vec::new(),
65            volume_validator: Arc::new(VolumeValidator::default()),
66            memory_limit: None,
67            cpu_limit: None,
68            determinism_engine: None,
69        })
70    }
71
72    /// Create with custom policy
73    pub fn with_policy(mut self, policy: Policy) -> Self {
74        self.policy = policy;
75        self
76    }
77
78    /// Create with custom execution timeout
79    pub fn with_timeout(mut self, timeout: Duration) -> Self {
80        self.timeout = timeout;
81        self
82    }
83
84    /// Create with custom startup timeout
85    pub fn with_startup_timeout(mut self, timeout: Duration) -> Self {
86        self.startup_timeout = timeout;
87        self
88    }
89
90    /// Check if the backend is running
91    pub fn is_running(&self) -> bool {
92        // For testcontainers, we consider the backend "running" if it can be created
93        // In a real implementation, this might check container status
94        true
95    }
96
97    /// Add environment variable to container
98    pub fn with_env(mut self, key: &str, val: &str) -> Self {
99        self.env_vars.insert(key.to_string(), val.to_string());
100        self
101    }
102
103    /// Set default command for container
104    pub fn with_cmd(mut self, cmd: Vec<String>) -> Self {
105        self.default_command = Some(cmd);
106        self
107    }
108
109    /// Add volume mount
110    ///
111    /// # Arguments
112    ///
113    /// * `host_path` - Path on the host system
114    /// * `container_path` - Path inside the container
115    /// * `read_only` - Whether mount is read-only
116    ///
117    /// # Errors
118    ///
119    /// Returns error if volume validation fails
120    pub fn with_volume(
121        mut self,
122        host_path: &str,
123        container_path: &str,
124        read_only: bool,
125    ) -> Result<Self> {
126        let mount = VolumeMount::new(host_path, container_path, read_only)?;
127        self.volume_validator.validate(&mount)?;
128        self.volume_mounts.push(mount);
129        Ok(self)
130    }
131
132    /// Add read-only volume mount
133    ///
134    /// Convenience method for adding read-only mounts
135    pub fn with_volume_ro(self, host_path: &str, container_path: &str) -> Result<Self> {
136        self.with_volume(host_path, container_path, true)
137    }
138
139    /// Set volume validator with custom whitelist
140    pub fn with_volume_validator(mut self, validator: VolumeValidator) -> Self {
141        self.volume_validator = Arc::new(validator);
142        self
143    }
144
145    /// Get volume mounts
146    pub fn volumes(&self) -> &[VolumeMount] {
147        &self.volume_mounts
148    }
149
150    /// Set memory limit in MB
151    pub fn with_memory_limit(mut self, limit_mb: u64) -> Self {
152        self.memory_limit = Some(limit_mb);
153        self
154    }
155
156    /// Set CPU limit (number of CPUs)
157    pub fn with_cpu_limit(mut self, cpus: f64) -> Self {
158        self.cpu_limit = Some(cpus);
159        self
160    }
161
162    /// Set determinism engine for reproducible execution
163    ///
164    /// # Arguments
165    /// * `engine` - DeterminismEngine with configured seed, clock freezing, etc.
166    pub fn with_determinism(mut self, engine: Arc<crate::determinism::DeterminismEngine>) -> Self {
167        self.determinism_engine = Some(engine);
168        self
169    }
170
171    /// Check if testcontainers is available
172    pub fn is_available() -> bool {
173        // For now, assume Docker is available if we can create a GenericImage
174        true
175    }
176
177    /// Validate OpenTelemetry instrumentation (if enabled)
178    ///
179    /// This method validates that OTel spans are created correctly during
180    /// container operations. Following core team standards:
181    /// - No .unwrap() or .expect()
182    /// - Sync method (dyn compatible)
183    /// - Returns Result<T, CleanroomError>
184    pub fn validate_otel_instrumentation(&self) -> Result<bool> {
185        // Check if OTel is initialized
186        use crate::telemetry::validation::is_otel_initialized;
187
188        if !is_otel_initialized() {
189            return Err(crate::error::CleanroomError::validation_error(
190                "OpenTelemetry is not initialized. Enable OTEL features and call init_otel()",
191            ));
192        }
193
194        // Basic validation - more comprehensive validation requires
195        // integration with in-memory span exporter
196        Ok(true)
197    }
198
199    /// Get OpenTelemetry validation status
200    pub fn otel_validation_enabled(&self) -> bool {
201        true
202    }
203
204    /// Execute command in container
205    #[instrument(name = "clnrm.container.exec", skip(self, cmd), fields(container.image = %self.image_name, container.tag = %self.image_tag, component = "container_backend"))]
206    fn execute_in_container(&self, cmd: &Cmd) -> Result<RunResult> {
207        let start_time = Instant::now();
208
209        info!(
210            "Starting container with image {}:{}",
211            self.image_name, self.image_tag
212        );
213
214        // Create a unique container ID for tracing
215        #[allow(unused_variables)]
216        let container_id = uuid::Uuid::new_v4().to_string();
217
218        {
219            use crate::telemetry::events;
220            use opentelemetry::global;
221            use opentelemetry::trace::{Span, Tracer, TracerProvider};
222
223            // Get current span and record container.start event
224            let tracer_provider = global::tracer_provider();
225            let mut span = tracer_provider
226                .tracer("clnrm-backend")
227                .start("clnrm.container.start");
228
229            events::record_container_start(
230                &mut span,
231                &format!("{}:{}", self.image_name, self.image_tag),
232                &container_id,
233            );
234            span.end();
235        }
236
237        // Docker availability will be checked by the container startup itself
238
239        // Create base image
240        let image = GenericImage::new(self.image_name.clone(), self.image_tag.clone());
241
242        // Build container request with all configurations
243        let mut container_request: testcontainers::core::ContainerRequest<
244            testcontainers::GenericImage,
245        > = image.into();
246
247        // Add environment variables from backend storage
248        for (key, value) in &self.env_vars {
249            container_request = container_request.with_env_var(key, value);
250        }
251
252        // Add environment variables from command
253        for (key, value) in &cmd.env {
254            container_request = container_request.with_env_var(key, value);
255        }
256
257        // Add policy environment variables
258        for (key, value) in self.policy.to_env() {
259            container_request = container_request.with_env_var(key, value);
260        }
261
262        // Add determinism environment variables
263        if let Some(ref engine) = self.determinism_engine {
264            // Set RANDOM env var for seeded random number generation
265            if engine.get_seed().is_some() {
266                // Use seed to generate initial RANDOM value
267                let random_value = match engine.next_u32() {
268                    Ok(val) => val,
269                    Err(e) => {
270                        warn!("Failed to generate random value from seed: {}", e);
271                        0
272                    }
273                };
274                container_request =
275                    container_request.with_env_var("RANDOM", random_value.to_string());
276            }
277
278            // Set FAKETIME env vars for clock freezing (requires libfaketime in container)
279            if let Some(frozen_clock) = engine.get_frozen_clock() {
280                container_request = container_request.with_env_var("FAKETIME", frozen_clock);
281                // LD_PRELOAD for libfaketime - assumes libfaketime.so.1 is in standard location
282                // Users must ensure libfaketime is installed in their container image
283                container_request = container_request.with_env_var(
284                    "LD_PRELOAD",
285                    "/usr/lib/x86_64-linux-gnu/faketime/libfaketime.so.1",
286                );
287                // Make faketime work in multi-threaded environments
288                container_request = container_request.with_env_var("FAKETIME_NO_CACHE", "1");
289            }
290
291            // Set CLEANROOM_ALLOWED_PORTS for deterministic port allocation
292            if engine.config().has_deterministic_ports() {
293                if let Ok(port_list) = engine.get_port_pool_env() {
294                    container_request =
295                        container_request.with_env_var("CLEANROOM_ALLOWED_PORTS", port_list);
296                }
297            }
298        }
299
300        // Add volume mounts from backend storage
301        for mount in &self.volume_mounts {
302            use testcontainers::core::{AccessMode, Mount};
303
304            let access_mode = if mount.is_read_only() {
305                AccessMode::ReadOnly
306            } else {
307                AccessMode::ReadWrite
308            };
309
310            let bind_mount = Mount::bind_mount(
311                mount.host_path().to_string_lossy().to_string(),
312                mount.container_path().to_string_lossy().to_string(),
313            )
314            .with_access_mode(access_mode);
315
316            container_request = container_request.with_mount(bind_mount);
317        }
318
319        // Set a default command to keep the container running
320        // Alpine containers exit immediately without a command
321        container_request = container_request.with_cmd(vec!["sleep", "3600"]);
322
323        // Set working directory if specified
324        if let Some(workdir) = &cmd.workdir {
325            container_request =
326                container_request.with_working_dir(workdir.to_string_lossy().to_string());
327        }
328
329        // Start container using SyncRunner with timeout monitoring
330        let container_start_time = Instant::now();
331        let container = container_request
332            .start()
333            .map_err(|e| {
334                let elapsed = container_start_time.elapsed();
335                if elapsed > Duration::from_secs(10) {
336                    warn!("Container startup took {}s, which is longer than expected. First pull of image may take time.", elapsed.as_secs());
337                }
338
339                BackendError::Runtime(format!(
340                    "Failed to start container with image '{}:{}' after {}s.\n\
341                    Possible causes:\n\
342                      - Docker daemon not running (try: docker ps)\n\
343                      - Image needs to be pulled (first run may take longer)\n\
344                      - Network issues preventing image pull\n\
345                    Try: Increase startup timeout or check Docker status\n\
346                    Original error: {}", 
347                    self.image_name, self.image_tag, elapsed.as_secs(), e
348                ))
349            })?;
350
351        info!("Container started successfully, executing command");
352
353        // Execute command - testcontainers expects Vec<&str> for exec
354        let cmd_args: Vec<&str> = std::iter::once(cmd.bin.as_str())
355            .chain(cmd.args.iter().map(|s| s.as_str()))
356            .collect();
357
358        #[allow(unused_variables)]
359        let cmd_string = format!("{} {}", cmd.bin, cmd.args.join(" "));
360
361        let exec_cmd = ExecCommand::new(cmd_args);
362        let mut exec_result = container
363            .exec(exec_cmd)
364            .map_err(|e| BackendError::Runtime(format!("Command execution failed: {}", e)))?;
365
366        let duration_ms = start_time.elapsed().as_millis() as u64;
367
368        info!("Command completed in {}ms", duration_ms);
369
370        // Extract output - SyncExecResult provides stdout() and stderr() as streams
371        use std::io::Read;
372        let mut stdout = String::new();
373        let mut stderr = String::new();
374
375        exec_result
376            .stdout()
377            .read_to_string(&mut stdout)
378            .map_err(|e| BackendError::Runtime(format!("Failed to read stdout: {}", e)))?;
379        exec_result
380            .stderr()
381            .read_to_string(&mut stderr)
382            .map_err(|e| BackendError::Runtime(format!("Failed to read stderr: {}", e)))?;
383
384        // Extract exit code with proper error handling
385        // testcontainers may return None if exit code is unavailable
386        #[allow(clippy::unnecessary_lazy_evaluations)] // Need closure for warn! macro
387        let exit_code = exec_result
388            .exit_code()
389            .map_err(|e| BackendError::Runtime(format!("Failed to get exit code: {}", e)))?
390            .unwrap_or_else(|| {
391                // Exit code unavailable - this can happen with certain container states
392                // Return -1 to indicate unknown/error state (POSIX convention for signal termination)
393                warn!("Exit code unavailable from container, defaulting to -1");
394                -1
395            }) as i32;
396
397        {
398            use crate::telemetry::events;
399            use opentelemetry::global;
400            use opentelemetry::trace::{Span, Tracer, TracerProvider};
401
402            // Record container.exec event
403            let tracer_provider = global::tracer_provider();
404            let mut exec_span = tracer_provider
405                .tracer("clnrm-backend")
406                .start("clnrm.container.exec");
407
408            events::record_container_exec(&mut exec_span, &cmd_string, exit_code);
409            exec_span.end();
410
411            // Record container.stop event
412            let mut stop_span = tracer_provider
413                .tracer("clnrm-backend")
414                .start("clnrm.container.stop");
415
416            events::record_container_stop(&mut stop_span, &container_id, exit_code);
417            stop_span.end();
418        }
419
420        Ok(RunResult {
421            exit_code,
422            stdout,
423            stderr,
424            duration_ms,
425            steps: Vec::new(),
426            redacted_env: Vec::new(),
427            backend: "testcontainers".to_string(),
428            concurrent: false,
429            step_order: Vec::new(),
430        })
431    }
432}
433
434impl Backend for TestcontainerBackend {
435    fn run_cmd(&self, cmd: Cmd) -> Result<RunResult> {
436        // Use synchronous execution with timeout
437        let start_time = Instant::now();
438
439        // Execute command with timeout
440        let result = self.execute_in_container(&cmd)?;
441
442        // Check if execution exceeded timeout
443        if start_time.elapsed() > self.timeout {
444            return Err(crate::error::CleanroomError::timeout_error(format!(
445                "Command execution timed out after {} seconds",
446                self.timeout.as_secs()
447            )));
448        }
449
450        Ok(result)
451    }
452
453    fn name(&self) -> &str {
454        "testcontainers"
455    }
456
457    fn is_available(&self) -> bool {
458        Self::is_available()
459    }
460
461    fn supports_hermetic(&self) -> bool {
462        true
463    }
464
465    fn supports_deterministic(&self) -> bool {
466        true
467    }
468}