Skip to main content

rch_common/
types.rs

1//! Common types used across RCH components.
2
3use crate::{CompilationKind, toolchain::ToolchainInfo};
4use rand::RngExt;
5use schemars::JsonSchema;
6use serde::{Deserialize, Serialize};
7use std::path::PathBuf;
8
9/// Unique identifier for a worker in the fleet.
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
11pub struct WorkerId(pub String);
12
13impl WorkerId {
14    pub fn new(id: impl Into<String>) -> Self {
15        Self(id.into())
16    }
17
18    pub fn as_str(&self) -> &str {
19        &self.0
20    }
21}
22
23impl std::fmt::Display for WorkerId {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        write!(f, "{}", self.0)
26    }
27}
28
29/// Status of a worker in the fleet.
30///
31/// State transitions:
32/// ```text
33/// HEALTHY ←→ DEGRADED (automatic based on response times)
34///    ↓           ↓
35///    ↓       UNREACHABLE (automatic on heartbeat failure)
36///    ↓
37/// DRAINING (via `rch workers drain`) - finishing current jobs
38///    ↓
39/// DRAINED (automatic when all jobs complete) - idle, ready to disable
40///    ↓
41/// DISABLED (via `rch workers disable`) - completely offline
42///
43/// Use `rch workers enable` to return from DRAINING/DRAINED/DISABLED → HEALTHY
44/// ```
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
46#[serde(rename_all = "snake_case")]
47pub enum WorkerStatus {
48    /// Worker is healthy and accepting jobs.
49    #[default]
50    Healthy,
51    /// Worker is responding slowly.
52    Degraded,
53    /// Worker failed to respond to heartbeat.
54    Unreachable,
55    /// Worker is not accepting new jobs (finishing current).
56    Draining,
57    /// Worker has finished draining (no active jobs, idle).
58    Drained,
59    /// Worker is manually disabled.
60    Disabled,
61}
62
63/// Circuit breaker state for a worker.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
65#[serde(rename_all = "snake_case")]
66pub enum CircuitState {
67    /// Normal operation.
68    #[default]
69    Closed,
70    /// Circuit is open (short-circuit).
71    Open,
72    /// Circuit is half-open (probing).
73    HalfOpen,
74}
75
76/// Required runtime for command execution.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, JsonSchema)]
78#[serde(rename_all = "lowercase")]
79pub enum RequiredRuntime {
80    /// No specific runtime required (default).
81    #[default]
82    None,
83    /// Requires Rust toolchain.
84    Rust,
85    /// Requires Bun runtime.
86    Bun,
87    /// Requires Node.js runtime.
88    Node,
89}
90
91/// Per-command priority hint for worker selection.
92///
93/// This is an *input hint* from the caller (typically the hook) and should not
94/// change default behavior when omitted.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, JsonSchema)]
96#[serde(rename_all = "lowercase")]
97pub enum CommandPriority {
98    Low,
99    #[default]
100    Normal,
101    High,
102}
103
104impl std::fmt::Display for CommandPriority {
105    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106        let value = match self {
107            Self::Low => "low",
108            Self::Normal => "normal",
109            Self::High => "high",
110        };
111        write!(f, "{}", value)
112    }
113}
114
115impl std::str::FromStr for CommandPriority {
116    type Err = ();
117
118    fn from_str(s: &str) -> Result<Self, Self::Err> {
119        match s.trim().to_lowercase().as_str() {
120            "low" => Ok(Self::Low),
121            "normal" => Ok(Self::Normal),
122            "high" => Ok(Self::High),
123            _ => Err(()),
124        }
125    }
126}
127
128/// Worker selection request sent from hook to daemon.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SelectionRequest {
131    /// Project identifier (usually directory name or hash).
132    pub project: String,
133    /// Full command being executed (optional, for active build tracking).
134    #[serde(default, skip_serializing_if = "Option::is_none")]
135    pub command: Option<String>,
136    /// Priority hint for this request.
137    #[serde(default)]
138    pub command_priority: CommandPriority,
139    /// Estimated CPU cores needed for this compilation.
140    pub estimated_cores: u32,
141    /// Preferred worker IDs (e.g., from project config).
142    #[serde(default)]
143    pub preferred_workers: Vec<WorkerId>,
144    /// Rust toolchain information for the project.
145    #[serde(default)]
146    pub toolchain: Option<ToolchainInfo>,
147    /// Required runtime for command execution.
148    #[serde(default)]
149    pub required_runtime: RequiredRuntime,
150    /// Classification decision latency in microseconds (for AGENTS.md compliance).
151    /// This tracks how long the 5-tier classification took on the hook side.
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub classification_duration_us: Option<u64>,
154    /// Process ID of the hook (for active build tracking).
155    #[serde(default, skip_serializing_if = "Option::is_none")]
156    pub hook_pid: Option<u32>,
157}
158
159/// Reason for worker selection result.
160///
161/// Provides context when no worker is available, enabling informative
162/// fallback messages in the hook.
163#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
164#[serde(rename_all = "snake_case")]
165pub enum SelectionReason {
166    /// Worker assigned successfully.
167    Success,
168    /// No workers configured in workers.toml.
169    NoWorkersConfigured,
170    /// All workers are unreachable (failed health checks).
171    AllWorkersUnreachable,
172    /// All workers have circuits open (after repeated failures).
173    AllCircuitsOpen,
174    /// All workers are at capacity (no available slots).
175    AllWorkersBusy,
176    /// Workers were present but none passed selection health thresholds.
177    NoWorkersPassedHealth,
178    /// All candidate workers failed hard preflight checks.
179    AllWorkersFailedPreflight,
180    /// All candidate workers failed repo convergence checks (repos missing/stale/failed).
181    AllWorkersFailedConvergence,
182    /// Workers exist but admission was blocked by concrete capacity/preflight reasons.
183    NoAdmissibleWorkers(String),
184    /// No workers match required tags or preferences.
185    NoMatchingWorkers,
186    /// No workers have the required runtime (e.g., Bun, Node).
187    NoWorkersWithRuntime(String),
188    /// Internal error during selection.
189    SelectionError(String),
190    /// Worker assigned via affinity pinning (recent successful build).
191    AffinityPinned,
192    /// Worker assigned via last-success fallback (all others unavailable).
193    AffinityFallback,
194}
195
196impl std::fmt::Display for SelectionReason {
197    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
198        match self {
199            Self::Success => write!(f, "worker assigned successfully"),
200            Self::NoWorkersConfigured => write!(f, "no workers configured"),
201            Self::AllWorkersUnreachable => write!(f, "all workers unreachable"),
202            Self::AllCircuitsOpen => write!(f, "all worker circuits open"),
203            Self::AllWorkersBusy => write!(f, "all workers at capacity"),
204            Self::NoWorkersPassedHealth => write!(f, "no workers passed health thresholds"),
205            Self::AllWorkersFailedPreflight => write!(f, "all workers failed preflight checks"),
206            Self::AllWorkersFailedConvergence => {
207                write!(f, "all workers failed repo convergence checks")
208            }
209            Self::NoAdmissibleWorkers(summary) => {
210                write!(f, "no admissible workers: {}", summary)
211            }
212            Self::NoMatchingWorkers => write!(f, "no matching workers found"),
213            Self::NoWorkersWithRuntime(rt) => write!(f, "no workers with {} installed", rt),
214            Self::SelectionError(e) => write!(f, "selection error: {}", e),
215            Self::AffinityPinned => write!(f, "worker assigned via affinity pinning"),
216            Self::AffinityFallback => write!(f, "worker assigned via last-success fallback"),
217        }
218    }
219}
220
221// ============================================================================
222// Worker Selection Strategy
223// ============================================================================
224
225/// Worker selection strategy determining how workers are chosen for jobs.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
227#[serde(rename_all = "snake_case")]
228pub enum SelectionStrategy {
229    /// Legacy/manual control: prioritize worker.priority first, then break ties
230    /// using command-aware cache/speed hints.
231    Priority,
232    /// Select worker with highest SpeedScore.
233    /// Best for performance-critical builds with homogeneous workers.
234    Fastest,
235    /// Balance all factors: SpeedScore, load, health, cache affinity.
236    /// Default for general use across diverse worker pools.
237    #[default]
238    Balanced,
239    /// Prefer workers with warm caches for the project.
240    /// Best for incremental builds on large codebases.
241    CacheAffinity,
242    /// Weighted random selection favoring fast workers but ensuring fairness.
243    /// Prevents hot-spotting when many workers are available.
244    FairFastest,
245}
246
247impl std::fmt::Display for SelectionStrategy {
248    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
249        let name = match self {
250            Self::Priority => "priority",
251            Self::Fastest => "fastest",
252            Self::Balanced => "balanced",
253            Self::CacheAffinity => "cache_affinity",
254            Self::FairFastest => "fair_fastest",
255        };
256        write!(f, "{}", name)
257    }
258}
259
260impl std::str::FromStr for SelectionStrategy {
261    type Err = String;
262
263    fn from_str(s: &str) -> Result<Self, Self::Err> {
264        match s.trim().to_lowercase().as_str() {
265            "priority" => Ok(Self::Priority),
266            "fastest" => Ok(Self::Fastest),
267            "balanced" => Ok(Self::Balanced),
268            "cache_affinity" | "cache-affinity" | "cacheaffinity" => Ok(Self::CacheAffinity),
269            "fair_fastest" | "fair-fastest" | "fairfastest" => Ok(Self::FairFastest),
270            _ => Err(format!(
271                "unknown selection strategy '{}', expected one of: priority, fastest, balanced, cache_affinity, fair_fastest",
272                s
273            )),
274        }
275    }
276}
277
278/// Configuration for the worker selection algorithm.
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct SelectionConfig {
281    /// Selection strategy to use.
282    #[serde(default)]
283    pub strategy: SelectionStrategy,
284    /// Minimum success rate (0.0-1.0) for a worker to be considered.
285    #[serde(default = "default_min_success_rate")]
286    pub min_success_rate: f64,
287    /// Factor weights for the balanced strategy.
288    #[serde(default)]
289    pub weights: SelectionWeightConfig,
290    /// Fairness settings for fair_fastest strategy.
291    #[serde(default)]
292    pub fairness: FairnessConfig,
293    /// Cache affinity settings for project-to-worker pinning.
294    #[serde(default)]
295    pub affinity: AffinityConfig,
296
297    // Preflight health guards (bd-3eaa)
298    /// Maximum load-per-core threshold. Workers exceeding this are skipped.
299    /// Set to None to disable load-based filtering.
300    #[serde(default = "default_max_load_per_core")]
301    pub max_load_per_core: Option<f64>,
302    /// Minimum free disk space in GB. Workers below this are skipped.
303    /// Set to None to disable disk-based filtering.
304    #[serde(default = "default_min_free_gb")]
305    pub min_free_gb: Option<f64>,
306}
307
308impl Default for SelectionConfig {
309    fn default() -> Self {
310        Self {
311            strategy: SelectionStrategy::default(),
312            min_success_rate: default_min_success_rate(),
313            weights: SelectionWeightConfig::default(),
314            fairness: FairnessConfig::default(),
315            affinity: AffinityConfig::default(),
316            max_load_per_core: default_max_load_per_core(),
317            min_free_gb: default_min_free_gb(),
318        }
319    }
320}
321
322fn default_min_success_rate() -> f64 {
323    0.8
324}
325
326fn default_max_load_per_core() -> Option<f64> {
327    Some(2.0) // Skip workers with load/core > 2.0
328}
329
330fn default_min_free_gb() -> Option<f64> {
331    Some(10.0) // Skip workers with < 10 GB free
332}
333
334/// Weight configuration for the balanced selection strategy.
335#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct SelectionWeightConfig {
337    /// Weight for SpeedScore (0.0-1.0).
338    #[serde(default = "default_weight_speedscore")]
339    pub speedscore: f64,
340    /// Weight for available slots (0.0-1.0).
341    #[serde(default = "default_weight_slots")]
342    pub slots: f64,
343    /// Weight for health/success rate (0.0-1.0).
344    #[serde(default = "default_weight_health")]
345    pub health: f64,
346    /// Weight for cache affinity (0.0-1.0).
347    #[serde(default = "default_weight_cache")]
348    pub cache: f64,
349    /// Weight for network latency (0.0-1.0).
350    #[serde(default = "default_weight_network")]
351    pub network: f64,
352    /// Weight for worker priority (0.0-1.0).
353    #[serde(default = "default_weight_priority")]
354    pub priority: f64,
355    /// Penalty multiplier for half-open circuit workers (0.0-1.0).
356    #[serde(default = "default_half_open_penalty")]
357    pub half_open_penalty: f64,
358}
359
360impl Default for SelectionWeightConfig {
361    fn default() -> Self {
362        Self {
363            speedscore: default_weight_speedscore(),
364            slots: default_weight_slots(),
365            health: default_weight_health(),
366            cache: default_weight_cache(),
367            network: default_weight_network(),
368            priority: default_weight_priority(),
369            half_open_penalty: default_half_open_penalty(),
370        }
371    }
372}
373
374fn default_weight_speedscore() -> f64 {
375    0.5
376}
377fn default_weight_slots() -> f64 {
378    0.4
379}
380fn default_weight_health() -> f64 {
381    0.3
382}
383fn default_weight_cache() -> f64 {
384    0.2
385}
386fn default_weight_network() -> f64 {
387    0.1
388}
389fn default_weight_priority() -> f64 {
390    0.1
391}
392fn default_half_open_penalty() -> f64 {
393    0.5
394}
395
396/// Fairness settings for the fair_fastest selection strategy.
397#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct FairnessConfig {
399    /// Lookback window in seconds for tracking recent selections.
400    #[serde(default = "default_fairness_lookback_secs")]
401    pub lookback_secs: u64,
402    /// Maximum consecutive selections for a single worker before penalty.
403    #[serde(default = "default_max_consecutive_selections")]
404    pub max_consecutive_selections: u32,
405}
406
407impl Default for FairnessConfig {
408    fn default() -> Self {
409        Self {
410            lookback_secs: default_fairness_lookback_secs(),
411            max_consecutive_selections: default_max_consecutive_selections(),
412        }
413    }
414}
415
416fn default_fairness_lookback_secs() -> u64 {
417    300 // 5 minutes
418}
419
420fn default_max_consecutive_selections() -> u32 {
421    3
422}
423
424/// Cache affinity settings for project-to-worker pinning.
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub struct AffinityConfig {
427    /// Enable affinity pinning (pin projects to last successful worker).
428    #[serde(default = "default_affinity_enabled")]
429    pub enabled: bool,
430    /// Time window in minutes to pin a project to its last successful worker.
431    /// After this window expires, normal selection resumes.
432    #[serde(default = "default_affinity_pin_minutes")]
433    pub pin_minutes: u64,
434    /// Enable last-success fallback when no workers match selection criteria.
435    /// If enabled, the daemon will attempt the last successful worker for a project
436    /// when all other workers are unavailable or fail selection.
437    #[serde(default = "default_last_success_fallback")]
438    pub enable_last_success_fallback: bool,
439    /// Minimum success rate for the last-success worker to be used as fallback.
440    /// Must be lower than min_success_rate to be meaningful as a fallback.
441    #[serde(default = "default_fallback_min_success_rate")]
442    pub fallback_min_success_rate: f64,
443}
444
445impl Default for AffinityConfig {
446    fn default() -> Self {
447        Self {
448            enabled: default_affinity_enabled(),
449            pin_minutes: default_affinity_pin_minutes(),
450            enable_last_success_fallback: default_last_success_fallback(),
451            fallback_min_success_rate: default_fallback_min_success_rate(),
452        }
453    }
454}
455
456fn default_affinity_enabled() -> bool {
457    true
458}
459
460fn default_affinity_pin_minutes() -> u64 {
461    60 // 1 hour default
462}
463
464fn default_last_success_fallback() -> bool {
465    true
466}
467
468fn default_fallback_min_success_rate() -> f64 {
469    0.5 // More lenient than normal min_success_rate (0.8)
470}
471
472/// Details about a selected worker for remote compilation.
473#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
474pub struct SelectedWorker {
475    /// Selected worker ID.
476    pub id: WorkerId,
477    /// Host address for SSH.
478    pub host: String,
479    /// SSH user.
480    pub user: String,
481    /// Path to SSH identity file.
482    pub identity_file: String,
483    /// Number of slots available on this worker after reservation.
484    pub slots_available: u32,
485    /// Worker's speed score (0-100).
486    pub speed_score: f64,
487}
488
489/// Worker selection response from daemon to hook.
490#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct SelectionResponse {
492    /// Selected worker details, if available.
493    pub worker: Option<SelectedWorker>,
494    /// Reason for the selection result.
495    pub reason: SelectionReason,
496    /// Optional build ID assigned by the daemon (for active build tracking).
497    #[serde(default, skip_serializing_if = "Option::is_none")]
498    pub build_id: Option<u64>,
499}
500
501/// Request to release reserved worker slots.
502#[derive(Debug, Clone, Serialize, Deserialize)]
503pub struct ReleaseRequest {
504    /// ID of the worker to release slots on.
505    pub worker_id: WorkerId,
506    /// Number of slots to release.
507    pub slots: u32,
508    /// Optional build ID to mark complete.
509    #[serde(default, skip_serializing_if = "Option::is_none")]
510    pub build_id: Option<u64>,
511    /// Optional exit code for the build (used to finalize active build tracking).
512    #[serde(default, skip_serializing_if = "Option::is_none")]
513    pub exit_code: Option<i32>,
514    /// Optional total duration in milliseconds (pipeline or execution).
515    ///
516    /// If omitted, the daemon will compute duration from the active build's start time.
517    #[serde(default, skip_serializing_if = "Option::is_none")]
518    pub duration_ms: Option<u64>,
519    /// Optional bytes transferred during the pipeline (upload + artifact download).
520    #[serde(default, skip_serializing_if = "Option::is_none")]
521    pub bytes_transferred: Option<u64>,
522    /// Optional per-phase timing breakdown for the build pipeline.
523    #[serde(default, skip_serializing_if = "Option::is_none")]
524    pub timing: Option<CommandTimingBreakdown>,
525}
526
527/// Build execution phase for daemon heartbeat tracking.
528#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
529#[serde(rename_all = "snake_case")]
530pub enum BuildHeartbeatPhase {
531    /// Uploading sources/dependencies to the worker.
532    SyncUp,
533    /// Running the remote compilation/test command.
534    Execute,
535    /// Downloading build artifacts from the worker.
536    SyncDown,
537    /// Build is completing or finalizing.
538    Finalize,
539}
540
541/// Request to update liveness/progress for an active build.
542#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
543pub struct BuildHeartbeatRequest {
544    /// Active build identifier assigned by the daemon.
545    pub build_id: u64,
546    /// Worker executing the build.
547    pub worker_id: WorkerId,
548    /// Hook process ID sending the heartbeat.
549    #[serde(default, skip_serializing_if = "Option::is_none")]
550    pub hook_pid: Option<u32>,
551    /// Remote file containing the process-group leader PID for cancellation.
552    #[serde(default, skip_serializing_if = "Option::is_none")]
553    pub remote_pgid_file: Option<String>,
554    /// Current build execution phase.
555    pub phase: BuildHeartbeatPhase,
556    /// Optional human-readable progress detail.
557    #[serde(default, skip_serializing_if = "Option::is_none")]
558    pub detail: Option<String>,
559    /// Optional monotonic progress counter.
560    ///
561    /// The hook increments this when it observes meaningful progress
562    /// (phase transitions, new output, or richer progress signals).
563    #[serde(default, skip_serializing_if = "Option::is_none")]
564    pub progress_counter: Option<u64>,
565    /// Optional progress estimate in [0,100].
566    #[serde(default, skip_serializing_if = "Option::is_none")]
567    pub progress_percent: Option<f64>,
568}
569
570/// Configuration for a remote worker.
571#[derive(Debug, Clone, Serialize, Deserialize)]
572pub struct WorkerConfig {
573    /// Unique identifier for this worker.
574    pub id: WorkerId,
575    /// SSH hostname or IP address.
576    pub host: String,
577    /// SSH username.
578    pub user: String,
579    /// Path to SSH private key.
580    pub identity_file: String,
581    /// Total CPU slots available on this worker.
582    pub total_slots: u32,
583    /// Priority for worker selection (higher = preferred).
584    #[serde(default = "default_priority")]
585    pub priority: u32,
586    /// Optional tags for filtering.
587    #[serde(default)]
588    pub tags: Vec<String>,
589}
590
591fn default_priority() -> u32 {
592    100
593}
594
595impl Default for WorkerConfig {
596    fn default() -> Self {
597        Self {
598            id: WorkerId::new("default-worker"),
599            host: "localhost".to_string(),
600            user: "user".to_string(),
601            identity_file: "~/.ssh/id_rsa".to_string(),
602            total_slots: 4,
603            priority: default_priority(),
604            tags: Vec::new(),
605        }
606    }
607}
608
609/// Runtime capabilities detected on a worker.
610///
611/// These are probed during health checks and cached for routing decisions.
612/// Commands requiring specific runtimes (e.g., `bun test`) can be routed
613/// only to workers with the corresponding capability.
614#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
615pub struct WorkerCapabilities {
616    /// Rust compiler version (from `rustc --version`).
617    #[serde(default, skip_serializing_if = "Option::is_none")]
618    pub rustc_version: Option<String>,
619    /// Bun runtime version (from `bun --version`).
620    #[serde(default, skip_serializing_if = "Option::is_none")]
621    pub bun_version: Option<String>,
622    /// Node.js version (from `node --version`).
623    #[serde(default, skip_serializing_if = "Option::is_none")]
624    pub node_version: Option<String>,
625    /// npm version (from `npm --version`).
626    #[serde(default, skip_serializing_if = "Option::is_none")]
627    pub npm_version: Option<String>,
628
629    // Health metrics (bd-3eaa)
630    /// Number of CPU cores on the worker.
631    #[serde(default, skip_serializing_if = "Option::is_none")]
632    pub num_cpus: Option<u32>,
633    /// 1-minute load average.
634    #[serde(default, skip_serializing_if = "Option::is_none")]
635    pub load_avg_1: Option<f64>,
636    /// 5-minute load average.
637    #[serde(default, skip_serializing_if = "Option::is_none")]
638    pub load_avg_5: Option<f64>,
639    /// 15-minute load average.
640    #[serde(default, skip_serializing_if = "Option::is_none")]
641    pub load_avg_15: Option<f64>,
642    /// Free disk space in GB (on /tmp or build directory).
643    #[serde(default, skip_serializing_if = "Option::is_none")]
644    pub disk_free_gb: Option<f64>,
645    /// Total disk space in GB.
646    #[serde(default, skip_serializing_if = "Option::is_none")]
647    pub disk_total_gb: Option<f64>,
648    /// Canonical path-topology preflight status (`/data/projects` + `/dp` alias).
649    ///
650    /// `Some(false)` indicates a hard preflight failure that should exclude this
651    /// worker from remote scheduling until a subsequent successful revalidation.
652    #[serde(default, skip_serializing_if = "Option::is_none")]
653    pub projects_root_ok: Option<bool>,
654    /// Machine-readable failure reason for path-topology preflight.
655    #[serde(default, skip_serializing_if = "Option::is_none")]
656    pub projects_root_issue: Option<String>,
657    /// Unix timestamp (ms) when topology preflight last executed.
658    #[serde(default, skip_serializing_if = "Option::is_none")]
659    pub projects_root_checked_at_unix_ms: Option<i64>,
660}
661
662impl WorkerCapabilities {
663    /// Create new empty capabilities.
664    pub fn new() -> Self {
665        Self::default()
666    }
667
668    /// Create mock capabilities with Rust installed.
669    ///
670    /// Used by mock transport to simulate a worker with Rust toolchain.
671    pub fn mock_with_rust() -> Self {
672        Self {
673            rustc_version: Some("1.85.0-nightly (mock)".to_string()),
674            projects_root_ok: Some(true),
675            ..Self::default()
676        }
677    }
678
679    /// Check if this worker has Bun installed.
680    pub fn has_bun(&self) -> bool {
681        self.bun_version.is_some()
682    }
683
684    /// Check if this worker has Node.js installed.
685    pub fn has_node(&self) -> bool {
686        self.node_version.is_some()
687    }
688
689    /// Check if this worker has Rust installed.
690    pub fn has_rust(&self) -> bool {
691        self.rustc_version.is_some()
692    }
693
694    /// Calculate load per core (1-minute load average / num_cpus).
695    /// Returns None if metrics are unavailable.
696    pub fn load_per_core(&self) -> Option<f64> {
697        match (self.load_avg_1, self.num_cpus) {
698            (Some(load), Some(cpus)) if cpus > 0 => Some(load / cpus as f64),
699            _ => None,
700        }
701    }
702
703    /// Check if worker has high load (exceeds threshold).
704    /// Returns None if metrics unavailable (fail-open).
705    pub fn is_high_load(&self, max_load_per_core: f64) -> Option<bool> {
706        self.load_per_core().map(|lpc| lpc > max_load_per_core)
707    }
708
709    /// Check if worker has low disk space (below threshold).
710    /// Returns None if metrics unavailable (fail-open).
711    pub fn is_low_disk(&self, min_free_gb: f64) -> Option<bool> {
712        self.disk_free_gb.map(|free| free < min_free_gb)
713    }
714
715    /// Check if canonical path-topology preflight is healthy.
716    ///
717    /// Returns `None` when probe data is unavailable (fail-open behavior).
718    pub fn is_topology_healthy(&self) -> Option<bool> {
719        self.projects_root_ok
720    }
721}
722
723/// Path topology configuration.
724///
725/// Overrides the default canonical and alias project roots used by
726/// [`crate::path_topology::PathTopologyPolicy`].  This is useful on platforms
727/// such as macOS where SIP prevents using `/data/projects` and the user must
728/// choose a different root (e.g. `~/Projects`).
729///
730/// ```toml
731/// [path_topology]
732/// canonical_root = "/Users/me/Projects"
733/// alias_root = "/Users/me/p"
734/// ```
735#[derive(Debug, Clone, Default, Serialize, Deserialize)]
736pub struct PathTopologyConfig {
737    /// Canonical project root directory.
738    /// Defaults to `/data/projects` when absent.
739    #[serde(default)]
740    pub canonical_root: Option<String>,
741
742    /// Alias (symlink) root that points at the canonical root.
743    /// Defaults to `/dp` when absent.
744    #[serde(default)]
745    pub alias_root: Option<String>,
746}
747
748impl PathTopologyConfig {
749    /// Build a [`crate::path_topology::PathTopologyPolicy`] from this config,
750    /// falling back to the compiled-in defaults for any field left unset.
751    ///
752    /// Empty strings are treated as unset (fall back to defaults).
753    /// Tilde prefixes (`~/...`) are expanded to the user's home directory.
754    pub fn to_policy(&self) -> crate::path_topology::PathTopologyPolicy {
755        let canonical = self
756            .canonical_root
757            .as_deref()
758            .filter(|s| !s.is_empty())
759            .map(|s| shellexpand::tilde(s).into_owned())
760            .unwrap_or_else(|| crate::path_topology::DEFAULT_CANONICAL_PROJECT_ROOT.to_string());
761        let alias = self
762            .alias_root
763            .as_deref()
764            .filter(|s| !s.is_empty())
765            .map(|s| shellexpand::tilde(s).into_owned())
766            .unwrap_or_else(|| crate::path_topology::DEFAULT_ALIAS_PROJECT_ROOT.to_string());
767        crate::path_topology::PathTopologyPolicy::new(
768            PathBuf::from(canonical),
769            PathBuf::from(alias),
770        )
771    }
772}
773
774/// RCH configuration.
775#[derive(Debug, Clone, Default, Serialize, Deserialize)]
776pub struct RchConfig {
777    #[serde(default)]
778    pub general: GeneralConfig,
779    #[serde(default)]
780    pub compilation: CompilationConfig,
781    #[serde(default)]
782    pub transfer: TransferConfig,
783    #[serde(default)]
784    pub environment: EnvironmentConfig,
785    #[serde(default)]
786    pub circuit: CircuitBreakerConfig,
787    #[serde(default)]
788    pub output: OutputConfig,
789    #[serde(default)]
790    pub self_healing: SelfHealingConfig,
791    #[serde(default)]
792    pub self_test: SelfTestConfig,
793    /// Worker selection algorithm configuration.
794    #[serde(default)]
795    pub selection: SelectionConfig,
796    /// Execution allowlist configuration (bd-785w).
797    #[serde(default)]
798    pub execution: ExecutionConfig,
799    /// Worker health alerting configuration (daemon).
800    #[serde(default)]
801    pub alerts: AlertsConfig,
802    /// Fleet operation configuration (bd-rs7w.2).
803    #[serde(default)]
804    pub fleet: FleetConfig,
805    /// Path topology overrides for project root directories.
806    #[serde(default)]
807    pub path_topology: PathTopologyConfig,
808}
809
810/// Worker health alerting configuration.
811#[derive(Debug, Clone, Serialize, Deserialize)]
812pub struct AlertsConfig {
813    /// Enable or disable alert generation in the daemon.
814    #[serde(default = "default_true")]
815    pub enabled: bool,
816    /// Suppress duplicate alerts for this many seconds.
817    #[serde(default = "default_alert_suppress_duplicates_secs")]
818    pub suppress_duplicates_secs: u64,
819    /// How long to keep a cleared alert visible (with state
820    /// `cleared_pending_clean`) before removing it entirely. Prevents
821    /// transient, self-healing circuit-breaker warnings from lingering on
822    /// `rch status` long after the fleet returned to healthy (bd-3ogaz).
823    #[serde(default = "default_alert_cleared_retention_secs")]
824    pub cleared_retention_secs: u64,
825    /// Webhook configuration for external notifications.
826    #[serde(default)]
827    pub webhook: Option<WebhookConfig>,
828}
829
830/// Webhook configuration for alert dispatch.
831#[derive(Debug, Clone, Default, Serialize, Deserialize)]
832pub struct WebhookConfig {
833    /// Webhook endpoint URL (e.g., Slack/Discord webhook URL).
834    pub url: Option<String>,
835    /// Secret for HMAC-SHA256 signing (optional).
836    #[serde(default)]
837    pub secret: Option<String>,
838    /// Timeout in seconds for webhook requests.
839    #[serde(default = "default_webhook_timeout_secs")]
840    pub timeout_secs: u64,
841    /// Number of retries on failure.
842    #[serde(default = "default_webhook_retry_count")]
843    pub retry_count: u32,
844    /// Events to send (empty = all events).
845    /// Supported: worker_offline, worker_degraded, circuit_open, all_workers_offline
846    #[serde(default)]
847    pub events: Vec<String>,
848}
849
850fn default_webhook_timeout_secs() -> u64 {
851    5
852}
853
854fn default_webhook_retry_count() -> u32 {
855    3
856}
857
858fn default_alert_suppress_duplicates_secs() -> u64 {
859    300
860}
861
862fn default_alert_cleared_retention_secs() -> u64 {
863    300
864}
865
866impl Default for AlertsConfig {
867    fn default() -> Self {
868        Self {
869            enabled: true,
870            suppress_duplicates_secs: default_alert_suppress_duplicates_secs(),
871            cleared_retention_secs: default_alert_cleared_retention_secs(),
872            webhook: None,
873        }
874    }
875}
876
877/// Fleet operation configuration.
878///
879/// Controls timeouts, thresholds, and behavior for fleet management commands
880/// like `rch fleet preflight`, `rch fleet status`, and `rch fleet deploy`.
881#[derive(Debug, Clone, Serialize, Deserialize)]
882pub struct FleetConfig {
883    /// SSH connection timeout in seconds.
884    #[serde(default = "default_fleet_ssh_connect_timeout_secs")]
885    pub ssh_connect_timeout_secs: u64,
886    /// SSH command execution timeout in seconds.
887    #[serde(default = "default_fleet_ssh_command_timeout_secs")]
888    pub ssh_command_timeout_secs: u64,
889    /// Minimum required disk space in MB for worker health.
890    #[serde(default = "default_fleet_min_disk_space_mb")]
891    pub min_disk_space_mb: u64,
892    /// Maximum acceptable load average for worker health.
893    #[serde(default = "default_fleet_max_load_average")]
894    pub max_load_average: f64,
895    /// Maximum number of concurrent SSH connections.
896    #[serde(default = "default_fleet_max_concurrent_workers")]
897    pub max_concurrent_workers: usize,
898    /// Number of retry attempts for transient failures.
899    #[serde(default = "default_fleet_retry_count")]
900    pub retry_count: u32,
901    /// Delay between retries in milliseconds.
902    #[serde(default = "default_fleet_retry_delay_ms")]
903    pub retry_delay_ms: u64,
904}
905
906fn default_fleet_ssh_connect_timeout_secs() -> u64 {
907    10
908}
909
910fn default_fleet_ssh_command_timeout_secs() -> u64 {
911    30
912}
913
914fn default_fleet_min_disk_space_mb() -> u64 {
915    500
916}
917
918fn default_fleet_max_load_average() -> f64 {
919    10.0
920}
921
922fn default_fleet_max_concurrent_workers() -> usize {
923    10
924}
925
926fn default_fleet_retry_count() -> u32 {
927    2
928}
929
930fn default_fleet_retry_delay_ms() -> u64 {
931    1000
932}
933
934impl Default for FleetConfig {
935    fn default() -> Self {
936        Self {
937            ssh_connect_timeout_secs: default_fleet_ssh_connect_timeout_secs(),
938            ssh_command_timeout_secs: default_fleet_ssh_command_timeout_secs(),
939            min_disk_space_mb: default_fleet_min_disk_space_mb(),
940            max_load_average: default_fleet_max_load_average(),
941            max_concurrent_workers: default_fleet_max_concurrent_workers(),
942            retry_count: default_fleet_retry_count(),
943            retry_delay_ms: default_fleet_retry_delay_ms(),
944        }
945    }
946}
947
948impl FleetConfig {
949    /// Get SSH connect timeout as Duration.
950    pub fn ssh_connect_timeout(&self) -> std::time::Duration {
951        std::time::Duration::from_secs(self.ssh_connect_timeout_secs)
952    }
953
954    /// Get SSH command timeout as Duration.
955    pub fn ssh_command_timeout(&self) -> std::time::Duration {
956        std::time::Duration::from_secs(self.ssh_command_timeout_secs)
957    }
958
959    /// Get retry delay as Duration.
960    pub fn retry_delay(&self) -> std::time::Duration {
961        std::time::Duration::from_millis(self.retry_delay_ms)
962    }
963}
964
965#[derive(Debug, Clone, Serialize, Deserialize)]
966pub struct GeneralConfig {
967    /// Whether RCH is enabled.
968    #[serde(default = "default_true")]
969    pub enabled: bool,
970    /// Force local execution for this project (never offload), even if the command is classified.
971    ///
972    /// Intended for sensitive projects or cases where local state must be authoritative.
973    #[serde(default)]
974    pub force_local: bool,
975    /// Force remote execution for this project (always attempt offload) when safe.
976    ///
977    /// This bypasses heuristic gating (e.g. confidence threshold overrides) but still respects
978    /// structural safety checks and NEVER_INTERCEPT patterns.
979    #[serde(default)]
980    pub force_remote: bool,
981    /// Log level (trace, debug, info, warn, error).
982    #[serde(default = "default_log_level")]
983    pub log_level: String,
984    /// Path to Unix socket for daemon communication.
985    #[serde(default = "default_socket_path")]
986    pub socket_path: String,
987}
988
989/// Environment variable passthrough configuration.
990#[derive(Debug, Clone, Default, Serialize, Deserialize)]
991pub struct EnvironmentConfig {
992    /// Allowlist of environment variables to forward to remote workers.
993    #[serde(default)]
994    pub allowlist: Vec<String>,
995}
996
997impl Default for GeneralConfig {
998    fn default() -> Self {
999        Self {
1000            enabled: true,
1001            force_local: false,
1002            force_remote: false,
1003            log_level: "info".to_string(),
1004            socket_path: default_socket_path(),
1005        }
1006    }
1007}
1008
1009/// Visibility level for hook output.
1010#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, JsonSchema)]
1011#[serde(rename_all = "lowercase")]
1012pub enum OutputVisibility {
1013    #[default]
1014    None,
1015    Summary,
1016    Verbose,
1017}
1018
1019impl std::fmt::Display for OutputVisibility {
1020    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1021        let value = match self {
1022            OutputVisibility::None => "none",
1023            OutputVisibility::Summary => "summary",
1024            OutputVisibility::Verbose => "verbose",
1025        };
1026        write!(f, "{}", value)
1027    }
1028}
1029
1030impl std::str::FromStr for OutputVisibility {
1031    type Err = ();
1032
1033    fn from_str(s: &str) -> Result<Self, Self::Err> {
1034        match s.trim().to_lowercase().as_str() {
1035            "none" | "silent" | "quiet" => Ok(Self::None),
1036            "summary" | "short" => Ok(Self::Summary),
1037            "verbose" | "debug" => Ok(Self::Verbose),
1038            _ => Err(()),
1039        }
1040    }
1041}
1042
1043/// Color mode for remote command output.
1044///
1045/// Controls whether ANSI color codes are preserved when streaming
1046/// output from remote compilation commands.
1047#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1048#[serde(rename_all = "lowercase")]
1049pub enum ColorMode {
1050    /// Force color output regardless of terminal detection.
1051    /// Sets CARGO_TERM_COLOR=always and similar environment variables.
1052    #[default]
1053    Always,
1054    /// Let the remote command detect terminal capabilities (may lose colors).
1055    Auto,
1056    /// Disable color output entirely.
1057    Never,
1058}
1059
1060impl std::fmt::Display for ColorMode {
1061    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1062        let value = match self {
1063            ColorMode::Always => "always",
1064            ColorMode::Auto => "auto",
1065            ColorMode::Never => "never",
1066        };
1067        write!(f, "{}", value)
1068    }
1069}
1070
1071impl std::str::FromStr for ColorMode {
1072    type Err = ();
1073
1074    fn from_str(s: &str) -> Result<Self, Self::Err> {
1075        match s.trim().to_lowercase().as_str() {
1076            "always" | "force" | "yes" | "true" => Ok(Self::Always),
1077            "auto" | "detect" => Ok(Self::Auto),
1078            "never" | "none" | "no" | "false" => Ok(Self::Never),
1079            _ => Err(()),
1080        }
1081    }
1082}
1083
1084fn default_color_mode() -> ColorMode {
1085    ColorMode::default()
1086}
1087
1088/// Hook output configuration.
1089#[derive(Debug, Clone, Serialize, Deserialize)]
1090pub struct OutputConfig {
1091    /// Output visibility level for hook messages.
1092    #[serde(default = "default_output_visibility")]
1093    pub visibility: OutputVisibility,
1094    /// Whether the first-run success message has been shown.
1095    #[serde(default)]
1096    pub first_run_complete: bool,
1097    /// Color mode for remote command output.
1098    /// Controls whether ANSI color codes are preserved in remote output.
1099    #[serde(default = "default_color_mode")]
1100    pub color_mode: ColorMode,
1101}
1102
1103impl Default for OutputConfig {
1104    fn default() -> Self {
1105        Self {
1106            visibility: OutputVisibility::None,
1107            first_run_complete: false,
1108            color_mode: ColorMode::default(),
1109        }
1110    }
1111}
1112
1113// ============================================================================
1114// Self-Healing Configuration
1115// ============================================================================
1116
1117fn default_autostart_cooldown_secs() -> u64 {
1118    30
1119}
1120
1121fn default_autostart_timeout_secs() -> u64 {
1122    3
1123}
1124
1125/// Log level for self-healing diagnostic output.
1126///
1127/// Used to filter `tracing` events emitted by the self-healing subsystem
1128/// (hook auto-start, daemon hook-install). Read from
1129/// `RCH_SELF_HEALING_LOG_LEVEL=debug|info|warn|error` (case-insensitive).
1130#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
1131#[serde(rename_all = "lowercase")]
1132pub enum SelfHealingLogLevel {
1133    Debug,
1134    #[default]
1135    Info,
1136    Warn,
1137    Error,
1138}
1139
1140impl SelfHealingLogLevel {
1141    /// Parse a human-friendly level name. Returns `None` for unknown input.
1142    pub fn from_env_str(s: &str) -> Option<Self> {
1143        match s.trim().to_ascii_lowercase().as_str() {
1144            "debug" => Some(Self::Debug),
1145            "info" => Some(Self::Info),
1146            "warn" | "warning" => Some(Self::Warn),
1147            "error" => Some(Self::Error),
1148            _ => None,
1149        }
1150    }
1151}
1152
1153/// Self-healing behaviors for the hook and daemon.
1154#[derive(Debug, Clone, Serialize, Deserialize)]
1155pub struct SelfHealingConfig {
1156    /// Whether the hook should attempt to auto-start the daemon.
1157    #[serde(default = "default_true")]
1158    pub hook_starts_daemon: bool,
1159    /// Whether the daemon should auto-install Claude Code hooks on startup.
1160    #[serde(default = "default_true")]
1161    pub daemon_installs_hooks: bool,
1162    /// Cooldown between auto-start attempts in seconds.
1163    #[serde(default = "default_autostart_cooldown_secs")]
1164    pub auto_start_cooldown_secs: u64,
1165    /// Time to wait for the daemon socket after spawning in seconds.
1166    /// Also aliased as `daemon_start_timeout` for backwards compatibility.
1167    #[serde(
1168        default = "default_autostart_timeout_secs",
1169        alias = "daemon_start_timeout"
1170    )]
1171    pub auto_start_timeout_secs: u64,
1172    /// Log level for self-healing diagnostic events. Default: info.
1173    /// Override via `RCH_SELF_HEALING_LOG_LEVEL=debug|info|warn|error`.
1174    #[serde(default)]
1175    pub self_healing_log_level: SelfHealingLogLevel,
1176}
1177
1178impl Default for SelfHealingConfig {
1179    fn default() -> Self {
1180        Self {
1181            hook_starts_daemon: default_true(),
1182            daemon_installs_hooks: default_true(),
1183            auto_start_cooldown_secs: default_autostart_cooldown_secs(),
1184            auto_start_timeout_secs: default_autostart_timeout_secs(),
1185            self_healing_log_level: SelfHealingLogLevel::default(),
1186        }
1187    }
1188}
1189
1190impl SelfHealingConfig {
1191    /// Apply environment variable overrides to this config.
1192    ///
1193    /// Supported environment variables:
1194    /// - `RCH_NO_SELF_HEALING=1` - Master switch to disable all self-healing
1195    /// - `RCH_HOOK_STARTS_DAEMON=0|1` - Control hook auto-starting daemon
1196    /// - `RCH_DAEMON_INSTALLS_HOOKS=0|1` - Control daemon auto-installing hooks
1197    /// - `RCH_AUTO_START_TIMEOUT_SECS=<seconds>` - Max wait for daemon start
1198    /// - `RCH_AUTO_START_COOLDOWN_SECS=<seconds>` - Min time between auto-starts
1199    pub fn with_env_overrides(mut self) -> Self {
1200        // Master disable switch
1201        if let Ok(val) = std::env::var("RCH_NO_SELF_HEALING")
1202            && (val == "1" || val.eq_ignore_ascii_case("true"))
1203        {
1204            self.hook_starts_daemon = false;
1205            self.daemon_installs_hooks = false;
1206            return self;
1207        }
1208
1209        // Individual toggles
1210        if let Ok(val) = std::env::var("RCH_HOOK_STARTS_DAEMON") {
1211            self.hook_starts_daemon = val != "0" && !val.eq_ignore_ascii_case("false");
1212        }
1213        if let Ok(val) = std::env::var("RCH_DAEMON_INSTALLS_HOOKS") {
1214            self.daemon_installs_hooks = val != "0" && !val.eq_ignore_ascii_case("false");
1215        }
1216
1217        // Numeric settings
1218        if let Ok(val) = std::env::var("RCH_AUTO_START_TIMEOUT_SECS")
1219            && let Ok(secs) = val.parse()
1220        {
1221            self.auto_start_timeout_secs = secs;
1222        }
1223        if let Ok(val) = std::env::var("RCH_AUTO_START_COOLDOWN_SECS")
1224            && let Ok(secs) = val.parse()
1225        {
1226            self.auto_start_cooldown_secs = secs;
1227        }
1228        // br-4zf3p: log-level override.
1229        if let Ok(val) = std::env::var("RCH_SELF_HEALING_LOG_LEVEL") {
1230            if let Some(level) = SelfHealingLogLevel::from_env_str(&val) {
1231                self.self_healing_log_level = level;
1232            } else {
1233                tracing::warn!(
1234                    target: "rch::self_healing",
1235                    invalid = %val,
1236                    "RCH_SELF_HEALING_LOG_LEVEL has invalid value; using default=info"
1237                );
1238            }
1239        }
1240
1241        self
1242    }
1243}
1244
1245// ============================================================================
1246// Self-Test Configuration
1247// ============================================================================
1248
1249/// Action to take when a scheduled self-test fails.
1250#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1251#[serde(rename_all = "snake_case")]
1252pub enum SelfTestFailureAction {
1253    /// Only emit an alert/log entry.
1254    #[default]
1255    Alert,
1256    /// Disable the failing worker automatically.
1257    DisableWorker,
1258    /// Alert and disable the worker.
1259    AlertAndDisable,
1260}
1261
1262/// Which workers to include in self-test runs.
1263#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1264#[serde(untagged)]
1265pub enum SelfTestWorkers {
1266    /// Special value "all" (or a single worker id string).
1267    All(String),
1268    /// Explicit worker list.
1269    List(Vec<WorkerId>),
1270}
1271
1272impl Default for SelfTestWorkers {
1273    fn default() -> Self {
1274        Self::All("all".to_string())
1275    }
1276}
1277
1278impl SelfTestWorkers {
1279    /// Resolve to an explicit list if configured, or None to indicate "all".
1280    pub fn resolve(&self) -> Option<Vec<WorkerId>> {
1281        match self {
1282            SelfTestWorkers::All(value) => {
1283                if value.eq_ignore_ascii_case("all") {
1284                    None
1285                } else {
1286                    Some(vec![WorkerId::new(value.clone())])
1287                }
1288            }
1289            SelfTestWorkers::List(list) => Some(list.clone()),
1290        }
1291    }
1292}
1293
1294fn default_self_test_retry_count() -> u32 {
1295    3
1296}
1297
1298fn default_self_test_retry_delay() -> String {
1299    "5m".to_string()
1300}
1301
1302fn default_self_test_enabled() -> bool {
1303    false
1304}
1305
1306/// Self-test scheduling and behavior configuration.
1307#[derive(Debug, Clone, Serialize, Deserialize)]
1308pub struct SelfTestConfig {
1309    /// Enable scheduled self-tests.
1310    #[serde(default = "default_self_test_enabled")]
1311    pub enabled: bool,
1312    /// Cron schedule (e.g., "0 3 * * *").
1313    #[serde(default)]
1314    pub schedule: Option<String>,
1315    /// Interval duration (e.g., "24h").
1316    #[serde(default)]
1317    pub interval: Option<String>,
1318    /// Which workers to test (default: "all").
1319    #[serde(default)]
1320    pub workers: SelfTestWorkers,
1321    /// Action on failure.
1322    #[serde(default)]
1323    pub on_failure: SelfTestFailureAction,
1324    /// Retry count for failed tests.
1325    #[serde(default = "default_self_test_retry_count")]
1326    pub retry_count: u32,
1327    /// Delay between retries.
1328    #[serde(default = "default_self_test_retry_delay")]
1329    pub retry_delay: String,
1330}
1331
1332impl Default for SelfTestConfig {
1333    fn default() -> Self {
1334        Self {
1335            enabled: default_self_test_enabled(),
1336            schedule: None,
1337            interval: None,
1338            workers: SelfTestWorkers::default(),
1339            on_failure: SelfTestFailureAction::default(),
1340            retry_count: default_self_test_retry_count(),
1341            retry_delay: default_self_test_retry_delay(),
1342        }
1343    }
1344}
1345
1346/// Circuit breaker configuration.
1347#[derive(Debug, Clone, Serialize, Deserialize)]
1348pub struct CircuitBreakerConfig {
1349    /// Consecutive failures to open circuit.
1350    #[serde(default = "default_circuit_failure_threshold")]
1351    pub failure_threshold: u32,
1352    /// Consecutive successes to close circuit.
1353    #[serde(default = "default_circuit_success_threshold")]
1354    pub success_threshold: u32,
1355    /// Error rate threshold (0.0-1.0) to open circuit.
1356    #[serde(default = "default_circuit_error_rate_threshold")]
1357    pub error_rate_threshold: f64,
1358    /// Rolling window size in seconds.
1359    #[serde(default = "default_circuit_window_secs")]
1360    pub window_secs: u64,
1361    /// Cooldown duration before half-open (seconds).
1362    #[serde(default = "default_circuit_open_cooldown_secs")]
1363    pub open_cooldown_secs: u64,
1364    /// Max concurrent probes in half-open.
1365    #[serde(default = "default_circuit_half_open_max_probes")]
1366    pub half_open_max_probes: u32,
1367}
1368
1369impl Default for CircuitBreakerConfig {
1370    fn default() -> Self {
1371        Self {
1372            failure_threshold: default_circuit_failure_threshold(),
1373            success_threshold: default_circuit_success_threshold(),
1374            error_rate_threshold: default_circuit_error_rate_threshold(),
1375            window_secs: default_circuit_window_secs(),
1376            open_cooldown_secs: default_circuit_open_cooldown_secs(),
1377            half_open_max_probes: default_circuit_half_open_max_probes(),
1378        }
1379    }
1380}
1381
1382/// Circuit breaker statistics for tracking state transitions.
1383///
1384/// Maintains a rolling window of successes and failures to determine
1385/// when the circuit should open, close, or enter half-open state.
1386#[derive(Debug, Clone, Default)]
1387pub struct CircuitStats {
1388    /// Current circuit state.
1389    state: CircuitState,
1390    /// Consecutive failure count (reset on success).
1391    consecutive_failures: u32,
1392    /// Consecutive success count in half-open state (reset on failure).
1393    consecutive_successes: u32,
1394    /// Total successes in the current rolling window.
1395    window_successes: u32,
1396    /// Total failures in the current rolling window.
1397    window_failures: u32,
1398    /// Timestamp when circuit entered Open state (epoch millis).
1399    opened_at: Option<u64>,
1400    /// Timestamp of last state change (epoch millis).
1401    last_state_change: u64,
1402    /// Current active probes in half-open state.
1403    active_probes: u32,
1404    /// Recent health check results (true=success, false=failure).
1405    /// Used for history visualization in status display.
1406    recent_results: Vec<bool>,
1407}
1408
1409/// Maximum number of recent results to keep for history visualization.
1410const CIRCUIT_HISTORY_SIZE: usize = 10;
1411
1412impl CircuitStats {
1413    /// Create new circuit stats in the closed state.
1414    pub fn new() -> Self {
1415        Self {
1416            state: CircuitState::Closed,
1417            consecutive_failures: 0,
1418            consecutive_successes: 0,
1419            window_successes: 0,
1420            window_failures: 0,
1421            opened_at: None,
1422            last_state_change: Self::now_millis(),
1423            active_probes: 0,
1424            recent_results: Vec::with_capacity(CIRCUIT_HISTORY_SIZE),
1425        }
1426    }
1427
1428    /// Get the current circuit state.
1429    pub fn state(&self) -> CircuitState {
1430        self.state
1431    }
1432
1433    /// Get the timestamp when the circuit opened (if open).
1434    pub fn opened_at(&self) -> Option<u64> {
1435        self.opened_at
1436    }
1437
1438    /// Get the timestamp of the last state change.
1439    pub fn last_state_change(&self) -> u64 {
1440        self.last_state_change
1441    }
1442
1443    /// Get the consecutive failure count.
1444    pub fn consecutive_failures(&self) -> u32 {
1445        self.consecutive_failures
1446    }
1447
1448    /// Get the error rate in the current window (0.0-1.0).
1449    pub fn error_rate(&self) -> f64 {
1450        let total = self.window_successes + self.window_failures;
1451        if total == 0 {
1452            return 0.0;
1453        }
1454        self.window_failures as f64 / total as f64
1455    }
1456
1457    /// Record a successful operation.
1458    ///
1459    /// In closed state: resets consecutive failures, increments window successes.
1460    /// In half-open state: increments consecutive successes.
1461    pub fn record_success(&mut self) {
1462        self.window_successes += 1;
1463        self.consecutive_failures = 0;
1464        self.push_result(true);
1465
1466        if self.state == CircuitState::HalfOpen {
1467            self.consecutive_successes += 1;
1468            if self.active_probes > 0 {
1469                self.active_probes -= 1;
1470            }
1471        }
1472    }
1473
1474    /// Record a failed operation.
1475    ///
1476    /// Increments consecutive failures and window failures.
1477    /// In half-open state, resets consecutive successes.
1478    pub fn record_failure(&mut self) {
1479        self.window_failures += 1;
1480        self.consecutive_failures += 1;
1481        self.push_result(false);
1482
1483        if self.state == CircuitState::HalfOpen {
1484            self.consecutive_successes = 0;
1485            if self.active_probes > 0 {
1486                self.active_probes -= 1;
1487            }
1488        }
1489    }
1490
1491    /// Check if the circuit should open based on config thresholds.
1492    ///
1493    /// Returns true if:
1494    /// - Consecutive failures exceed threshold, OR
1495    /// - Error rate exceeds threshold (with minimum sample size)
1496    pub fn should_open(&self, config: &CircuitBreakerConfig) -> bool {
1497        if self.state != CircuitState::Closed {
1498            return false;
1499        }
1500
1501        // Open if consecutive failures exceed threshold
1502        if self.consecutive_failures >= config.failure_threshold {
1503            return true;
1504        }
1505
1506        // Open if error rate exceeds threshold (with minimum 5 samples)
1507        let total = self.window_successes + self.window_failures;
1508        if total >= 5 && self.error_rate() >= config.error_rate_threshold {
1509            return true;
1510        }
1511
1512        false
1513    }
1514
1515    /// Check if the circuit should transition to half-open.
1516    ///
1517    /// Returns true if the circuit is open and the cooldown period has elapsed.
1518    pub fn should_half_open(&self, config: &CircuitBreakerConfig) -> bool {
1519        if self.state != CircuitState::Open {
1520            return false;
1521        }
1522
1523        let now = Self::now_millis();
1524        let cooldown_ms = config.open_cooldown_secs * 1000;
1525
1526        if let Some(opened_at) = self.opened_at {
1527            now.saturating_sub(opened_at) >= cooldown_ms
1528        } else {
1529            false
1530        }
1531    }
1532
1533    /// Check if the circuit should close.
1534    ///
1535    /// Returns true if in half-open state and consecutive successes
1536    /// meet or exceed the success threshold.
1537    pub fn should_close(&self, config: &CircuitBreakerConfig) -> bool {
1538        if self.state != CircuitState::HalfOpen {
1539            return false;
1540        }
1541
1542        self.consecutive_successes >= config.success_threshold
1543    }
1544
1545    /// Check if a probe request can be made in half-open state.
1546    ///
1547    /// Returns true if active probes are below the maximum allowed.
1548    pub fn can_probe(&self, config: &CircuitBreakerConfig) -> bool {
1549        if self.state != CircuitState::HalfOpen {
1550            return false;
1551        }
1552
1553        self.active_probes < config.half_open_max_probes
1554    }
1555
1556    /// Start a probe request in half-open state.
1557    ///
1558    /// Returns true if the probe was started, false if already at max probes.
1559    pub fn start_probe(&mut self, config: &CircuitBreakerConfig) -> bool {
1560        if !self.can_probe(config) {
1561            return false;
1562        }
1563        self.active_probes += 1;
1564        true
1565    }
1566
1567    /// Transition the circuit to open state.
1568    pub fn open(&mut self) {
1569        if self.state != CircuitState::Open {
1570            self.state = CircuitState::Open;
1571            self.opened_at = Some(Self::now_millis());
1572            self.last_state_change = Self::now_millis();
1573            self.consecutive_successes = 0;
1574            self.active_probes = 0;
1575        }
1576    }
1577
1578    /// Transition the circuit to half-open state.
1579    pub fn half_open(&mut self) {
1580        if self.state != CircuitState::HalfOpen {
1581            self.state = CircuitState::HalfOpen;
1582            self.last_state_change = Self::now_millis();
1583            self.consecutive_successes = 0;
1584            self.active_probes = 0;
1585        }
1586    }
1587
1588    /// Transition the circuit to closed state.
1589    pub fn close(&mut self) {
1590        if self.state != CircuitState::Closed {
1591            self.state = CircuitState::Closed;
1592            self.last_state_change = Self::now_millis();
1593            self.opened_at = None;
1594            self.consecutive_failures = 0;
1595            self.consecutive_successes = 0;
1596            self.active_probes = 0;
1597            // Reset the window on close
1598            self.window_successes = 0;
1599            self.window_failures = 0;
1600        }
1601    }
1602
1603    /// Reset the rolling window counters.
1604    ///
1605    /// Called periodically to ensure the window reflects recent activity.
1606    pub fn reset_window(&mut self) {
1607        self.window_successes = 0;
1608        self.window_failures = 0;
1609    }
1610
1611    /// Get recent health check results for history visualization.
1612    ///
1613    /// Returns a slice of recent results (true=success, false=failure),
1614    /// with the most recent result at the end.
1615    pub fn recent_results(&self) -> &[bool] {
1616        &self.recent_results
1617    }
1618
1619    /// Calculate seconds remaining until circuit auto-transitions to half-open.
1620    ///
1621    /// Returns None if circuit is not open or cooldown has already elapsed.
1622    pub fn recovery_remaining_secs(&self, config: &CircuitBreakerConfig) -> Option<u64> {
1623        if self.state != CircuitState::Open {
1624            return None;
1625        }
1626
1627        let now = Self::now_millis();
1628        let cooldown_ms = config.open_cooldown_secs * 1000;
1629
1630        if let Some(opened_at) = self.opened_at {
1631            let elapsed_ms = now.saturating_sub(opened_at);
1632            if elapsed_ms < cooldown_ms {
1633                return Some((cooldown_ms - elapsed_ms) / 1000);
1634            }
1635        }
1636        None
1637    }
1638
1639    /// Push a result to the history, maintaining the maximum size.
1640    fn push_result(&mut self, success: bool) {
1641        if self.recent_results.len() >= CIRCUIT_HISTORY_SIZE {
1642            self.recent_results.remove(0);
1643        }
1644        self.recent_results.push(success);
1645    }
1646
1647    fn now_millis() -> u64 {
1648        std::time::SystemTime::now()
1649            .duration_since(std::time::UNIX_EPOCH)
1650            .map(|d| d.as_millis() as u64)
1651            .unwrap_or(0)
1652    }
1653}
1654
1655#[derive(Debug, Clone, Serialize, Deserialize)]
1656pub struct CompilationConfig {
1657    /// Minimum confidence score to intercept (0.0-1.0).
1658    #[serde(default = "default_confidence")]
1659    pub confidence_threshold: f64,
1660    /// Skip interception if estimated local time < this (ms).
1661    #[serde(default = "default_min_local_time")]
1662    pub min_local_time_ms: u64,
1663    /// Minimum expected speedup ratio (local_time / remote_time) to offload.
1664    /// If predicted speedup < threshold, run locally. Default: 1.2 (20% faster).
1665    /// Set to 1.0 to always offload when other criteria are met.
1666    #[serde(default = "default_speedup_threshold")]
1667    pub remote_speedup_threshold: f64,
1668    /// Default slot estimate for build commands (cargo build, gcc, etc.).
1669    #[serde(default = "default_build_slots")]
1670    pub build_slots: u32,
1671    /// Default slot estimate for test commands (cargo test, nextest).
1672    /// Tests typically use more parallelism than builds.
1673    #[serde(default = "default_test_slots")]
1674    pub test_slots: u32,
1675    /// Default slot estimate for check/lint commands (cargo check, clippy).
1676    /// These are typically faster and use fewer resources.
1677    #[serde(default = "default_check_slots")]
1678    pub check_slots: u32,
1679    /// Timeout in seconds for build commands.
1680    /// Build commands (cargo build, gcc, etc.) typically complete faster than tests.
1681    #[serde(default = "default_build_timeout")]
1682    pub build_timeout_sec: u64,
1683    /// Timeout in seconds for test commands.
1684    /// Test commands often need longer timeouts due to test suite execution time.
1685    #[serde(default = "default_test_timeout")]
1686    pub test_timeout_sec: u64,
1687    /// Timeout in seconds for bun commands (bun test, bun typecheck).
1688    /// Bun has known issues where its internal --timeout flag doesn't work for
1689    /// CPU-bound hangs, so we use a shorter external timeout by default.
1690    #[serde(default = "default_bun_timeout")]
1691    pub bun_timeout_sec: u64,
1692    /// Whether to wrap remote commands with external timeout protection.
1693    /// This prevents runaway/stuck processes from consuming worker slots indefinitely.
1694    /// Default: true. Set to false to disable timeout wrapping entirely.
1695    #[serde(default = "default_external_timeout_enabled")]
1696    pub external_timeout_enabled: bool,
1697}
1698
1699impl Default for CompilationConfig {
1700    fn default() -> Self {
1701        Self {
1702            confidence_threshold: 0.85,
1703            min_local_time_ms: 2000,
1704            remote_speedup_threshold: default_speedup_threshold(),
1705            build_slots: default_build_slots(),
1706            test_slots: default_test_slots(),
1707            check_slots: default_check_slots(),
1708            build_timeout_sec: default_build_timeout(),
1709            test_timeout_sec: default_test_timeout(),
1710            bun_timeout_sec: default_bun_timeout(),
1711            external_timeout_enabled: default_external_timeout_enabled(),
1712        }
1713    }
1714}
1715
1716fn default_build_slots() -> u32 {
1717    4
1718}
1719
1720fn default_test_slots() -> u32 {
1721    8
1722}
1723
1724fn default_check_slots() -> u32 {
1725    2
1726}
1727
1728/// Default build timeout: 5 minutes (300 seconds).
1729/// Most builds complete well within this time.
1730fn default_build_timeout() -> u64 {
1731    300
1732}
1733
1734/// Default test timeout: 30 minutes (1800 seconds).
1735/// Test suites often take significantly longer than builds.
1736fn default_test_timeout() -> u64 {
1737    1800
1738}
1739
1740/// Default bun timeout: 10 minutes (600 seconds).
1741/// Bun's internal --timeout flag doesn't work for CPU-bound hangs,
1742/// so we use a shorter external timeout to prevent zombies.
1743fn default_bun_timeout() -> u64 {
1744    600
1745}
1746
1747/// Default: external timeout protection is enabled.
1748fn default_external_timeout_enabled() -> bool {
1749    true
1750}
1751
1752impl CompilationConfig {
1753    /// Returns the appropriate external timeout for the given compilation kind.
1754    ///
1755    /// - Bun commands get the shorter bun_timeout (to protect against known hang issues)
1756    /// - Test commands get the longer test_timeout
1757    /// - All other commands (builds, checks, clippy) get the build_timeout
1758    pub fn timeout_for_kind(&self, kind: Option<CompilationKind>) -> std::time::Duration {
1759        let secs = match kind {
1760            // Bun has known issues where internal timeout doesn't work for CPU hangs
1761            Some(CompilationKind::BunTest) | Some(CompilationKind::BunTypecheck) => {
1762                self.bun_timeout_sec
1763            }
1764            // Other test commands use the standard test timeout
1765            Some(CompilationKind::CargoTest) | Some(CompilationKind::CargoNextest) => {
1766                self.test_timeout_sec
1767            }
1768            // Builds, checks, clippy, and unknown use build timeout
1769            _ => self.build_timeout_sec,
1770        };
1771        std::time::Duration::from_secs(secs)
1772    }
1773
1774    /// Returns whether external timeout wrapping is enabled.
1775    pub fn external_timeout_enabled(&self) -> bool {
1776        self.external_timeout_enabled
1777    }
1778}
1779
1780#[derive(Debug, Clone, Serialize, Deserialize)]
1781pub struct TransferConfig {
1782    /// zstd compression level (1-19).
1783    #[serde(default = "default_compression")]
1784    pub compression_level: u32,
1785    /// Patterns to exclude from transfer.
1786    #[serde(default = "default_excludes")]
1787    pub exclude_patterns: Vec<String>,
1788    /// SSH keepalive interval in seconds (`ssh -o ServerAliveInterval=<N>`).
1789    ///
1790    /// When unset, OpenSSH defaults apply (keepalive disabled).
1791    #[serde(default, skip_serializing_if = "Option::is_none")]
1792    pub ssh_server_alive_interval_secs: Option<u64>,
1793    /// SSH ControlPersist idle timeout in seconds (`ssh -o ControlPersist=<N>s`).
1794    ///
1795    /// - Unset preserves OpenSSH defaults (ControlPersist enabled by default via mux).
1796    /// - `0` disables persistence (`ControlPersist=no`).
1797    #[serde(default, skip_serializing_if = "Option::is_none")]
1798    pub ssh_control_persist_secs: Option<u64>,
1799    /// Remote base directory for project sync and build execution.
1800    /// Must be an absolute path. Defaults to /tmp/rch.
1801    #[serde(default = "default_remote_base")]
1802    pub remote_base: String,
1803    /// Retry policy for transient network errors during transfer.
1804    #[serde(default)]
1805    pub retry: RetryConfig,
1806    /// Verify artifact integrity using blake3 hashes after transfer (bd-377q).
1807    ///
1808    /// When enabled, computes blake3 hashes of key artifacts (binaries, test outputs)
1809    /// on the worker after build, then verifies them locally after download.
1810    /// Disabled by default to avoid extra overhead.
1811    #[serde(default)]
1812    pub verify_artifacts: bool,
1813    /// Maximum file size (bytes) for artifact verification (bd-377q).
1814    ///
1815    /// Files larger than this limit are skipped during verification to avoid
1816    /// excessive I/O. Defaults to 100MB.
1817    #[serde(default = "default_verify_max_size")]
1818    pub verify_max_size_bytes: u64,
1819
1820    // =========================================================================
1821    // Transfer Optimization (bd-3hho)
1822    // =========================================================================
1823    /// Maximum transfer size in MB before skipping remote execution.
1824    ///
1825    /// When set, runs `rsync --dry-run --stats` to estimate transfer size.
1826    /// If the estimated size exceeds this threshold, remote offload is skipped
1827    /// and the command runs locally. Set to `None` (default) to disable.
1828    #[serde(default, skip_serializing_if = "Option::is_none")]
1829    pub max_transfer_mb: Option<u64>,
1830
1831    /// Maximum estimated transfer time in milliseconds before skipping remote.
1832    ///
1833    /// Uses `estimated_bandwidth_bps` (or measured link speed) to calculate
1834    /// expected transfer time. If it exceeds this threshold, remote offload
1835    /// is skipped. Set to `None` (default) to disable.
1836    #[serde(default, skip_serializing_if = "Option::is_none")]
1837    pub max_transfer_time_ms: Option<u64>,
1838
1839    /// Bandwidth limit for rsync in KB/s.
1840    ///
1841    /// Passed as `rsync --bwlimit=<N>` to limit transfer speed and prevent
1842    /// network saturation. Set to `None` or `0` (default) for unlimited.
1843    #[serde(default, skip_serializing_if = "Option::is_none")]
1844    pub bwlimit_kbps: Option<u64>,
1845
1846    /// Estimated link bandwidth in bytes per second.
1847    ///
1848    /// Used for transfer time estimation when `max_transfer_time_ms` is set.
1849    /// If not provided, defaults to 10 MB/s (10485760 bytes/sec).
1850    #[serde(default, skip_serializing_if = "Option::is_none")]
1851    pub estimated_bandwidth_bps: Option<u64>,
1852
1853    // =========================================================================
1854    // Adaptive Compression (bd-243w)
1855    // =========================================================================
1856    /// Enable adaptive zstd compression based on transfer size (bd-243w).
1857    ///
1858    /// When enabled, automatically selects compression level based on estimated
1859    /// payload size:
1860    /// - < 10MB: level 1 (fast, minimal compression)
1861    /// - 10-200MB: level 3 (balanced, default)
1862    /// - > 200MB: level 7 (slower, better compression)
1863    ///
1864    /// When disabled (default), uses the fixed `compression_level` setting.
1865    #[serde(default)]
1866    pub adaptive_compression: bool,
1867
1868    /// Minimum compression level for adaptive mode (bd-243w).
1869    ///
1870    /// Even in adaptive mode, compression level won't go below this.
1871    /// Defaults to 1.
1872    #[serde(default = "default_min_compression")]
1873    pub min_compression_level: u32,
1874
1875    /// Maximum compression level for adaptive mode (bd-243w).
1876    ///
1877    /// Even in adaptive mode, compression level won't exceed this.
1878    /// Defaults to 9 (avoids CPU-intensive levels 10+).
1879    #[serde(default = "default_max_compression")]
1880    pub max_compression_level: u32,
1881}
1882
1883impl Default for TransferConfig {
1884    fn default() -> Self {
1885        Self {
1886            compression_level: 3,
1887            exclude_patterns: default_excludes(),
1888            ssh_server_alive_interval_secs: None,
1889            ssh_control_persist_secs: None,
1890            remote_base: default_remote_base(),
1891            retry: RetryConfig::default(),
1892            verify_artifacts: false,
1893            verify_max_size_bytes: default_verify_max_size(),
1894            // Transfer optimization (bd-3hho)
1895            max_transfer_mb: None,
1896            max_transfer_time_ms: None,
1897            bwlimit_kbps: None,
1898            estimated_bandwidth_bps: None,
1899            // Adaptive compression (bd-243w)
1900            adaptive_compression: false,
1901            min_compression_level: default_min_compression(),
1902            max_compression_level: default_max_compression(),
1903        }
1904    }
1905}
1906
1907impl TransferConfig {
1908    /// Select compression level based on estimated transfer size (bd-243w).
1909    ///
1910    /// When adaptive compression is enabled, selects an appropriate level
1911    /// based on payload size:
1912    /// - < 10MB: level 1 (fast, minimal compression)
1913    /// - 10-200MB: level 3 (balanced)
1914    /// - > 200MB: level 7 (better compression)
1915    ///
1916    /// The result is clamped between `min_compression_level` and `max_compression_level`.
1917    ///
1918    /// When adaptive compression is disabled, returns the fixed `compression_level`.
1919    pub fn select_compression_level(&self, estimated_bytes: Option<u64>) -> u32 {
1920        if !self.adaptive_compression {
1921            return self.compression_level;
1922        }
1923
1924        let Some(bytes) = estimated_bytes else {
1925            // No estimate available, use default
1926            return self.compression_level;
1927        };
1928
1929        // Size thresholds
1930        const SMALL_THRESHOLD: u64 = 10_000_000; // 10 MB (decimal)
1931        const LARGE_THRESHOLD: u64 = 200_000_000; // 200 MB (decimal)
1932
1933        let level = if bytes < SMALL_THRESHOLD {
1934            1 // Fast compression for small payloads
1935        } else if bytes < LARGE_THRESHOLD {
1936            3 // Balanced for medium payloads
1937        } else {
1938            7 // Better compression for large payloads
1939        };
1940
1941        // Clamp to configured bounds
1942        level.clamp(self.min_compression_level, self.max_compression_level)
1943    }
1944}
1945
1946// =============================================================================
1947// Execution Configuration (bd-785w)
1948// =============================================================================
1949
1950/// Default allowlist of commands that can be executed remotely.
1951/// Aligned with the classifier's supported CompilationKind values.
1952fn default_execution_allowlist() -> Vec<String> {
1953    vec![
1954        // Rust
1955        "cargo".to_string(),
1956        "rustc".to_string(),
1957        // cargo-nextest
1958        "nextest".to_string(),
1959        // C/C++
1960        "gcc".to_string(),
1961        "g++".to_string(),
1962        "clang".to_string(),
1963        "clang++".to_string(),
1964        "cc".to_string(),
1965        "c++".to_string(),
1966        // Build systems
1967        "make".to_string(),
1968        "cmake".to_string(),
1969        "ninja".to_string(),
1970        "meson".to_string(),
1971        // Bun
1972        "bun".to_string(),
1973    ]
1974}
1975
1976/// Execution allowlist configuration for remote builds (bd-785w).
1977///
1978/// Controls which command base names are permitted for remote execution.
1979/// Commands not in the allowlist will fail-open to local execution.
1980#[derive(Debug, Clone, Serialize, Deserialize)]
1981pub struct ExecutionConfig {
1982    /// Allowlist of command base names permitted for remote execution.
1983    ///
1984    /// Examples: "cargo", "rustc", "gcc", "bun"
1985    ///
1986    /// If a classified compilation command's base name is not in this list,
1987    /// RCH will fail-open to local execution with a clear reason.
1988    ///
1989    /// An empty allowlist disables all remote execution (local only).
1990    #[serde(default = "default_execution_allowlist")]
1991    pub allowlist: Vec<String>,
1992}
1993
1994impl Default for ExecutionConfig {
1995    fn default() -> Self {
1996        Self {
1997            allowlist: default_execution_allowlist(),
1998        }
1999    }
2000}
2001
2002impl ExecutionConfig {
2003    /// Check if a command base name is allowed for remote execution.
2004    pub fn is_allowed(&self, command_base: &str) -> bool {
2005        self.allowlist
2006            .iter()
2007            .any(|allowed| allowed.eq_ignore_ascii_case(command_base))
2008    }
2009}
2010
2011// =============================================================================
2012// Retry Configuration (bd-x1ek)
2013// =============================================================================
2014
2015fn default_retry_max_attempts() -> u32 {
2016    3
2017}
2018
2019fn default_retry_base_delay_ms() -> u64 {
2020    100
2021}
2022
2023fn default_retry_max_delay_ms() -> u64 {
2024    5000
2025}
2026
2027fn default_retry_jitter_factor() -> f64 {
2028    0.1
2029}
2030
2031fn default_retry_total_timeout_ms() -> u64 {
2032    30000
2033}
2034
2035/// Configuration for retry behavior on transient errors.
2036///
2037/// Used by rsync and SSH operations to recover from transient network failures
2038/// without blocking too long or retrying non-recoverable errors.
2039#[derive(Debug, Clone, Serialize, Deserialize)]
2040pub struct RetryConfig {
2041    /// Maximum number of retry attempts (1 = no retries, just initial attempt).
2042    #[serde(default = "default_retry_max_attempts")]
2043    pub max_attempts: u32,
2044
2045    /// Initial delay between retries in milliseconds.
2046    #[serde(default = "default_retry_base_delay_ms")]
2047    pub base_delay_ms: u64,
2048
2049    /// Maximum delay between retries in milliseconds (caps exponential backoff).
2050    #[serde(default = "default_retry_max_delay_ms")]
2051    pub max_delay_ms: u64,
2052
2053    /// Jitter factor (0.0 - 1.0) to randomize delay.
2054    /// A value of 0.1 means ±10% randomization.
2055    #[serde(default = "default_retry_jitter_factor")]
2056    pub jitter_factor: f64,
2057
2058    /// Total timeout for all retry attempts in milliseconds.
2059    /// After this, retries stop even if max_attempts not reached.
2060    #[serde(default = "default_retry_total_timeout_ms")]
2061    pub total_timeout_ms: u64,
2062}
2063
2064impl Default for RetryConfig {
2065    fn default() -> Self {
2066        Self {
2067            max_attempts: default_retry_max_attempts(),
2068            base_delay_ms: default_retry_base_delay_ms(),
2069            max_delay_ms: default_retry_max_delay_ms(),
2070            jitter_factor: default_retry_jitter_factor(),
2071            total_timeout_ms: default_retry_total_timeout_ms(),
2072        }
2073    }
2074}
2075
2076impl RetryConfig {
2077    /// Create a config that disables retries (single attempt only).
2078    pub fn no_retry() -> Self {
2079        Self {
2080            max_attempts: 1,
2081            ..Default::default()
2082        }
2083    }
2084
2085    /// Calculate delay for a given attempt number (0-indexed).
2086    ///
2087    /// Uses exponential backoff with optional jitter.
2088    pub fn delay_for_attempt(&self, attempt: u32) -> std::time::Duration {
2089        if attempt == 0 {
2090            return std::time::Duration::ZERO;
2091        }
2092
2093        // Exponential backoff: base * 2^attempt
2094        let base = self.base_delay_ms as f64;
2095        let delay = base * (2.0_f64.powi(attempt as i32 - 1));
2096        let capped = delay.min(self.max_delay_ms as f64);
2097
2098        // Apply jitter: delay ± (delay * jitter_factor)
2099        let jittered = if self.jitter_factor > 0.0 {
2100            let jitter_range = capped * self.jitter_factor;
2101            let jitter = (rand::rng().random::<f64>() * 2.0 - 1.0) * jitter_range;
2102            (capped + jitter).max(0.0)
2103        } else {
2104            capped
2105        };
2106
2107        std::time::Duration::from_millis(jittered as u64)
2108    }
2109
2110    /// Check if we should retry given elapsed time since start.
2111    pub fn should_retry(&self, attempt: u32, elapsed: std::time::Duration) -> bool {
2112        if attempt >= self.max_attempts {
2113            return false;
2114        }
2115        elapsed.as_millis() < self.total_timeout_ms as u128
2116    }
2117}
2118
2119/// Default remote base path for project transfers.
2120pub fn default_remote_base() -> String {
2121    "/tmp/rch".to_string()
2122}
2123
2124/// Validate and normalize a remote base path.
2125/// Returns an error if the path is not absolute or contains traversal sequences.
2126pub fn validate_remote_base(path: &str) -> Result<String, String> {
2127    // Expand tilde for user convenience
2128    let expanded = shellexpand::tilde(path).into_owned();
2129
2130    // Check for absolute path
2131    if !expanded.starts_with('/') {
2132        return Err(format!(
2133            "remote_base must be an absolute path, got: {}",
2134            path
2135        ));
2136    }
2137
2138    // Check for path traversal attempts
2139    if expanded.contains("..") {
2140        return Err(format!(
2141            "remote_base must not contain path traversal (..): {}",
2142            path
2143        ));
2144    }
2145
2146    // Normalize: remove trailing slashes
2147    let normalized = expanded.trim_end_matches('/').to_string();
2148
2149    // Safety checks
2150    if normalized.is_empty() || normalized == "/" {
2151        return Err("remote_base cannot be the root directory (safety restriction)".to_string());
2152    }
2153
2154    // Enforce at least 2 levels deep (e.g. /tmp/rch, /home/user/rch)
2155    // /tmp -> depth 1 (unsafe for recursive delete)
2156    // /tmp/rch -> depth 2 (safe)
2157    let components: Vec<&str> = normalized.split('/').filter(|c| !c.is_empty()).collect();
2158    if components.len() < 2 {
2159        return Err(format!(
2160            "remote_base must be at least 2 levels deep (e.g. /tmp/rch), got: {}",
2161            normalized
2162        ));
2163    }
2164
2165    Ok(normalized)
2166}
2167
2168fn default_circuit_failure_threshold() -> u32 {
2169    3
2170}
2171
2172fn default_circuit_success_threshold() -> u32 {
2173    2
2174}
2175
2176fn default_circuit_error_rate_threshold() -> f64 {
2177    0.5
2178}
2179
2180fn default_circuit_window_secs() -> u64 {
2181    60
2182}
2183
2184fn default_circuit_open_cooldown_secs() -> u64 {
2185    30
2186}
2187
2188fn default_circuit_half_open_max_probes() -> u32 {
2189    1
2190}
2191
2192fn default_true() -> bool {
2193    true
2194}
2195
2196fn default_log_level() -> String {
2197    "info".to_string()
2198}
2199
2200pub fn default_socket_path() -> String {
2201    // Prefer XDG_RUNTIME_DIR if available (per-user runtime directory).
2202    if let Ok(runtime_dir) = std::env::var("XDG_RUNTIME_DIR")
2203        && !runtime_dir.trim().is_empty()
2204    {
2205        let path = PathBuf::from(runtime_dir).join("rch.sock");
2206        return path.to_string_lossy().to_string();
2207    }
2208
2209    // Fallback to ~/.cache/rch/rch.sock (persistent across reboots).
2210    if let Some(cache_dir) = dirs::cache_dir() {
2211        let rch_cache = cache_dir.join("rch");
2212        let _ = std::fs::create_dir_all(&rch_cache);
2213        return rch_cache.join("rch.sock").to_string_lossy().to_string();
2214    }
2215
2216    // Last resort: /tmp/rch.sock
2217    "/tmp/rch.sock".to_string()
2218}
2219
2220#[allow(dead_code)]
2221fn default_output_visibility() -> OutputVisibility {
2222    OutputVisibility::None
2223}
2224
2225fn default_confidence() -> f64 {
2226    0.85
2227}
2228
2229fn default_min_local_time() -> u64 {
2230    2000
2231}
2232
2233/// Default speedup threshold: 1.2 (require 20% speedup to offload).
2234/// This is conservative to avoid offloading builds that would be only
2235/// marginally faster remotely when accounting for transfer overhead.
2236fn default_speedup_threshold() -> f64 {
2237    1.2
2238}
2239
2240fn default_compression() -> u32 {
2241    3
2242}
2243
2244/// Default maximum file size for artifact verification (100MB).
2245fn default_verify_max_size() -> u64 {
2246    100 * 1024 * 1024 // 100MB
2247}
2248
2249/// Default minimum compression level for adaptive mode.
2250fn default_min_compression() -> u32 {
2251    1
2252}
2253
2254/// Default maximum compression level for adaptive mode.
2255fn default_max_compression() -> u32 {
2256    9
2257}
2258
2259fn default_excludes() -> Vec<String> {
2260    vec![
2261        // Rust build artifacts
2262        "target/".to_string(),
2263        "*.rlib".to_string(),
2264        "*.rmeta".to_string(),
2265        // Git objects (large, regenerated on clone)
2266        ".git/objects/".to_string(),
2267        // Node.js / Bun dependencies (massive, reinstalled on worker)
2268        "node_modules/".to_string(),
2269        // Bun cache and runtime files
2270        ".bun/".to_string(),
2271        // npm / pnpm caches
2272        ".npm/".to_string(),
2273        ".pnpm-store/".to_string(),
2274        // Common build output directories
2275        "dist/".to_string(),
2276        "build/".to_string(),
2277        // Framework-specific caches
2278        ".next/".to_string(),
2279        ".nuxt/".to_string(),
2280        ".turbo/".to_string(),
2281        ".parcel-cache/".to_string(),
2282        // Beads runtime state (local issue DB + lock/temp files); not needed remotely.
2283        ".beads/".to_string(),
2284        // Coverage reports (generated during tests)
2285        "coverage/".to_string(),
2286        ".nyc_output/".to_string(),
2287        // Credentials and secrets (must never be transferred to workers)
2288        ".cargo/credentials".to_string(),
2289        ".cargo/credentials.toml".to_string(),
2290        ".env".to_string(),
2291        ".env.*".to_string(),
2292        "*.pem".to_string(),
2293        "*.key".to_string(),
2294        "credentials.json".to_string(),
2295        "secrets.json".to_string(),
2296        "secrets.yaml".to_string(),
2297        "secrets.yml".to_string(),
2298        "secrets.toml".to_string(),
2299        "secrets.env".to_string(),
2300        "secrets.txt".to_string(),
2301        "secrets.conf".to_string(),
2302        "secrets.cfg".to_string(),
2303        "secrets.properties".to_string(),
2304        "secrets.xml".to_string(),
2305    ]
2306}
2307
2308// ============================================================================
2309// Build History Types
2310// ============================================================================
2311
2312/// Location where a build was executed.
2313#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
2314#[serde(rename_all = "snake_case")]
2315pub enum BuildLocation {
2316    /// Build executed locally.
2317    #[default]
2318    Local,
2319    /// Build executed on a remote worker.
2320    Remote,
2321}
2322
2323/// Worker health snapshot captured when a cancellation completes.
2324#[derive(Debug, Clone, Serialize, Deserialize)]
2325pub struct BuildCancellationWorkerHealth {
2326    /// Worker status at cancellation completion.
2327    pub status: String,
2328    /// Worker speed score at cancellation completion.
2329    pub speed_score: f64,
2330    /// Slots currently used on the worker.
2331    pub used_slots: u32,
2332    /// Slots currently available on the worker.
2333    pub available_slots: u32,
2334    /// Normalized pressure state.
2335    pub pressure_state: String,
2336    /// Stable pressure reason code.
2337    pub pressure_reason_code: String,
2338}
2339
2340/// Structured cancellation metadata attached to cancelled build records.
2341#[derive(Debug, Clone, Serialize, Deserialize)]
2342pub struct BuildCancellationMetadata {
2343    /// Stable cancellation operation identifier.
2344    pub operation_id: String,
2345    /// Cancellation origin (for example `user`, `timeout`).
2346    pub origin: String,
2347    /// Stable reason code for cancellation.
2348    pub reason_code: String,
2349    /// Ordered cancellation state path.
2350    #[serde(default)]
2351    pub decision_path: Vec<String>,
2352    /// Highest escalation stage reached (`term`, `remote_kill`, `sigkill`).
2353    pub escalation_stage: String,
2354    /// Number of escalation transitions taken.
2355    pub escalation_count: u32,
2356    /// Whether remote kill was attempted.
2357    pub remote_kill_attempted: bool,
2358    /// Whether cleanup completed successfully.
2359    pub cleanup_ok: bool,
2360    /// Whether active build history was successfully claimed/cancelled.
2361    pub history_cancelled: bool,
2362    /// Final cancellation state.
2363    pub final_state: String,
2364    /// Worker health snapshot at completion.
2365    #[serde(default, skip_serializing_if = "Option::is_none")]
2366    pub worker_health: Option<BuildCancellationWorkerHealth>,
2367}
2368
2369/// Record of a completed build.
2370#[derive(Debug, Clone, Serialize, Deserialize)]
2371pub struct BuildRecord {
2372    /// Unique build identifier.
2373    pub id: u64,
2374    /// When the build started (ISO 8601 timestamp).
2375    pub started_at: String,
2376    /// When the build completed (ISO 8601 timestamp).
2377    pub completed_at: String,
2378    /// Project identifier (usually directory name or hash).
2379    pub project_id: String,
2380    /// Worker that executed the build (None if local).
2381    pub worker_id: Option<String>,
2382    /// Full command executed.
2383    pub command: String,
2384    /// Exit code (0 = success).
2385    pub exit_code: i32,
2386    /// Duration in milliseconds.
2387    pub duration_ms: u64,
2388    /// Build location (local or remote).
2389    pub location: BuildLocation,
2390    /// Bytes transferred (if remote).
2391    pub bytes_transferred: Option<u64>,
2392    /// Optional timing breakdown for the pipeline.
2393    #[serde(default)]
2394    pub timing: Option<CommandTimingBreakdown>,
2395    /// Structured cancellation metadata for cancelled builds.
2396    #[serde(default, skip_serializing_if = "Option::is_none")]
2397    pub cancellation: Option<BuildCancellationMetadata>,
2398}
2399
2400/// Input payload for recording a completed build.
2401#[derive(Debug, Clone, Serialize, Deserialize)]
2402pub struct BuildRecordInput {
2403    /// When the build started (ISO 8601 timestamp).
2404    pub started_at: String,
2405    /// When the build completed (ISO 8601 timestamp).
2406    pub completed_at: String,
2407    /// Project identifier (usually directory name or hash).
2408    pub project_id: String,
2409    /// Worker that executed the build (None if local).
2410    pub worker_id: Option<String>,
2411    /// Full command executed.
2412    pub command: String,
2413    /// Exit code (0 = success).
2414    pub exit_code: i32,
2415    /// Duration in milliseconds.
2416    pub duration_ms: u64,
2417    /// Build location (local or remote).
2418    pub location: BuildLocation,
2419    /// Bytes transferred (if remote).
2420    pub bytes_transferred: Option<u64>,
2421    /// Optional timing breakdown for the pipeline.
2422    #[serde(default)]
2423    pub timing: Option<CommandTimingBreakdown>,
2424    /// Structured cancellation metadata for cancelled builds.
2425    #[serde(default, skip_serializing_if = "Option::is_none")]
2426    pub cancellation: Option<BuildCancellationMetadata>,
2427}
2428
2429impl BuildRecordInput {
2430    /// Convert input payload into a build record with a supplied ID.
2431    pub fn into_record(self, id: u64) -> BuildRecord {
2432        BuildRecord {
2433            id,
2434            started_at: self.started_at,
2435            completed_at: self.completed_at,
2436            project_id: self.project_id,
2437            worker_id: self.worker_id,
2438            command: self.command,
2439            exit_code: self.exit_code,
2440            duration_ms: self.duration_ms,
2441            location: self.location,
2442            bytes_transferred: self.bytes_transferred,
2443            timing: self.timing,
2444            cancellation: self.cancellation,
2445        }
2446    }
2447}
2448
2449/// Aggregate statistics for build history.
2450#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2451pub struct BuildStats {
2452    /// Total number of builds in history.
2453    pub total_builds: usize,
2454    /// Number of successful builds (exit_code == 0).
2455    pub success_count: usize,
2456    /// Number of failed builds (exit_code != 0).
2457    pub failure_count: usize,
2458    /// Number of remote builds.
2459    pub remote_count: usize,
2460    /// Number of local builds.
2461    pub local_count: usize,
2462    /// Average build duration in milliseconds.
2463    pub avg_duration_ms: u64,
2464}
2465
2466/// Saved time statistics from remote builds.
2467///
2468/// Tracks estimated time savings from offloading builds to remote workers.
2469/// Uses local build history to estimate what remote builds would have taken locally.
2470#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2471pub struct SavedTimeStats {
2472    /// Total time spent on remote builds (milliseconds).
2473    pub total_remote_duration_ms: u64,
2474    /// Estimated total time if those builds ran locally (milliseconds).
2475    pub estimated_local_duration_ms: u64,
2476    /// Time saved by offloading (milliseconds).
2477    /// Computed as max(0, estimated_local - remote).
2478    pub time_saved_ms: u64,
2479    /// Number of remote builds included in calculation.
2480    pub builds_counted: usize,
2481    /// Average speedup factor (local_estimate / remote_duration).
2482    /// A value of 2.0 means remote builds are ~2x faster than local.
2483    pub avg_speedup: f64,
2484    /// Total time saved today (milliseconds).
2485    pub today_saved_ms: u64,
2486    /// Total time saved this week (milliseconds).
2487    pub week_saved_ms: u64,
2488}
2489
2490// ============================================================================
2491// Compilation Timing and Metrics
2492// ============================================================================
2493
2494use std::collections::VecDeque;
2495use std::time::{Duration, Instant};
2496
2497/// Breakdown of timing for each compilation phase.
2498#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2499pub struct CompilationTimingBreakdown {
2500    /// Time to sync source files to worker.
2501    #[serde(with = "duration_millis")]
2502    pub rsync_up: Duration,
2503    /// Time for cargo build on worker.
2504    #[serde(with = "duration_millis")]
2505    pub remote_build: Duration,
2506    /// Time to sync artifacts back.
2507    #[serde(with = "duration_millis")]
2508    pub rsync_down: Duration,
2509    /// Total end-to-end latency.
2510    #[serde(with = "duration_millis")]
2511    pub total: Duration,
2512}
2513
2514/// Timing breakdown for a single command pipeline.
2515///
2516/// Uses optional durations so missing phases serialize as `null`.
2517#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2518pub struct CommandTimingBreakdown {
2519    /// Time spent classifying the command.
2520    #[serde(with = "option_duration_millis")]
2521    pub classify: Option<Duration>,
2522    /// Time spent selecting a worker.
2523    #[serde(with = "option_duration_millis")]
2524    pub select: Option<Duration>,
2525    /// Time spent syncing source files to the worker.
2526    #[serde(with = "option_duration_millis")]
2527    pub sync_up: Option<Duration>,
2528    /// Time spent executing the remote command.
2529    #[serde(with = "option_duration_millis")]
2530    pub exec: Option<Duration>,
2531    /// Time spent syncing artifacts back.
2532    #[serde(with = "option_duration_millis")]
2533    pub sync_down: Option<Duration>,
2534    /// Time spent on cleanup (slot release, bookkeeping).
2535    #[serde(with = "option_duration_millis")]
2536    pub cleanup: Option<Duration>,
2537    /// Total end-to-end time for the pipeline.
2538    #[serde(with = "option_duration_millis")]
2539    pub total: Option<Duration>,
2540}
2541
2542mod duration_millis {
2543    use serde::{Deserialize, Deserializer, Serialize, Serializer};
2544    use std::time::Duration;
2545
2546    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
2547    where
2548        S: Serializer,
2549    {
2550        duration.as_millis().serialize(serializer)
2551    }
2552
2553    pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
2554    where
2555        D: Deserializer<'de>,
2556    {
2557        let millis = u64::deserialize(deserializer)?;
2558        Ok(Duration::from_millis(millis))
2559    }
2560}
2561
2562/// Comprehensive metrics for a single compilation.
2563#[derive(Debug, Clone, Serialize, Deserialize)]
2564pub struct CompilationMetrics {
2565    /// Project identifier (usually directory name or hash).
2566    pub project_id: String,
2567    /// Worker that performed the compilation.
2568    pub worker_id: String,
2569    /// When the compilation occurred.
2570    pub timestamp: chrono::DateTime<chrono::Utc>,
2571    /// Timing breakdown for each phase.
2572    pub timing: CompilationTimingBreakdown,
2573    /// Local build time for comparison (if available).
2574    #[serde(with = "option_duration_millis")]
2575    pub local_build_time: Option<Duration>,
2576    /// Speedup ratio (local_time / remote_time).
2577    pub speedup: Option<f64>,
2578    /// Number of files synced to worker.
2579    pub files_synced: u64,
2580    /// Total bytes transferred.
2581    pub bytes_transferred: u64,
2582    /// Exit code from compilation.
2583    pub exit_code: i32,
2584    /// Whether compilation succeeded.
2585    pub success: bool,
2586}
2587
2588mod option_duration_millis {
2589    use serde::{Deserialize, Deserializer, Serialize, Serializer};
2590    use std::time::Duration;
2591
2592    pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
2593    where
2594        S: Serializer,
2595    {
2596        match duration {
2597            Some(d) => Some(d.as_millis() as u64).serialize(serializer),
2598            None => Option::<u64>::None.serialize(serializer),
2599        }
2600    }
2601
2602    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
2603    where
2604        D: Deserializer<'de>,
2605    {
2606        let opt: Option<u64> = Option::deserialize(deserializer)?;
2607        Ok(opt.map(Duration::from_millis))
2608    }
2609}
2610
2611impl CompilationMetrics {
2612    /// Calculate speedup based on local vs remote build time.
2613    pub fn calculate_speedup(&mut self) {
2614        if let Some(local) = self.local_build_time
2615            && self.timing.total.as_millis() > 0
2616        {
2617            self.speedup = Some(local.as_secs_f64() / self.timing.total.as_secs_f64());
2618        }
2619    }
2620
2621    /// Check if remote compilation was beneficial (faster than local).
2622    pub fn is_beneficial(&self) -> bool {
2623        self.speedup.map(|s| s > 1.0).unwrap_or(false)
2624    }
2625}
2626
2627impl Default for CompilationMetrics {
2628    fn default() -> Self {
2629        Self {
2630            project_id: String::new(),
2631            worker_id: String::new(),
2632            timestamp: chrono::Utc::now(),
2633            timing: CompilationTimingBreakdown::default(),
2634            local_build_time: None,
2635            speedup: None,
2636            files_synced: 0,
2637            bytes_transferred: 0,
2638            exit_code: 0,
2639            success: true,
2640        }
2641    }
2642}
2643
2644/// Timer for tracking compilation phases.
2645///
2646/// Use this to measure the duration of each phase of remote compilation.
2647///
2648/// # Example
2649/// ```ignore
2650/// let mut timer = CompilationTimer::new("my-project", "worker-1");
2651/// // ... rsync source files ...
2652/// timer.end_rsync_up();
2653/// // ... run cargo build ...
2654/// timer.end_remote_build();
2655/// // ... rsync artifacts back ...
2656/// timer.end_rsync_down();
2657/// let metrics = timer.finish(0, 100, 1_000_000);
2658/// ```
2659#[derive(Debug)]
2660pub struct CompilationTimer {
2661    project_id: String,
2662    worker_id: String,
2663    start: Instant,
2664    phase_start: Instant,
2665    rsync_up: Option<Duration>,
2666    remote_build: Option<Duration>,
2667    rsync_down: Option<Duration>,
2668}
2669
2670impl CompilationTimer {
2671    /// Create a new timer for a compilation.
2672    pub fn new(project_id: &str, worker_id: &str) -> Self {
2673        let now = Instant::now();
2674        Self {
2675            project_id: project_id.to_string(),
2676            worker_id: worker_id.to_string(),
2677            start: now,
2678            phase_start: now,
2679            rsync_up: None,
2680            remote_build: None,
2681            rsync_down: None,
2682        }
2683    }
2684
2685    /// Mark the end of the rsync upload phase.
2686    pub fn end_rsync_up(&mut self) {
2687        self.rsync_up = Some(self.phase_start.elapsed());
2688        self.phase_start = Instant::now();
2689        tracing::info!(
2690            rsync_up_ms = %self.rsync_up.unwrap().as_millis(),
2691            "TIMING: rsync_up completed"
2692        );
2693    }
2694
2695    /// Mark the end of the remote build phase.
2696    pub fn end_remote_build(&mut self) {
2697        self.remote_build = Some(self.phase_start.elapsed());
2698        self.phase_start = Instant::now();
2699        tracing::info!(
2700            remote_build_ms = %self.remote_build.unwrap().as_millis(),
2701            "TIMING: remote_build completed"
2702        );
2703    }
2704
2705    /// Mark the end of the rsync download phase.
2706    pub fn end_rsync_down(&mut self) {
2707        self.rsync_down = Some(self.phase_start.elapsed());
2708        tracing::info!(
2709            rsync_down_ms = %self.rsync_down.unwrap().as_millis(),
2710            "TIMING: rsync_down completed"
2711        );
2712    }
2713
2714    /// Finish timing and produce metrics.
2715    pub fn finish(self, exit_code: i32, files: u64, bytes: u64) -> CompilationMetrics {
2716        let total = self.start.elapsed();
2717        tracing::info!(
2718            total_ms = %total.as_millis(),
2719            exit_code = %exit_code,
2720            "TIMING: compilation completed"
2721        );
2722
2723        CompilationMetrics {
2724            project_id: self.project_id,
2725            worker_id: self.worker_id,
2726            timestamp: chrono::Utc::now(),
2727            timing: CompilationTimingBreakdown {
2728                rsync_up: self.rsync_up.unwrap_or_default(),
2729                remote_build: self.remote_build.unwrap_or_default(),
2730                rsync_down: self.rsync_down.unwrap_or_default(),
2731                total,
2732            },
2733            local_build_time: None,
2734            speedup: None,
2735            files_synced: files,
2736            bytes_transferred: bytes,
2737            exit_code,
2738            success: exit_code == 0,
2739        }
2740    }
2741}
2742
2743/// Aggregator for compilation metrics with statistics.
2744#[derive(Debug)]
2745pub struct MetricsAggregator {
2746    history: VecDeque<CompilationMetrics>,
2747    max_history: usize,
2748}
2749
2750impl MetricsAggregator {
2751    /// Create a new aggregator with a maximum history size.
2752    pub fn new(max_history: usize) -> Self {
2753        Self {
2754            history: VecDeque::with_capacity(max_history),
2755            max_history,
2756        }
2757    }
2758
2759    /// Record a new compilation's metrics.
2760    pub fn record(&mut self, metrics: CompilationMetrics) {
2761        if self.history.len() >= self.max_history {
2762            self.history.pop_front();
2763        }
2764        self.history.push_back(metrics);
2765    }
2766
2767    /// Get the average speedup across all recorded compilations.
2768    pub fn average_speedup(&self) -> Option<f64> {
2769        let speedups: Vec<f64> = self.history.iter().filter_map(|m| m.speedup).collect();
2770
2771        if speedups.is_empty() {
2772            None
2773        } else {
2774            Some(speedups.iter().sum::<f64>() / speedups.len() as f64)
2775        }
2776    }
2777
2778    /// Get the p50 (median) total compilation time.
2779    pub fn p50_total_time(&self) -> Option<Duration> {
2780        self.percentile_total_time(0.50)
2781    }
2782
2783    /// Get the p95 total compilation time.
2784    pub fn p95_total_time(&self) -> Option<Duration> {
2785        self.percentile_total_time(0.95)
2786    }
2787
2788    /// Get the p99 total compilation time.
2789    pub fn p99_total_time(&self) -> Option<Duration> {
2790        self.percentile_total_time(0.99)
2791    }
2792
2793    /// Get a percentile of total compilation times.
2794    ///
2795    /// Uses the "inclusive" percentile method (matching numpy.percentile, Excel PERCENTILE.INC):
2796    /// index = (n - 1) * percentile, where n is the number of samples.
2797    fn percentile_total_time(&self, percentile: f64) -> Option<Duration> {
2798        let mut times: Vec<_> = self.history.iter().map(|m| m.timing.total).collect();
2799
2800        if times.is_empty() {
2801            return None;
2802        }
2803
2804        times.sort();
2805        // Use inclusive percentile: index = (n - 1) * p
2806        // For p50 with 100 items: (99 * 0.50) = 49.5 -> 49 (true median position)
2807        // For p99 with 100 items: (99 * 0.99) = 98.01 -> 98
2808        let idx = (((times.len() - 1) as f64 * percentile).round() as usize).min(times.len() - 1);
2809        Some(times[idx])
2810    }
2811
2812    /// Get the success rate as a percentage.
2813    pub fn success_rate(&self) -> f64 {
2814        if self.history.is_empty() {
2815            return 100.0;
2816        }
2817        let successes = self.history.iter().filter(|m| m.success).count();
2818        (successes as f64 / self.history.len() as f64) * 100.0
2819    }
2820
2821    /// Get the number of recorded compilations.
2822    pub fn count(&self) -> usize {
2823        self.history.len()
2824    }
2825
2826    /// Get all recorded metrics.
2827    pub fn metrics(&self) -> &VecDeque<CompilationMetrics> {
2828        &self.history
2829    }
2830
2831    /// Clear all recorded metrics.
2832    pub fn clear(&mut self) {
2833        self.history.clear();
2834    }
2835}
2836
2837impl Default for MetricsAggregator {
2838    fn default() -> Self {
2839        Self::new(1000)
2840    }
2841}
2842
2843#[cfg(test)]
2844mod tests {
2845    use super::*;
2846    use crate::test_guard;
2847
2848    #[test]
2849    fn test_circuit_state_default() {
2850        let _guard = test_guard!();
2851        assert_eq!(CircuitState::default(), CircuitState::Closed);
2852    }
2853
2854    #[test]
2855    fn test_circuit_config_defaults() {
2856        let _guard = test_guard!();
2857        let config = CircuitBreakerConfig::default();
2858        assert_eq!(config.failure_threshold, 3);
2859        assert_eq!(config.success_threshold, 2);
2860        assert_eq!(config.error_rate_threshold, 0.5);
2861        assert_eq!(config.window_secs, 60);
2862        assert_eq!(config.open_cooldown_secs, 30);
2863        assert_eq!(config.half_open_max_probes, 1);
2864    }
2865
2866    #[test]
2867    fn test_rch_config_has_circuit_defaults() {
2868        let _guard = test_guard!();
2869        let config = RchConfig::default();
2870        assert_eq!(config.circuit.failure_threshold, 3);
2871    }
2872
2873    #[test]
2874    fn test_circuit_config_serde_roundtrip() {
2875        let _guard = test_guard!();
2876        let config = CircuitBreakerConfig {
2877            failure_threshold: 5,
2878            success_threshold: 3,
2879            error_rate_threshold: 0.75,
2880            window_secs: 120,
2881            open_cooldown_secs: 45,
2882            half_open_max_probes: 2,
2883        };
2884        let json = serde_json::to_string(&config).unwrap();
2885        let parsed: CircuitBreakerConfig = serde_json::from_str(&json).unwrap();
2886        assert_eq!(parsed.failure_threshold, 5);
2887        assert_eq!(parsed.success_threshold, 3);
2888        assert_eq!(parsed.error_rate_threshold, 0.75);
2889        assert_eq!(parsed.window_secs, 120);
2890        assert_eq!(parsed.open_cooldown_secs, 45);
2891        assert_eq!(parsed.half_open_max_probes, 2);
2892    }
2893
2894    // ========================================================================
2895    // WorkerCapabilities Tests
2896    // ========================================================================
2897
2898    #[test]
2899    fn test_worker_capabilities_default() {
2900        let _guard = test_guard!();
2901        let caps = WorkerCapabilities::default();
2902        assert!(caps.rustc_version.is_none());
2903        assert!(caps.bun_version.is_none());
2904        assert!(caps.node_version.is_none());
2905        assert!(caps.npm_version.is_none());
2906        assert!(caps.projects_root_ok.is_none());
2907        assert!(caps.projects_root_issue.is_none());
2908        assert!(caps.projects_root_checked_at_unix_ms.is_none());
2909    }
2910
2911    #[test]
2912    fn test_worker_capabilities_new() {
2913        let _guard = test_guard!();
2914        let caps = WorkerCapabilities::new();
2915        assert!(caps.rustc_version.is_none());
2916        assert!(!caps.has_rust());
2917        assert!(!caps.has_bun());
2918        assert!(!caps.has_node());
2919        assert!(caps.is_topology_healthy().is_none());
2920    }
2921
2922    #[test]
2923    fn test_worker_capabilities_has_rust() {
2924        let _guard = test_guard!();
2925        let mut caps = WorkerCapabilities::new();
2926        assert!(!caps.has_rust());
2927
2928        caps.rustc_version = Some("rustc 1.76.0 (07dca489a 2024-02-04)".to_string());
2929        assert!(caps.has_rust());
2930    }
2931
2932    #[test]
2933    fn test_worker_capabilities_has_bun() {
2934        let _guard = test_guard!();
2935        let mut caps = WorkerCapabilities::new();
2936        assert!(!caps.has_bun());
2937
2938        caps.bun_version = Some("1.0.25".to_string());
2939        assert!(caps.has_bun());
2940    }
2941
2942    #[test]
2943    fn test_worker_capabilities_has_node() {
2944        let _guard = test_guard!();
2945        let mut caps = WorkerCapabilities::new();
2946        assert!(!caps.has_node());
2947
2948        caps.node_version = Some("v20.11.0".to_string());
2949        assert!(caps.has_node());
2950    }
2951
2952    #[test]
2953    fn test_worker_capabilities_multiple_runtimes() {
2954        let _guard = test_guard!();
2955        let caps = WorkerCapabilities {
2956            rustc_version: Some("rustc 1.76.0".to_string()),
2957            bun_version: Some("1.0.25".to_string()),
2958            node_version: Some("v20.11.0".to_string()),
2959            npm_version: Some("10.2.4".to_string()),
2960            ..Default::default()
2961        };
2962
2963        assert!(caps.has_rust());
2964        assert!(caps.has_bun());
2965        assert!(caps.has_node());
2966    }
2967
2968    #[test]
2969    fn test_worker_capabilities_serialization_empty() {
2970        let _guard = test_guard!();
2971        let caps = WorkerCapabilities::new();
2972        let json = serde_json::to_string(&caps).unwrap();
2973        // Empty capabilities should serialize to {}
2974        assert_eq!(json, "{}");
2975
2976        // And deserialize back correctly
2977        let parsed: WorkerCapabilities = serde_json::from_str(&json).unwrap();
2978        assert!(!parsed.has_rust());
2979        assert!(!parsed.has_bun());
2980        assert!(!parsed.has_node());
2981    }
2982
2983    #[test]
2984    fn test_worker_capabilities_serialization_with_versions() {
2985        let _guard = test_guard!();
2986        let caps = WorkerCapabilities {
2987            rustc_version: Some("rustc 1.76.0-nightly".to_string()),
2988            bun_version: Some("1.0.25".to_string()),
2989            node_version: None,
2990            npm_version: None,
2991            ..Default::default()
2992        };
2993
2994        let json = serde_json::to_string(&caps).unwrap();
2995        assert!(json.contains("rustc_version"));
2996        assert!(json.contains("rustc 1.76.0-nightly"));
2997        assert!(json.contains("bun_version"));
2998        assert!(json.contains("1.0.25"));
2999        // None fields should be skipped
3000        assert!(!json.contains("node_version"));
3001        assert!(!json.contains("npm_version"));
3002
3003        let parsed: WorkerCapabilities = serde_json::from_str(&json).unwrap();
3004        assert!(parsed.has_rust());
3005        assert!(parsed.has_bun());
3006        assert!(!parsed.has_node());
3007    }
3008
3009    #[test]
3010    fn test_worker_capabilities_deserialization_partial() {
3011        let _guard = test_guard!();
3012        // Deserialize JSON with only some fields
3013        let json = r#"{"bun_version": "1.0.0"}"#;
3014        let caps: WorkerCapabilities = serde_json::from_str(json).unwrap();
3015
3016        assert!(!caps.has_rust());
3017        assert!(caps.has_bun());
3018        assert!(!caps.has_node());
3019        assert_eq!(caps.bun_version, Some("1.0.0".to_string()));
3020    }
3021
3022    #[test]
3023    fn test_worker_capabilities_clone() {
3024        let _guard = test_guard!();
3025        let caps = WorkerCapabilities {
3026            rustc_version: Some("1.76.0".to_string()),
3027            bun_version: None,
3028            node_version: Some("v20".to_string()),
3029            npm_version: None,
3030            projects_root_ok: Some(false),
3031            projects_root_issue: Some("alias_missing".to_string()),
3032            projects_root_checked_at_unix_ms: Some(123),
3033            ..Default::default()
3034        };
3035
3036        let cloned = caps.clone();
3037        assert_eq!(cloned.rustc_version, caps.rustc_version);
3038        assert_eq!(cloned.bun_version, caps.bun_version);
3039        assert_eq!(cloned.node_version, caps.node_version);
3040        assert_eq!(cloned.npm_version, caps.npm_version);
3041        assert_eq!(cloned.projects_root_ok, caps.projects_root_ok);
3042        assert_eq!(cloned.projects_root_issue, caps.projects_root_issue);
3043        assert_eq!(
3044            cloned.projects_root_checked_at_unix_ms,
3045            caps.projects_root_checked_at_unix_ms
3046        );
3047    }
3048
3049    #[test]
3050    fn test_worker_capabilities_topology_health_helper() {
3051        let _guard = test_guard!();
3052        let mut caps = WorkerCapabilities::new();
3053        assert_eq!(caps.is_topology_healthy(), None);
3054
3055        caps.projects_root_ok = Some(true);
3056        assert_eq!(caps.is_topology_healthy(), Some(true));
3057
3058        caps.projects_root_ok = Some(false);
3059        assert_eq!(caps.is_topology_healthy(), Some(false));
3060    }
3061
3062    #[test]
3063    fn test_selection_reason_serialization() {
3064        let _guard = test_guard!();
3065        // Test that all variants serialize to snake_case
3066        assert_eq!(
3067            serde_json::to_string(&SelectionReason::Success).unwrap(),
3068            "\"success\""
3069        );
3070        assert_eq!(
3071            serde_json::to_string(&SelectionReason::NoWorkersConfigured).unwrap(),
3072            "\"no_workers_configured\""
3073        );
3074        assert_eq!(
3075            serde_json::to_string(&SelectionReason::AllWorkersUnreachable).unwrap(),
3076            "\"all_workers_unreachable\""
3077        );
3078        assert_eq!(
3079            serde_json::to_string(&SelectionReason::AllCircuitsOpen).unwrap(),
3080            "\"all_circuits_open\""
3081        );
3082        assert_eq!(
3083            serde_json::to_string(&SelectionReason::AllWorkersBusy).unwrap(),
3084            "\"all_workers_busy\""
3085        );
3086        assert_eq!(
3087            serde_json::to_string(&SelectionReason::NoWorkersPassedHealth).unwrap(),
3088            "\"no_workers_passed_health\""
3089        );
3090        assert_eq!(
3091            serde_json::to_string(&SelectionReason::AllWorkersFailedPreflight).unwrap(),
3092            "\"all_workers_failed_preflight\""
3093        );
3094        assert_eq!(
3095            serde_json::to_string(&SelectionReason::AllWorkersFailedConvergence).unwrap(),
3096            "\"all_workers_failed_convergence\""
3097        );
3098        assert_eq!(
3099            serde_json::to_string(&SelectionReason::NoAdmissibleWorkers(
3100                "critical_pressure=1,insufficient_slots=1".to_string()
3101            ))
3102            .unwrap(),
3103            "{\"no_admissible_workers\":\"critical_pressure=1,insufficient_slots=1\"}"
3104        );
3105        assert_eq!(
3106            serde_json::to_string(&SelectionReason::NoMatchingWorkers).unwrap(),
3107            "\"no_matching_workers\""
3108        );
3109    }
3110
3111    #[test]
3112    fn test_selection_reason_with_error() {
3113        let _guard = test_guard!();
3114        let reason = SelectionReason::SelectionError("test error".to_string());
3115        let json = serde_json::to_string(&reason).unwrap();
3116        assert!(json.contains("selection_error"));
3117        assert!(json.contains("test error"));
3118    }
3119
3120    #[test]
3121    fn test_selection_reason_deserialization() {
3122        let _guard = test_guard!();
3123        assert_eq!(
3124            serde_json::from_str::<SelectionReason>("\"success\"").unwrap(),
3125            SelectionReason::Success
3126        );
3127        assert_eq!(
3128            serde_json::from_str::<SelectionReason>("\"all_workers_busy\"").unwrap(),
3129            SelectionReason::AllWorkersBusy
3130        );
3131        assert_eq!(
3132            serde_json::from_str::<SelectionReason>("\"no_workers_passed_health\"").unwrap(),
3133            SelectionReason::NoWorkersPassedHealth
3134        );
3135        assert_eq!(
3136            serde_json::from_str::<SelectionReason>("\"all_workers_failed_preflight\"").unwrap(),
3137            SelectionReason::AllWorkersFailedPreflight
3138        );
3139        assert_eq!(
3140            serde_json::from_str::<SelectionReason>("\"all_workers_failed_convergence\"").unwrap(),
3141            SelectionReason::AllWorkersFailedConvergence
3142        );
3143        assert_eq!(
3144            serde_json::from_str::<SelectionReason>(
3145                "{\"no_admissible_workers\":\"critical_pressure=1\"}"
3146            )
3147            .unwrap(),
3148            SelectionReason::NoAdmissibleWorkers("critical_pressure=1".to_string())
3149        );
3150    }
3151
3152    #[test]
3153    fn test_selection_reason_display() {
3154        let _guard = test_guard!();
3155        assert_eq!(
3156            SelectionReason::Success.to_string(),
3157            "worker assigned successfully"
3158        );
3159        assert_eq!(
3160            SelectionReason::NoWorkersConfigured.to_string(),
3161            "no workers configured"
3162        );
3163        assert_eq!(
3164            SelectionReason::AllWorkersUnreachable.to_string(),
3165            "all workers unreachable"
3166        );
3167        assert_eq!(
3168            SelectionReason::AllWorkersBusy.to_string(),
3169            "all workers at capacity"
3170        );
3171        assert_eq!(
3172            SelectionReason::NoWorkersPassedHealth.to_string(),
3173            "no workers passed health thresholds"
3174        );
3175        assert_eq!(
3176            SelectionReason::AllWorkersFailedPreflight.to_string(),
3177            "all workers failed preflight checks"
3178        );
3179        assert_eq!(
3180            SelectionReason::AllWorkersFailedConvergence.to_string(),
3181            "all workers failed repo convergence checks"
3182        );
3183        assert_eq!(
3184            SelectionReason::NoAdmissibleWorkers(
3185                "critical_pressure=1,insufficient_slots=1".to_string()
3186            )
3187            .to_string(),
3188            "no admissible workers: critical_pressure=1,insufficient_slots=1"
3189        );
3190        assert_eq!(
3191            SelectionReason::SelectionError("oops".to_string()).to_string(),
3192            "selection error: oops"
3193        );
3194    }
3195
3196    #[test]
3197    fn test_selection_response_with_worker() {
3198        let _guard = test_guard!();
3199        let response = SelectionResponse {
3200            worker: Some(SelectedWorker {
3201                id: WorkerId::new("test"),
3202                host: "localhost".to_string(),
3203                user: "user".to_string(),
3204                identity_file: "~/.ssh/id_rsa".to_string(),
3205                slots_available: 8,
3206                speed_score: 75.0,
3207            }),
3208            reason: SelectionReason::Success,
3209            build_id: None,
3210        };
3211
3212        let json = serde_json::to_string(&response).unwrap();
3213        assert!(json.contains("\"reason\":\"success\""));
3214        assert!(json.contains("\"id\":\"test\""));
3215    }
3216
3217    #[test]
3218    fn test_selection_response_without_worker() {
3219        let _guard = test_guard!();
3220        let response = SelectionResponse {
3221            worker: None,
3222            reason: SelectionReason::AllWorkersBusy,
3223            build_id: None,
3224        };
3225
3226        let json = serde_json::to_string(&response).unwrap();
3227        assert!(json.contains("\"worker\":null"));
3228        assert!(json.contains("\"reason\":\"all_workers_busy\""));
3229    }
3230
3231    #[test]
3232    fn test_selection_response_roundtrip() {
3233        let _guard = test_guard!();
3234        let original = SelectionResponse {
3235            worker: Some(SelectedWorker {
3236                id: WorkerId::new("worker-1"),
3237                host: "192.168.1.100".to_string(),
3238                user: "ubuntu".to_string(),
3239                identity_file: "/path/to/key".to_string(),
3240                slots_available: 16,
3241                speed_score: 90.5,
3242            }),
3243            reason: SelectionReason::Success,
3244            build_id: None,
3245        };
3246
3247        let json = serde_json::to_string(&original).unwrap();
3248        let parsed: SelectionResponse = serde_json::from_str(&json).unwrap();
3249
3250        assert!(parsed.worker.is_some());
3251        let worker = parsed.worker.unwrap();
3252        assert_eq!(worker.id.as_str(), "worker-1");
3253        assert_eq!(worker.host, "192.168.1.100");
3254        assert_eq!(worker.slots_available, 16);
3255        assert_eq!(parsed.reason, SelectionReason::Success);
3256    }
3257
3258    #[test]
3259    fn test_build_heartbeat_phase_roundtrip() {
3260        let _guard = test_guard!();
3261        let json = serde_json::to_string(&BuildHeartbeatPhase::SyncUp).unwrap();
3262        assert_eq!(json, "\"sync_up\"");
3263        let parsed: BuildHeartbeatPhase = serde_json::from_str(&json).unwrap();
3264        assert_eq!(parsed, BuildHeartbeatPhase::SyncUp);
3265    }
3266
3267    #[test]
3268    fn test_build_heartbeat_request_roundtrip() {
3269        let _guard = test_guard!();
3270        let request = BuildHeartbeatRequest {
3271            build_id: 42,
3272            worker_id: WorkerId::new("worker-a"),
3273            hook_pid: Some(12345),
3274            remote_pgid_file: Some("/tmp/rch/project/hash/.rch-run/42.pgid".to_string()),
3275            phase: BuildHeartbeatPhase::Execute,
3276            detail: Some("Compiling crates".to_string()),
3277            progress_counter: Some(7),
3278            progress_percent: Some(42.5),
3279        };
3280
3281        let json = serde_json::to_string(&request).unwrap();
3282        let parsed: BuildHeartbeatRequest = serde_json::from_str(&json).unwrap();
3283        assert_eq!(parsed.build_id, 42);
3284        assert_eq!(parsed.worker_id.as_str(), "worker-a");
3285        assert_eq!(
3286            parsed.remote_pgid_file,
3287            Some("/tmp/rch/project/hash/.rch-run/42.pgid".to_string())
3288        );
3289        assert_eq!(parsed.phase, BuildHeartbeatPhase::Execute);
3290        assert_eq!(parsed.progress_counter, Some(7));
3291        assert_eq!(parsed.progress_percent, Some(42.5));
3292    }
3293
3294    // CircuitStats tests
3295
3296    #[test]
3297    fn test_circuit_stats_new() {
3298        let _guard = test_guard!();
3299        let stats = CircuitStats::new();
3300        assert_eq!(stats.state(), CircuitState::Closed);
3301        assert_eq!(stats.consecutive_failures(), 0);
3302        assert_eq!(stats.error_rate(), 0.0);
3303        assert!(stats.opened_at().is_none());
3304    }
3305
3306    #[test]
3307    fn test_circuit_stats_record_success() {
3308        let _guard = test_guard!();
3309        let mut stats = CircuitStats::new();
3310        stats.record_success();
3311        stats.record_success();
3312        assert_eq!(stats.error_rate(), 0.0);
3313        assert_eq!(stats.consecutive_failures(), 0);
3314    }
3315
3316    #[test]
3317    fn test_circuit_stats_record_failure() {
3318        let _guard = test_guard!();
3319        let mut stats = CircuitStats::new();
3320        stats.record_failure();
3321        stats.record_failure();
3322        assert_eq!(stats.consecutive_failures(), 2);
3323        assert_eq!(stats.error_rate(), 1.0); // 2 failures, 0 successes
3324    }
3325
3326    #[test]
3327    fn test_circuit_stats_error_rate() {
3328        let _guard = test_guard!();
3329        let mut stats = CircuitStats::new();
3330        stats.record_success();
3331        stats.record_success();
3332        stats.record_failure();
3333        // 1 failure / 3 total = 0.333...
3334        assert!((stats.error_rate() - 0.333).abs() < 0.01);
3335    }
3336
3337    #[test]
3338    fn test_circuit_stats_should_open_consecutive_failures() {
3339        let _guard = test_guard!();
3340        let mut stats = CircuitStats::new();
3341        let config = CircuitBreakerConfig::default(); // threshold = 3
3342
3343        stats.record_failure();
3344        assert!(!stats.should_open(&config));
3345
3346        stats.record_failure();
3347        assert!(!stats.should_open(&config));
3348
3349        stats.record_failure();
3350        assert!(stats.should_open(&config)); // 3 consecutive failures
3351    }
3352
3353    #[test]
3354    fn test_circuit_stats_should_open_error_rate() {
3355        let _guard = test_guard!();
3356        let mut stats = CircuitStats::new();
3357        let config = CircuitBreakerConfig {
3358            error_rate_threshold: 0.5,
3359            ..Default::default()
3360        };
3361
3362        // Need at least 5 samples
3363        stats.record_success();
3364        stats.record_success();
3365        stats.record_failure();
3366        stats.record_failure();
3367        assert!(!stats.should_open(&config)); // Only 4 samples
3368
3369        stats.record_failure(); // 5 samples: 2 success, 3 failures = 60% error rate
3370        assert!(stats.should_open(&config));
3371    }
3372
3373    #[test]
3374    fn test_circuit_stats_success_resets_consecutive_failures() {
3375        let _guard = test_guard!();
3376        let mut stats = CircuitStats::new();
3377        stats.record_failure();
3378        stats.record_failure();
3379        assert_eq!(stats.consecutive_failures(), 2);
3380
3381        stats.record_success();
3382        assert_eq!(stats.consecutive_failures(), 0);
3383    }
3384
3385    #[test]
3386    fn test_circuit_stats_open_transition() {
3387        let _guard = test_guard!();
3388        let mut stats = CircuitStats::new();
3389        stats.open();
3390
3391        assert_eq!(stats.state(), CircuitState::Open);
3392        assert!(stats.opened_at().is_some());
3393    }
3394
3395    #[test]
3396    fn test_circuit_stats_half_open_transition() {
3397        let _guard = test_guard!();
3398        let mut stats = CircuitStats::new();
3399        stats.open();
3400        stats.half_open();
3401
3402        assert_eq!(stats.state(), CircuitState::HalfOpen);
3403    }
3404
3405    #[test]
3406    fn test_circuit_stats_close_transition() {
3407        let _guard = test_guard!();
3408        let mut stats = CircuitStats::new();
3409        stats.open();
3410        stats.half_open();
3411        stats.close();
3412
3413        assert_eq!(stats.state(), CircuitState::Closed);
3414        assert!(stats.opened_at().is_none());
3415        assert_eq!(stats.consecutive_failures(), 0);
3416    }
3417
3418    #[test]
3419    fn test_circuit_stats_should_close() {
3420        let _guard = test_guard!();
3421        let mut stats = CircuitStats::new();
3422        let config = CircuitBreakerConfig {
3423            success_threshold: 2,
3424            ..Default::default()
3425        };
3426
3427        stats.open();
3428        stats.half_open();
3429
3430        assert!(!stats.should_close(&config)); // 0 successes
3431
3432        stats.record_success();
3433        assert!(!stats.should_close(&config)); // 1 success
3434
3435        stats.record_success();
3436        assert!(stats.should_close(&config)); // 2 successes
3437    }
3438
3439    #[test]
3440    fn test_circuit_stats_can_probe() {
3441        let _guard = test_guard!();
3442        let mut stats = CircuitStats::new();
3443        let config = CircuitBreakerConfig {
3444            half_open_max_probes: 1,
3445            ..Default::default()
3446        };
3447
3448        // Can't probe when closed
3449        assert!(!stats.can_probe(&config));
3450
3451        stats.open();
3452        // Can't probe when open
3453        assert!(!stats.can_probe(&config));
3454
3455        stats.half_open();
3456        // Can probe when half-open
3457        assert!(stats.can_probe(&config));
3458
3459        // Start a probe
3460        assert!(stats.start_probe(&config));
3461        // Can't start another probe
3462        assert!(!stats.can_probe(&config));
3463        assert!(!stats.start_probe(&config));
3464    }
3465
3466    #[test]
3467    fn test_circuit_stats_probe_completion() {
3468        let _guard = test_guard!();
3469        let mut stats = CircuitStats::new();
3470        let config = CircuitBreakerConfig {
3471            half_open_max_probes: 1,
3472            ..Default::default()
3473        };
3474
3475        stats.open();
3476        stats.half_open();
3477        stats.start_probe(&config);
3478
3479        // Probe completes with success
3480        stats.record_success();
3481
3482        // Can start another probe
3483        assert!(stats.can_probe(&config));
3484    }
3485
3486    #[test]
3487    fn test_circuit_stats_failure_in_half_open() {
3488        let _guard = test_guard!();
3489        let mut stats = CircuitStats::new();
3490        let config = CircuitBreakerConfig {
3491            success_threshold: 2,
3492            ..Default::default()
3493        };
3494
3495        stats.open();
3496        stats.half_open();
3497
3498        stats.record_success();
3499        assert_eq!(stats.consecutive_failures(), 0);
3500
3501        // Failure resets consecutive successes
3502        stats.record_failure();
3503        assert!(!stats.should_close(&config));
3504
3505        // Need 2 more successes
3506        stats.record_success();
3507        stats.record_success();
3508        assert!(stats.should_close(&config));
3509    }
3510
3511    #[test]
3512    fn test_circuit_stats_reset_window() {
3513        let _guard = test_guard!();
3514        let mut stats = CircuitStats::new();
3515        stats.record_success();
3516        stats.record_failure();
3517
3518        assert!(stats.error_rate() > 0.0);
3519
3520        stats.reset_window();
3521        assert_eq!(stats.error_rate(), 0.0);
3522    }
3523
3524    #[test]
3525    fn test_circuit_state_transitions_are_deterministic() {
3526        let _guard = test_guard!();
3527        let config = CircuitBreakerConfig::default();
3528        let mut stats = CircuitStats::new();
3529
3530        // Start closed
3531        assert_eq!(stats.state(), CircuitState::Closed);
3532
3533        // Cause failures to open
3534        for _ in 0..3 {
3535            stats.record_failure();
3536        }
3537        assert!(stats.should_open(&config));
3538        stats.open();
3539        assert_eq!(stats.state(), CircuitState::Open);
3540
3541        // Transition to half-open after cooldown would be checked
3542        stats.half_open();
3543        assert_eq!(stats.state(), CircuitState::HalfOpen);
3544
3545        // Successes to close
3546        for _ in 0..2 {
3547            stats.record_success();
3548        }
3549        assert!(stats.should_close(&config));
3550        stats.close();
3551        assert_eq!(stats.state(), CircuitState::Closed);
3552    }
3553
3554    // ========================================================================
3555    // Compilation Timing Tests
3556    // ========================================================================
3557
3558    fn make_test_metrics(
3559        speedup: Option<f64>,
3560        total_secs: u64,
3561        success: bool,
3562    ) -> CompilationMetrics {
3563        CompilationMetrics {
3564            project_id: "test-project".to_string(),
3565            worker_id: "worker-1".to_string(),
3566            timestamp: chrono::Utc::now(),
3567            timing: CompilationTimingBreakdown {
3568                rsync_up: Duration::from_millis(100),
3569                remote_build: Duration::from_millis(800),
3570                rsync_down: Duration::from_millis(100),
3571                total: Duration::from_secs(total_secs),
3572            },
3573            local_build_time: speedup.map(|s| Duration::from_secs_f64(total_secs as f64 * s)),
3574            speedup,
3575            files_synced: 100,
3576            bytes_transferred: 1_000_000,
3577            exit_code: if success { 0 } else { 1 },
3578            success,
3579        }
3580    }
3581
3582    #[test]
3583    fn test_compilation_timing_breakdown_default() {
3584        let _guard = test_guard!();
3585        let timing = CompilationTimingBreakdown::default();
3586        assert_eq!(timing.rsync_up, Duration::ZERO);
3587        assert_eq!(timing.remote_build, Duration::ZERO);
3588        assert_eq!(timing.rsync_down, Duration::ZERO);
3589        assert_eq!(timing.total, Duration::ZERO);
3590    }
3591
3592    #[test]
3593    fn test_compilation_timing_breakdown_serialization() {
3594        let _guard = test_guard!();
3595        let timing = CompilationTimingBreakdown {
3596            rsync_up: Duration::from_millis(100),
3597            remote_build: Duration::from_millis(2000),
3598            rsync_down: Duration::from_millis(50),
3599            total: Duration::from_millis(2150),
3600        };
3601
3602        let json = serde_json::to_string(&timing).unwrap();
3603        assert!(json.contains("100")); // rsync_up
3604        assert!(json.contains("2000")); // remote_build
3605        assert!(json.contains("50")); // rsync_down
3606        assert!(json.contains("2150")); // total
3607
3608        let parsed: CompilationTimingBreakdown = serde_json::from_str(&json).unwrap();
3609        assert_eq!(parsed.rsync_up, timing.rsync_up);
3610        assert_eq!(parsed.remote_build, timing.remote_build);
3611        assert_eq!(parsed.rsync_down, timing.rsync_down);
3612        assert_eq!(parsed.total, timing.total);
3613    }
3614
3615    #[test]
3616    fn test_command_timing_breakdown_serialization() {
3617        let _guard = test_guard!();
3618        let timing = CommandTimingBreakdown {
3619            classify: Some(Duration::from_millis(2)),
3620            select: None,
3621            sync_up: Some(Duration::from_millis(120)),
3622            exec: Some(Duration::from_millis(900)),
3623            sync_down: None,
3624            cleanup: Some(Duration::from_millis(5)),
3625            total: Some(Duration::from_millis(1027)),
3626        };
3627
3628        let json = serde_json::to_string(&timing).unwrap();
3629        assert!(json.contains("\"classify\":2"));
3630        assert!(json.contains("\"select\":null"));
3631        assert!(json.contains("\"sync_up\":120"));
3632        assert!(json.contains("\"exec\":900"));
3633        assert!(json.contains("\"sync_down\":null"));
3634        assert!(json.contains("\"cleanup\":5"));
3635        assert!(json.contains("\"total\":1027"));
3636
3637        let parsed: CommandTimingBreakdown = serde_json::from_str(&json).unwrap();
3638        assert_eq!(parsed.classify, timing.classify);
3639        assert_eq!(parsed.select, timing.select);
3640        assert_eq!(parsed.sync_up, timing.sync_up);
3641        assert_eq!(parsed.exec, timing.exec);
3642        assert_eq!(parsed.sync_down, timing.sync_down);
3643        assert_eq!(parsed.cleanup, timing.cleanup);
3644        assert_eq!(parsed.total, timing.total);
3645    }
3646
3647    #[test]
3648    fn test_compilation_metrics_calculate_speedup() {
3649        let _guard = test_guard!();
3650        let mut metrics = CompilationMetrics {
3651            timing: CompilationTimingBreakdown {
3652                total: Duration::from_secs(10),
3653                ..Default::default()
3654            },
3655            local_build_time: Some(Duration::from_secs(30)),
3656            ..Default::default()
3657        };
3658
3659        metrics.calculate_speedup();
3660
3661        assert!(metrics.speedup.is_some());
3662        let speedup = metrics.speedup.unwrap();
3663        assert!(
3664            (speedup - 3.0).abs() < 0.01,
3665            "Expected 3.0x speedup, got {}",
3666            speedup
3667        );
3668    }
3669
3670    #[test]
3671    fn test_compilation_metrics_is_beneficial() {
3672        let _guard = test_guard!();
3673        let mut metrics = CompilationMetrics::default();
3674
3675        // No speedup calculated
3676        assert!(!metrics.is_beneficial());
3677
3678        // Speedup > 1 is beneficial
3679        metrics.speedup = Some(1.5);
3680        assert!(metrics.is_beneficial());
3681
3682        // Speedup < 1 is not beneficial
3683        metrics.speedup = Some(0.8);
3684        assert!(!metrics.is_beneficial());
3685
3686        // Speedup = 1 is not beneficial (exactly same speed)
3687        metrics.speedup = Some(1.0);
3688        assert!(!metrics.is_beneficial());
3689    }
3690
3691    #[test]
3692    fn test_compilation_metrics_serialization() {
3693        let _guard = test_guard!();
3694        let metrics = make_test_metrics(Some(2.5), 10, true);
3695        let json = serde_json::to_string(&metrics).unwrap();
3696
3697        assert!(json.contains("test-project"));
3698        assert!(json.contains("worker-1"));
3699        assert!(json.contains("2.5")); // speedup
3700
3701        let parsed: CompilationMetrics = serde_json::from_str(&json).unwrap();
3702        assert_eq!(parsed.project_id, "test-project");
3703        assert_eq!(parsed.worker_id, "worker-1");
3704        assert!(parsed.success);
3705    }
3706
3707    #[test]
3708    fn test_compilation_timer_new() {
3709        let _guard = test_guard!();
3710        let timer = CompilationTimer::new("my-project", "my-worker");
3711        assert_eq!(timer.project_id, "my-project");
3712        assert_eq!(timer.worker_id, "my-worker");
3713        assert!(timer.rsync_up.is_none());
3714        assert!(timer.remote_build.is_none());
3715        assert!(timer.rsync_down.is_none());
3716    }
3717
3718    #[test]
3719    fn test_compilation_timer_phases() {
3720        let _guard = test_guard!();
3721        let mut timer = CompilationTimer::new("test", "worker");
3722
3723        // Simulate rsync up
3724        std::thread::sleep(Duration::from_millis(5));
3725        timer.end_rsync_up();
3726        assert!(timer.rsync_up.is_some());
3727        assert!(timer.rsync_up.unwrap() >= Duration::from_millis(5));
3728
3729        // Simulate remote build
3730        std::thread::sleep(Duration::from_millis(10));
3731        timer.end_remote_build();
3732        assert!(timer.remote_build.is_some());
3733        assert!(timer.remote_build.unwrap() >= Duration::from_millis(10));
3734
3735        // Simulate rsync down
3736        std::thread::sleep(Duration::from_millis(5));
3737        timer.end_rsync_down();
3738        assert!(timer.rsync_down.is_some());
3739        assert!(timer.rsync_down.unwrap() >= Duration::from_millis(5));
3740
3741        // Finish
3742        let metrics = timer.finish(0, 50, 500_000);
3743        assert_eq!(metrics.project_id, "test");
3744        assert_eq!(metrics.worker_id, "worker");
3745        assert!(metrics.success);
3746        assert!(metrics.timing.total >= Duration::from_millis(20));
3747        assert_eq!(metrics.files_synced, 50);
3748        assert_eq!(metrics.bytes_transferred, 500_000);
3749    }
3750
3751    #[test]
3752    fn test_metrics_aggregator_new() {
3753        let _guard = test_guard!();
3754        let agg = MetricsAggregator::new(100);
3755        assert_eq!(agg.count(), 0);
3756        assert!(agg.average_speedup().is_none());
3757    }
3758
3759    #[test]
3760    fn test_metrics_aggregator_record() {
3761        let _guard = test_guard!();
3762        let mut agg = MetricsAggregator::new(3);
3763
3764        // Record first metric
3765        agg.record(make_test_metrics(Some(2.0), 10, true));
3766        assert_eq!(agg.count(), 1);
3767
3768        // Record up to max
3769        agg.record(make_test_metrics(Some(3.0), 20, true));
3770        agg.record(make_test_metrics(Some(4.0), 30, true));
3771        assert_eq!(agg.count(), 3);
3772
3773        // Exceed max should drop oldest
3774        agg.record(make_test_metrics(Some(5.0), 40, true));
3775        assert_eq!(agg.count(), 3);
3776    }
3777
3778    #[test]
3779    fn test_metrics_aggregator_average_speedup() {
3780        let _guard = test_guard!();
3781        let mut agg = MetricsAggregator::new(100);
3782
3783        // Empty case
3784        assert!(agg.average_speedup().is_none());
3785
3786        // Add metrics with speedups 1, 2, 3, 4, 5 -> average = 3
3787        for i in 1..=5 {
3788            agg.record(make_test_metrics(Some(i as f64), 10, true));
3789        }
3790
3791        let avg = agg.average_speedup().unwrap();
3792        assert!((avg - 3.0).abs() < 0.01, "Expected 3.0, got {}", avg);
3793    }
3794
3795    #[test]
3796    fn test_metrics_aggregator_average_speedup_with_none() {
3797        let _guard = test_guard!();
3798        let mut agg = MetricsAggregator::new(100);
3799
3800        // Mix of Some and None speedups
3801        agg.record(make_test_metrics(Some(2.0), 10, true));
3802        agg.record(make_test_metrics(None, 10, true)); // No speedup
3803        agg.record(make_test_metrics(Some(4.0), 10, true));
3804
3805        // Average should only consider metrics with speedup
3806        let avg = agg.average_speedup().unwrap();
3807        assert!((avg - 3.0).abs() < 0.01, "Expected 3.0, got {}", avg);
3808    }
3809
3810    #[test]
3811    fn test_metrics_aggregator_percentiles() {
3812        let _guard = test_guard!();
3813        let mut agg = MetricsAggregator::new(100);
3814
3815        // Add metrics with total times 1s, 2s, 3s, ..., 10s
3816        for i in 1..=10 {
3817            agg.record(make_test_metrics(Some(1.0), i, true));
3818        }
3819
3820        // p50 (median) should be around 5s
3821        let p50 = agg.p50_total_time().unwrap();
3822        assert!(p50 >= Duration::from_secs(5) && p50 <= Duration::from_secs(6));
3823
3824        // p95 should be around 9s or 10s
3825        let p95 = agg.p95_total_time().unwrap();
3826        assert!(p95 >= Duration::from_secs(9) && p95 <= Duration::from_secs(10));
3827
3828        // p99 should be 10s
3829        let p99 = agg.p99_total_time().unwrap();
3830        assert!(p99 >= Duration::from_secs(9));
3831    }
3832
3833    #[test]
3834    fn test_metrics_aggregator_success_rate() {
3835        let _guard = test_guard!();
3836        let mut agg = MetricsAggregator::new(100);
3837
3838        // Empty case
3839        assert_eq!(agg.success_rate(), 100.0);
3840
3841        // All successful
3842        agg.record(make_test_metrics(Some(1.0), 10, true));
3843        agg.record(make_test_metrics(Some(1.0), 10, true));
3844        assert_eq!(agg.success_rate(), 100.0);
3845
3846        // Add a failure (2 success, 1 failure = 66.67%)
3847        agg.record(make_test_metrics(Some(1.0), 10, false));
3848        let rate = agg.success_rate();
3849        assert!((rate - 66.67).abs() < 1.0, "Expected ~66.67%, got {}", rate);
3850    }
3851
3852    #[test]
3853    fn test_metrics_aggregator_clear() {
3854        let _guard = test_guard!();
3855        let mut agg = MetricsAggregator::new(100);
3856        agg.record(make_test_metrics(Some(1.0), 10, true));
3857        agg.record(make_test_metrics(Some(2.0), 20, true));
3858
3859        assert_eq!(agg.count(), 2);
3860
3861        agg.clear();
3862        assert_eq!(agg.count(), 0);
3863        assert!(agg.average_speedup().is_none());
3864    }
3865
3866    // CompilationConfig timeout tests
3867
3868    #[test]
3869    fn test_compilation_config_default_timeouts() {
3870        let _guard = test_guard!();
3871        let config = CompilationConfig::default();
3872        // Default build timeout: 5 minutes
3873        assert_eq!(config.build_timeout_sec, 300);
3874        // Default test timeout: 30 minutes
3875        assert_eq!(config.test_timeout_sec, 1800);
3876    }
3877
3878    #[test]
3879    fn test_compilation_config_timeout_for_test_kinds() {
3880        let _guard = test_guard!();
3881        let config = CompilationConfig::default();
3882
3883        // Test commands should get test_timeout_sec
3884        assert_eq!(
3885            config.timeout_for_kind(Some(crate::CompilationKind::CargoTest)),
3886            std::time::Duration::from_secs(1800)
3887        );
3888        assert_eq!(
3889            config.timeout_for_kind(Some(crate::CompilationKind::CargoNextest)),
3890            std::time::Duration::from_secs(1800)
3891        );
3892        assert_eq!(
3893            config.timeout_for_kind(Some(crate::CompilationKind::BunTest)),
3894            std::time::Duration::from_secs(config.bun_timeout_sec)
3895        );
3896    }
3897
3898    #[test]
3899    fn test_compilation_config_timeout_for_build_kinds() {
3900        let _guard = test_guard!();
3901        let config = CompilationConfig::default();
3902
3903        // Build/check commands should get build_timeout_sec
3904        assert_eq!(
3905            config.timeout_for_kind(Some(crate::CompilationKind::CargoBuild)),
3906            std::time::Duration::from_secs(300)
3907        );
3908        assert_eq!(
3909            config.timeout_for_kind(Some(crate::CompilationKind::CargoCheck)),
3910            std::time::Duration::from_secs(300)
3911        );
3912        assert_eq!(
3913            config.timeout_for_kind(Some(crate::CompilationKind::CargoClippy)),
3914            std::time::Duration::from_secs(300)
3915        );
3916        assert_eq!(
3917            config.timeout_for_kind(None),
3918            std::time::Duration::from_secs(300)
3919        );
3920    }
3921
3922    #[test]
3923    fn test_compilation_config_custom_timeouts() {
3924        let _guard = test_guard!();
3925        let config = CompilationConfig {
3926            build_timeout_sec: 600, // 10 minutes
3927            test_timeout_sec: 3600, // 1 hour
3928            ..Default::default()
3929        };
3930
3931        assert_eq!(
3932            config.timeout_for_kind(Some(crate::CompilationKind::CargoBuild)),
3933            std::time::Duration::from_secs(600)
3934        );
3935        assert_eq!(
3936            config.timeout_for_kind(Some(crate::CompilationKind::CargoTest)),
3937            std::time::Duration::from_secs(3600)
3938        );
3939    }
3940
3941    #[test]
3942    fn test_compilation_config_speedup_threshold_default() {
3943        let _guard = test_guard!();
3944        let config = CompilationConfig::default();
3945        // Default speedup threshold: 1.2 (20% faster required)
3946        assert!((config.remote_speedup_threshold - 1.2).abs() < 0.001);
3947    }
3948
3949    #[test]
3950    fn test_compilation_config_speedup_threshold_custom() {
3951        let _guard = test_guard!();
3952        let config = CompilationConfig {
3953            remote_speedup_threshold: 1.5, // Require 50% faster
3954            ..Default::default()
3955        };
3956        assert!((config.remote_speedup_threshold - 1.5).abs() < 0.001);
3957    }
3958
3959    #[test]
3960    fn test_compilation_config_speedup_threshold_no_minimum() {
3961        let _guard = test_guard!();
3962        // Setting to 1.0 means "always offload when other criteria met"
3963        let config = CompilationConfig {
3964            remote_speedup_threshold: 1.0,
3965            ..Default::default()
3966        };
3967        assert!((config.remote_speedup_threshold - 1.0).abs() < 0.001);
3968    }
3969
3970    #[test]
3971    fn test_compilation_config_min_local_time_ms_default() {
3972        let _guard = test_guard!();
3973        let config = CompilationConfig::default();
3974        // Default min_local_time_ms: 2000ms
3975        assert_eq!(config.min_local_time_ms, 2000);
3976    }
3977
3978    #[test]
3979    fn test_compilation_config_serde_roundtrip_speedup_threshold() {
3980        let _guard = test_guard!();
3981        let config = CompilationConfig {
3982            remote_speedup_threshold: 2.5,
3983            min_local_time_ms: 5000,
3984            ..Default::default()
3985        };
3986        let json = serde_json::to_string(&config).unwrap();
3987        let parsed: CompilationConfig = serde_json::from_str(&json).unwrap();
3988        assert!((parsed.remote_speedup_threshold - 2.5).abs() < 0.001);
3989        assert_eq!(parsed.min_local_time_ms, 5000);
3990    }
3991
3992    // ========================================================================
3993    // validate_remote_base Tests
3994    // ========================================================================
3995
3996    #[test]
3997    fn test_validate_remote_base_absolute_path() {
3998        let _guard = test_guard!();
3999        assert_eq!(validate_remote_base("/tmp/rch").unwrap(), "/tmp/rch");
4000        assert_eq!(
4001            validate_remote_base("/var/rch-builds").unwrap(),
4002            "/var/rch-builds"
4003        );
4004        assert_eq!(
4005            validate_remote_base("/home/builder/.rch").unwrap(),
4006            "/home/builder/.rch"
4007        );
4008    }
4009
4010    #[test]
4011    fn test_validate_remote_base_tilde_expansion() {
4012        let _guard = test_guard!();
4013        // Tilde expansion should work
4014        let result = validate_remote_base("~/rch");
4015        assert!(result.is_ok());
4016        let path = result.unwrap();
4017        assert!(
4018            path.starts_with('/'),
4019            "Path should be absolute after expansion: {}",
4020            path
4021        );
4022        assert!(!path.contains('~'), "Tilde should be expanded: {}", path);
4023    }
4024
4025    #[test]
4026    fn test_validate_remote_base_rejects_relative_path() {
4027        let _guard = test_guard!();
4028        let result = validate_remote_base("tmp/rch");
4029        assert!(result.is_err());
4030        assert!(result.unwrap_err().contains("absolute path"));
4031    }
4032
4033    #[test]
4034    fn test_validate_remote_base_rejects_path_traversal() {
4035        let _guard = test_guard!();
4036        let result = validate_remote_base("/tmp/../etc/rch");
4037        assert!(result.is_err());
4038        assert!(result.unwrap_err().contains("path traversal"));
4039
4040        let result = validate_remote_base("/tmp/rch/../other");
4041        assert!(result.is_err());
4042        assert!(result.unwrap_err().contains("path traversal"));
4043    }
4044
4045    #[test]
4046    fn test_validate_remote_base_normalizes_trailing_slash() {
4047        let _guard = test_guard!();
4048        assert_eq!(validate_remote_base("/tmp/rch/").unwrap(), "/tmp/rch");
4049        assert_eq!(validate_remote_base("/tmp/rch///").unwrap(), "/tmp/rch");
4050    }
4051
4052    #[test]
4053    fn test_validate_remote_base_root_path() {
4054        let _guard = test_guard!();
4055        // Root path should be rejected
4056        let result = validate_remote_base("/");
4057        assert!(result.is_err());
4058        assert!(result.unwrap_err().contains("root directory"));
4059    }
4060
4061    #[test]
4062    fn test_validate_remote_base_rejects_top_level() {
4063        let _guard = test_guard!();
4064        // Top-level directories should be rejected (depth 1)
4065        let result = validate_remote_base("/tmp");
4066        assert!(result.is_err());
4067        assert!(result.unwrap_err().contains("at least 2 levels deep"));
4068
4069        let result = validate_remote_base("/home");
4070        assert!(result.is_err());
4071
4072        // Depth 2 should be allowed
4073        assert_eq!(validate_remote_base("/tmp/rch").unwrap(), "/tmp/rch");
4074    }
4075
4076    #[test]
4077    fn test_default_remote_base() {
4078        let _guard = test_guard!();
4079        assert_eq!(default_remote_base(), "/tmp/rch");
4080    }
4081
4082    #[test]
4083    fn test_transfer_config_default_has_remote_base() {
4084        let _guard = test_guard!();
4085        let config = TransferConfig::default();
4086        assert_eq!(config.remote_base, "/tmp/rch");
4087    }
4088
4089    // ========================================================================
4090    // RetryConfig Tests (bd-x1ek)
4091    // ========================================================================
4092
4093    #[test]
4094    fn test_retry_config_default() {
4095        let _guard = test_guard!();
4096        let config = RetryConfig::default();
4097        assert_eq!(config.max_attempts, 3);
4098        assert_eq!(config.base_delay_ms, 100);
4099        assert_eq!(config.max_delay_ms, 5000);
4100        assert_eq!(config.jitter_factor, 0.1);
4101        assert_eq!(config.total_timeout_ms, 30000);
4102    }
4103
4104    #[test]
4105    fn test_retry_config_no_retry() {
4106        let _guard = test_guard!();
4107        let config = RetryConfig::no_retry();
4108        assert_eq!(config.max_attempts, 1);
4109    }
4110
4111    #[test]
4112    fn test_retry_config_delay_for_attempt_zero() {
4113        let _guard = test_guard!();
4114        let config = RetryConfig::default();
4115        assert_eq!(config.delay_for_attempt(0), std::time::Duration::ZERO);
4116    }
4117
4118    #[test]
4119    fn test_retry_config_delay_for_attempt_exponential() {
4120        let _guard = test_guard!();
4121        let config = RetryConfig {
4122            base_delay_ms: 100,
4123            max_delay_ms: 10000,
4124            jitter_factor: 0.0, // No jitter for deterministic test
4125            ..Default::default()
4126        };
4127
4128        // attempt 1: 100 * 2^0 = 100ms
4129        assert_eq!(
4130            config.delay_for_attempt(1),
4131            std::time::Duration::from_millis(100)
4132        );
4133        // attempt 2: 100 * 2^1 = 200ms
4134        assert_eq!(
4135            config.delay_for_attempt(2),
4136            std::time::Duration::from_millis(200)
4137        );
4138        // attempt 3: 100 * 2^2 = 400ms
4139        assert_eq!(
4140            config.delay_for_attempt(3),
4141            std::time::Duration::from_millis(400)
4142        );
4143    }
4144
4145    #[test]
4146    fn test_retry_config_delay_capped_at_max() {
4147        let _guard = test_guard!();
4148        let config = RetryConfig {
4149            base_delay_ms: 1000,
4150            max_delay_ms: 2000,
4151            jitter_factor: 0.0,
4152            ..Default::default()
4153        };
4154
4155        // attempt 2: 1000 * 2^1 = 2000ms (at cap)
4156        assert_eq!(
4157            config.delay_for_attempt(2),
4158            std::time::Duration::from_millis(2000)
4159        );
4160        // attempt 3: 1000 * 2^2 = 4000ms, capped to 2000ms
4161        assert_eq!(
4162            config.delay_for_attempt(3),
4163            std::time::Duration::from_millis(2000)
4164        );
4165    }
4166
4167    #[test]
4168    fn test_retry_config_should_retry() {
4169        let _guard = test_guard!();
4170        let config = RetryConfig {
4171            max_attempts: 3,
4172            total_timeout_ms: 1000,
4173            ..Default::default()
4174        };
4175
4176        // Before max attempts and within timeout
4177        assert!(config.should_retry(0, std::time::Duration::from_millis(0)));
4178        assert!(config.should_retry(1, std::time::Duration::from_millis(500)));
4179        assert!(config.should_retry(2, std::time::Duration::from_millis(900)));
4180
4181        // At max attempts
4182        assert!(!config.should_retry(3, std::time::Duration::from_millis(0)));
4183
4184        // Past timeout
4185        assert!(!config.should_retry(1, std::time::Duration::from_millis(1001)));
4186    }
4187
4188    #[test]
4189    fn test_retry_config_serde_roundtrip() {
4190        let _guard = test_guard!();
4191        let config = RetryConfig {
4192            max_attempts: 5,
4193            base_delay_ms: 200,
4194            max_delay_ms: 8000,
4195            jitter_factor: 0.2,
4196            total_timeout_ms: 60000,
4197        };
4198        let json = serde_json::to_string(&config).unwrap();
4199        let parsed: RetryConfig = serde_json::from_str(&json).unwrap();
4200        assert_eq!(parsed.max_attempts, 5);
4201        assert_eq!(parsed.base_delay_ms, 200);
4202        assert_eq!(parsed.max_delay_ms, 8000);
4203        assert_eq!(parsed.jitter_factor, 0.2);
4204        assert_eq!(parsed.total_timeout_ms, 60000);
4205    }
4206
4207    #[test]
4208    fn test_transfer_config_includes_retry() {
4209        let _guard = test_guard!();
4210        let config = TransferConfig::default();
4211        assert_eq!(config.retry.max_attempts, 3);
4212    }
4213
4214    // =========================================================================
4215    // ExecutionConfig Tests (bd-785w)
4216    // =========================================================================
4217
4218    #[test]
4219    fn test_execution_config_default_allowlist() {
4220        let _guard = test_guard!();
4221        let config = ExecutionConfig::default();
4222        // All supported compilers should be in the default allowlist
4223        assert!(config.is_allowed("cargo"));
4224        assert!(config.is_allowed("rustc"));
4225        assert!(config.is_allowed("gcc"));
4226        assert!(config.is_allowed("g++"));
4227        assert!(config.is_allowed("clang"));
4228        assert!(config.is_allowed("clang++"));
4229        assert!(config.is_allowed("make"));
4230        assert!(config.is_allowed("cmake"));
4231        assert!(config.is_allowed("ninja"));
4232        assert!(config.is_allowed("meson"));
4233        assert!(config.is_allowed("bun"));
4234        assert!(config.is_allowed("nextest"));
4235        assert!(config.is_allowed("cc"));
4236        assert!(config.is_allowed("c++"));
4237    }
4238
4239    #[test]
4240    fn test_execution_config_is_allowed_case_insensitive() {
4241        let _guard = test_guard!();
4242        let config = ExecutionConfig::default();
4243        // Should be case-insensitive
4244        assert!(config.is_allowed("CARGO"));
4245        assert!(config.is_allowed("Cargo"));
4246        assert!(config.is_allowed("GCC"));
4247        assert!(config.is_allowed("Gcc"));
4248    }
4249
4250    #[test]
4251    fn test_execution_config_is_not_allowed() {
4252        let _guard = test_guard!();
4253        let config = ExecutionConfig::default();
4254        // Unknown commands should not be allowed
4255        assert!(!config.is_allowed("python"));
4256        assert!(!config.is_allowed("npm"));
4257        assert!(!config.is_allowed("go"));
4258        assert!(!config.is_allowed(""));
4259    }
4260
4261    #[test]
4262    fn test_execution_config_empty_allowlist() {
4263        let _guard = test_guard!();
4264        let config = ExecutionConfig { allowlist: vec![] };
4265        // Empty allowlist should block everything
4266        assert!(!config.is_allowed("cargo"));
4267        assert!(!config.is_allowed("gcc"));
4268    }
4269
4270    #[test]
4271    fn test_execution_config_custom_allowlist() {
4272        let _guard = test_guard!();
4273        let config = ExecutionConfig {
4274            allowlist: vec!["cargo".to_string(), "custom_tool".to_string()],
4275        };
4276        assert!(config.is_allowed("cargo"));
4277        assert!(config.is_allowed("custom_tool"));
4278        assert!(!config.is_allowed("gcc"));
4279    }
4280
4281    #[test]
4282    fn test_execution_config_serde() {
4283        let _guard = test_guard!();
4284        let config = ExecutionConfig {
4285            allowlist: vec!["cargo".to_string(), "rustc".to_string()],
4286        };
4287        let json = serde_json::to_string(&config).unwrap();
4288        let parsed: ExecutionConfig = serde_json::from_str(&json).unwrap();
4289        assert_eq!(parsed.allowlist, config.allowlist);
4290    }
4291
4292    #[test]
4293    fn test_rch_config_includes_execution() {
4294        let _guard = test_guard!();
4295        let config = RchConfig::default();
4296        // RchConfig should include ExecutionConfig with default allowlist
4297        assert!(config.execution.is_allowed("cargo"));
4298    }
4299
4300    // ========================================================================
4301    // Adaptive Compression Tests (bd-243w)
4302    // ========================================================================
4303
4304    #[test]
4305    fn test_adaptive_compression_disabled_by_default() {
4306        let _guard = test_guard!();
4307        let config = TransferConfig::default();
4308        assert!(!config.adaptive_compression);
4309        // Should use fixed level when disabled
4310        assert_eq!(config.select_compression_level(Some(1_000_000)), 3);
4311        assert_eq!(config.select_compression_level(Some(500_000_000)), 3);
4312    }
4313
4314    #[test]
4315    fn test_adaptive_compression_small_payload() {
4316        let _guard = test_guard!();
4317        let config = TransferConfig {
4318            adaptive_compression: true,
4319            ..Default::default()
4320        };
4321        // < 10MB should use level 1
4322        assert_eq!(config.select_compression_level(Some(0)), 1);
4323        assert_eq!(config.select_compression_level(Some(1_000_000)), 1); // 1MB
4324        assert_eq!(config.select_compression_level(Some(9_999_999)), 1); // ~10MB
4325    }
4326
4327    #[test]
4328    fn test_adaptive_compression_medium_payload() {
4329        let _guard = test_guard!();
4330        let config = TransferConfig {
4331            adaptive_compression: true,
4332            ..Default::default()
4333        };
4334        // 10-200MB should use level 3
4335        assert_eq!(config.select_compression_level(Some(10_000_000)), 3); // 10MB
4336        assert_eq!(config.select_compression_level(Some(100_000_000)), 3); // 100MB
4337        assert_eq!(config.select_compression_level(Some(199_999_999)), 3); // ~200MB
4338    }
4339
4340    #[test]
4341    fn test_adaptive_compression_large_payload() {
4342        let _guard = test_guard!();
4343        let config = TransferConfig {
4344            adaptive_compression: true,
4345            ..Default::default()
4346        };
4347        // > 200MB should use level 7
4348        assert_eq!(config.select_compression_level(Some(200_000_000)), 7); // 200MB
4349        assert_eq!(config.select_compression_level(Some(500_000_000)), 7); // 500MB
4350        assert_eq!(config.select_compression_level(Some(1_000_000_000)), 7); // 1GB
4351    }
4352
4353    #[test]
4354    fn test_adaptive_compression_no_estimate() {
4355        let _guard = test_guard!();
4356        let config = TransferConfig {
4357            adaptive_compression: true,
4358            compression_level: 5,
4359            ..Default::default()
4360        };
4361        // No estimate should fallback to default level
4362        assert_eq!(config.select_compression_level(None), 5);
4363    }
4364
4365    #[test]
4366    fn test_adaptive_compression_respects_min_level() {
4367        let _guard = test_guard!();
4368        let config = TransferConfig {
4369            adaptive_compression: true,
4370            min_compression_level: 3,
4371            ..Default::default()
4372        };
4373        // Small payload would want level 1, but min is 3
4374        assert_eq!(config.select_compression_level(Some(1_000_000)), 3);
4375    }
4376
4377    #[test]
4378    fn test_adaptive_compression_respects_max_level() {
4379        let _guard = test_guard!();
4380        let config = TransferConfig {
4381            adaptive_compression: true,
4382            max_compression_level: 5,
4383            ..Default::default()
4384        };
4385        // Large payload would want level 7, but max is 5
4386        assert_eq!(config.select_compression_level(Some(500_000_000)), 5);
4387    }
4388
4389    #[test]
4390    fn test_adaptive_compression_serde_roundtrip() {
4391        let _guard = test_guard!();
4392        let config = TransferConfig {
4393            adaptive_compression: true,
4394            min_compression_level: 2,
4395            max_compression_level: 8,
4396            ..Default::default()
4397        };
4398        let json = serde_json::to_string(&config).unwrap();
4399        let parsed: TransferConfig = serde_json::from_str(&json).unwrap();
4400        assert!(parsed.adaptive_compression);
4401        assert_eq!(parsed.min_compression_level, 2);
4402        assert_eq!(parsed.max_compression_level, 8);
4403    }
4404
4405    // -------------------------------------------------------------------------
4406    // Self-Healing Config Tests (bd-59kg)
4407    // -------------------------------------------------------------------------
4408
4409    #[test]
4410    fn test_self_healing_config_defaults() {
4411        let _guard = test_guard!();
4412        // TEST START: SelfHealingConfig::default() returns expected values
4413        let config = SelfHealingConfig::default();
4414
4415        // Both self-healing behaviors enabled by default
4416        assert!(
4417            config.hook_starts_daemon,
4418            "hook_starts_daemon should be true by default"
4419        );
4420        assert!(
4421            config.daemon_installs_hooks,
4422            "daemon_installs_hooks should be true by default"
4423        );
4424
4425        // Timing defaults
4426        assert_eq!(
4427            config.auto_start_cooldown_secs, 30,
4428            "auto_start_cooldown_secs should default to 30"
4429        );
4430        assert_eq!(
4431            config.auto_start_timeout_secs, 3,
4432            "auto_start_timeout_secs should default to 3"
4433        );
4434        // TEST PASS: SelfHealingConfig defaults
4435    }
4436
4437    #[test]
4438    fn test_self_healing_config_serde_full() {
4439        let _guard = test_guard!();
4440        // TEST START: Full SelfHealingConfig serialization/deserialization
4441        let config = SelfHealingConfig {
4442            hook_starts_daemon: false,
4443            daemon_installs_hooks: false,
4444            auto_start_cooldown_secs: 60,
4445            auto_start_timeout_secs: 10,
4446            self_healing_log_level: SelfHealingLogLevel::Debug,
4447        };
4448
4449        let json = serde_json::to_string(&config).unwrap();
4450        assert!(json.contains("\"hook_starts_daemon\":false"));
4451        assert!(json.contains("\"daemon_installs_hooks\":false"));
4452        assert!(json.contains("\"auto_start_cooldown_secs\":60"));
4453        assert!(json.contains("\"auto_start_timeout_secs\":10"));
4454        assert!(json.contains("\"self_healing_log_level\":\"debug\""));
4455
4456        let parsed: SelfHealingConfig = serde_json::from_str(&json).unwrap();
4457        assert!(!parsed.hook_starts_daemon);
4458        assert!(!parsed.daemon_installs_hooks);
4459        assert_eq!(parsed.auto_start_cooldown_secs, 60);
4460        assert_eq!(parsed.auto_start_timeout_secs, 10);
4461        assert_eq!(parsed.self_healing_log_level, SelfHealingLogLevel::Debug);
4462        // TEST PASS: Full SelfHealingConfig serde
4463    }
4464
4465    // br-4zf3p: Tests for the self_healing_log_level field.
4466
4467    #[test]
4468    fn test_self_healing_log_level_default_is_info() {
4469        // TEST START: SelfHealingLogLevel::default() == Info
4470        let level = SelfHealingLogLevel::default();
4471        assert_eq!(level, SelfHealingLogLevel::Info);
4472        // Also verify the SelfHealingConfig default surfaces it.
4473        let config = SelfHealingConfig::default();
4474        assert_eq!(
4475            config.self_healing_log_level,
4476            SelfHealingLogLevel::Info,
4477            "config default should be Info"
4478        );
4479        // TEST PASS
4480    }
4481
4482    #[test]
4483    fn test_self_healing_log_level_serializes_lowercase() {
4484        // TEST START: serde uses lowercase variants
4485        let json = serde_json::to_string(&SelfHealingLogLevel::Warn).unwrap();
4486        assert_eq!(json, "\"warn\"");
4487        let parsed: SelfHealingLogLevel = serde_json::from_str("\"error\"").unwrap();
4488        assert_eq!(parsed, SelfHealingLogLevel::Error);
4489        // TEST PASS
4490    }
4491
4492    #[test]
4493    fn test_self_healing_log_level_from_env_str_accepts_known() {
4494        // TEST START: from_env_str parses each level (case-insensitive) and aliases
4495        assert_eq!(
4496            SelfHealingLogLevel::from_env_str("debug"),
4497            Some(SelfHealingLogLevel::Debug)
4498        );
4499        assert_eq!(
4500            SelfHealingLogLevel::from_env_str("DEBUG"),
4501            Some(SelfHealingLogLevel::Debug)
4502        );
4503        assert_eq!(
4504            SelfHealingLogLevel::from_env_str("info"),
4505            Some(SelfHealingLogLevel::Info)
4506        );
4507        assert_eq!(
4508            SelfHealingLogLevel::from_env_str("warn"),
4509            Some(SelfHealingLogLevel::Warn)
4510        );
4511        assert_eq!(
4512            SelfHealingLogLevel::from_env_str("warning"),
4513            Some(SelfHealingLogLevel::Warn),
4514            "warning should map to Warn"
4515        );
4516        assert_eq!(
4517            SelfHealingLogLevel::from_env_str("error"),
4518            Some(SelfHealingLogLevel::Error)
4519        );
4520        // Whitespace tolerance
4521        assert_eq!(
4522            SelfHealingLogLevel::from_env_str("  debug  "),
4523            Some(SelfHealingLogLevel::Debug)
4524        );
4525        // TEST PASS
4526    }
4527
4528    #[test]
4529    fn test_self_healing_log_level_from_env_str_rejects_unknown() {
4530        // TEST START: unknown values return None (caller falls back to default)
4531        assert!(SelfHealingLogLevel::from_env_str("banana").is_none());
4532        assert!(SelfHealingLogLevel::from_env_str("").is_none());
4533        assert!(SelfHealingLogLevel::from_env_str("trace").is_none());
4534        // TEST PASS
4535    }
4536
4537    #[test]
4538    fn test_self_healing_config_serde_partial_uses_defaults() {
4539        let _guard = test_guard!();
4540        // TEST START: Partial TOML/JSON uses defaults for missing fields
4541        let json = r#"{"hook_starts_daemon": false}"#;
4542        let config: SelfHealingConfig = serde_json::from_str(json).unwrap();
4543
4544        assert!(
4545            !config.hook_starts_daemon,
4546            "Explicit false should be parsed"
4547        );
4548        assert!(
4549            config.daemon_installs_hooks,
4550            "Missing field should use default (true)"
4551        );
4552        assert_eq!(
4553            config.auto_start_cooldown_secs, 30,
4554            "Missing field should use default (30)"
4555        );
4556        assert_eq!(
4557            config.auto_start_timeout_secs, 3,
4558            "Missing field should use default (3)"
4559        );
4560        // TEST PASS: Partial SelfHealingConfig uses defaults
4561    }
4562
4563    #[test]
4564    fn test_self_healing_config_toml_with_alias() {
4565        let _guard = test_guard!();
4566        // TEST START: daemon_start_timeout alias works for auto_start_timeout_secs
4567        let toml_str = r#"
4568            hook_starts_daemon = true
4569            daemon_installs_hooks = false
4570            auto_start_cooldown_secs = 45
4571            daemon_start_timeout = 7
4572        "#;
4573
4574        let config: SelfHealingConfig = toml::from_str(toml_str).unwrap();
4575        assert!(config.hook_starts_daemon);
4576        assert!(!config.daemon_installs_hooks);
4577        assert_eq!(config.auto_start_cooldown_secs, 45);
4578        assert_eq!(
4579            config.auto_start_timeout_secs, 7,
4580            "daemon_start_timeout alias should set auto_start_timeout_secs"
4581        );
4582        // TEST PASS: TOML alias works
4583    }
4584
4585    #[allow(unsafe_code)]
4586    mod self_healing_env_override_tests {
4587        use super::*;
4588        use crate::config::env_test_lock;
4589
4590        fn env_guard() -> std::sync::MutexGuard<'static, ()> {
4591            env_test_lock()
4592        }
4593
4594        fn set_env(key: &str, value: &str) {
4595            // SAFETY: Tests are serialized with env_guard().
4596            unsafe { std::env::set_var(key, value) };
4597        }
4598
4599        fn remove_env(key: &str) {
4600            // SAFETY: Tests are serialized with env_guard().
4601            unsafe { std::env::remove_var(key) };
4602        }
4603
4604        struct EnvVarGuard {
4605            key: &'static str,
4606            old: Option<String>,
4607        }
4608
4609        impl EnvVarGuard {
4610            fn set(key: &'static str, value: &str) -> Self {
4611                let old = std::env::var(key).ok();
4612                set_env(key, value);
4613                Self { key, old }
4614            }
4615        }
4616
4617        impl Drop for EnvVarGuard {
4618            fn drop(&mut self) {
4619                if let Some(old) = &self.old {
4620                    set_env(self.key, old);
4621                } else {
4622                    remove_env(self.key);
4623                }
4624            }
4625        }
4626
4627        #[test]
4628        fn test_self_healing_config_with_env_overrides_master_disable() {
4629            let _guard = test_guard!();
4630            let _guard = env_guard();
4631
4632            let _no_self_healing = EnvVarGuard::set("RCH_NO_SELF_HEALING", "1");
4633            let _hook_starts_daemon = EnvVarGuard::set("RCH_HOOK_STARTS_DAEMON", "1");
4634            let _daemon_installs_hooks = EnvVarGuard::set("RCH_DAEMON_INSTALLS_HOOKS", "1");
4635            let _cooldown = EnvVarGuard::set("RCH_AUTO_START_COOLDOWN_SECS", "99");
4636            let _timeout = EnvVarGuard::set("RCH_AUTO_START_TIMEOUT_SECS", "99");
4637
4638            let config = SelfHealingConfig::default().with_env_overrides();
4639            assert!(
4640                !config.hook_starts_daemon,
4641                "RCH_NO_SELF_HEALING should disable hook auto-start"
4642            );
4643            assert!(
4644                !config.daemon_installs_hooks,
4645                "RCH_NO_SELF_HEALING should disable daemon hook installation"
4646            );
4647
4648            // Master disable returns early; numeric overrides should not apply.
4649            assert_eq!(config.auto_start_cooldown_secs, 30);
4650            assert_eq!(config.auto_start_timeout_secs, 3);
4651        }
4652
4653        #[test]
4654        fn test_self_healing_config_with_env_overrides_toggles_and_numbers() {
4655            let _guard = test_guard!();
4656            let _guard = env_guard();
4657
4658            let _hook_starts_daemon = EnvVarGuard::set("RCH_HOOK_STARTS_DAEMON", "0");
4659            let _daemon_installs_hooks = EnvVarGuard::set("RCH_DAEMON_INSTALLS_HOOKS", "false");
4660            let _cooldown = EnvVarGuard::set("RCH_AUTO_START_COOLDOWN_SECS", "45");
4661            let _timeout = EnvVarGuard::set("RCH_AUTO_START_TIMEOUT_SECS", "7");
4662
4663            let config = SelfHealingConfig::default().with_env_overrides();
4664            assert!(!config.hook_starts_daemon);
4665            assert!(!config.daemon_installs_hooks);
4666            assert_eq!(config.auto_start_cooldown_secs, 45);
4667            assert_eq!(config.auto_start_timeout_secs, 7);
4668        }
4669
4670        #[test]
4671        fn test_self_healing_config_with_env_overrides_invalid_numbers_ignored() {
4672            let _guard = test_guard!();
4673            let _guard = env_guard();
4674
4675            let _cooldown = EnvVarGuard::set("RCH_AUTO_START_COOLDOWN_SECS", "not-a-number");
4676            let _timeout = EnvVarGuard::set("RCH_AUTO_START_TIMEOUT_SECS", "nope");
4677
4678            let config = SelfHealingConfig::default().with_env_overrides();
4679            assert_eq!(config.auto_start_cooldown_secs, 30);
4680            assert_eq!(config.auto_start_timeout_secs, 3);
4681        }
4682    }
4683
4684    // -------------------------------------------------------------------------
4685    // Preflight Health Guard Tests (bd-3eaa)
4686    // -------------------------------------------------------------------------
4687
4688    #[test]
4689    fn test_load_per_core_calculation() {
4690        let _guard = test_guard!();
4691        let mut caps = WorkerCapabilities::new();
4692        // No metrics -> None
4693        assert!(caps.load_per_core().is_none());
4694
4695        // Only load, no cpus -> None
4696        caps.load_avg_1 = Some(4.0);
4697        assert!(caps.load_per_core().is_none());
4698
4699        // Both available
4700        caps.num_cpus = Some(4);
4701        assert_eq!(caps.load_per_core(), Some(1.0)); // 4.0 / 4 = 1.0
4702
4703        // High load
4704        caps.load_avg_1 = Some(16.0);
4705        assert_eq!(caps.load_per_core(), Some(4.0)); // 16.0 / 4 = 4.0
4706    }
4707
4708    #[test]
4709    fn test_is_high_load() {
4710        let _guard = test_guard!();
4711        let mut caps = WorkerCapabilities::new();
4712        caps.load_avg_1 = Some(8.0);
4713        caps.num_cpus = Some(4);
4714        // load_per_core = 2.0
4715
4716        // Below threshold
4717        assert_eq!(caps.is_high_load(3.0), Some(false));
4718        // At threshold
4719        assert_eq!(caps.is_high_load(2.0), Some(false)); // equal is OK
4720        // Above threshold
4721        assert_eq!(caps.is_high_load(1.5), Some(true));
4722
4723        // No metrics -> None (fail-open)
4724        caps.load_avg_1 = None;
4725        assert!(caps.is_high_load(1.0).is_none());
4726    }
4727
4728    #[test]
4729    fn test_is_low_disk() {
4730        let _guard = test_guard!();
4731        let mut caps = WorkerCapabilities::new();
4732        caps.disk_free_gb = Some(15.0);
4733
4734        // Above threshold
4735        assert_eq!(caps.is_low_disk(10.0), Some(false));
4736        // At threshold
4737        assert_eq!(caps.is_low_disk(15.0), Some(false)); // equal is OK
4738        // Below threshold
4739        assert_eq!(caps.is_low_disk(20.0), Some(true));
4740
4741        // No metrics -> None (fail-open)
4742        caps.disk_free_gb = None;
4743        assert!(caps.is_low_disk(10.0).is_none());
4744    }
4745
4746    #[test]
4747    fn test_selection_config_preflight_defaults() {
4748        let _guard = test_guard!();
4749        let config = SelectionConfig::default();
4750        // Default thresholds
4751        assert_eq!(config.max_load_per_core, Some(2.0));
4752        assert_eq!(config.min_free_gb, Some(10.0));
4753    }
4754
4755    #[test]
4756    fn test_selection_config_preflight_serde() {
4757        let _guard = test_guard!();
4758        let config = SelectionConfig {
4759            max_load_per_core: Some(3.5),
4760            min_free_gb: Some(25.0),
4761            ..Default::default()
4762        };
4763        let json = serde_json::to_string(&config).unwrap();
4764        let parsed: SelectionConfig = serde_json::from_str(&json).unwrap();
4765        assert_eq!(parsed.max_load_per_core, Some(3.5));
4766        assert_eq!(parsed.min_free_gb, Some(25.0));
4767    }
4768
4769    #[test]
4770    fn test_worker_capabilities_health_metrics_serde() {
4771        let _guard = test_guard!();
4772        let caps = WorkerCapabilities {
4773            num_cpus: Some(8),
4774            load_avg_1: Some(1.5),
4775            load_avg_5: Some(2.0),
4776            load_avg_15: Some(1.8),
4777            disk_free_gb: Some(50.5),
4778            disk_total_gb: Some(100.0),
4779            ..Default::default()
4780        };
4781        let json = serde_json::to_string(&caps).unwrap();
4782        let parsed: WorkerCapabilities = serde_json::from_str(&json).unwrap();
4783        assert_eq!(parsed.num_cpus, Some(8));
4784        assert_eq!(parsed.load_avg_1, Some(1.5));
4785        assert_eq!(parsed.disk_free_gb, Some(50.5));
4786    }
4787}