Skip to main content

rch_common/errors/
catalog.rs

1//! Error Catalog for Remote Compilation Helper
2//!
3//! This module defines a comprehensive error catalog with unique error codes,
4//! categorized by subsystem. Each error includes:
5//! - A unique code (RCH-E001 through RCH-E999)
6//! - A human-readable message template
7//! - Remediation steps
8//! - Documentation links where applicable
9//!
10//! # Error Code Ranges
11//!
12//! | Range      | Category    | Description                          |
13//! |------------|-------------|--------------------------------------|
14//! | E001-E099  | Config      | Configuration and setup errors       |
15//! | E100-E199  | Network     | Network and SSH connectivity         |
16//! | E200-E299  | Worker      | Worker selection and management      |
17//! | E300-E399  | Build       | Compilation and build errors         |
18//! | E400-E499  | Transfer    | File transfer and sync errors        |
19//! | E500-E599  | Internal    | Internal/unexpected errors           |
20//!
21//! ## Extended Sub-Ranges (within existing categories)
22//!
23//! | Range      | Subcategory        | Description                           |
24//! |------------|--------------------|---------------------------------------|
25//! | E013-E018  | Config/PathDeps    | Path-dependency resolution errors     |
26//! | E019-E024  | Config/Closure     | Dependency-closure planner errors     |
27//! | E210-E219  | Worker/Storage     | Disk pressure and storage errors      |
28//! | E310-E319  | Build/Triage       | Process triage integration errors     |
29//! | E320-E325  | Build/Cancellation | Build cancellation lifecycle errors   |
30//!
31//! # Example
32//!
33//! ```rust
34//! use rch_common::errors::catalog::{ErrorCode, ErrorEntry};
35//!
36//! let error = ErrorCode::ConfigNotFound;
37//! let entry = error.entry();
38//!
39//! println!("Error {}: {}", entry.code, entry.message);
40//! for step in entry.remediation {
41//!     println!("  - {}", step);
42//! }
43//! ```
44
45use serde::{Deserialize, Serialize};
46use std::fmt;
47
48/// Error code enumeration covering all RCH error scenarios.
49///
50/// Each variant maps to a unique error code in the RCH-Exxx format.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
52#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
53#[non_exhaustive]
54pub enum ErrorCode {
55    // =========================================================================
56    // Config Errors (E001-E099)
57    // =========================================================================
58    /// Configuration file not found
59    ConfigNotFound,
60    /// Configuration file could not be read
61    ConfigReadError,
62    /// Configuration file contains invalid TOML syntax
63    ConfigParseError,
64    /// Configuration contains invalid values
65    ConfigValidationError,
66    /// Environment variable has invalid value
67    ConfigEnvError,
68    /// Profile not found in configuration
69    ConfigProfileNotFound,
70    /// No workers configured
71    ConfigNoWorkers,
72    /// Worker configuration is invalid
73    ConfigInvalidWorker,
74    /// SSH key path is invalid or inaccessible
75    ConfigSshKeyError,
76    /// Socket path is invalid
77    ConfigSocketPathError,
78
79    // -- Path-Dependency Resolution (E013-E018) --
80    /// Cargo manifest parse failure during path-dependency resolution
81    PathDepManifestParseFailed,
82    /// Path dependency declared but target directory not found
83    PathDepMissing,
84    /// Cyclic path dependency detected
85    PathDepCyclic,
86    /// Path dependency violates canonical-root policy
87    PathDepPolicyViolation,
88    /// cargo metadata invocation failed
89    PathDepMetadataFailed,
90    /// cargo metadata output could not be parsed
91    PathDepMetadataParseFailed,
92
93    // -- Dependency-Closure Planner (E019-E024) --
94    /// Dependency closure plan computation failed
95    ClosurePlanFailed,
96    /// Closure entered fail-open state (unverifiable dependency data)
97    ClosureFailOpen,
98    /// High-risk path dependencies in closure
99    ClosureHighRisk,
100    /// Required closure data is missing or incomplete
101    ClosureMissingData,
102    /// Closure sync action ordering is non-deterministic
103    ClosureNonDeterministic,
104    /// Closure manifest fingerprint mismatch
105    ClosureFingerprintMismatch,
106
107    // =========================================================================
108    // Network Errors (E100-E199)
109    // =========================================================================
110    /// SSH connection failed
111    SshConnectionFailed,
112    /// SSH authentication failed
113    SshAuthFailed,
114    /// SSH key not found or invalid format
115    SshKeyError,
116    /// SSH known hosts verification failed
117    SshHostKeyError,
118    /// SSH command execution timed out
119    SshTimeout,
120    /// SSH session terminated unexpectedly
121    SshSessionDropped,
122    /// DNS resolution failed for worker host
123    NetworkDnsError,
124    /// Network unreachable
125    NetworkUnreachable,
126    /// Connection refused by remote host
127    NetworkConnectionRefused,
128    /// TCP connection timed out
129    NetworkTimeout,
130
131    // =========================================================================
132    // Worker Errors (E200-E299)
133    // =========================================================================
134    /// No workers available for selection
135    WorkerNoneAvailable,
136    /// All workers are unhealthy
137    WorkerAllUnhealthy,
138    /// Worker failed health check
139    WorkerHealthCheckFailed,
140    /// Worker self-test failed
141    WorkerSelfTestFailed,
142    /// Worker is at capacity
143    WorkerAtCapacity,
144    /// Worker missing required toolchain
145    WorkerMissingToolchain,
146    /// Worker state is inconsistent
147    WorkerStateError,
148    /// Worker circuit breaker is open
149    WorkerCircuitOpen,
150    /// Worker selection strategy failed
151    WorkerSelectionFailed,
152    /// Worker load query failed
153    WorkerLoadQueryFailed,
154
155    // -- Disk Pressure / Storage (E210-E219) --
156    /// Worker disk usage is critically high
157    WorkerDiskPressureCritical,
158    /// Worker disk usage is elevated (warning threshold)
159    WorkerDiskPressureWarning,
160    /// Worker disk pressure telemetry is stale or missing
161    WorkerTelemetryGap,
162    /// Worker disk I/O utilization is too high for scheduling
163    WorkerDiskIoHigh,
164    /// Worker memory pressure exceeds scheduling threshold
165    WorkerMemoryPressureHigh,
166    /// Disk reclaim/ballast eviction failed on worker
167    WorkerReclaimFailed,
168    /// Disk headroom estimation too low for build reservation
169    WorkerDiskHeadroomInsufficient,
170    /// Active build protection prevented reclaim operation
171    WorkerReclaimProtected,
172
173    // =========================================================================
174    // Build Errors (E300-E399)
175    // =========================================================================
176    /// Remote compilation failed
177    BuildCompilationFailed,
178    /// Build command not recognized
179    BuildUnknownCommand,
180    /// Build process was killed by signal
181    BuildKilledBySignal,
182    /// Build timed out
183    BuildTimeout,
184    /// Build output capture failed
185    BuildOutputError,
186    /// Remote working directory error
187    BuildWorkdirError,
188    /// Toolchain wrapper failed
189    BuildToolchainError,
190    /// Build environment setup failed
191    BuildEnvError,
192    /// Incremental build state corrupted
193    BuildIncrementalError,
194    /// Build artifact not found
195    BuildArtifactMissing,
196
197    // -- Process Triage (E310-E319) --
198    /// Process triage adapter binary unavailable or not installed
199    ProcessTriageAdapterUnavailable,
200    /// Process detector could not classify process with sufficient confidence
201    ProcessTriageDetectorUncertain,
202    /// Process triage action violates safe-action policy
203    ProcessTriagePolicyViolation,
204    /// Transport error communicating with process triage adapter
205    ProcessTriageTransportError,
206    /// Process triage executor encountered a runtime error
207    ProcessTriageExecutorError,
208    /// Process triage operation timed out
209    ProcessTriageTimeout,
210    /// Process triage returned partial or incomplete results
211    ProcessTriagePartialResult,
212    /// Invalid process triage request (malformed input)
213    ProcessTriageInvalidRequest,
214
215    // -- Cancellation (E320-E325) --
216    /// Graceful cancel signal dispatched
217    CancelGracefulSent,
218    /// Escalated to forced kill after timeout
219    CancelEscalatedKill,
220    /// Failed to kill remote process via SSH
221    CancelRemoteKillFailed,
222    /// Post-cancel cleanup encountered errors
223    CancelCleanupFailed,
224    /// Slots not properly released after cancel
225    CancelSlotLeak,
226    /// Cancellation exceeded policy time budget
227    CancelTimeoutExceeded,
228
229    // =========================================================================
230    // Transfer Errors (E400-E499)
231    // =========================================================================
232    /// Rsync transfer failed
233    TransferRsyncFailed,
234    /// File sync timed out
235    TransferTimeout,
236    /// Source files not found
237    TransferSourceMissing,
238    /// Destination path error
239    TransferDestError,
240    /// Insufficient disk space on worker
241    TransferDiskFull,
242    /// Permission denied during transfer
243    TransferPermissionDenied,
244    /// Transfer checksum mismatch
245    TransferChecksumError,
246    /// Binary download failed
247    TransferBinaryFailed,
248    /// Partial transfer detected
249    TransferIncomplete,
250    /// Transfer protocol error
251    TransferProtocolError,
252
253    // =========================================================================
254    // Internal Errors (E500-E599)
255    // =========================================================================
256    /// Daemon socket connection failed
257    InternalDaemonSocket,
258    /// Daemon protocol error
259    InternalDaemonProtocol,
260    /// Daemon not running
261    InternalDaemonNotRunning,
262    /// Inter-process communication error
263    InternalIpcError,
264    /// Unexpected internal state
265    InternalStateError,
266    /// Serialization/deserialization error
267    InternalSerdeError,
268    /// Hook execution failed
269    InternalHookError,
270    /// Metrics collection error
271    InternalMetricsError,
272    /// Logging system error
273    InternalLoggingError,
274    /// Update check failed
275    InternalUpdateError,
276}
277
278impl ErrorCode {
279    /// Returns the numeric error code (without prefix).
280    #[must_use]
281    pub const fn code_number(&self) -> u16 {
282        match self {
283            // Config (001-099)
284            Self::ConfigNotFound => 1,
285            Self::ConfigReadError => 2,
286            Self::ConfigParseError => 3,
287            Self::ConfigValidationError => 4,
288            Self::ConfigEnvError => 5,
289            Self::ConfigProfileNotFound => 6,
290            Self::ConfigNoWorkers => 7,
291            Self::ConfigInvalidWorker => 8,
292            Self::ConfigSshKeyError => 9,
293            Self::ConfigSocketPathError => 10,
294
295            // Path-Dependency (013-018)
296            Self::PathDepManifestParseFailed => 13,
297            Self::PathDepMissing => 14,
298            Self::PathDepCyclic => 15,
299            Self::PathDepPolicyViolation => 16,
300            Self::PathDepMetadataFailed => 17,
301            Self::PathDepMetadataParseFailed => 18,
302
303            // Dependency-Closure (019-024)
304            Self::ClosurePlanFailed => 19,
305            Self::ClosureFailOpen => 20,
306            Self::ClosureHighRisk => 21,
307            Self::ClosureMissingData => 22,
308            Self::ClosureNonDeterministic => 23,
309            Self::ClosureFingerprintMismatch => 24,
310
311            // Network (100-199)
312            Self::SshConnectionFailed => 100,
313            Self::SshAuthFailed => 101,
314            Self::SshKeyError => 102,
315            Self::SshHostKeyError => 103,
316            Self::SshTimeout => 104,
317            Self::SshSessionDropped => 105,
318            Self::NetworkDnsError => 106,
319            Self::NetworkUnreachable => 107,
320            Self::NetworkConnectionRefused => 108,
321            Self::NetworkTimeout => 109,
322
323            // Worker (200-299)
324            Self::WorkerNoneAvailable => 200,
325            Self::WorkerAllUnhealthy => 201,
326            Self::WorkerHealthCheckFailed => 202,
327            Self::WorkerSelfTestFailed => 203,
328            Self::WorkerAtCapacity => 204,
329            Self::WorkerMissingToolchain => 205,
330            Self::WorkerStateError => 206,
331            Self::WorkerCircuitOpen => 207,
332            Self::WorkerSelectionFailed => 208,
333            Self::WorkerLoadQueryFailed => 209,
334
335            // Disk Pressure / Storage (210-219)
336            Self::WorkerDiskPressureCritical => 210,
337            Self::WorkerDiskPressureWarning => 211,
338            Self::WorkerTelemetryGap => 212,
339            Self::WorkerDiskIoHigh => 213,
340            Self::WorkerMemoryPressureHigh => 214,
341            Self::WorkerReclaimFailed => 215,
342            Self::WorkerDiskHeadroomInsufficient => 216,
343            Self::WorkerReclaimProtected => 217,
344
345            // Build (300-399)
346            Self::BuildCompilationFailed => 300,
347            Self::BuildUnknownCommand => 301,
348            Self::BuildKilledBySignal => 302,
349            Self::BuildTimeout => 303,
350            Self::BuildOutputError => 304,
351            Self::BuildWorkdirError => 305,
352            Self::BuildToolchainError => 306,
353            Self::BuildEnvError => 307,
354            Self::BuildIncrementalError => 308,
355            Self::BuildArtifactMissing => 309,
356
357            // Process Triage (310-319)
358            Self::ProcessTriageAdapterUnavailable => 310,
359            Self::ProcessTriageDetectorUncertain => 311,
360            Self::ProcessTriagePolicyViolation => 312,
361            Self::ProcessTriageTransportError => 313,
362            Self::ProcessTriageExecutorError => 314,
363            Self::ProcessTriageTimeout => 315,
364            Self::ProcessTriagePartialResult => 316,
365            Self::ProcessTriageInvalidRequest => 317,
366
367            // Cancellation (320-325)
368            Self::CancelGracefulSent => 320,
369            Self::CancelEscalatedKill => 321,
370            Self::CancelRemoteKillFailed => 322,
371            Self::CancelCleanupFailed => 323,
372            Self::CancelSlotLeak => 324,
373            Self::CancelTimeoutExceeded => 325,
374
375            // Transfer (400-499)
376            Self::TransferRsyncFailed => 400,
377            Self::TransferTimeout => 401,
378            Self::TransferSourceMissing => 402,
379            Self::TransferDestError => 403,
380            Self::TransferDiskFull => 404,
381            Self::TransferPermissionDenied => 405,
382            Self::TransferChecksumError => 406,
383            Self::TransferBinaryFailed => 407,
384            Self::TransferIncomplete => 408,
385            Self::TransferProtocolError => 409,
386
387            // Internal (500-599)
388            Self::InternalDaemonSocket => 500,
389            Self::InternalDaemonProtocol => 501,
390            Self::InternalDaemonNotRunning => 502,
391            Self::InternalIpcError => 503,
392            Self::InternalStateError => 504,
393            Self::InternalSerdeError => 505,
394            Self::InternalHookError => 506,
395            Self::InternalMetricsError => 507,
396            Self::InternalLoggingError => 508,
397            Self::InternalUpdateError => 509,
398        }
399    }
400
401    /// Returns the formatted error code string (e.g., "RCH-E001").
402    #[must_use]
403    pub fn code_string(&self) -> String {
404        format!("RCH-E{:03}", self.code_number())
405    }
406
407    /// Returns the error category.
408    #[must_use]
409    pub const fn category(&self) -> ErrorCategory {
410        match self.code_number() {
411            1..=99 => ErrorCategory::Config,
412            100..=199 => ErrorCategory::Network,
413            200..=299 => ErrorCategory::Worker,
414            300..=399 => ErrorCategory::Build,
415            400..=499 => ErrorCategory::Transfer,
416            500..=599 => ErrorCategory::Internal,
417            _ => ErrorCategory::Internal,
418        }
419    }
420
421    /// Returns the full error entry with all metadata.
422    #[must_use]
423    pub fn entry(&self) -> ErrorEntry {
424        ErrorEntry {
425            code: self.code_string(),
426            category: self.category(),
427            message: self.message().to_string(),
428            remediation: self
429                .remediation()
430                .iter()
431                .map(|s| (*s).to_string())
432                .collect(),
433            doc_url: self.doc_url().map(String::from),
434        }
435    }
436
437    /// Returns the error message template.
438    #[must_use]
439    pub const fn message(&self) -> &'static str {
440        match self {
441            // Config
442            Self::ConfigNotFound => "Configuration file not found",
443            Self::ConfigReadError => "Failed to read configuration file",
444            Self::ConfigParseError => "Configuration file contains invalid TOML syntax",
445            Self::ConfigValidationError => "Configuration contains invalid values",
446            Self::ConfigEnvError => "Environment variable has invalid value",
447            Self::ConfigProfileNotFound => "Profile not found in configuration",
448            Self::ConfigNoWorkers => "No workers are configured",
449            Self::ConfigInvalidWorker => "Worker configuration is invalid",
450            Self::ConfigSshKeyError => "SSH key path is invalid or inaccessible",
451            Self::ConfigSocketPathError => "Socket path is invalid or inaccessible",
452
453            // Path-Dependency
454            Self::PathDepManifestParseFailed => {
455                "Cargo manifest parse failure during path-dependency resolution"
456            }
457            Self::PathDepMissing => "Path dependency declared but target directory not found",
458            Self::PathDepCyclic => "Cyclic path dependency detected in dependency graph",
459            Self::PathDepPolicyViolation => {
460                "Path dependency violates canonical-root topology policy"
461            }
462            Self::PathDepMetadataFailed => "cargo metadata invocation failed",
463            Self::PathDepMetadataParseFailed => "cargo metadata output could not be parsed",
464
465            // Dependency-Closure
466            Self::ClosurePlanFailed => "Dependency closure plan computation failed",
467            Self::ClosureFailOpen => {
468                "Dependency closure entered fail-open state due to unverifiable data"
469            }
470            Self::ClosureHighRisk => "High-risk path dependencies detected in closure plan",
471            Self::ClosureMissingData => "Required dependency closure data is missing or incomplete",
472            Self::ClosureNonDeterministic => "Closure sync action ordering is non-deterministic",
473            Self::ClosureFingerprintMismatch => "Closure manifest fingerprint mismatch detected",
474
475            // Network
476            Self::SshConnectionFailed => "SSH connection to worker failed",
477            Self::SshAuthFailed => "SSH authentication failed",
478            Self::SshKeyError => "SSH key not found or has invalid format",
479            Self::SshHostKeyError => "SSH host key verification failed",
480            Self::SshTimeout => "SSH command execution timed out",
481            Self::SshSessionDropped => "SSH session terminated unexpectedly",
482            Self::NetworkDnsError => "DNS resolution failed for worker host",
483            Self::NetworkUnreachable => "Network is unreachable",
484            Self::NetworkConnectionRefused => "Connection refused by remote host",
485            Self::NetworkTimeout => "TCP connection timed out",
486
487            // Worker
488            Self::WorkerNoneAvailable => "No workers available for selection",
489            Self::WorkerAllUnhealthy => "All configured workers are unhealthy",
490            Self::WorkerHealthCheckFailed => "Worker failed health check",
491            Self::WorkerSelfTestFailed => "Worker self-test failed",
492            Self::WorkerAtCapacity => "Worker is at maximum capacity",
493            Self::WorkerMissingToolchain => "Worker is missing required toolchain",
494            Self::WorkerStateError => "Worker state is inconsistent",
495            Self::WorkerCircuitOpen => "Worker circuit breaker is open",
496            Self::WorkerSelectionFailed => "Worker selection strategy failed",
497            Self::WorkerLoadQueryFailed => "Failed to query worker load",
498
499            // Disk Pressure / Storage
500            Self::WorkerDiskPressureCritical => "Worker disk usage is critically high",
501            Self::WorkerDiskPressureWarning => "Worker disk usage has exceeded warning threshold",
502            Self::WorkerTelemetryGap => "Worker disk pressure telemetry is stale or missing",
503            Self::WorkerDiskIoHigh => "Worker disk I/O utilization is too high for scheduling",
504            Self::WorkerMemoryPressureHigh => "Worker memory pressure exceeds scheduling threshold",
505            Self::WorkerReclaimFailed => "Disk reclaim operation failed on worker",
506            Self::WorkerDiskHeadroomInsufficient => {
507                "Insufficient disk headroom for build reservation"
508            }
509            Self::WorkerReclaimProtected => "Active build protection prevented reclaim operation",
510
511            // Build
512            Self::BuildCompilationFailed => "Remote compilation failed",
513            Self::BuildUnknownCommand => "Build command not recognized",
514            Self::BuildKilledBySignal => "Build process was killed by signal",
515            Self::BuildTimeout => "Build operation timed out",
516            Self::BuildOutputError => "Failed to capture build output",
517            Self::BuildWorkdirError => "Remote working directory error",
518            Self::BuildToolchainError => "Toolchain wrapper failed",
519            Self::BuildEnvError => "Build environment setup failed",
520            Self::BuildIncrementalError => "Incremental build state is corrupted",
521            Self::BuildArtifactMissing => "Build artifact not found",
522
523            // Process Triage
524            Self::ProcessTriageAdapterUnavailable => {
525                "Process triage adapter is unavailable or not installed"
526            }
527            Self::ProcessTriageDetectorUncertain => {
528                "Process detector could not classify with sufficient confidence"
529            }
530            Self::ProcessTriagePolicyViolation => {
531                "Process triage action violates safe-action policy"
532            }
533            Self::ProcessTriageTransportError => {
534                "Transport error communicating with process triage adapter"
535            }
536            Self::ProcessTriageExecutorError => {
537                "Process triage executor encountered a runtime error"
538            }
539            Self::ProcessTriageTimeout => "Process triage operation timed out",
540            Self::ProcessTriagePartialResult => {
541                "Process triage returned partial or incomplete results"
542            }
543            Self::ProcessTriageInvalidRequest => "Invalid process triage request",
544
545            // Cancellation
546            Self::CancelGracefulSent => "Graceful cancel signal dispatched",
547            Self::CancelEscalatedKill => "Escalated to forced kill after timeout",
548            Self::CancelRemoteKillFailed => "Failed to kill remote process via SSH",
549            Self::CancelCleanupFailed => "Post-cancel cleanup encountered errors",
550            Self::CancelSlotLeak => "Slots not properly released after cancel",
551            Self::CancelTimeoutExceeded => "Cancellation exceeded policy time budget",
552
553            // Transfer
554            Self::TransferRsyncFailed => "Rsync transfer failed",
555            Self::TransferTimeout => "File sync operation timed out",
556            Self::TransferSourceMissing => "Source files not found",
557            Self::TransferDestError => "Destination path error",
558            Self::TransferDiskFull => "Insufficient disk space on worker",
559            Self::TransferPermissionDenied => "Permission denied during file transfer",
560            Self::TransferChecksumError => "Transfer checksum mismatch",
561            Self::TransferBinaryFailed => "Binary download failed",
562            Self::TransferIncomplete => "Transfer completed partially",
563            Self::TransferProtocolError => "Transfer protocol error",
564
565            // Internal
566            Self::InternalDaemonSocket => "Failed to connect to daemon socket",
567            Self::InternalDaemonProtocol => "Daemon protocol error",
568            Self::InternalDaemonNotRunning => "RCH daemon is not running",
569            Self::InternalIpcError => "Inter-process communication error",
570            Self::InternalStateError => "Unexpected internal state",
571            Self::InternalSerdeError => "Serialization/deserialization error",
572            Self::InternalHookError => "Hook execution failed",
573            Self::InternalMetricsError => "Metrics collection error",
574            Self::InternalLoggingError => "Logging system error",
575            Self::InternalUpdateError => "Update check failed",
576        }
577    }
578
579    /// Returns remediation steps for this error.
580    #[must_use]
581    pub const fn remediation(&self) -> &'static [&'static str] {
582        match self {
583            // Config
584            Self::ConfigNotFound => &[
585                "Run 'rch init' to create a default configuration",
586                "Check if ~/.config/rch/config.toml exists",
587                "Set RCH_CONFIG_DIR environment variable to specify custom config directory",
588            ],
589            Self::ConfigReadError => &[
590                "Check file permissions on the configuration file",
591                "Verify the file is not corrupted",
592                "Ensure no other process has locked the file",
593            ],
594            Self::ConfigParseError => &[
595                "Run 'rch config validate' to identify syntax errors",
596                "Check TOML syntax at the indicated line",
597                "Ensure all strings are properly quoted",
598            ],
599            Self::ConfigValidationError => &[
600                "Run 'rch config validate' for detailed diagnostics",
601                "Check that all required fields are present",
602                "Verify values are within allowed ranges",
603            ],
604            Self::ConfigEnvError => &[
605                "Check the environment variable value format",
606                "Unset the variable to use config file defaults",
607                "See 'rch help env' for valid environment variables",
608            ],
609            Self::ConfigProfileNotFound => &[
610                "List available profiles with 'rch config profiles'",
611                "Create the profile in your configuration file",
612                "Check for typos in the profile name",
613            ],
614            Self::ConfigNoWorkers => &[
615                "Add at least one worker to your configuration",
616                "Run 'rch discover' to find available workers",
617                "Check the [workers] section in your config",
618            ],
619            Self::ConfigInvalidWorker => &[
620                "Verify worker hostname is correct",
621                "Check SSH username and key configuration",
622                "Ensure remote_base_dir is a valid path",
623            ],
624            Self::ConfigSshKeyError => &[
625                "Check that the SSH key file exists",
626                "Verify file permissions (should be 600)",
627                "Ensure the key format is valid (ssh-keygen -y -f KEY)",
628            ],
629            Self::ConfigSocketPathError => &[
630                "Check directory permissions for socket path",
631                "Ensure parent directory exists",
632                "Try using the default socket path",
633            ],
634
635            // Path-Dependency
636            Self::PathDepManifestParseFailed => &[
637                "Check Cargo.toml syntax with 'cargo verify-project'",
638                "Ensure all path-dependency Cargo.toml files are valid TOML",
639                "Run 'cargo metadata' manually to see detailed parse errors",
640            ],
641            Self::PathDepMissing => &[
642                "Verify the path in Cargo.toml [dependencies] exists on disk",
643                "Check for typos in the dependency path value",
644                "Ensure all workspace members are checked out",
645            ],
646            Self::PathDepCyclic => &[
647                "Review the path dependency graph for cycles",
648                "Run 'cargo metadata' to visualize the dependency tree",
649                "Break the cycle by restructuring crate boundaries",
650            ],
651            Self::PathDepPolicyViolation => &[
652                "Ensure all path dependencies are under the canonical root (/data/projects)",
653                "Check that paths resolve within allowed topology prefixes",
654                "Review the PathTopologyPolicy configuration",
655            ],
656            Self::PathDepMetadataFailed => &[
657                "Verify 'cargo' is installed and on PATH",
658                "Check that Cargo.toml is a valid project manifest",
659                "Try running 'cargo metadata --format-version=1' manually",
660            ],
661            Self::PathDepMetadataParseFailed => &[
662                "Run 'cargo metadata --format-version=1' and check JSON output",
663                "Ensure cargo version is recent enough for the workspace layout",
664                "Check for toolchain incompatibilities with rust-toolchain.toml",
665            ],
666
667            // Dependency-Closure
668            Self::ClosurePlanFailed => &[
669                "Check that all path dependencies are resolvable",
670                "Run 'cargo metadata' to verify dependency graph health",
671                "Review dependency closure planner logs for specific failures",
672            ],
673            Self::ClosureFailOpen => &[
674                "The transfer will proceed with project root only (fail-open semantics)",
675                "Check path dependency graph health to restore full closure",
676                "Review the fail-open reason in structured diagnostics output",
677            ],
678            Self::ClosureHighRisk => &[
679                "Review the high-risk dependencies flagged in the plan",
680                "Ensure all dependency paths are canonical and stable",
681                "Consider pinning dependency versions to reduce risk",
682            ],
683            Self::ClosureMissingData => &[
684                "Ensure Cargo.toml and Cargo.lock are present and valid",
685                "Check that all workspace members are accessible",
686                "Run 'cargo update' to regenerate lock file if needed",
687            ],
688            Self::ClosureNonDeterministic => &[
689                "Report this as a bug — closure ordering must be deterministic",
690                "Check for filesystem race conditions or concurrent modifications",
691                "Retry the operation to see if the ordering stabilizes",
692            ],
693            Self::ClosureFingerprintMismatch => &[
694                "A dependency manifest changed since the plan was computed",
695                "Recompute the closure plan to pick up the latest manifests",
696                "Check for concurrent modifications to Cargo.toml files",
697            ],
698
699            // Network
700            Self::SshConnectionFailed => &[
701                "Verify the worker host is reachable: ping <host>",
702                "Check that SSH service is running on the worker",
703                "Verify firewall allows SSH (port 22)",
704                "Try connecting manually: ssh <user>@<host>",
705            ],
706            Self::SshAuthFailed => &[
707                "Verify SSH key is in authorized_keys on the worker",
708                "Check SSH key passphrase if applicable",
709                "Ensure ssh-agent is running with key loaded",
710                "Try: ssh-add -l to list loaded keys",
711            ],
712            Self::SshKeyError => &[
713                "Check that the SSH key file exists at the configured path",
714                "Verify key file permissions are 600",
715                "Regenerate key if format is corrupted",
716            ],
717            Self::SshHostKeyError => &[
718                "Accept the host key: ssh <user>@<host> (confirm fingerprint)",
719                "Check known_hosts for conflicting entries",
720                "Update known_hosts_policy in config if appropriate",
721            ],
722            Self::SshTimeout => &[
723                "Check network connectivity to the worker",
724                "Increase timeout in configuration",
725                "Verify worker is not overloaded",
726            ],
727            Self::SshSessionDropped => &[
728                "Check network stability",
729                "Verify worker has not rebooted",
730                "Look for keepalive settings in SSH config",
731            ],
732            Self::NetworkDnsError => &[
733                "Verify worker hostname is correct",
734                "Check DNS server configuration",
735                "Try using IP address instead of hostname",
736            ],
737            Self::NetworkUnreachable => &[
738                "Check network connection on local machine",
739                "Verify VPN connection if required",
740                "Check routing to worker network",
741            ],
742            Self::NetworkConnectionRefused => &[
743                "Verify SSH service is running on worker",
744                "Check if worker firewall allows connections",
745                "Ensure correct port is being used",
746            ],
747            Self::NetworkTimeout => &[
748                "Check network latency to worker",
749                "Verify worker is responsive",
750                "Increase connection timeout in config",
751            ],
752
753            // Worker
754            Self::WorkerNoneAvailable => &[
755                "Configure at least one worker in config.toml",
756                "Run 'rch discover' to find available workers",
757                "Check that configured workers are enabled",
758            ],
759            Self::WorkerAllUnhealthy => &[
760                "Run 'rch doctor' to diagnose worker issues",
761                "Check individual worker connectivity",
762                "Review worker health check logs",
763            ],
764            Self::WorkerHealthCheckFailed => &[
765                "Verify SSH connectivity to worker",
766                "Check worker disk space and load",
767                "Review health check timeout settings",
768            ],
769            Self::WorkerSelfTestFailed => &[
770                "Run 'rch self-test --worker <name>' for details",
771                "Verify Rust toolchain on worker",
772                "Check worker has required dependencies",
773            ],
774            Self::WorkerAtCapacity => &[
775                "Wait for current builds to complete",
776                "Add more workers to distribute load",
777                "Increase max_concurrent_builds on worker",
778            ],
779            Self::WorkerMissingToolchain => &[
780                "Install required toolchain on worker",
781                "Run 'rustup show' on worker to verify",
782                "Update worker toolchain configuration",
783            ],
784            Self::WorkerStateError => &[
785                "Restart the RCH daemon: rchd restart",
786                "Check for stale lock files",
787                "Review daemon logs for details",
788            ],
789            Self::WorkerCircuitOpen => &[
790                "Wait for circuit breaker reset period",
791                "Check worker health manually",
792                "Review recent build failures on worker",
793            ],
794            Self::WorkerSelectionFailed => &[
795                "Verify at least one worker is healthy",
796                "Check selection strategy configuration",
797                "Review worker weights and priorities",
798            ],
799            Self::WorkerLoadQueryFailed => &[
800                "Verify SSH connectivity to worker",
801                "Check that load query command works on worker",
802                "Review timeout settings for load queries",
803            ],
804
805            // Disk Pressure / Storage
806            Self::WorkerDiskPressureCritical => &[
807                "Worker disk usage is above 95% — builds will not be scheduled here",
808                "Clean up old build caches: rch cache clean --worker <id>",
809                "Check disk usage on worker: ssh <worker> df -h",
810            ],
811            Self::WorkerDiskPressureWarning => &[
812                "Worker disk usage is above 80% — scheduling priority reduced",
813                "Consider cleaning old caches: rch cache clean --worker <id>",
814                "Monitor disk usage trend to prevent critical state",
815            ],
816            Self::WorkerTelemetryGap => &[
817                "Worker disk telemetry is stale — pressure assessment is unreliable",
818                "Check worker health: rch workers probe <id>",
819                "Verify telemetry collection is running on the worker",
820            ],
821            Self::WorkerDiskIoHigh => &[
822                "Worker disk I/O is saturated — builds may experience latency",
823                "Wait for current I/O-heavy operations to complete",
824                "Check for stuck or runaway processes: rch workers probe <id>",
825            ],
826            Self::WorkerMemoryPressureHigh => &[
827                "Worker memory pressure is high — scheduling priority reduced",
828                "Check for memory leaks or over-committed builds on the worker",
829                "Review worker slot count to prevent over-scheduling",
830            ],
831            Self::WorkerReclaimFailed => &[
832                "Disk space reclaim operation failed on the worker",
833                "Check worker filesystem health and permissions",
834                "Inspect both root and temp usage: ssh <worker> 'df -h / /tmp'",
835            ],
836            Self::WorkerDiskHeadroomInsufficient => &[
837                "Estimated build disk requirement exceeds available free space",
838                "Try a different worker with more headroom",
839                "Clean up old build artifacts to free space",
840            ],
841            Self::WorkerReclaimProtected => &[
842                "Active build artifacts were protected from reclaim",
843                "Wait for current builds to complete before retrying reclaim",
844                "Only idle cache entries are eligible for eviction",
845            ],
846
847            // Build
848            Self::BuildCompilationFailed => &[
849                "Review compilation errors in output",
850                "Verify code compiles locally first",
851                "Check for missing dependencies on worker",
852            ],
853            Self::BuildUnknownCommand => &[
854                "Check that the command is supported",
855                "Verify cargo/rustc version compatibility",
856                "Review RCH command pattern configuration",
857            ],
858            Self::BuildKilledBySignal => &[
859                "Check worker system logs for OOM killer",
860                "Review build memory requirements",
861                "Check if build was manually interrupted",
862            ],
863            Self::BuildTimeout => &[
864                "Increase build timeout in configuration",
865                "Check for infinite loops or hangs",
866                "Verify worker is not overloaded",
867            ],
868            Self::BuildOutputError => &[
869                "Check worker disk space",
870                "Verify PTY allocation settings",
871                "Review output buffer configuration",
872            ],
873            Self::BuildWorkdirError => &[
874                "Verify remote_base_dir is writable",
875                "Check directory permissions on worker",
876                "Ensure path does not contain special characters",
877            ],
878            Self::BuildToolchainError => &[
879                "Verify toolchain is installed on worker",
880                "Check rustup default toolchain",
881                "Review toolchain override settings",
882            ],
883            Self::BuildEnvError => &[
884                "Check environment variable configuration",
885                "Verify required environment is set on worker",
886                "Review shell initialization on worker",
887            ],
888            Self::BuildIncrementalError => &[
889                "Run 'cargo clean' on remote workspace",
890                "Delete incremental compilation cache",
891                "Try full rebuild with --release",
892            ],
893            Self::BuildArtifactMissing => &[
894                "Verify build completed successfully",
895                "Check artifact path configuration",
896                "Review build output for artifact location",
897            ],
898
899            // Process Triage
900            Self::ProcessTriageAdapterUnavailable => &[
901                "Ensure the process triage adapter binary is installed",
902                "Check PATH includes the adapter binary location",
903                "Verify the adapter version is compatible with this RCH version",
904            ],
905            Self::ProcessTriageDetectorUncertain => &[
906                "Process classification was inconclusive — no action taken",
907                "Review the process list manually for suspicious entries",
908                "Adjust detector confidence threshold if false negatives are common",
909            ],
910            Self::ProcessTriagePolicyViolation => &[
911                "The requested action is blocked by safe-action policy",
912                "Review the escalation level required for this action class",
913                "Use a lower-risk action class or request manual approval",
914            ],
915            Self::ProcessTriageTransportError => &[
916                "Communication with the process triage adapter failed",
917                "Verify the adapter process is running and responsive",
918                "Check for socket/pipe errors in adapter logs",
919            ],
920            Self::ProcessTriageExecutorError => &[
921                "The process triage executor encountered a runtime error",
922                "Check adapter logs for detailed error output",
923                "Verify the target process is still running",
924            ],
925            Self::ProcessTriageTimeout => &[
926                "Process triage operation exceeded the configured timeout",
927                "Increase timeout in ProcessTriageTimeoutPolicy if needed",
928                "Check for adapter hangs or system-level resource contention",
929            ],
930            Self::ProcessTriagePartialResult => &[
931                "Not all requested triage actions completed successfully",
932                "Review the partial result for which actions succeeded",
933                "Retry failed actions individually for better diagnostics",
934            ],
935            Self::ProcessTriageInvalidRequest => &[
936                "The process triage request is malformed or missing required fields",
937                "Validate request against the ProcessTriage contract schema",
938                "Check the contract schema version compatibility",
939            ],
940
941            // Cancellation
942            Self::CancelGracefulSent => &[
943                "Graceful cancellation signal (SIGTERM) was sent to the build process",
944                "The build should terminate within the grace period",
945                "Use force cancel if the process does not respond",
946            ],
947            Self::CancelEscalatedKill => &[
948                "Build did not respond to graceful cancel within the grace period",
949                "SIGKILL was sent to forcefully terminate the process",
950                "Check worker for orphaned processes if this occurs frequently",
951            ],
952            Self::CancelRemoteKillFailed => &[
953                "SSH kill command to the remote worker failed",
954                "Verify SSH connectivity to the worker",
955                "Check that the remote process PID is still valid",
956            ],
957            Self::CancelCleanupFailed => &[
958                "Post-cancellation cleanup did not complete successfully",
959                "Check worker disk space and permissions",
960                "Verify remote working directory state manually",
961            ],
962            Self::CancelSlotLeak => &[
963                "Worker slots were not properly released after cancellation",
964                "Check worker slot accounting for inconsistencies",
965                "Restart the daemon if slot leak persists",
966            ],
967            Self::CancelTimeoutExceeded => &[
968                "Cancellation did not complete within the policy time budget",
969                "The build may still be running on the worker",
970                "Consider increasing cancellation timeouts or force-cancelling",
971            ],
972
973            // Transfer
974            Self::TransferRsyncFailed => &[
975                "Verify rsync is installed on both ends",
976                "Check SSH connectivity to worker",
977                "Review rsync exclude patterns",
978            ],
979            Self::TransferTimeout => &[
980                "Increase transfer timeout in configuration",
981                "Check network bandwidth to worker",
982                "Consider using incremental sync",
983            ],
984            Self::TransferSourceMissing => &[
985                "Verify source files exist locally",
986                "Check file patterns in configuration",
987                "Review .rchignore exclusions",
988            ],
989            Self::TransferDestError => &[
990                "Check remote directory permissions and ownership",
991                "Verify remote_base_dir is valid",
992                "Ensure sufficient disk space on worker",
993            ],
994            Self::TransferDiskFull => &[
995                "Clean up old builds on worker",
996                "Check disk usage: df -h on worker",
997                "Increase disk allocation for worker",
998            ],
999            Self::TransferPermissionDenied => &[
1000                "Check ownership of /data/projects/<repo> on the worker",
1001                "Verify SSH user has write permissions",
1002                "Repair drift if needed: sudo chown -R <ssh-user>:<ssh-user> /data/projects/<repo>",
1003            ],
1004            Self::TransferChecksumError => &[
1005                "Retry the transfer",
1006                "Check for network issues",
1007                "Verify file integrity on source",
1008            ],
1009            Self::TransferBinaryFailed => &[
1010                "Check network connectivity",
1011                "Verify binary URL is accessible",
1012                "Try manual download to diagnose",
1013            ],
1014            Self::TransferIncomplete => &[
1015                "Retry the transfer operation",
1016                "Check for network interruptions",
1017                "Review transfer logs for details",
1018            ],
1019            Self::TransferProtocolError => &[
1020                "Verify rsync version compatibility",
1021                "Check SSH protocol settings",
1022                "Review transfer configuration",
1023            ],
1024
1025            // Internal
1026            Self::InternalDaemonSocket => &[
1027                "Start the daemon: rchd start",
1028                "Check socket path permissions",
1029                "Verify no stale socket file exists",
1030            ],
1031            Self::InternalDaemonProtocol => &[
1032                "Restart the daemon: rchd restart",
1033                "Check for version mismatch between rch and rchd",
1034                "Review daemon logs for details",
1035            ],
1036            Self::InternalDaemonNotRunning => &[
1037                "Start the daemon: rchd start",
1038                "Check if daemon crashed: journalctl -u rchd",
1039                "Verify daemon configuration",
1040            ],
1041            Self::InternalIpcError => &[
1042                "Restart the daemon",
1043                "Check system message queue limits",
1044                "Review logs for detailed error",
1045            ],
1046            Self::InternalStateError => &[
1047                "Restart the daemon",
1048                "Clear any lock files",
1049                "Report bug if persists",
1050            ],
1051            Self::InternalSerdeError => &[
1052                "Check for corrupted state files",
1053                "Clear cache and restart",
1054                "Report bug with reproduction steps",
1055            ],
1056            Self::InternalHookError => &[
1057                "Verify hook script exists and is executable",
1058                "Check hook script for errors",
1059                "Review hook timeout settings",
1060            ],
1061            Self::InternalMetricsError => &[
1062                "Check metrics file permissions",
1063                "Verify disk space for metrics",
1064                "Review metrics configuration",
1065            ],
1066            Self::InternalLoggingError => &[
1067                "Check log directory permissions",
1068                "Verify disk space for logs",
1069                "Review logging configuration",
1070            ],
1071            Self::InternalUpdateError => &[
1072                "Check network connectivity",
1073                "Verify update server is reachable",
1074                "Try manual update check",
1075            ],
1076        }
1077    }
1078
1079    /// Returns documentation URL for this error, if available.
1080    #[must_use]
1081    pub const fn doc_url(&self) -> Option<&'static str> {
1082        // Use specific doc pages for new sub-ranges when available
1083        match self {
1084            Self::PathDepManifestParseFailed
1085            | Self::PathDepMissing
1086            | Self::PathDepCyclic
1087            | Self::PathDepPolicyViolation
1088            | Self::PathDepMetadataFailed
1089            | Self::PathDepMetadataParseFailed => Some("https://rch.dev/docs/path-deps"),
1090
1091            Self::ClosurePlanFailed
1092            | Self::ClosureFailOpen
1093            | Self::ClosureHighRisk
1094            | Self::ClosureMissingData
1095            | Self::ClosureNonDeterministic
1096            | Self::ClosureFingerprintMismatch => Some("https://rch.dev/docs/dependency-closure"),
1097
1098            Self::WorkerDiskPressureCritical
1099            | Self::WorkerDiskPressureWarning
1100            | Self::WorkerTelemetryGap
1101            | Self::WorkerDiskIoHigh
1102            | Self::WorkerMemoryPressureHigh
1103            | Self::WorkerReclaimFailed
1104            | Self::WorkerDiskHeadroomInsufficient
1105            | Self::WorkerReclaimProtected => Some("https://rch.dev/docs/disk-pressure"),
1106
1107            Self::ProcessTriageAdapterUnavailable
1108            | Self::ProcessTriageDetectorUncertain
1109            | Self::ProcessTriagePolicyViolation
1110            | Self::ProcessTriageTransportError
1111            | Self::ProcessTriageExecutorError
1112            | Self::ProcessTriageTimeout
1113            | Self::ProcessTriagePartialResult
1114            | Self::ProcessTriageInvalidRequest => Some("https://rch.dev/docs/process-triage"),
1115
1116            Self::CancelGracefulSent
1117            | Self::CancelEscalatedKill
1118            | Self::CancelRemoteKillFailed
1119            | Self::CancelCleanupFailed
1120            | Self::CancelSlotLeak
1121            | Self::CancelTimeoutExceeded => Some("https://rch.dev/docs/cancellation"),
1122
1123            _ => match self.category() {
1124                ErrorCategory::Config => Some("https://rch.dev/docs/config"),
1125                ErrorCategory::Network => Some("https://rch.dev/docs/ssh"),
1126                ErrorCategory::Worker => Some("https://rch.dev/docs/workers"),
1127                ErrorCategory::Build => Some("https://rch.dev/docs/builds"),
1128                ErrorCategory::Transfer => Some("https://rch.dev/docs/sync"),
1129                ErrorCategory::Internal => Some("https://rch.dev/docs/troubleshooting"),
1130            },
1131        }
1132    }
1133
1134    /// Returns all error codes.
1135    #[must_use]
1136    pub const fn all() -> &'static [ErrorCode] {
1137        &[
1138            // Config
1139            Self::ConfigNotFound,
1140            Self::ConfigReadError,
1141            Self::ConfigParseError,
1142            Self::ConfigValidationError,
1143            Self::ConfigEnvError,
1144            Self::ConfigProfileNotFound,
1145            Self::ConfigNoWorkers,
1146            Self::ConfigInvalidWorker,
1147            Self::ConfigSshKeyError,
1148            Self::ConfigSocketPathError,
1149            // Path-Dependency
1150            Self::PathDepManifestParseFailed,
1151            Self::PathDepMissing,
1152            Self::PathDepCyclic,
1153            Self::PathDepPolicyViolation,
1154            Self::PathDepMetadataFailed,
1155            Self::PathDepMetadataParseFailed,
1156            // Dependency-Closure
1157            Self::ClosurePlanFailed,
1158            Self::ClosureFailOpen,
1159            Self::ClosureHighRisk,
1160            Self::ClosureMissingData,
1161            Self::ClosureNonDeterministic,
1162            Self::ClosureFingerprintMismatch,
1163            // Network
1164            Self::SshConnectionFailed,
1165            Self::SshAuthFailed,
1166            Self::SshKeyError,
1167            Self::SshHostKeyError,
1168            Self::SshTimeout,
1169            Self::SshSessionDropped,
1170            Self::NetworkDnsError,
1171            Self::NetworkUnreachable,
1172            Self::NetworkConnectionRefused,
1173            Self::NetworkTimeout,
1174            // Worker
1175            Self::WorkerNoneAvailable,
1176            Self::WorkerAllUnhealthy,
1177            Self::WorkerHealthCheckFailed,
1178            Self::WorkerSelfTestFailed,
1179            Self::WorkerAtCapacity,
1180            Self::WorkerMissingToolchain,
1181            Self::WorkerStateError,
1182            Self::WorkerCircuitOpen,
1183            Self::WorkerSelectionFailed,
1184            Self::WorkerLoadQueryFailed,
1185            // Disk Pressure / Storage
1186            Self::WorkerDiskPressureCritical,
1187            Self::WorkerDiskPressureWarning,
1188            Self::WorkerTelemetryGap,
1189            Self::WorkerDiskIoHigh,
1190            Self::WorkerMemoryPressureHigh,
1191            Self::WorkerReclaimFailed,
1192            Self::WorkerDiskHeadroomInsufficient,
1193            Self::WorkerReclaimProtected,
1194            // Build
1195            Self::BuildCompilationFailed,
1196            Self::BuildUnknownCommand,
1197            Self::BuildKilledBySignal,
1198            Self::BuildTimeout,
1199            Self::BuildOutputError,
1200            Self::BuildWorkdirError,
1201            Self::BuildToolchainError,
1202            Self::BuildEnvError,
1203            Self::BuildIncrementalError,
1204            Self::BuildArtifactMissing,
1205            // Process Triage
1206            Self::ProcessTriageAdapterUnavailable,
1207            Self::ProcessTriageDetectorUncertain,
1208            Self::ProcessTriagePolicyViolation,
1209            Self::ProcessTriageTransportError,
1210            Self::ProcessTriageExecutorError,
1211            Self::ProcessTriageTimeout,
1212            Self::ProcessTriagePartialResult,
1213            Self::ProcessTriageInvalidRequest,
1214            // Cancellation
1215            Self::CancelGracefulSent,
1216            Self::CancelEscalatedKill,
1217            Self::CancelRemoteKillFailed,
1218            Self::CancelCleanupFailed,
1219            Self::CancelSlotLeak,
1220            Self::CancelTimeoutExceeded,
1221            // Transfer
1222            Self::TransferRsyncFailed,
1223            Self::TransferTimeout,
1224            Self::TransferSourceMissing,
1225            Self::TransferDestError,
1226            Self::TransferDiskFull,
1227            Self::TransferPermissionDenied,
1228            Self::TransferChecksumError,
1229            Self::TransferBinaryFailed,
1230            Self::TransferIncomplete,
1231            Self::TransferProtocolError,
1232            // Internal
1233            Self::InternalDaemonSocket,
1234            Self::InternalDaemonProtocol,
1235            Self::InternalDaemonNotRunning,
1236            Self::InternalIpcError,
1237            Self::InternalStateError,
1238            Self::InternalSerdeError,
1239            Self::InternalHookError,
1240            Self::InternalMetricsError,
1241            Self::InternalLoggingError,
1242            Self::InternalUpdateError,
1243        ]
1244    }
1245}
1246
1247impl fmt::Display for ErrorCode {
1248    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1249        write!(f, "{}: {}", self.code_string(), self.message())
1250    }
1251}
1252
1253/// Error category for grouping related errors.
1254#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, schemars::JsonSchema)]
1255#[serde(rename_all = "lowercase")]
1256pub enum ErrorCategory {
1257    /// Configuration and setup errors (E001-E099)
1258    Config,
1259    /// Network and SSH connectivity errors (E100-E199)
1260    Network,
1261    /// Worker selection and management errors (E200-E299)
1262    Worker,
1263    /// Compilation and build errors (E300-E399)
1264    Build,
1265    /// File transfer and sync errors (E400-E499)
1266    Transfer,
1267    /// Internal/unexpected errors (E500-E599)
1268    Internal,
1269}
1270
1271impl ErrorCategory {
1272    /// Returns a human-readable name for the category.
1273    #[must_use]
1274    pub const fn name(&self) -> &'static str {
1275        match self {
1276            Self::Config => "Configuration",
1277            Self::Network => "Network",
1278            Self::Worker => "Worker",
1279            Self::Build => "Build",
1280            Self::Transfer => "Transfer",
1281            Self::Internal => "Internal",
1282        }
1283    }
1284
1285    /// Returns a short description of the category.
1286    #[must_use]
1287    pub const fn description(&self) -> &'static str {
1288        match self {
1289            Self::Config => "Configuration file and environment setup issues",
1290            Self::Network => "SSH connectivity and network communication issues",
1291            Self::Worker => "Remote worker selection, health, and management issues",
1292            Self::Build => "Remote compilation and build process issues",
1293            Self::Transfer => "File synchronization and transfer issues",
1294            Self::Internal => "Internal errors that may indicate bugs",
1295        }
1296    }
1297}
1298
1299impl fmt::Display for ErrorCategory {
1300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1301        write!(f, "{}", self.name())
1302    }
1303}
1304
1305/// Complete error entry with all metadata.
1306#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1307pub struct ErrorEntry {
1308    /// Error code string (e.g., "RCH-E001")
1309    pub code: String,
1310    /// Error category
1311    pub category: ErrorCategory,
1312    /// Human-readable error message
1313    pub message: String,
1314    /// Steps to remediate the error
1315    pub remediation: Vec<String>,
1316    /// Documentation URL, if available
1317    pub doc_url: Option<String>,
1318}
1319
1320impl ErrorEntry {
1321    /// Formats the error for display with full remediation steps.
1322    #[must_use]
1323    pub fn format_full(&self) -> String {
1324        let mut output = format!("[{}] {}\n\n", self.code, self.message);
1325
1326        if !self.remediation.is_empty() {
1327            output.push_str("Remediation steps:\n");
1328            for (i, step) in self.remediation.iter().enumerate() {
1329                output.push_str(&format!("  {}. {}\n", i + 1, step));
1330            }
1331        }
1332
1333        if let Some(url) = &self.doc_url {
1334            output.push_str(&format!("\nFor more information: {}\n", url));
1335        }
1336
1337        output
1338    }
1339
1340    /// Formats the error as a single line.
1341    #[must_use]
1342    pub fn format_brief(&self) -> String {
1343        format!("[{}] {}", self.code, self.message)
1344    }
1345}
1346
1347impl fmt::Display for ErrorEntry {
1348    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1349        write!(f, "{}", self.format_brief())
1350    }
1351}
1352
1353#[cfg(test)]
1354mod tests {
1355    use super::*;
1356
1357    #[test]
1358    fn test_error_code_numbers_are_unique() {
1359        let mut seen = std::collections::HashSet::new();
1360        for code in ErrorCode::all() {
1361            let num = code.code_number();
1362            assert!(
1363                seen.insert(num),
1364                "Duplicate error code number: {} for {:?}",
1365                num,
1366                code
1367            );
1368        }
1369    }
1370
1371    #[test]
1372    fn test_error_code_format() {
1373        assert_eq!(ErrorCode::ConfigNotFound.code_string(), "RCH-E001");
1374        assert_eq!(ErrorCode::SshConnectionFailed.code_string(), "RCH-E100");
1375        assert_eq!(ErrorCode::WorkerNoneAvailable.code_string(), "RCH-E200");
1376        assert_eq!(ErrorCode::BuildCompilationFailed.code_string(), "RCH-E300");
1377        assert_eq!(ErrorCode::TransferRsyncFailed.code_string(), "RCH-E400");
1378        assert_eq!(ErrorCode::InternalDaemonSocket.code_string(), "RCH-E500");
1379
1380        // New subcategory codes
1381        assert_eq!(
1382            ErrorCode::PathDepManifestParseFailed.code_string(),
1383            "RCH-E013"
1384        );
1385        assert_eq!(ErrorCode::ClosurePlanFailed.code_string(), "RCH-E019");
1386        assert_eq!(
1387            ErrorCode::WorkerDiskPressureCritical.code_string(),
1388            "RCH-E210"
1389        );
1390        assert_eq!(
1391            ErrorCode::ProcessTriageAdapterUnavailable.code_string(),
1392            "RCH-E310"
1393        );
1394    }
1395
1396    #[test]
1397    fn test_error_categories() {
1398        assert_eq!(ErrorCode::ConfigNotFound.category(), ErrorCategory::Config);
1399        assert_eq!(
1400            ErrorCode::SshConnectionFailed.category(),
1401            ErrorCategory::Network
1402        );
1403        assert_eq!(
1404            ErrorCode::WorkerNoneAvailable.category(),
1405            ErrorCategory::Worker
1406        );
1407        assert_eq!(
1408            ErrorCode::BuildCompilationFailed.category(),
1409            ErrorCategory::Build
1410        );
1411        assert_eq!(
1412            ErrorCode::TransferRsyncFailed.category(),
1413            ErrorCategory::Transfer
1414        );
1415        assert_eq!(
1416            ErrorCode::InternalDaemonSocket.category(),
1417            ErrorCategory::Internal
1418        );
1419    }
1420
1421    #[test]
1422    fn test_all_errors_have_message() {
1423        for code in ErrorCode::all() {
1424            let msg = code.message();
1425            assert!(!msg.is_empty(), "Error {:?} has empty message", code);
1426        }
1427    }
1428
1429    #[test]
1430    fn test_all_errors_have_remediation() {
1431        for code in ErrorCode::all() {
1432            let steps = code.remediation();
1433            assert!(
1434                !steps.is_empty(),
1435                "Error {:?} has no remediation steps",
1436                code
1437            );
1438        }
1439    }
1440
1441    #[test]
1442    fn test_error_entry_serialization() {
1443        let entry = ErrorCode::ConfigNotFound.entry();
1444        let json = serde_json::to_string(&entry).expect("serialization failed");
1445        assert!(json.contains("RCH-E001"));
1446        assert!(json.contains("config"));
1447
1448        let parsed: ErrorEntry = serde_json::from_str(&json).expect("deserialization failed");
1449        assert_eq!(parsed.code, "RCH-E001");
1450        assert_eq!(parsed.category, ErrorCategory::Config);
1451    }
1452
1453    #[test]
1454    fn test_error_code_serialization() {
1455        let code = ErrorCode::SshAuthFailed;
1456        let json = serde_json::to_string(&code).expect("serialization failed");
1457        assert_eq!(json, "\"SSH_AUTH_FAILED\"");
1458
1459        let parsed: ErrorCode = serde_json::from_str(&json).expect("deserialization failed");
1460        assert_eq!(parsed, ErrorCode::SshAuthFailed);
1461    }
1462
1463    #[test]
1464    fn test_format_full() {
1465        let entry = ErrorCode::ConfigNotFound.entry();
1466        let formatted = entry.format_full();
1467
1468        assert!(formatted.contains("[RCH-E001]"));
1469        assert!(formatted.contains("Configuration file not found"));
1470        assert!(formatted.contains("Remediation steps:"));
1471        assert!(formatted.contains("rch init"));
1472    }
1473
1474    #[test]
1475    fn test_format_brief() {
1476        let entry = ErrorCode::ConfigNotFound.entry();
1477        let formatted = entry.format_brief();
1478
1479        assert_eq!(formatted, "[RCH-E001] Configuration file not found");
1480    }
1481
1482    #[test]
1483    fn test_display_implementations() {
1484        let code = ErrorCode::ConfigNotFound;
1485        let display = format!("{}", code);
1486        assert!(display.contains("RCH-E001"));
1487        assert!(display.contains("Configuration file not found"));
1488
1489        let category = ErrorCategory::Config;
1490        assert_eq!(format!("{}", category), "Configuration");
1491    }
1492
1493    #[test]
1494    fn test_category_ranges() {
1495        // Verify each category has errors in the correct range
1496        for code in ErrorCode::all() {
1497            let num = code.code_number();
1498            let cat = code.category();
1499            match cat {
1500                ErrorCategory::Config => assert!(num < 100, "{:?} should be < 100", code),
1501                ErrorCategory::Network => {
1502                    assert!((100..200).contains(&num), "{:?} should be 100-199", code)
1503                }
1504                ErrorCategory::Worker => {
1505                    assert!((200..300).contains(&num), "{:?} should be 200-299", code)
1506                }
1507                ErrorCategory::Build => {
1508                    assert!((300..400).contains(&num), "{:?} should be 300-399", code)
1509                }
1510                ErrorCategory::Transfer => {
1511                    assert!((400..500).contains(&num), "{:?} should be 400-499", code)
1512                }
1513                ErrorCategory::Internal => {
1514                    assert!((500..600).contains(&num), "{:?} should be 500-599", code)
1515                }
1516            }
1517        }
1518    }
1519
1520    // =========================================================================
1521    // Contract Tests — Code Stability (bd-vvmd.6.1)
1522    // =========================================================================
1523
1524    /// Contract test: path-dependency error codes are stable across versions.
1525    #[test]
1526    fn test_path_dep_error_codes_stable() {
1527        assert_eq!(ErrorCode::PathDepManifestParseFailed.code_number(), 13);
1528        assert_eq!(ErrorCode::PathDepMissing.code_number(), 14);
1529        assert_eq!(ErrorCode::PathDepCyclic.code_number(), 15);
1530        assert_eq!(ErrorCode::PathDepPolicyViolation.code_number(), 16);
1531        assert_eq!(ErrorCode::PathDepMetadataFailed.code_number(), 17);
1532        assert_eq!(ErrorCode::PathDepMetadataParseFailed.code_number(), 18);
1533    }
1534
1535    /// Contract test: dependency-closure error codes are stable.
1536    #[test]
1537    fn test_closure_error_codes_stable() {
1538        assert_eq!(ErrorCode::ClosurePlanFailed.code_number(), 19);
1539        assert_eq!(ErrorCode::ClosureFailOpen.code_number(), 20);
1540        assert_eq!(ErrorCode::ClosureHighRisk.code_number(), 21);
1541        assert_eq!(ErrorCode::ClosureMissingData.code_number(), 22);
1542        assert_eq!(ErrorCode::ClosureNonDeterministic.code_number(), 23);
1543        assert_eq!(ErrorCode::ClosureFingerprintMismatch.code_number(), 24);
1544    }
1545
1546    /// Contract test: disk pressure/storage error codes are stable.
1547    #[test]
1548    fn test_disk_pressure_error_codes_stable() {
1549        assert_eq!(ErrorCode::WorkerDiskPressureCritical.code_number(), 210);
1550        assert_eq!(ErrorCode::WorkerDiskPressureWarning.code_number(), 211);
1551        assert_eq!(ErrorCode::WorkerTelemetryGap.code_number(), 212);
1552        assert_eq!(ErrorCode::WorkerDiskIoHigh.code_number(), 213);
1553        assert_eq!(ErrorCode::WorkerMemoryPressureHigh.code_number(), 214);
1554        assert_eq!(ErrorCode::WorkerReclaimFailed.code_number(), 215);
1555        assert_eq!(ErrorCode::WorkerDiskHeadroomInsufficient.code_number(), 216);
1556        assert_eq!(ErrorCode::WorkerReclaimProtected.code_number(), 217);
1557    }
1558
1559    /// Contract test: process triage error codes are stable.
1560    #[test]
1561    fn test_process_triage_error_codes_stable() {
1562        assert_eq!(
1563            ErrorCode::ProcessTriageAdapterUnavailable.code_number(),
1564            310
1565        );
1566        assert_eq!(ErrorCode::ProcessTriageDetectorUncertain.code_number(), 311);
1567        assert_eq!(ErrorCode::ProcessTriagePolicyViolation.code_number(), 312);
1568        assert_eq!(ErrorCode::ProcessTriageTransportError.code_number(), 313);
1569        assert_eq!(ErrorCode::ProcessTriageExecutorError.code_number(), 314);
1570        assert_eq!(ErrorCode::ProcessTriageTimeout.code_number(), 315);
1571        assert_eq!(ErrorCode::ProcessTriagePartialResult.code_number(), 316);
1572        assert_eq!(ErrorCode::ProcessTriageInvalidRequest.code_number(), 317);
1573    }
1574
1575    /// Contract test: cancellation error codes are stable.
1576    #[test]
1577    fn test_cancellation_error_codes_stable() {
1578        assert_eq!(ErrorCode::CancelGracefulSent.code_number(), 320);
1579        assert_eq!(ErrorCode::CancelEscalatedKill.code_number(), 321);
1580        assert_eq!(ErrorCode::CancelRemoteKillFailed.code_number(), 322);
1581        assert_eq!(ErrorCode::CancelCleanupFailed.code_number(), 323);
1582        assert_eq!(ErrorCode::CancelSlotLeak.code_number(), 324);
1583        assert_eq!(ErrorCode::CancelTimeoutExceeded.code_number(), 325);
1584    }
1585
1586    /// Contract test: new error codes belong to correct categories.
1587    #[test]
1588    fn test_new_error_codes_correct_categories() {
1589        // Path-dep and closure are in Config range (E001-E099)
1590        assert_eq!(ErrorCode::PathDepCyclic.category(), ErrorCategory::Config);
1591        assert_eq!(
1592            ErrorCode::ClosurePlanFailed.category(),
1593            ErrorCategory::Config
1594        );
1595
1596        // Disk pressure in Worker range (E200-E299)
1597        assert_eq!(
1598            ErrorCode::WorkerDiskPressureCritical.category(),
1599            ErrorCategory::Worker
1600        );
1601        assert_eq!(
1602            ErrorCode::WorkerReclaimProtected.category(),
1603            ErrorCategory::Worker
1604        );
1605
1606        // Process triage in Build range (E300-E399)
1607        assert_eq!(
1608            ErrorCode::ProcessTriageTimeout.category(),
1609            ErrorCategory::Build
1610        );
1611        assert_eq!(
1612            ErrorCode::ProcessTriageInvalidRequest.category(),
1613            ErrorCategory::Build
1614        );
1615
1616        // Cancellation in Build range (E300-E399)
1617        assert_eq!(
1618            ErrorCode::CancelGracefulSent.category(),
1619            ErrorCategory::Build
1620        );
1621        assert_eq!(
1622            ErrorCode::CancelTimeoutExceeded.category(),
1623            ErrorCategory::Build
1624        );
1625    }
1626
1627    /// Contract test: all new error codes have doc URLs pointing to correct sections.
1628    #[test]
1629    fn test_new_error_codes_doc_urls() {
1630        assert_eq!(
1631            ErrorCode::PathDepCyclic.doc_url(),
1632            Some("https://rch.dev/docs/path-deps")
1633        );
1634        assert_eq!(
1635            ErrorCode::ClosureFailOpen.doc_url(),
1636            Some("https://rch.dev/docs/dependency-closure")
1637        );
1638        assert_eq!(
1639            ErrorCode::WorkerDiskPressureCritical.doc_url(),
1640            Some("https://rch.dev/docs/disk-pressure")
1641        );
1642        assert_eq!(
1643            ErrorCode::ProcessTriageTimeout.doc_url(),
1644            Some("https://rch.dev/docs/process-triage")
1645        );
1646        assert_eq!(
1647            ErrorCode::CancelGracefulSent.doc_url(),
1648            Some("https://rch.dev/docs/cancellation")
1649        );
1650    }
1651
1652    /// Contract test: new error codes roundtrip through JSON serialization.
1653    #[test]
1654    fn test_new_error_codes_json_roundtrip() {
1655        let new_codes = [
1656            ErrorCode::PathDepManifestParseFailed,
1657            ErrorCode::ClosurePlanFailed,
1658            ErrorCode::WorkerDiskPressureCritical,
1659            ErrorCode::ProcessTriageAdapterUnavailable,
1660        ];
1661
1662        for code in new_codes {
1663            let json = serde_json::to_string(&code).expect("serialization failed");
1664            let parsed: ErrorCode = serde_json::from_str(&json).expect("deserialization failed");
1665            assert_eq!(parsed, code, "Roundtrip failed for {:?}", code);
1666
1667            // Entry should also roundtrip
1668            let entry = code.entry();
1669            let entry_json = serde_json::to_string(&entry).expect("entry serialization failed");
1670            let parsed_entry: ErrorEntry =
1671                serde_json::from_str(&entry_json).expect("entry deserialization failed");
1672            assert_eq!(parsed_entry.code, code.code_string());
1673        }
1674    }
1675
1676    /// Contract test: total error code count is as expected (guards against accidental removal).
1677    #[test]
1678    fn test_total_error_code_count() {
1679        let total = ErrorCode::all().len();
1680        // 10 config + 12 path-dep/closure + 10 network + 10 worker + 8 storage
1681        // + 10 build + 8 process-triage + 6 cancellation + 10 transfer + 10 internal = 94
1682        assert!(
1683            total >= 94,
1684            "Expected at least 94 error codes (was {}); did a code get accidentally removed?",
1685            total,
1686        );
1687    }
1688}