Skip to main content

pe_core/
error.rs

1//! Error taxonomy — complete set of errors the library produces.
2//! Based on Group 7 of the pre-plan.
3
4use thiserror::Error;
5
6#[derive(Error, Debug, Clone)]
7#[non_exhaustive]
8pub enum PeError {
9    // === Graph errors ===
10    #[error("Graph recursion limit reached ({limit} supersteps)")]
11    GraphRecursion { limit: u32 },
12
13    #[error("Graph interrupt: {reason}")]
14    GraphInterrupt { reason: String },
15
16    #[error("Node interrupt: {reason}")]
17    NodeInterrupt { reason: String },
18
19    #[error("Invalid state update: {details}")]
20    InvalidUpdate { details: String },
21
22    #[error("Channel '{channel}' was never written")]
23    EmptyChannel { channel: String },
24
25    #[error("Invalid graph structure: {details}")]
26    GraphValue { details: String },
27
28    #[error("Node '{node}' is unreachable — no path from START")]
29    UnreachableNode { node: String },
30
31    #[error("Graph invoked with no input")]
32    EmptyInput,
33
34    #[error("Multiple subgraphs conflict: {details}")]
35    MultipleSubgraphs { details: String },
36
37    // === Agent errors ===
38    #[error("Agent '{agent_id}' not found in registry")]
39    AgentNotFound { agent_id: String },
40
41    #[error("Agent '{agent_id}' is unreachable: {reason}")]
42    AgentUnreachable { agent_id: String, reason: String },
43
44    #[error("Handoff cycle detected: {path}")]
45    HandoffCycle { path: String },
46
47    #[error("Max handoffs ({max}) exceeded")]
48    MaxHandoffs { max: u32 },
49
50    // === Boundary errors ===
51    #[error("Permission denied: {action}")]
52    PermissionDenied { action: String },
53
54    #[error("Tool '{tool}' denied by policy: {reason}")]
55    ToolDenied { tool: String, reason: String },
56
57    #[error("Communication with '{target}' blocked by rules")]
58    CommunicationBlocked { target: String },
59
60    #[error("Approval denied for '{action}': {reason}")]
61    ApprovalDenied { action: String, reason: String },
62
63    #[error("Approval required for '{action}', but no approval resolver is configured")]
64    ApprovalRequired { action: String },
65
66    #[error("Guardrail violated: {guardrail} — {details}")]
67    GuardrailViolation { guardrail: String, details: String },
68
69    #[error("Write governance violation on '{destination}': {reason}")]
70    WriteGovernanceViolation { destination: String, reason: String },
71
72    #[error("Inspection denied for '{root}': {reason}")]
73    InspectionDenied { root: String, reason: String },
74
75    #[error("Inspection failed for '{path}': {reason}")]
76    InspectionFailed { path: String, reason: String },
77
78    // === Tool errors ===
79    #[error("Tool '{tool}' execution failed: {reason}")]
80    ToolExecution { tool: String, reason: String },
81
82    #[error("Tool '{tool}' not found in registry")]
83    ToolNotFound { tool: String },
84
85    #[error("Tool '{tool}' is already registered")]
86    ToolAlreadyRegistered { tool: String },
87
88    // === LLM errors ===
89    #[error("LLM provider error: {details}")]
90    LlmProvider { details: String },
91
92    /// Authentication/authorization failed (HTTP 401/403). NOT retryable —
93    /// a bad API key will never succeed on retry.
94    #[error("LLM auth failed: {details}")]
95    LlmAuth { details: String },
96
97    /// Rate limited by the provider (HTTP 429). Retryable with backoff.
98    #[error("LLM rate limited: {details}")]
99    LlmRateLimit { details: String },
100
101    #[error("LLM provider returned no response")]
102    LlmEmpty,
103
104    #[error("MockProvider response queue exhausted — add more responses with .respond_with()")]
105    MockProviderExhausted,
106
107    #[error("Embedding dimension mismatch: expected {expected}, got {actual}")]
108    EmbeddingDimension { expected: usize, actual: usize },
109
110    #[error("Structured output parse failed: {details}")]
111    StructuredOutput { details: String },
112
113    // === Storage errors ===
114    #[error("Storage error: {details}")]
115    Storage { details: String },
116
117    #[error("Checkpoint not found for thread '{thread_id}'")]
118    CheckpointNotFound { thread_id: String },
119
120    // === Bus / Negotiation errors ===
121    #[error("Bus error: {details}")]
122    BusError { details: String },
123
124    #[error("Negotiation failed: {reason}")]
125    NegotiationFailed { reason: String },
126
127    #[error("Negotiation interrupted: {reason}")]
128    NegotiationInterrupted { reason: String },
129
130    // === Execution errors ===
131    #[error("Execution timeout after {seconds:.2}s")]
132    Timeout { seconds: f64 },
133
134    #[error("Execution budget exceeded: {budget_type}")]
135    BudgetExceeded { budget_type: String },
136
137    // === Agent construction ===
138    #[error("Invalid agent: {0}")]
139    InvalidAgent(String),
140
141    #[error("Manifest load failed: {0}")]
142    ManifestLoad(String),
143
144    #[error("Manifest parse failed: {0}")]
145    ManifestParse(String),
146
147    // === Delegation errors ===
148    #[error("Cyclic delegation detected: {chain:?} → {attempted}")]
149    CyclicDelegation {
150        chain: Vec<String>,
151        attempted: String,
152    },
153
154    // === Resume errors ===
155    /// Resume point mismatch between checkpoint and command.
156    #[error("Resume point mismatch: expected '{expected}', got '{actual}'")]
157    ResumePointMismatch {
158        /// The resume point stored in the checkpoint.
159        expected: String,
160        /// The resume point provided by the caller.
161        actual: String,
162    },
163
164    /// Node expected human input on resume but none was provided.
165    #[error("Missing human input — node expected HumanInput on resume but none was provided")]
166    MissingHumanInput,
167
168    // === Matrix errors ===
169    #[error("Matrix dimension mismatch: expected {expected}, got {actual}")]
170    MatrixDimensionMismatch { expected: usize, actual: usize },
171
172    #[error("Node '{node}' not found in transition matrix")]
173    NodeNotFound { node: String },
174
175    #[error("Tensor constraint {constraint} violated: {detail}")]
176    ConstraintViolation { constraint: String, detail: String },
177
178    #[error("Tensor composition failed: {reason}")]
179    CompositionError { reason: String },
180
181    #[error("Invalid payload: {reason}")]
182    InvalidPayload { reason: String },
183
184    // === Cognitive errors ===
185    /// Cognitive budget exhausted — lobes consumed all allocated tokens/time.
186    #[error("Cognitive budget exhausted for agent '{agent_id}': used {tokens_used}/{limit} tokens")]
187    CognitiveBudgetExhausted {
188        agent_id: String,
189        tokens_used: u32,
190        limit: u32,
191    },
192
193    /// A lobe vetoed the output — blocks execution.
194    #[error("Cognitive veto by lobe '{lobe}' for agent '{agent_id}': {reason}")]
195    CognitiveSignalVeto {
196        agent_id: String,
197        lobe: String,
198        reason: String,
199    },
200
201    /// A lobe failed to activate or process.
202    ///
203    /// This is a structural failure (not transient). Transient failures
204    /// (e.g., model unavailable) should be reported as [`PeError::LlmProvider`] or
205    /// [`PeError::Timeout`] by the lobe itself before returning this error.
206    #[error("Lobe activation failed for '{lobe}': {reason}")]
207    LobeActivationFailed { lobe: String, reason: String },
208
209    // === Internal ===
210    #[error("Internal error: {details}")]
211    Internal { details: String },
212}
213
214impl PeError {
215    /// Whether this error is retryable by a retry policy.
216    ///
217    /// Transient failures (timeouts, LLM provider errors, tool execution
218    /// errors) are retryable. Structural errors (invalid graph, permission
219    /// denied, missing input) are not.
220    pub fn is_retryable(&self) -> bool {
221        matches!(
222            self,
223            PeError::Timeout { .. }
224                | PeError::LlmProvider { .. }
225                | PeError::LlmRateLimit { .. }
226                | PeError::LlmEmpty
227                | PeError::ToolExecution { .. }
228                | PeError::Storage { .. }
229        )
230        // Note: LlmAuth is NOT retryable — bad credentials never succeed on retry
231    }
232
233    /// Whether this error represents a transient condition that may resolve later.
234    ///
235    /// Transient errors are temporary — the same operation might succeed on retry
236    /// or after a delay. Permanent errors indicate structural problems that will
237    /// never resolve without code/config changes.
238    ///
239    /// `is_retryable()` is a subset of `is_transient()`: all retryable errors are
240    /// transient, but some transient errors (like `BusError`) may not benefit from
241    /// immediate retry.
242    pub fn is_transient(&self) -> bool {
243        matches!(
244            self,
245            PeError::Timeout { .. }
246                | PeError::LlmProvider { .. }
247                | PeError::LlmRateLimit { .. }
248                | PeError::LlmEmpty
249                | PeError::ToolExecution { .. }
250                | PeError::Storage { .. }
251                | PeError::BusError { .. }
252                | PeError::NegotiationFailed { .. }
253                | PeError::MockProviderExhausted
254        )
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    /// Exhaustive classification of every PeError variant.
263    /// If a new variant is added, this function will fail to compile
264    /// until updated — ensuring classification stays complete.
265    fn classify_all_variants() -> Vec<(PeError, bool, bool)> {
266        // (error, expected_transient, expected_retryable)
267        vec![
268            // Transient + retryable
269            (PeError::Timeout { seconds: 30.0 }, true, true),
270            (
271                PeError::LlmProvider {
272                    details: "rate limited".into(),
273                },
274                true,
275                true,
276            ),
277            (PeError::LlmEmpty, true, true),
278            (
279                PeError::LlmAuth {
280                    details: "invalid api key".into(),
281                },
282                false, // NOT retryable — bad credentials never succeed
283                false, // NOT transient — structural problem
284            ),
285            (
286                PeError::LlmRateLimit {
287                    details: "too many requests".into(),
288                },
289                true, // retryable with backoff
290                true, // transient
291            ),
292            (
293                PeError::ToolExecution {
294                    tool: "search".into(),
295                    reason: "network error".into(),
296                },
297                true,
298                true,
299            ),
300            (
301                PeError::Storage {
302                    details: "connection refused".into(),
303                },
304                true,
305                true,
306            ),
307            // Transient but not immediately retryable
308            (
309                PeError::BusError {
310                    details: "disconnected".into(),
311                },
312                true,
313                false,
314            ),
315            (
316                PeError::NegotiationFailed {
317                    reason: "timeout".into(),
318                },
319                true,
320                false,
321            ),
322            (PeError::MockProviderExhausted, true, false),
323            // Permanent (not transient, not retryable)
324            (PeError::GraphRecursion { limit: 25 }, false, false),
325            (
326                PeError::GraphInterrupt {
327                    reason: "paused".into(),
328                },
329                false,
330                false,
331            ),
332            (
333                PeError::NodeInterrupt {
334                    reason: "paused".into(),
335                },
336                false,
337                false,
338            ),
339            (
340                PeError::InvalidUpdate {
341                    details: "bad field".into(),
342                },
343                false,
344                false,
345            ),
346            (
347                PeError::EmptyChannel {
348                    channel: "msgs".into(),
349                },
350                false,
351                false,
352            ),
353            (
354                PeError::GraphValue {
355                    details: "no edges".into(),
356                },
357                false,
358                false,
359            ),
360            (
361                PeError::UnreachableNode {
362                    node: "orphan".into(),
363                },
364                false,
365                false,
366            ),
367            (PeError::EmptyInput, false, false),
368            (
369                PeError::MultipleSubgraphs {
370                    details: "conflict".into(),
371                },
372                false,
373                false,
374            ),
375            (
376                PeError::AgentNotFound {
377                    agent_id: "x".into(),
378                },
379                false,
380                false,
381            ),
382            (
383                PeError::AgentUnreachable {
384                    agent_id: "x".into(),
385                    reason: "no bus".into(),
386                },
387                false,
388                false,
389            ),
390            (
391                PeError::HandoffCycle {
392                    path: "a->b->a".into(),
393                },
394                false,
395                false,
396            ),
397            (PeError::MaxHandoffs { max: 10 }, false, false),
398            (
399                PeError::PermissionDenied {
400                    action: "write".into(),
401                },
402                false,
403                false,
404            ),
405            (
406                PeError::ToolDenied {
407                    tool: "rm".into(),
408                    reason: "blocked".into(),
409                },
410                false,
411                false,
412            ),
413            (
414                PeError::CommunicationBlocked {
415                    target: "agent-b".into(),
416                },
417                false,
418                false,
419            ),
420            (
421                PeError::ApprovalDenied {
422                    action: "tool:shell_exec".into(),
423                    reason: "host denied".into(),
424                },
425                false,
426                false,
427            ),
428            (
429                PeError::ApprovalRequired {
430                    action: "tool:shell_exec".into(),
431                },
432                false,
433                false,
434            ),
435            (
436                PeError::GuardrailViolation {
437                    guardrail: "safety".into(),
438                    details: "toxic".into(),
439                },
440                false,
441                false,
442            ),
443            (
444                PeError::WriteGovernanceViolation {
445                    destination: "db".into(),
446                    reason: "denied".into(),
447                },
448                false,
449                false,
450            ),
451            (
452                PeError::InspectionDenied {
453                    root: ".".into(),
454                    reason: "blocked".into(),
455                },
456                false,
457                false,
458            ),
459            (
460                PeError::InspectionFailed {
461                    path: "missing.txt".into(),
462                    reason: "not found".into(),
463                },
464                false,
465                false,
466            ),
467            (
468                PeError::ToolNotFound {
469                    tool: "missing".into(),
470                },
471                false,
472                false,
473            ),
474            (
475                PeError::ToolAlreadyRegistered {
476                    tool: "search".into(),
477                },
478                false,
479                false,
480            ),
481            (
482                PeError::EmbeddingDimension {
483                    expected: 768,
484                    actual: 512,
485                },
486                false,
487                false,
488            ),
489            (
490                PeError::StructuredOutput {
491                    details: "parse fail".into(),
492                },
493                false,
494                false,
495            ),
496            (
497                PeError::CheckpointNotFound {
498                    thread_id: "t1".into(),
499                },
500                false,
501                false,
502            ),
503            (
504                PeError::NegotiationInterrupted {
505                    reason: "timeout".into(),
506                },
507                false,
508                false,
509            ),
510            (
511                PeError::BudgetExceeded {
512                    budget_type: "tokens".into(),
513                },
514                false,
515                false,
516            ),
517            (PeError::InvalidAgent("bad config".into()), false, false),
518            (PeError::ManifestLoad("not found".into()), false, false),
519            (PeError::ManifestParse("invalid yaml".into()), false, false),
520            (
521                PeError::CyclicDelegation {
522                    chain: vec!["a".into(), "b".into()],
523                    attempted: "a".into(),
524                },
525                false,
526                false,
527            ),
528            (
529                PeError::ResumePointMismatch {
530                    expected: "x".into(),
531                    actual: "y".into(),
532                },
533                false,
534                false,
535            ),
536            (PeError::MissingHumanInput, false, false),
537            (
538                PeError::MatrixDimensionMismatch {
539                    expected: 3,
540                    actual: 5,
541                },
542                false,
543                false,
544            ),
545            (
546                PeError::NodeNotFound {
547                    node: "ghost".into(),
548                },
549                false,
550                false,
551            ),
552            (
553                PeError::ConstraintViolation {
554                    constraint: "C4".into(),
555                    detail: "sum > 1".into(),
556                },
557                false,
558                false,
559            ),
560            (
561                PeError::CompositionError {
562                    reason: "incompatible".into(),
563                },
564                false,
565                false,
566            ),
567            (
568                PeError::InvalidPayload {
569                    reason: "negative probability".into(),
570                },
571                false,
572                false,
573            ),
574            (
575                PeError::CognitiveBudgetExhausted {
576                    agent_id: "agent-1".into(),
577                    tokens_used: 2000,
578                    limit: 2000,
579                },
580                false,
581                false,
582            ),
583            (
584                PeError::CognitiveSignalVeto {
585                    agent_id: "agent-1".into(),
586                    lobe: "safety".into(),
587                    reason: "dangerous".into(),
588                },
589                false,
590                false,
591            ),
592            (
593                PeError::LobeActivationFailed {
594                    lobe: "critic".into(),
595                    reason: "model unavailable".into(),
596                },
597                false,
598                false,
599            ),
600            (
601                PeError::Internal {
602                    details: "bug".into(),
603                },
604                false,
605                false,
606            ),
607        ]
608    }
609
610    #[test]
611    fn test_is_transient_exhaustive() {
612        for (error, expected_transient, _) in classify_all_variants() {
613            assert_eq!(
614                error.is_transient(),
615                expected_transient,
616                "Wrong is_transient() for: {error}"
617            );
618        }
619    }
620
621    #[test]
622    fn test_is_retryable_exhaustive() {
623        for (error, _, expected_retryable) in classify_all_variants() {
624            assert_eq!(
625                error.is_retryable(),
626                expected_retryable,
627                "Wrong is_retryable() for: {error}"
628            );
629        }
630    }
631
632    #[test]
633    fn test_retryable_is_subset_of_transient() {
634        for (error, _, _) in classify_all_variants() {
635            if error.is_retryable() {
636                assert!(
637                    error.is_transient(),
638                    "Retryable error should also be transient: {error}"
639                );
640            }
641        }
642    }
643
644    #[test]
645    fn test_transient_errors_are_retryable_or_bus() {
646        let transient = vec![
647            PeError::Timeout { seconds: 30.0 },
648            PeError::LlmProvider {
649                details: "500".into(),
650            },
651            PeError::LlmEmpty,
652            PeError::ToolExecution {
653                tool: "api".into(),
654                reason: "timeout".into(),
655            },
656            PeError::Storage {
657                details: "conn reset".into(),
658            },
659        ];
660        for err in transient {
661            assert!(err.is_transient(), "Expected transient: {err}");
662            assert!(err.is_retryable(), "Expected retryable: {err}");
663        }
664    }
665
666    #[test]
667    fn test_permanent_errors_are_not_retryable() {
668        let permanent = vec![
669            PeError::GraphRecursion { limit: 25 },
670            PeError::AgentNotFound {
671                agent_id: "x".into(),
672            },
673            PeError::PermissionDenied {
674                action: "write".into(),
675            },
676            PeError::Internal {
677                details: "bug".into(),
678            },
679        ];
680        for err in permanent {
681            assert!(!err.is_transient(), "Expected permanent: {err}");
682            assert!(!err.is_retryable(), "Expected not retryable: {err}");
683        }
684    }
685}