Skip to main content

polaris_graph/executor/
error.rs

1//! Error types for graph execution.
2
3use crate::node::NodeId;
4use crate::predicate::PredicateError;
5use polaris_system::param::{AccessMode, ErrorContext};
6use std::any::TypeId;
7use std::fmt;
8use std::sync::Arc;
9use std::time::Duration;
10
11/// Errors that can occur during graph execution.
12///
13/// # Examples
14///
15/// ```
16/// use polaris_graph::ExecutionError;
17///
18/// let err = ExecutionError::EmptyGraph;
19/// assert_eq!(format!("{err}"), "graph has no entry point");
20///
21/// // Pattern matching on error variants
22/// match err {
23///     ExecutionError::EmptyGraph => { /* handle empty graph */ }
24///     ExecutionError::NodeNotFound(id) => { /* handle missing node */ }
25///     ExecutionError::SystemError(msg) => { /* handle system failure */ }
26///     _ => { /* handle other errors */ }
27/// }
28/// ```
29#[derive(Debug, Clone)]
30#[non_exhaustive]
31pub enum ExecutionError {
32    /// The graph has no entry point.
33    EmptyGraph,
34    /// A referenced node was not found in the graph.
35    NodeNotFound(NodeId),
36    /// No sequential edge found from the given node.
37    NoNextNode(NodeId),
38    /// A decision or loop node is missing its predicate.
39    MissingPredicate(NodeId),
40    /// A decision node is missing a branch target.
41    MissingBranch {
42        /// The node ID of the decision node.
43        node: NodeId,
44        /// Which branch is missing ("true" or "false").
45        branch: &'static str,
46    },
47    /// A system execution error occurred.
48    SystemError(Arc<str>),
49    /// A predicate evaluation error occurred.
50    PredicateError(PredicateError),
51    /// Maximum iterations exceeded in a loop.
52    MaxIterationsExceeded {
53        /// The loop node that exceeded iterations.
54        node: NodeId,
55        /// The maximum allowed iterations.
56        max: usize,
57    },
58    /// A loop node has no termination condition (neither predicate nor `max_iterations`).
59    NoTerminationCondition(NodeId),
60    /// A system execution timed out.
61    Timeout {
62        /// The node that timed out.
63        node: NodeId,
64        /// The timeout duration that was exceeded.
65        timeout: Duration,
66    },
67    /// Feature not yet implemented.
68    Unimplemented(&'static str),
69    /// Maximum recursion depth exceeded in nested control flow.
70    RecursionLimitExceeded {
71        /// The current depth when the limit was hit.
72        depth: usize,
73        /// The maximum allowed depth.
74        max: usize,
75    },
76    /// A switch node is missing its discriminator.
77    MissingDiscriminator(NodeId),
78    /// No matching case found in switch node and no default provided.
79    NoMatchingCase {
80        /// The switch node ID.
81        node: NodeId,
82        /// The discriminator value that didn't match any case.
83        key: &'static str,
84    },
85    /// An internal framework invariant was violated.
86    InternalError(String),
87    /// A middleware layer failed.
88    MiddlewareError {
89        /// Registered name of the middleware that failed.
90        middleware: String,
91        /// Description of the failure.
92        message: String,
93    },
94    /// Total graph execution time exceeded the configured limit.
95    GraphTimeout {
96        /// How long the graph ran before being stopped.
97        elapsed: Duration,
98        /// The configured maximum duration.
99        max: Duration,
100    },
101    /// A scope's `ContextPolicy` declared `forward_fresh::<T>()` for a resource
102    /// that has no registered factory anywhere in the parent hierarchy.
103    ScopeMissingFactory {
104        /// The scope node's name.
105        scope: &'static str,
106        /// The resource type that was missing a factory.
107        resource: &'static str,
108    },
109    /// A scope's `ContextPolicy` declared a per-resource crossing
110    /// (`forward::<T>()` or `fork::<T>()`) for a resource that does not exist
111    /// in the parent's local scope at execution time.
112    ScopeMissingResource {
113        /// The scope node's name.
114        scope: &'static str,
115        /// The resource type that was missing.
116        resource: &'static str,
117        /// Which verb declared the crossing — `"forward"` or `"fork"`.
118        action: &'static str,
119    },
120    /// A scope's `ContextPolicy` declared a per-resource crossing
121    /// (`forward::<T>()` or `fork::<T>()`) for a resource that exists in the
122    /// parent's local scope but could not be copied at scope entry because it
123    /// is currently held mutably (write-locked).
124    ///
125    /// Distinct from [`ScopeMissingResource`](Self::ScopeMissingResource),
126    /// which means the resource is genuinely absent. This variant means the
127    /// resource is present but momentarily unavailable.
128    ScopeResourceBusy {
129        /// The scope node's name.
130        scope: &'static str,
131        /// The resource type that was held mutably.
132        resource: &'static str,
133        /// Which verb declared the crossing — `"forward"` or `"fork"`.
134        action: &'static str,
135    },
136}
137
138impl fmt::Display for ExecutionError {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        match self {
141            ExecutionError::EmptyGraph => write!(f, "graph has no entry point"),
142            ExecutionError::NodeNotFound(id) => write!(f, "node not found: {id}"),
143            ExecutionError::NoNextNode(id) => write!(f, "no sequential edge from node: {id}"),
144            ExecutionError::MissingPredicate(id) => {
145                write!(f, "missing predicate on node: {id}")
146            }
147            ExecutionError::MissingBranch { node, branch } => {
148                write!(f, "missing {branch} branch on decision node: {node}")
149            }
150            ExecutionError::SystemError(msg) => write!(f, "system error: {msg}"),
151            ExecutionError::PredicateError(err) => write!(f, "predicate error: {err}"),
152            ExecutionError::MaxIterationsExceeded { node, max } => {
153                write!(f, "max iterations ({max}) exceeded on loop node: {node}")
154            }
155            ExecutionError::NoTerminationCondition(id) => {
156                write!(f, "loop node has no termination condition: {id}")
157            }
158            ExecutionError::Timeout { node, timeout } => {
159                write!(f, "system timed out after {:?} on node: {node}", timeout)
160            }
161            ExecutionError::Unimplemented(feature) => {
162                write!(f, "feature not implemented: {feature}")
163            }
164            ExecutionError::RecursionLimitExceeded { depth, max } => {
165                write!(
166                    f,
167                    "recursion limit exceeded: depth {depth} exceeds max {max}"
168                )
169            }
170            ExecutionError::MissingDiscriminator(id) => {
171                write!(f, "missing discriminator on switch node: {id}")
172            }
173            ExecutionError::NoMatchingCase { node, key } => {
174                write!(f, "no matching case for key '{key}' on switch node: {node}")
175            }
176            ExecutionError::InternalError(msg) => write!(f, "internal error: {msg}"),
177            ExecutionError::MiddlewareError {
178                middleware,
179                message,
180            } => {
181                write!(f, "middleware '{middleware}' failed: {message}")
182            }
183            ExecutionError::GraphTimeout { elapsed, max } => {
184                write!(
185                    f,
186                    "graph execution timed out after {elapsed:?} (max: {max:?})"
187                )
188            }
189            ExecutionError::ScopeMissingFactory { scope, resource } => {
190                write!(
191                    f,
192                    "scope '{scope}' declared forward_fresh::<{resource}>() but no factory is registered (call Server::register_local::<{resource}>(...) before entering this scope)"
193                )
194            }
195            ExecutionError::ScopeMissingResource {
196                scope,
197                resource,
198                action,
199            } => {
200                write!(
201                    f,
202                    "scope '{scope}' declared {action}::<{resource}>() but the parent context has no local resource of that type"
203                )
204            }
205            ExecutionError::ScopeResourceBusy {
206                scope,
207                resource,
208                action,
209            } => {
210                write!(
211                    f,
212                    "scope '{scope}' declared {action}::<{resource}>() but {resource} is currently held mutably in the parent context and cannot be copied at scope entry"
213                )
214            }
215        }
216    }
217}
218
219impl std::error::Error for ExecutionError {
220    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
221        match self {
222            ExecutionError::PredicateError(err) => Some(err),
223            _ => None,
224        }
225    }
226}
227
228/// Errors that can occur during resource validation.
229///
230/// These errors are detected before graph execution starts, allowing
231/// early detection of missing resources that would cause runtime failures.
232///
233/// # Examples
234///
235/// ```
236/// use polaris_graph::ResourceValidationError;
237/// use polaris_graph::NodeId;
238/// use polaris_system::param::AccessMode;
239/// use std::any::TypeId;
240///
241/// let err = ResourceValidationError::MissingResource {
242///     node: NodeId::from_string("node_1"),
243///     system_name: "my_system",
244///     resource_type: "MyConfig",
245///     type_id: TypeId::of::<String>(),
246///     access_mode: AccessMode::Read,
247/// };
248///
249/// // Display provides a human-readable message
250/// let msg = format!("{err}");
251/// assert!(msg.contains("my_system"));
252/// assert!(msg.contains("MyConfig"));
253/// ```
254#[derive(Debug, Clone)]
255#[non_exhaustive]
256pub enum ResourceValidationError {
257    /// A required resource is missing from the context.
258    MissingResource {
259        /// The node ID of the system requiring the resource.
260        node: NodeId,
261        /// The name of the system.
262        system_name: &'static str,
263        /// The type name of the missing resource.
264        resource_type: &'static str,
265        /// The type ID of the missing resource.
266        type_id: TypeId,
267        /// The access mode (read or write).
268        access_mode: AccessMode,
269    },
270    /// A required output from a previous system is missing.
271    MissingOutput {
272        /// The node ID of the system requiring the output.
273        node: NodeId,
274        /// The name of the system.
275        system_name: &'static str,
276        /// The type name of the missing output.
277        output_type: &'static str,
278        /// The type ID of the missing output.
279        type_id: TypeId,
280    },
281    /// A scope's `ContextPolicy` declared `forward_fresh::<T>()` for a resource
282    /// that has no registered factory in the parent context or globals.
283    ///
284    /// Detected during [`GraphExecutor::validate_resources`] by walking the
285    /// parent chain via [`SystemContext::factory_fn_by_type_id`].
286    ///
287    /// [`GraphExecutor::validate_resources`]: super::GraphExecutor::validate_resources
288    /// [`SystemContext::factory_fn_by_type_id`]: polaris_system::param::SystemContext::factory_fn_by_type_id
289    ScopeMissingFactory {
290        /// The scope node ID.
291        scope: NodeId,
292        /// The scope node's name.
293        scope_name: &'static str,
294        /// The resource type that was missing a factory.
295        resource: &'static str,
296    },
297    /// A scope's `ContextPolicy` declared a per-resource crossing
298    /// (`forward::<T>()` or `fork::<T>()`) for a resource that is not
299    /// reachable from the parent context at validation time.
300    ///
301    /// Detected during [`GraphExecutor::validate_resources`] via
302    /// [`SystemContext::contains_resource_by_type_id`]. Mirrors the runtime
303    /// [`ExecutionError::ScopeMissingResource`] safety net for callers that
304    /// skip validation.
305    ///
306    /// [`GraphExecutor::validate_resources`]: super::GraphExecutor::validate_resources
307    /// [`SystemContext::contains_resource_by_type_id`]: polaris_system::param::SystemContext::contains_resource_by_type_id
308    /// [`ExecutionError::ScopeMissingResource`]: super::ExecutionError::ScopeMissingResource
309    ScopeMissingResource {
310        /// The scope node ID.
311        scope: NodeId,
312        /// The scope node's name.
313        scope_name: &'static str,
314        /// The resource type that was missing.
315        resource: &'static str,
316        /// Which verb declared the crossing — `"forward"` or `"fork"`.
317        action: &'static str,
318    },
319}
320
321impl fmt::Display for ResourceValidationError {
322    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323        match self {
324            ResourceValidationError::MissingResource {
325                node,
326                system_name,
327                resource_type,
328                access_mode,
329                ..
330            } => {
331                let mode_str = match access_mode {
332                    AccessMode::Read => "read",
333                    AccessMode::Write => "write",
334                };
335                write!(
336                    f,
337                    "system '{system_name}' ({node}) requires {mode_str} access to missing resource: {resource_type}"
338                )
339            }
340            ResourceValidationError::MissingOutput {
341                node,
342                system_name,
343                output_type,
344                ..
345            } => {
346                write!(
347                    f,
348                    "system '{system_name}' ({node}) requires missing output: {output_type}"
349                )
350            }
351            ResourceValidationError::ScopeMissingFactory {
352                scope,
353                scope_name,
354                resource,
355            } => {
356                write!(
357                    f,
358                    "scope '{scope_name}' ({scope}) declared forward_fresh::<{resource}>() but no factory is registered for {resource} in the parent context or globals"
359                )
360            }
361            ResourceValidationError::ScopeMissingResource {
362                scope,
363                scope_name,
364                resource,
365                action,
366            } => {
367                write!(
368                    f,
369                    "scope '{scope_name}' ({scope}) declared {action}::<{resource}>() but {resource} is not reachable from the parent context"
370                )
371            }
372        }
373    }
374}
375
376impl std::error::Error for ResourceValidationError {}
377
378/// Classification of the error that caused a system failure.
379///
380/// Used in [`CaughtError`] to distinguish error sources without parsing
381/// message strings.
382///
383/// # Examples
384///
385/// ```
386/// use polaris_graph::ErrorKind;
387///
388/// let kind = ErrorKind::Execution;
389/// assert_eq!(format!("{kind}"), "execution");
390///
391/// let kind = ErrorKind::ParamResolution;
392/// assert_eq!(format!("{kind}"), "param_resolution");
393/// ```
394#[derive(Debug, Clone, Copy, PartialEq, Eq)]
395pub enum ErrorKind {
396    /// System returned `Err(SystemError::ExecutionError(...))`.
397    Execution,
398    /// System parameter resolution failed (`Err(SystemError::ParamError(...))`).
399    ParamResolution,
400}
401
402impl fmt::Display for ErrorKind {
403    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
404        match self {
405            ErrorKind::Execution => write!(f, "execution"),
406            ErrorKind::ParamResolution => write!(f, "param_resolution"),
407        }
408    }
409}
410
411/// Error context injected by the executor when routing to an error handler.
412///
413/// When a system fails and an error edge exists, the executor stores this
414/// in the outputs before routing to the handler node. Error handler systems
415/// read it via [`ErrOut<CaughtError>`](polaris_system::param::ErrOut).
416///
417/// # Fields
418///
419/// - `message` — The error message from the failed system
420/// - `system_name` — The name of the system that failed
421/// - `node_id` — The graph node ID of the failed system
422/// - `duration` — How long the system ran before failing
423/// - `kind` — Classification of the error source
424///
425/// # Example
426///
427/// ```
428/// use polaris_graph::CaughtError;
429/// use polaris_system::param::ErrOut;
430/// use polaris_system::system;
431///
432/// # #[derive(Default)]
433/// # struct RecoveryState;
434///
435/// #[system]
436/// async fn handle_error(error: ErrOut<CaughtError>) -> RecoveryState {
437///     tracing::error!("[{}] {}: {}", error.node_id, error.system_name, error.message);
438///     RecoveryState::default()
439/// }
440/// ```
441#[derive(Debug, Clone)]
442pub struct CaughtError {
443    /// The error message from the failed system.
444    pub message: Arc<str>,
445    /// The name of the system that failed.
446    pub system_name: &'static str,
447    /// The node ID of the failed system.
448    pub node_id: NodeId,
449    /// How long the system ran before failing.
450    pub duration: Duration,
451    /// Classification of the error source.
452    pub kind: ErrorKind,
453}
454
455impl fmt::Display for CaughtError {
456    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
457        write!(
458            f,
459            "system '{}' ({}) failed after {:?} [{}]: {}",
460            self.system_name, self.node_id, self.duration, self.kind, self.message
461        )
462    }
463}
464
465impl std::error::Error for CaughtError {}
466
467impl ErrorContext for CaughtError {}
468
469/// Internal result of executing a system with optional retry and timeout.
470pub(crate) enum SystemOutcome {
471    /// System completed successfully.
472    Ok(Box<dyn core::any::Any + Send + Sync>),
473    /// System failed with an error after all retry attempts.
474    Err(polaris_system::system::SystemError),
475    /// System timed out after all retry attempts.
476    Timeout,
477}