Skip to main content

meerkat_mob/
error.rs

1//! Error types for mob operations.
2
3use crate::ids::{AgentRuntimeId, FenceToken, FlowId, LoopId, MeerkatId, ProfileName, WorkRef};
4use crate::runtime::MobState;
5use crate::store::FrameAtomicOperation;
6use crate::validate::Diagnostic;
7use crate::{MobId, RunId, StepId};
8use meerkat_contracts::MobSpawnManyFailureCause;
9use meerkat_contracts::wire::supervisor_bridge::{BridgeRejectionCause, BridgeRejectionReply};
10
11/// Runtime capability required from a seated mob member.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum MobMemberCapability {
14    /// Interaction-scoped injection used for autonomous console/RPC/flow turns.
15    InteractionEventInjector,
16}
17
18impl std::fmt::Display for MobMemberCapability {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        match self {
21            Self::InteractionEventInjector => f.write_str("interaction_event_injector"),
22        }
23    }
24}
25
26/// Errors returned by mob operations.
27#[derive(Debug, thiserror::Error)]
28pub enum MobError {
29    /// The requested profile does not exist in the mob definition.
30    #[error("profile not found: {0}")]
31    ProfileNotFound(ProfileName),
32
33    /// The requested mob member does not exist in the roster.
34    ///
35    /// Renamed from `MeerkatNotFound` by DELETE_ME finding A2 + B8 as
36    /// part of the 0.6 identity-first cascade. The inner type remains
37    /// [`MeerkatId`] until the full A5 DSL-schema migration flips it to
38    /// [`AgentIdentity`](crate::ids::AgentIdentity); the rename lands
39    /// first so public error matching doesn't leak the legacy term.
40    #[error("mob member not found: {0}")]
41    MemberNotFound(MeerkatId),
42
43    /// A mob member with the given ID already exists.
44    ///
45    /// Renamed from `MeerkatAlreadyExists` by DELETE_ME finding A2 + B8.
46    #[error("mob member already exists: {0}")]
47    MemberAlreadyExists(MeerkatId),
48
49    /// The mob member's profile does not allow external turns.
50    #[error("mob member is not externally addressable: {0}")]
51    NotExternallyAddressable(MeerkatId),
52
53    /// The requested lifecycle state transition is invalid.
54    #[error("invalid state transition: {from} -> {to}")]
55    InvalidTransition { from: MobState, to: MobState },
56
57    /// A wiring operation failed.
58    #[error("wiring error: {0}")]
59    WiringError(String),
60
61    /// Supervisor rotation reached one or more remote members but did not
62    /// complete, so local supervisor authority stayed at the pre-rotation
63    /// epoch.
64    #[error(
65        "supervisor rotation incomplete: failed after {rotated_peer_count} remote peer(s) accepted attempted epoch {attempted_epoch}; local authority remains at epoch {previous_epoch}; rollback_succeeded={rollback_succeeded}; pending_authority_recorded={pending_authority_recorded}; pending_authority_process_local={pending_authority_process_local}; failure: {reason}"
66    )]
67    SupervisorRotationIncomplete {
68        previous_epoch: u64,
69        attempted_epoch: u64,
70        attempted_public_peer_id: String,
71        rotated_peer_count: usize,
72        rollback_succeeded: bool,
73        pending_authority_recorded: bool,
74        pending_authority_process_local: bool,
75        rollback_error: Option<String>,
76        reason: String,
77    },
78
79    /// A supervisor bridge command was rejected by the remote member.
80    #[error("bridge command rejected ({cause:?}): {reason}")]
81    BridgeCommandRejected {
82        cause: BridgeRejectionCause,
83        reason: String,
84    },
85
86    /// The member failed to restore durable session state and is broken until repaired.
87    #[error(
88        "member {member_id} failed to restore {}: {reason}",
89        format_member_restore_target(.session_id.as_ref())
90    )]
91    MemberRestoreFailed {
92        member_id: MeerkatId,
93        session_id: Option<meerkat_core::types::SessionId>,
94        reason: String,
95    },
96
97    /// Waiting for kickoff completion timed out.
98    #[error("kickoff wait timed out")]
99    KickoffWaitTimedOut { pending_member_ids: Vec<MeerkatId> },
100
101    /// Waiting for startup readiness timed out.
102    #[error("member ready wait timed out")]
103    ReadyWaitTimedOut { pending_member_ids: Vec<MeerkatId> },
104
105    /// The mob definition failed validation.
106    #[error("definition error: {}", format_diagnostics(.0))]
107    DefinitionError(Vec<Diagnostic>),
108
109    /// Referenced flow does not exist.
110    #[error("flow not found: {0}")]
111    FlowNotFound(FlowId),
112
113    /// Run failed with a reason.
114    #[error("flow failed for run {run_id}: {reason}")]
115    FlowFailed { run_id: RunId, reason: String },
116
117    /// Referenced run does not exist.
118    #[error("run not found: {0}")]
119    RunNotFound(RunId),
120
121    /// Run was canceled.
122    #[error("run canceled: {0}")]
123    RunCanceled(RunId),
124
125    /// Flow turn timed out while awaiting terminal transport outcome.
126    #[error("flow turn timed out")]
127    FlowTurnTimedOut,
128
129    /// A frame-aware flow exceeded its configured nesting depth.
130    #[error(
131        "loop '{loop_id}' would exceed max_frame_depth={max_frame_depth} (current depth={current_depth})"
132    )]
133    FrameDepthLimitExceeded {
134        loop_id: LoopId,
135        max_frame_depth: u32,
136        current_depth: u32,
137    },
138
139    /// The selected mob run store cannot provide frame-aware atomic persistence.
140    #[error("mob run store cannot atomically persist frame operation '{operation}'")]
141    FrameAtomicPersistenceUnavailable { operation: FrameAtomicOperation },
142
143    /// Spec revision compare-and-swap failed.
144    #[error("spec revision conflict for mob {mob_id}: expected {expected:?}, actual {actual}")]
145    SpecRevisionConflict {
146        mob_id: MobId,
147        expected: Option<u64>,
148        actual: u64,
149    },
150
151    /// Schema validation failed for a step output.
152    #[error("schema validation failed for step {step_id}: {message}")]
153    SchemaValidation { step_id: StepId, message: String },
154
155    /// Not enough targets to satisfy dispatch/collection policy.
156    #[error("insufficient targets for step {step_id}: required {required}, available {available}")]
157    InsufficientTargets {
158        step_id: StepId,
159        required: u8,
160        available: usize,
161    },
162
163    /// Topology policy denied a dispatch edge.
164    #[error("topology violation: {from_role} -> {to_role}")]
165    TopologyViolation {
166        from_role: ProfileName,
167        to_role: ProfileName,
168    },
169
170    /// A bridge accepted the delivery command but rejected the member input.
171    #[error("bridge delivery rejected ({cause}): {reason}")]
172    BridgeDeliveryRejected {
173        cause: meerkat_contracts::wire::supervisor_bridge::BridgeDeliveryRejectionCause,
174        reason: String,
175    },
176
177    /// Supervisor escalation happened.
178    #[error("supervisor escalation: {0}")]
179    SupervisorEscalation(String),
180
181    /// Operation is not supported for the member's runtime mode.
182    #[error("unsupported for runtime mode {mode}: {reason}")]
183    UnsupportedForMode {
184        mode: crate::MobRuntimeMode,
185        reason: String,
186    },
187
188    /// A member is missing a required runtime capability for the requested operation.
189    #[error("mob member {member_id} missing required capability {capability}: {context}")]
190    MissingMemberCapability {
191        member_id: MeerkatId,
192        capability: MobMemberCapability,
193        context: &'static str,
194    },
195
196    /// Operation blocked by reset barrier.
197    #[error("reset barrier active")]
198    ResetBarrier,
199
200    /// A storage operation failed.
201    #[error("storage error: {0}")]
202    StorageError(#[source] Box<dyn std::error::Error + Send + Sync>),
203
204    /// A session service operation failed.
205    #[error("session error: {0}")]
206    SessionError(#[from] meerkat_core::service::SessionError),
207
208    /// A comms operation failed.
209    #[error("comms error: {0}")]
210    CommsError(#[from] meerkat_core::comms::SendError),
211
212    /// A runtime-backed member turn reached an external callback boundary.
213    #[error("callback pending for session {session_id} on tool '{tool_name}'")]
214    CallbackPending {
215        session_id: meerkat_core::types::SessionId,
216        tool_name: String,
217        args: serde_json::Value,
218    },
219
220    /// The fence token does not match the member's current incarnation.
221    #[error("stale fence token for {runtime_id}: expected {expected}, got {actual}")]
222    StaleFenceToken {
223        runtime_id: AgentRuntimeId,
224        expected: FenceToken,
225        actual: FenceToken,
226    },
227
228    /// A caller supplied an event replay cursor beyond the store frontier.
229    #[error("stale mob event cursor: requested {after_cursor}, latest {latest_cursor}")]
230    StaleEventCursor {
231        after_cursor: u64,
232        latest_cursor: u64,
233    },
234
235    /// The referenced work unit does not exist.
236    #[error("work not found: {0}")]
237    WorkNotFound(WorkRef),
238
239    /// An internal error (unexpected state, logic errors).
240    #[error("internal error: {0}")]
241    Internal(String),
242}
243
244fn format_diagnostics(diagnostics: &[Diagnostic]) -> String {
245    diagnostics
246        .iter()
247        .map(|d| format!("{}: {}", d.code, d.message))
248        .collect::<Vec<_>>()
249        .join("; ")
250}
251
252fn format_member_restore_target(session_id: Option<&meerkat_core::types::SessionId>) -> String {
253    match session_id {
254        Some(session_id) => format!("session {session_id}"),
255        None => "runtime bridge state".to_string(),
256    }
257}
258
259impl From<Box<dyn std::error::Error + Send + Sync>> for MobError {
260    fn from(error: Box<dyn std::error::Error + Send + Sync>) -> Self {
261        Self::StorageError(error)
262    }
263}
264
265impl From<crate::store::MobStoreError> for MobError {
266    fn from(error: crate::store::MobStoreError) -> Self {
267        match error {
268            crate::store::MobStoreError::SpecRevisionConflict {
269                mob_id,
270                expected,
271                actual,
272            } => Self::SpecRevisionConflict {
273                mob_id,
274                expected,
275                actual,
276            },
277            crate::store::MobStoreError::FrameAtomicPersistenceUnavailable { operation } => {
278                Self::FrameAtomicPersistenceUnavailable { operation }
279            }
280            other => Self::StorageError(Box::new(other)),
281        }
282    }
283}
284
285impl From<BridgeRejectionReply> for MobError {
286    fn from(rejection: BridgeRejectionReply) -> Self {
287        let cause = rejection.typed_cause();
288        let reason = rejection.reason().to_string();
289        match cause {
290            Some(cause) => Self::BridgeCommandRejected { cause, reason },
291            None => Self::WiringError(reason),
292        }
293    }
294}
295
296impl MobError {
297    pub fn bridge_rejection_cause(&self) -> Option<BridgeRejectionCause> {
298        match self {
299            Self::BridgeCommandRejected { cause, .. } => Some(*cause),
300            _ => None,
301        }
302    }
303
304    /// Typed failure cause for per-member `mob/spawn_many` result rows.
305    ///
306    /// This match intentionally has no wildcard arm. Adding a new `MobError`
307    /// variant must update the public spawn-many failure projection instead of
308    /// silently collapsing into string-only error semantics.
309    pub fn spawn_many_failure_cause(&self) -> MobSpawnManyFailureCause {
310        match self {
311            Self::ProfileNotFound(_) => MobSpawnManyFailureCause::ProfileNotFound,
312            Self::MemberNotFound(_) => MobSpawnManyFailureCause::MemberNotFound,
313            Self::MemberAlreadyExists(_) => MobSpawnManyFailureCause::MemberAlreadyExists,
314            Self::NotExternallyAddressable(_) => MobSpawnManyFailureCause::NotExternallyAddressable,
315            Self::InvalidTransition { .. } => MobSpawnManyFailureCause::InvalidTransition,
316            Self::WiringError(_) => MobSpawnManyFailureCause::WiringError,
317            Self::SupervisorRotationIncomplete { .. } => MobSpawnManyFailureCause::WiringError,
318            Self::BridgeCommandRejected { .. } => MobSpawnManyFailureCause::BridgeCommandRejected,
319            Self::MemberRestoreFailed { .. } => MobSpawnManyFailureCause::MemberRestoreFailed,
320            Self::KickoffWaitTimedOut { .. } => MobSpawnManyFailureCause::KickoffWaitTimedOut,
321            Self::ReadyWaitTimedOut { .. } => MobSpawnManyFailureCause::ReadyWaitTimedOut,
322            Self::DefinitionError(_) => MobSpawnManyFailureCause::DefinitionError,
323            Self::FlowNotFound(_) => MobSpawnManyFailureCause::FlowNotFound,
324            Self::FlowFailed { .. } => MobSpawnManyFailureCause::FlowFailed,
325            Self::RunNotFound(_) => MobSpawnManyFailureCause::RunNotFound,
326            Self::RunCanceled(_) => MobSpawnManyFailureCause::RunCanceled,
327            Self::FlowTurnTimedOut => MobSpawnManyFailureCause::FlowTurnTimedOut,
328            Self::FrameDepthLimitExceeded { .. } => {
329                MobSpawnManyFailureCause::FrameDepthLimitExceeded
330            }
331            Self::FrameAtomicPersistenceUnavailable { .. } => {
332                MobSpawnManyFailureCause::FrameAtomicPersistenceUnavailable
333            }
334            Self::SpecRevisionConflict { .. } => MobSpawnManyFailureCause::SpecRevisionConflict,
335            Self::SchemaValidation { .. } => MobSpawnManyFailureCause::SchemaValidation,
336            Self::InsufficientTargets { .. } => MobSpawnManyFailureCause::InsufficientTargets,
337            Self::TopologyViolation { .. } => MobSpawnManyFailureCause::TopologyViolation,
338            Self::BridgeDeliveryRejected { .. } => MobSpawnManyFailureCause::BridgeDeliveryRejected,
339            Self::SupervisorEscalation(_) => MobSpawnManyFailureCause::SupervisorEscalation,
340            Self::UnsupportedForMode { .. } => MobSpawnManyFailureCause::UnsupportedForMode,
341            Self::MissingMemberCapability { .. } => {
342                MobSpawnManyFailureCause::MissingMemberCapability
343            }
344            Self::ResetBarrier => MobSpawnManyFailureCause::ResetBarrier,
345            Self::StorageError(_) => MobSpawnManyFailureCause::StorageError,
346            Self::SessionError(_) => MobSpawnManyFailureCause::SessionError,
347            Self::CommsError(_) => MobSpawnManyFailureCause::CommsError,
348            Self::CallbackPending { .. } => MobSpawnManyFailureCause::CallbackPending,
349            Self::StaleFenceToken { .. } => MobSpawnManyFailureCause::StaleFenceToken,
350            Self::StaleEventCursor { .. } => MobSpawnManyFailureCause::StaleEventCursor,
351            Self::WorkNotFound(_) => MobSpawnManyFailureCause::WorkNotFound,
352            Self::Internal(_) => MobSpawnManyFailureCause::Internal,
353        }
354    }
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360    use crate::validate::{Diagnostic, DiagnosticCode, DiagnosticSeverity};
361
362    #[test]
363    fn test_profile_not_found_display() {
364        let err = MobError::ProfileNotFound(ProfileName::from("missing"));
365        assert!(format!("{err}").contains("missing"));
366    }
367
368    /// DELETE_ME A2 + B8 regression: the `Meerkat*` variant prefix and
369    /// the "meerkat" literal in error messages were renamed to
370    /// identity-first terminology ("mob member"). This test pins both
371    /// the display-string and the variant-construction shape so the
372    /// 0.6 identity-first cascade cannot regress into legacy wording.
373    #[test]
374    fn member_not_found_and_already_exists_use_identity_first_display() {
375        let not_found = MobError::MemberNotFound(MeerkatId::from("singer"));
376        let already = MobError::MemberAlreadyExists(MeerkatId::from("singer"));
377        let not_addressable = MobError::NotExternallyAddressable(MeerkatId::from("singer"));
378
379        let msg_nf = format!("{not_found}");
380        let msg_ae = format!("{already}");
381        let msg_na = format!("{not_addressable}");
382
383        assert_eq!(msg_nf, "mob member not found: singer");
384        assert_eq!(msg_ae, "mob member already exists: singer");
385        assert_eq!(msg_na, "mob member is not externally addressable: singer");
386
387        // No legacy "meerkat" literal should appear in any of the
388        // identity-first error displays.
389        for msg in [&msg_nf, &msg_ae, &msg_na] {
390            assert!(
391                !msg.to_lowercase().contains("meerkat"),
392                "identity-first mob errors must not carry legacy 'meerkat' wording: {msg}",
393            );
394        }
395    }
396
397    #[test]
398    fn spawn_many_failure_cause_preserves_typed_mob_error_variant() {
399        let profile_missing = MobError::ProfileNotFound(ProfileName::from("missing"));
400        assert_eq!(
401            profile_missing.spawn_many_failure_cause(),
402            MobSpawnManyFailureCause::ProfileNotFound
403        );
404
405        let internal = MobError::Internal("unexpected".to_string());
406        assert_eq!(
407            internal.spawn_many_failure_cause(),
408            MobSpawnManyFailureCause::Internal
409        );
410    }
411
412    #[test]
413    fn test_invalid_transition_display() {
414        let err = MobError::InvalidTransition {
415            from: MobState::Completed,
416            to: MobState::Running,
417        };
418        let msg = format!("{err}");
419        assert!(msg.contains("Completed"));
420        assert!(msg.contains("Running"));
421    }
422
423    #[test]
424    fn test_definition_error_display() {
425        let err = MobError::DefinitionError(vec![
426            Diagnostic {
427                code: DiagnosticCode::MissingSkillRef,
428                message: "skill 'foo' not found".to_string(),
429                location: Some("profiles.worker.skills[0]".to_string()),
430                severity: DiagnosticSeverity::Error,
431            },
432            Diagnostic {
433                code: DiagnosticCode::EmptyProfiles,
434                message: "no spawnable profiles".to_string(),
435                location: Some("profiles".to_string()),
436                severity: DiagnosticSeverity::Error,
437            },
438        ]);
439        let msg = format!("{err}");
440        assert!(msg.contains("missing_skill_ref"));
441        assert!(msg.contains("empty_profiles"));
442    }
443
444    #[test]
445    fn test_session_error_from() {
446        let session_err = meerkat_core::service::SessionError::NotFound {
447            id: meerkat_core::types::SessionId::new(),
448        };
449        let mob_err: MobError = session_err.into();
450        assert!(matches!(mob_err, MobError::SessionError(_)));
451    }
452
453    #[test]
454    fn test_comms_error_from() {
455        let send_err = meerkat_core::comms::SendError::PeerNotFound("agent-1".to_string());
456        let mob_err: MobError = send_err.into();
457        assert!(matches!(mob_err, MobError::CommsError(_)));
458    }
459
460    #[test]
461    fn test_storage_error() {
462        let err = MobError::StorageError(Box::new(std::io::Error::new(
463            std::io::ErrorKind::Other,
464            "disk full",
465        )));
466        assert!(format!("{err}").contains("disk full"));
467    }
468
469    #[test]
470    fn test_all_variants_exist() {
471        // Ensures all variants are constructible.
472        let _variants: Vec<MobError> = vec![
473            MobError::ProfileNotFound(ProfileName::from("p")),
474            MobError::MemberNotFound(MeerkatId::from("m")),
475            MobError::MemberAlreadyExists(MeerkatId::from("m")),
476            MobError::NotExternallyAddressable(MeerkatId::from("m")),
477            MobError::InvalidTransition {
478                from: MobState::Creating,
479                to: MobState::Running,
480            },
481            MobError::WiringError("w".to_string()),
482            MobError::SupervisorRotationIncomplete {
483                previous_epoch: 1,
484                attempted_epoch: 2,
485                attempted_public_peer_id: "peer-next".to_string(),
486                rotated_peer_count: 1,
487                rollback_succeeded: false,
488                pending_authority_recorded: true,
489                pending_authority_process_local: false,
490                rollback_error: Some("rollback failed".to_string()),
491                reason: "remote failed".to_string(),
492            },
493            MobError::BridgeCommandRejected {
494                cause: BridgeRejectionCause::NotBound,
495                reason: "bind required".to_string(),
496            },
497            MobError::MemberRestoreFailed {
498                member_id: MeerkatId::from("m"),
499                session_id: Some(meerkat_core::types::SessionId::new()),
500                reason: "restore failed".to_string(),
501            },
502            MobError::KickoffWaitTimedOut {
503                pending_member_ids: vec![MeerkatId::from("m")],
504            },
505            MobError::DefinitionError(vec![]),
506            MobError::FlowNotFound(FlowId::from("f")),
507            MobError::FlowFailed {
508                run_id: RunId::new(),
509                reason: "r".to_string(),
510            },
511            MobError::RunNotFound(RunId::new()),
512            MobError::RunCanceled(RunId::new()),
513            MobError::FlowTurnTimedOut,
514            MobError::FrameDepthLimitExceeded {
515                loop_id: LoopId::from("loop"),
516                max_frame_depth: 1,
517                current_depth: 1,
518            },
519            MobError::FrameAtomicPersistenceUnavailable {
520                operation: FrameAtomicOperation::CasGrantNodeSlot,
521            },
522            MobError::SpecRevisionConflict {
523                mob_id: MobId::from("mob"),
524                expected: Some(2),
525                actual: 3,
526            },
527            MobError::SchemaValidation {
528                step_id: StepId::from("step"),
529                message: "invalid".to_string(),
530            },
531            MobError::InsufficientTargets {
532                step_id: StepId::from("step"),
533                required: 2,
534                available: 1,
535            },
536            MobError::TopologyViolation {
537                from_role: ProfileName::from("lead"),
538                to_role: ProfileName::from("worker"),
539            },
540            MobError::SupervisorEscalation("boom".to_string()),
541            MobError::UnsupportedForMode {
542                mode: crate::MobRuntimeMode::TurnDriven,
543                reason: "autonomous host runtime required".to_string(),
544            },
545            MobError::ResetBarrier,
546            MobError::StorageError(Box::new(std::io::Error::new(
547                std::io::ErrorKind::Other,
548                "e",
549            ))),
550            MobError::SessionError(meerkat_core::service::SessionError::PersistenceDisabled),
551            MobError::CommsError(meerkat_core::comms::SendError::PeerOffline),
552            MobError::StaleFenceToken {
553                runtime_id: crate::ids::AgentRuntimeId::initial(crate::ids::AgentIdentity::from(
554                    "m",
555                )),
556                expected: FenceToken::new(1),
557                actual: FenceToken::new(0),
558            },
559            MobError::WorkNotFound(WorkRef::new()),
560            MobError::Internal("i".to_string()),
561        ];
562    }
563}