Skip to main content

bb_compiler/
error.rs

1//! Compiler error taxonomies. `ValidationError` is exclusive to
2//! `validate`; `CompileError` covers everything else and wraps
3//! `ValidationError` via `From`.
4
5/// Errors from `validate` (pass 1). One variant per
6/// `docs/COMPILER.md` §4.1 rule.
7#[derive(Debug)]
8pub enum ValidationError {
9    /// Rule 1 - unknown `(op_type, domain)` pair.
10    UnknownOp {
11        /// `NodeProto.name`.
12        node_name: String,
13        /// `NodeProto.op_type`.
14        op_type: String,
15        /// `NodeProto.domain`.
16        domain: String,
17    },
18
19    /// Rule 2 - an input value name has no producer.
20    DanglingInput {
21        /// `NodeProto.name`.
22        node_name: String,
23        /// The dangling input value name.
24        input_name: String,
25    },
26
27    /// Rule 3 - two ops claim to produce the same output value name.
28    DuplicateOutput {
29        /// The duplicated value name.
30        value_name: String,
31        /// First producer (`NodeProto.name`).
32        node_a: String,
33        /// Second producer (`NodeProto.name`).
34        node_b: String,
35    },
36
37    /// Rule 5 - a function input has no matching `ValueInfoProto.type`.
38    MissingTypeInfo {
39        /// The input value name lacking a type.
40        input_name: String,
41    },
42
43    /// Rule 6 - a role-domain NodeProto lacks the canonical metadata
44    /// keys (`concrete_type` + `instance` OR `required_trait` +
45    /// `slot_id`).
46    MalformedSlotMetadata {
47        /// The offending node's name.
48        node_name: String,
49        /// Human-readable detail.
50        detail: String,
51    },
52
53    /// Rule 7 - the graph contains at least one cycle.
54    CyclicGraph {
55        /// Node names involved in the cycle.
56        involves: Vec<String>,
57    },
58
59    /// Rule 8 - an op uses an opset that was not declared in
60    /// `ModelProto.opset_import`.
61    OpsetNotImported {
62        /// The missing opset's domain.
63        domain: String,
64        /// The version the graph used.
65        version_used: i64,
66    },
67}
68
69impl std::fmt::Display for ValidationError {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        match self {
72            Self::UnknownOp {
73                node_name,
74                op_type,
75                domain,
76            } => write!(f, "unknown op {domain}::{op_type} at node {node_name}",),
77            Self::DanglingInput {
78                node_name,
79                input_name,
80            } => write!(f, "dangling input {input_name} at node {node_name}",),
81            Self::DuplicateOutput {
82                value_name,
83                node_a,
84                node_b,
85            } => write!(
86                f,
87                "duplicate output {value_name} produced by both {node_a} and {node_b}",
88            ),
89            Self::MissingTypeInfo { input_name } => {
90                write!(f, "missing type info for input {input_name}")
91            }
92            Self::MalformedSlotMetadata { node_name, detail } => {
93                write!(f, "malformed slot metadata at {node_name}: {detail}")
94            }
95            Self::CyclicGraph { involves } => {
96                write!(f, "graph contains a cycle involving {involves:?}")
97            }
98            Self::OpsetNotImported {
99                domain,
100                version_used,
101            } => {
102                write!(f, "opset {domain} v{version_used} used but not imported")
103            }
104        }
105    }
106}
107
108impl std::error::Error for ValidationError {}
109
110/// Errors surfaced by any compiler pass beyond `validate`.
111#[derive(Debug)]
112pub enum CompileError {
113    /// Wrapped [`ValidationError`] from the structural `validate` pass.
114    Validation(ValidationError),
115
116    /// `expand_ops` failed for a specific op.
117    ExpansionFailed {
118        /// `NodeProto.op_type`.
119        op_type: String,
120        /// `NodeProto.domain`.
121        domain: String,
122        /// Human-readable detail.
123        reason: String,
124    },
125
126    /// `runtime.<op>()` returned an error during role-method
127    /// inlining.
128    RoleMethodFailed {
129        /// Slot name (e.g. `"backend"`, `"model"`).
130        slot: String,
131        /// `NodeProto.op_type` that triggered the inlining.
132        op_type: String,
133        /// Source error reported by the runtime impl.
134        source: String,
135    },
136
137    /// A concrete impl satisfying role R coexists with a generic
138    /// placeholder of role R in the same Module. The runner
139    /// surfaces this as `BuildError::AmbiguousRole`.
140    AmbiguousRole {
141        /// Role domain (e.g. `"ai.bytesandbrains.role.index"`).
142        role: String,
143        /// Concrete `TYPE_NAME` providing the role.
144        concrete_type: String,
145        /// Slot id of the conflicting generic placeholder.
146        generic_slot_id: u32,
147    },
148
149    /// `infer_peer_classes` - a `wire.Send`'s peer input has no
150    /// declared `peer_class` (neither on the input's
151    /// `ValueInfoProto` nor on the producing NodeProto's metadata).
152    /// The compiler can't decide which class of Node owns the send's
153    /// data output.
154    UnresolvedPeerClass {
155        /// `NodeProto.name` of the offending send.
156        node_name: String,
157        /// Value name of the peer input lacking a class.
158        peer_input: String,
159    },
160
161    /// `infer_peer_classes` - a non-wire NodeProto consumes two values
162    /// from different home classes. Either the user threaded a value
163    /// from one peer's partition into another's compute without a
164    /// `wire.send` between them, or a frontend forgot to mark a peer
165    /// input as ambient.
166    CrossClassDataflow {
167        /// `NodeProto.name` of the offending consumer.
168        node_name: String,
169        /// One of the conflicting home classes.
170        home_a: String,
171        /// The other conflicting home class.
172        home_b: String,
173    },
174
175    /// the input `ModelProto`'s stamped
176    /// `FRAMEWORK_IR_VERSION` doesn't match what this compiler was
177    /// built to consume. Surfaced by
178    /// [`crate::driver::Compiler::with_target_version`] / the
179    /// driver-entry check before any pass runs.
180    IrVersionMismatch {
181        /// Version the compiler expects.
182        expected: u32,
183        /// Version the input model carries.
184        got: u32,
185    },
186
187    /// a binding the compiler required (a generic-slot
188    /// concrete impl, a peer attribute, etc.) was not present at
189    /// the offending site. Replaces the catch-all `Internal` for
190    /// the missing-binding failure mode so consumers can match on
191    /// shape and surface actionable diagnostics.
192    MissingBinding {
193        /// Stable slot identifier (e.g. `"ATTR_PEER"`, `"backend"`,
194        /// `"required_trait:IndexRuntime"`).
195        slot: String,
196        /// Where the requirement was raised (typically
197        /// `NodeProto.name` or `(function_name, node_index)` as a
198        /// composite string).
199        site: String,
200    },
201
202    /// `ModelProto.functions` was empty when the
203    /// compiler needed at least the root function the recorder
204    /// produces from `Module::body`. Distinct from
205    /// `Validation(ValidationError)` because it surfaces from the
206    /// driver entry, not a pass body.
207    EmptyFunctionTable,
208
209    /// `validate_runtime_complete` found the compiled
210    /// model is missing a runtime requirement (e.g. a NodeProto
211    /// whose op is not registered, or a gate that should have been
212    /// inserted but wasn't).
213    RuntimeIncomplete {
214        /// Human-readable description of what's missing.
215        missing: String,
216    },
217
218    /// Catch-all for orchestrator-level failures (e.g. ill-formed
219    /// recorded module). Carries enough detail to debug.
220    Internal {
221        /// Human-readable failure detail.
222        detail: String,
223    },
224
225    /// `type_solver` - a [`TypeRelation`] reported `Failed` while
226    /// running against the constraint network. The op's relations
227    /// can't be satisfied together with the seeded inputs.
228    TypeConstraintFailed {
229        /// Op the failing relation was attached to (`domain::op_type`).
230        op: String,
231        /// Diagnostic detail from the relation.
232        detail: String,
233    },
234
235    /// `type_solver` (strict mode) - a value slot reached fixpoint
236    /// still bound to an abstract TypeNode. The graph is under-
237    /// constrained; either a seed is missing or an op's
238    /// `type_relations` declarations are insufficient.
239    UnresolvedType {
240        /// Value name that didn't narrow to a concrete leaf.
241        value: String,
242    },
243
244    /// `resolve_component_dependencies` - a concrete component
245    /// declared `#[depends(<role> = "<slot>")]` for a slot that
246    /// has no binding in the compiled artifact's spec. The user
247    /// supplied an `index` binding but forgot the `backend`
248    /// binding the index needs.
249    UnboundDependency {
250        /// `TYPE_NAME` of the concrete with the unsatisfied dep.
251        component: String,
252        /// Slot the component was bound at.
253        bound_at_slot: String,
254        /// The role the dependency requires
255        /// (e.g. `"Backend"`).
256        required_role: String,
257        /// The slot name the dep points at.
258        required_slot: String,
259    },
260
261    /// A NodeProto references a port name the module didn't
262    /// record in its body via `g.input` / `g.output` / `g.net_out`
263    /// / `g.net_in`.
264    UnknownPort {
265        /// Module that references the bad port.
266        module: String,
267        /// The bad port name.
268        port: String,
269    },
270
271    /// A declared port was neither read (input) nor written
272    /// (output) inside its module's body.
273    PortUnwired {
274        /// Module declaring the unwired port.
275        module: String,
276        /// The unwired port name.
277        port: String,
278        /// `"Input"` or `"Output"`.
279        direction: String,
280    },
281
282    /// A `wire.Send` / `wire.Recv` op was found that doesn't
283    /// land on a module's declared network boundary. Internal
284    /// wire ops are forbidden — every wire boundary must coincide
285    /// with a `g.net_out` / `g.net_in` recording.
286    NetworkOpNotAtBoundary {
287        /// Module hosting the off-boundary wire op.
288        module: String,
289        /// Op identifier (NodeProto.name).
290        op_id: String,
291    },
292
293    /// `refine_polymorphic_value_info` — a Contract-method NodeProto
294    /// carries `ai.bytesandbrains.slot_id` metadata that is missing or
295    /// not a valid `u32`.
296    InvalidSlotId {
297        /// `NodeProto.name` of the offending node.
298        node: String,
299        /// The raw metadata value that failed to parse.
300        value: String,
301    },
302
303    /// `refine_polymorphic_value_info` — the slot_id on a
304    /// Contract-method NodeProto does not correspond to any slot in
305    /// the compiled artifact's `BindingSpec`. Indicates the binding
306    /// chain is missing an entry for the role this node requires.
307    UnknownSlotId {
308        /// `NodeProto.name` of the offending node.
309        node: String,
310        /// The slot_id that was not found.
311        slot_id: u32,
312    },
313
314    /// `refine_polymorphic_value_info` — a Contract-method NodeProto
315    /// declares a `ai.bytesandbrains.required_trait` value that the
316    /// pass does not recognise as a known role-runtime identifier.
317    UnknownRoleRuntime {
318        /// `NodeProto.name` of the offending node.
319        node: String,
320        /// The unrecognised role string.
321        role: String,
322    },
323
324    /// `refine_polymorphic_value_info` — a `CodecRuntime` NodeProto
325    /// is missing the `ai.bytesandbrains.codec.port` metadata entry
326    /// that indicates whether this node is an encode (`"out"`) or
327    /// decode (`"in"`) operation.
328    MissingCodecPortMetadata {
329        /// `NodeProto.name` of the offending node.
330        node: String,
331    },
332
333    /// `refine_polymorphic_value_info` — a `CodecRuntime` NodeProto
334    /// carries a `ai.bytesandbrains.codec.port` value that is neither
335    /// `"in"` nor `"out"`.
336    InvalidCodecPort {
337        /// `NodeProto.name` of the offending node.
338        node: String,
339        /// The invalid port value.
340        value: String,
341    },
342
343    /// `refine_polymorphic_value_info` — two or more slots in the
344    /// `BindingSpec` share the same `role_runtime` identifier (e.g.
345    /// two `.bind_index::<A>("local").bind_index::<B>("remote")`
346    /// calls produce two slots both with `role = "IndexRuntime"`).
347    /// The pass uses `lookup_by_role` which returns only the first
348    /// match, so it would silently apply the wrong concrete's storage
349    /// type to nodes belonging to the other slot. Until slot_id-keyed
350    /// lookup is implemented this ambiguity is a hard error.
351    AmbiguousRoleBinding {
352        /// The role string shared by multiple slots.
353        role: String,
354        /// Author-chosen slot names that share the role.
355        slot_names: Vec<String>,
356    },
357
358    /// `refine_polymorphic_value_info` — a NodeProto carries
359    /// `ai.bytesandbrains.slot_id` metadata (marking it as a
360    /// Contract-method node) but lacks the companion
361    /// `ai.bytesandbrains.required_trait` metadata. The DSL recorder
362    /// always stamps both; a missing `required_trait` indicates a
363    /// malformed IR that would cause the pass to silently mis-route
364    /// the refinement.
365    MissingRequiredTraitMetadata {
366        /// `NodeProto.name` of the offending node.
367        node: String,
368    },
369
370    /// `resolve_component_dependencies` - the slot a component's
371    /// dep points at is bound to a concrete whose declared role
372    /// set does NOT include the required role. The user bound
373    /// the right slot to the wrong KIND of concrete.
374    DependencyRoleMismatch {
375        /// `TYPE_NAME` of the concrete with the dep.
376        component: String,
377        /// Slot the component was bound at.
378        bound_at_slot: String,
379        /// The role the dep requires.
380        required_role: String,
381        /// The slot name the dep points at.
382        required_slot: String,
383        /// The role(s) the bound concrete at `required_slot`
384        /// actually provides.
385        provided_role: String,
386    },
387
388    /// `validate_all_slots_bound` - the compiled artifact has at
389    /// least one slot the install path would need a concrete for
390    /// that the bind chain didn't supply. Source identifies why
391    /// the slot is required so the diagnostic can point the user
392    /// at exactly which `.bind_<role>::<T>("…")` is missing.
393    UnboundSlot {
394        /// Author-chosen role identifier (PascalCase Contract
395        /// role name, e.g. `"Backend"`, `"Index"`).
396        role: String,
397        /// Where the requirement comes from.
398        source: SlotSource,
399    },
400
401    /// `validate_bootstrap_composition` — a CALL inside a bootstrap
402    /// function points at a target name that has no matching
403    /// FunctionProto. The most common cause is a parent Module's
404    /// `bootstrap` recording calling `self.child.call().bootstrap(g)`
405    /// without the child's bootstrap recording reaching the
406    /// `Module::build` output (e.g. an empty `bootstrap` override that
407    /// `build` drops on the floor).
408    BootstrapCompositionGap {
409        /// Bootstrap function whose body emits the orphan CALL.
410        caller: String,
411        /// Missing FunctionProto name the CALL points at.
412        target: String,
413    },
414
415    /// `validate_bootstrap_composition` — the bootstrap function-call
416    /// graph contains a cycle. Bootstrap is a one-shot drain; a cycle
417    /// would wedge the engine in `bootstrap_pending` forever.
418    BootstrapCompositionCycle {
419        /// Function names traversed in the cycle, with the repeated
420        /// node appearing at both ends so the path reads naturally.
421        involves: Vec<String>,
422    },
423
424    /// `type_solver` — a wire edge carries a concrete storage type on
425    /// the send side that does not match the concrete storage type
426    /// declared on the receive side, and no `Codec` bridge is
427    /// wired between them. Reading the hint: add a
428    /// `Codec<In=<actual_id>, Out=<expected_id>>` node on the edge so
429    /// the encoder/decoder pair converts between the two storage
430    /// representations. Quantization methods are not substitutable
431    /// casts — the author must choose the right Codec impl.
432    IncompatibleStorageOnEdge {
433        /// Value name produced by the upstream send-side node.
434        src: String,
435        /// Value name expected by the downstream receive-side node.
436        dst: String,
437        /// `TypeNode.id` string the receive side declares
438        /// (e.g. `"tensor.u8"`).
439        expected_id: &'static str,
440        /// `TypeNode.id` string the send side resolved to
441        /// (e.g. `"tensor.f32"`).
442        actual_id: &'static str,
443    },
444}
445
446/// Why a slot needs to be bound. Threaded into
447/// [`CompileError::UnboundSlot`] so the diagnostic can point at
448/// the originating location.
449#[derive(Debug, Clone)]
450pub enum SlotSource {
451    /// At least one NodeProto in the IR references a role via
452    /// `(required_trait, slot_id)` metadata but no `BindingSpec`
453    /// entry of that role exists. The user forgot the
454    /// `.bind_<role>::<T>("…")` for a placeholder field their
455    /// Module body actually uses.
456    DirectPlaceholder,
457
458    /// The slot is required because the named concrete declares
459    /// `#[depends(<role> = "<slot>")]`, but no matching binding
460    /// exists. Diagnostic: *"backend slot 'compute' is required
461    /// by CountingIndex bound at slot 'primary_index' but isn't
462    /// bound."*
463    DependencyOf {
464        /// `TYPE_NAME` of the concrete with the dep.
465        component: String,
466        /// Slot the dep-declaring concrete was bound at.
467        bound_at_slot: String,
468        /// Slot name the dependency references.
469        required_slot: String,
470    },
471}
472
473impl std::fmt::Display for CompileError {
474    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
475        match self {
476            Self::Validation(e) => write!(f, "{e}"),
477            Self::ExpansionFailed { op_type, domain, reason } => write!(
478                f,
479                "op expansion failed for {domain}::{op_type}: {reason}",
480            ),
481            Self::RoleMethodFailed { slot, op_type, source } => write!(
482                f,
483                "role method {op_type} (slot {slot}) failed: {source}",
484            ),
485            Self::AmbiguousRole { role, concrete_type, generic_slot_id } => write!(
486                f,
487                "ambiguous role {role}: both concrete {concrete_type} and generic slot {generic_slot_id} provide it",
488            ),
489            Self::UnresolvedPeerClass { node_name, peer_input } => write!(
490                f,
491                "wire.Send {node_name} has no declared peer_class for peer input {peer_input}",
492            ),
493            Self::CrossClassDataflow { node_name, home_a, home_b } => write!(
494                f,
495                "node {node_name} consumes values from {home_a} and {home_b} without a wire.send between them",
496            ),
497            Self::IrVersionMismatch { expected, got } => write!(
498                f,
499                "IR version mismatch: compiler expects v{expected}, model carries v{got}",
500            ),
501            Self::MissingBinding { slot, site } => write!(
502                f,
503                "missing binding for slot `{slot}` at {site}",
504            ),
505            Self::EmptyFunctionTable => f.write_str(
506                "ModelProto.functions is empty — the recorder produced no FunctionProto",
507            ),
508            Self::RuntimeIncomplete { missing } => write!(
509                f,
510                "compiled model is not runtime-complete: missing {missing}",
511            ),
512            Self::Internal { detail } => write!(f, "compiler internal error: {detail}"),
513            Self::TypeConstraintFailed { op, detail } => write!(
514                f,
515                "type constraint failed at {op}: {detail}",
516            ),
517            Self::UnresolvedType { value } => write!(
518                f,
519                "value `{value}` did not resolve to a concrete type",
520            ),
521            Self::UnboundDependency {
522                component,
523                bound_at_slot,
524                required_role,
525                required_slot,
526            } => write!(
527                f,
528                "{component} (bound at slot `{bound_at_slot}`) requires a {required_role} \
529                 at slot `{required_slot}`, but no such slot is bound",
530            ),
531            Self::UnboundSlot { role, source } => match source {
532                SlotSource::DirectPlaceholder => write!(
533                    f,
534                    "no `.bind_<role>::<T>(\"…\")` supplied a {role} concrete; the Module body \
535                     uses a {role} placeholder that the compiler must fill at install time",
536                ),
537                SlotSource::DependencyOf {
538                    component,
539                    bound_at_slot,
540                    required_slot,
541                } => write!(
542                    f,
543                    "{component} (bound at slot `{bound_at_slot}`) requires a {role} at slot \
544                     `{required_slot}`, but the bind chain doesn't include it; add \
545                     `.bind_{}::<...>(\"{required_slot}\")`",
546                    role.to_ascii_lowercase(),
547                ),
548            },
549            Self::DependencyRoleMismatch {
550                component,
551                bound_at_slot,
552                required_role,
553                required_slot,
554                provided_role,
555            } => write!(
556                f,
557                "{component} (bound at slot `{bound_at_slot}`) requires a {required_role} \
558                 at slot `{required_slot}`, but the slot is bound to a {provided_role}",
559            ),
560            Self::UnknownPort { module, port } => write!(
561                f,
562                "module `{module}` references port `{port}` that it did not declare",
563            ),
564            Self::PortUnwired { module, port, direction } => write!(
565                f,
566                "module `{module}` port `{port}` ({direction}) is declared but not wired in the body",
567            ),
568            Self::NetworkOpNotAtBoundary { module, op_id } => write!(
569                f,
570                "module `{module}` op `{op_id}` is a wire.Send/Recv but is not at a declared network port",
571            ),
572            Self::InvalidSlotId { node, value } => write!(
573                f,
574                "node `{node}` has invalid or missing `ai.bytesandbrains.slot_id` metadata: `{value}`",
575            ),
576            Self::UnknownSlotId { node, slot_id } => write!(
577                f,
578                "node `{node}` references slot_id {slot_id} which has no corresponding binding in BindingSpec",
579            ),
580            Self::UnknownRoleRuntime { node, role } => write!(
581                f,
582                "node `{node}` declares unknown role runtime `{role}` in `ai.bytesandbrains.required_trait`",
583            ),
584            Self::MissingCodecPortMetadata { node } => write!(
585                f,
586                "codec node `{node}` is missing `ai.bytesandbrains.codec.port` metadata (expected `in` or `out`)",
587            ),
588            Self::InvalidCodecPort { node, value } => write!(
589                f,
590                "codec node `{node}` has invalid `ai.bytesandbrains.codec.port` value `{value}` (expected `in` or `out`)",
591            ),
592            Self::AmbiguousRoleBinding { role, slot_names } => write!(
593                f,
594                "multiple slots share role `{role}` (slots: {slot_names:?}); \
595                 polymorphic refinement requires slot_id discriminator support (TODO follow-up)",
596            ),
597            Self::MissingRequiredTraitMetadata { node } => write!(
598                f,
599                "node `{node}` declares slot_id without required_trait metadata",
600            ),
601            Self::IncompatibleStorageOnEdge {
602                src,
603                dst,
604                expected_id,
605                actual_id,
606            } => write!(
607                f,
608                "port `{dst}` expects {expected_id}; upstream `{src}` outputs {actual_id}. \
609                 Insert a `Codec<In={actual_id}, Out={expected_id}>` on the edge.",
610            ),
611            Self::BootstrapCompositionGap { caller, target } => write!(
612                f,
613                "bootstrap function `{caller}` calls `{target}`, which has no FunctionProto in the model",
614            ),
615            Self::BootstrapCompositionCycle { involves } => write!(
616                f,
617                "bootstrap composition cycle: {}",
618                involves.join(" → "),
619            ),
620        }
621    }
622}
623
624impl From<crate::type_solver::TypeError> for CompileError {
625    fn from(e: crate::type_solver::TypeError) -> Self {
626        match e {
627            crate::type_solver::TypeError::ConstraintFailed { op, detail } => {
628                Self::TypeConstraintFailed { op, detail }
629            }
630            crate::type_solver::TypeError::UnresolvedType { value } => {
631                Self::UnresolvedType { value }
632            }
633            crate::type_solver::TypeError::PortOutOfRange { op, port } => Self::Internal {
634                detail: format!("port {port:?} out of range on op {op}"),
635            },
636        }
637    }
638}
639
640impl std::error::Error for CompileError {}
641
642impl From<ValidationError> for CompileError {
643    fn from(e: ValidationError) -> Self {
644        Self::Validation(e)
645    }
646}