vernier_core/error.rs
1//! Typed errors for the evaluator.
2//!
3//! Per the workspace clippy lints, we forbid `panic!`, `unwrap`, and
4//! `expect` in non-test code. Every fallible operation in `vernier-core`
5//! returns `Result<_, EvalError>`, including `Similarity::compute`
6//! (per ADR-0005).
7
8use thiserror::Error;
9use vernier_mask::MaskError;
10use vernier_partial::PartialError;
11
12// Re-export the shared sub-discriminator under its existing path so
13// callers (FFI, tests) keep using `EvalError::PartialFormatMismatch
14// { kind: PartialFormatErrorKind }` unchanged after ADR-0032's move
15// of the framing logic into the leaf crate.
16pub use vernier_partial::PartialFormatErrorKind;
17
18/// Unified error type for evaluation paths.
19///
20/// Variants are kept coarse on purpose: each one corresponds to a class
21/// of failure a caller can plausibly recover from or report distinctly.
22/// We add new variants as they're needed, rather than enumerating every
23/// possible cause up front.
24#[derive(Debug, Error)]
25pub enum EvalError {
26 /// Two annotations or two RLEs disagree on dimensions in a way that
27 /// makes the operation undefined. Replaces the `-1` sentinel
28 /// pycocotools' `rleIou` returns on dimension mismatch (quirk
29 /// **I2**, dispositioned `corrected` per ADR-0002).
30 #[error("dimension mismatch: {detail}")]
31 DimensionMismatch {
32 /// Free-form detail string for the operator that detected the
33 /// mismatch; carries the offending dimensions.
34 detail: String,
35 },
36
37 /// Annotation could not be parsed from JSON, or referenced an
38 /// `image_id` / `category_id` that the dataset does not contain.
39 /// Quirk **J5** in pycocotools is the matching enforcement on
40 /// `loadRes`.
41 #[error("invalid annotation: {detail}")]
42 InvalidAnnotation {
43 /// Free-form detail string identifying the offending field.
44 detail: String,
45 },
46
47 /// JSON deserialization failed before any vernier-side validation.
48 #[error("json: {0}")]
49 Json(#[from] serde_json::Error),
50
51 /// Mask-side operation failed (codec decode, polygon rasterization,
52 /// merge dimension mismatch). Propagated from `vernier-mask` per
53 /// ADR-0009's one-way dependency.
54 #[error("mask: {0}")]
55 Mask(#[from] MaskError),
56
57 /// Numeric input was not finite (NaN or infinity reached an
58 /// arithmetic that cannot tolerate it). Used at boundaries where
59 /// we receive scores or coordinates from external code.
60 #[error("non-finite value in {context}")]
61 NonFinite {
62 /// Where the non-finite value was encountered.
63 context: &'static str,
64 },
65
66 /// Caller-supplied evaluation parameters are inconsistent with the
67 /// data they're being applied to (e.g., a maxDet value that the
68 /// accumulator never saw, an IoU threshold absent from the
69 /// ladder). Distinct from `InvalidAnnotation`, which is for
70 /// dataset-side data errors.
71 #[error("invalid config: {detail}")]
72 InvalidConfig {
73 /// Free-form detail string identifying the offending parameter.
74 detail: String,
75 },
76
77 /// Streaming evaluator memory budget exceeded. Carries a breakdown of
78 /// where bytes are spent so the user can pick a remediation (shard,
79 /// shrink iou_thresholds, raise budget).
80 #[error("memory budget exceeded: used {used_bytes} / budget {budget_bytes} bytes")]
81 OutOfBudget {
82 /// Total bytes the evaluator was holding when it tripped the budget.
83 used_bytes: usize,
84 /// Configured budget cap.
85 budget_bytes: usize,
86 /// Stable keys: `"cells_store"`, `"scores"`, `"match_flags"`. The
87 /// schema is future-additive — consumers must tolerate extra keys.
88 breakdown: std::collections::HashMap<&'static str, usize>,
89 },
90
91 /// Feature wired but not yet implemented in v0. Used by the streaming
92 /// evaluator's `checkpoint`/`restore` pair, deferred per the user's
93 /// scope decision; future ADR re-introduces the implementation.
94 #[error("not implemented: {feature}")]
95 NotImplemented {
96 /// Stable identifier of the unimplemented feature, e.g.
97 /// `"StreamingEvaluator::checkpoint"`.
98 feature: &'static str,
99 },
100
101 /// `per_pair` row count exceeded the configured cap (ADR-0019
102 /// `TablesConfig::per_pair_max_rows`). Carries the observed count
103 /// at the moment the cap was tripped and the cap value, so callers
104 /// can decide whether to raise the cap or constrain the workload.
105 #[error("per_pair table exceeded cap: would emit at least {observed} rows, cap {cap}")]
106 PerPairOverflow {
107 /// Best-effort lower bound on the row count at the moment the
108 /// cap was tripped. The check is per-cell so the actual final
109 /// count may be larger; this is the value that triggered the
110 /// abort.
111 observed: usize,
112 /// `TablesConfig::per_pair_max_rows` value the caller (or
113 /// default) configured.
114 cap: usize,
115 },
116
117 /// LVIS federated metadata violates the disjointness invariant
118 /// for one `(image, category)` cell: the category appears in both
119 /// `not_exhaustive_category_ids` and `neg_category_ids` (or is
120 /// listed in `neg_category_ids` while a GT of that category exists,
121 /// which would put it implicitly in `pos`). Quirk **AA7** of
122 /// ADR-0026, dispositioned `corrected`: lvis-api silently picks
123 /// `not_exhaustive` on overlap; vernier rejects at load.
124 #[error("lvis federated conflict on image_id={image_id}, category_id={category_id}: {detail}")]
125 LvisFederatedConflict {
126 /// Offending image id.
127 image_id: i64,
128 /// Offending category id.
129 category_id: i64,
130 /// Free-form detail string identifying which constraint failed
131 /// (e.g., `"category in both not_exhaustive and neg"`).
132 detail: &'static str,
133 },
134
135 /// LVIS dataset is missing the `frequency` field on one or more
136 /// categories. Quirk **AB6** of ADR-0026, dispositioned `corrected`:
137 /// lvis-api raises `KeyError` mid-eval on the first miss; vernier
138 /// raises at load with the full list of offending categories so
139 /// the failure is debuggable in one shot.
140 ///
141 /// The `category_ids` list is sorted ascending for stable error
142 /// messages.
143 #[error(
144 "lvis dataset is missing `frequency` on {} categories: {category_ids:?}",
145 category_ids.len()
146 )]
147 MissingFrequency {
148 /// Sorted list of category ids that lacked a `frequency` value.
149 category_ids: Vec<i64>,
150 },
151
152 /// Partial wire-format header / framing rejected by
153 /// [`vernier_partial::with_validated_envelope`] (ADR-0031). The `kind`
154 /// names which structural check tripped — magic, version, CRC,
155 /// kernel discriminator, grid dims, or rkyv archive validation.
156 #[error("partial wire format rejected: {kind}")]
157 PartialFormatMismatch {
158 /// Which framing or structural check failed. See
159 /// [`PartialFormatErrorKind`].
160 kind: PartialFormatErrorKind,
161 },
162
163 /// One or more partials carry a `dataset_hash` that doesn't match
164 /// the live dataset's. Means the partial was computed against a
165 /// different GT than the receiving rank loaded — almost always a
166 /// sampler / config bug; refusing protects the merge result from
167 /// the head-rank's perspective. ADR-0031 §"Validation order" #6.
168 #[error("partial dataset_hash mismatch: expected {expected:02x?}, got {actual:02x?}")]
169 PartialDatasetMismatch {
170 /// Receiving rank's `dataset_hash` (what the partial was
171 /// expected to be computed against).
172 expected: [u8; 32],
173 /// Partial's declared `dataset_hash` (what was actually used).
174 actual: [u8; 32],
175 },
176
177 /// One or more partials carry a `params_hash` that doesn't match
178 /// the receiving rank's. Means the partial was produced with
179 /// different `iou_thresholds` / `max_dets` / `use_cats` / etc. and
180 /// the merged result would not equal a batch run. ADR-0031
181 /// §"Validation order" #7.
182 #[error("partial params_hash mismatch: expected {expected:02x?}, got {actual:02x?}")]
183 PartialParamsMismatch {
184 /// Receiving rank's `params_hash`.
185 expected: [u8; 32],
186 /// Partial's declared `params_hash`.
187 actual: [u8; 32],
188 },
189
190 /// Two partials cover the same `image_id` — the disjoint-partition
191 /// rule (ADR-0031 §"Axis D" D1) is violated. Almost always a
192 /// `DistributedSampler` misconfiguration where two ranks evaluated
193 /// the same image. The error names both rank ids and the colliding
194 /// image so the user can fix their sampler.
195 #[error("partials cover image_id={image_id} on both rank {rank_a} and rank {rank_b}")]
196 PartialPartitionOverlap {
197 /// Lower rank id involved in the collision (sorted for
198 /// determinism — `min(a, b)`).
199 rank_a: u32,
200 /// Higher rank id involved in the collision.
201 rank_b: u32,
202 /// Image id that appeared in both partials' `seen_images`.
203 image_id: i64,
204 },
205
206 /// Two strict-mode partials declare the same `rank_id`. ADR-0031
207 /// §"Axis C" C2: strict-mode merge requires distinct rank ids so
208 /// the future `(score, rank_id, local_position)` tiebreak gives a
209 /// total order. Corrected mode tolerates collisions.
210 #[error("partials share rank_id={rank_id} in strict mode")]
211 PartialRankCollision {
212 /// The duplicated rank id.
213 rank_id: u32,
214 },
215}
216
217/// Translate a leaf-crate [`PartialError`] into the equivalent
218/// [`EvalError`] variant. Centralizes the variant-name mapping
219/// (`Format` ↔ `PartialFormatMismatch` etc.) so call sites use `?` to
220/// propagate.
221impl From<PartialError> for EvalError {
222 fn from(err: PartialError) -> Self {
223 match err {
224 PartialError::Format { kind } => EvalError::PartialFormatMismatch { kind },
225 PartialError::DatasetMismatch { expected, actual } => {
226 EvalError::PartialDatasetMismatch { expected, actual }
227 }
228 PartialError::ParamsMismatch { expected, actual } => {
229 EvalError::PartialParamsMismatch { expected, actual }
230 }
231 PartialError::PartitionOverlap {
232 rank_a,
233 rank_b,
234 image_id,
235 } => EvalError::PartialPartitionOverlap {
236 rank_a,
237 rank_b,
238 image_id,
239 },
240 PartialError::RankCollision { rank_id } => EvalError::PartialRankCollision { rank_id },
241 }
242 }
243}