edgefirst_decoder/decoder/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use ndarray::{ArrayView, ArrayViewD, Dimension};
5use num_traits::{AsPrimitive, Float};
6
7use crate::{DecoderError, DetectBox, ProtoData, Segmentation};
8
9pub mod config;
10pub mod configs;
11
12use configs::ModelType;
13
14#[derive(Debug)]
15pub struct Decoder {
16    model_type: ModelType,
17    pub iou_threshold: f32,
18    pub score_threshold: f32,
19    /// NMS mode (always a concrete variant after build — `Nms::Auto` is
20    /// resolved during `DecoderBuilder::build()` and never stored here):
21    /// - `Some(ClassAgnostic)` — class-agnostic NMS
22    /// - `Some(ClassAware)` — class-aware NMS
23    /// - `None` — NMS bypassed (end-to-end models)
24    pub nms: Option<configs::Nms>,
25    /// Maximum number of candidate boxes fed into NMS after score filtering.
26    /// Reduces O(N²) NMS cost when many low-confidence proposals pass the
27    /// threshold (common during COCO mAP evaluation with threshold ≈ 0.001).
28    /// Candidates are ranked by score; only the top `pre_nms_top_k` proceed
29    /// to NMS.  Default: 300.  Ignored when `nms` is `None`.
30    ///
31    /// # ⚠️ Validation vs Deployment
32    ///
33    /// The default of 300 is tuned for **deployment** (score threshold ≥ 0.25)
34    /// where few anchors pass the score filter, making top-K a no-op in
35    /// practice while bounding worst-case NMS cost.
36    ///
37    /// For **mAP evaluation** (score threshold ≈ 0.001), most of the 8 400
38    /// YOLO anchors pass the score filter. At `pre_nms_top_k = 300`, roughly
39    /// 74 % of candidates that would survive NMS are discarded *before* NMS
40    /// runs, causing **~9 pp box mAP loss** — a measurement artifact, not a
41    /// model quality issue.
42    ///
43    /// | Use case | `pre_nms_top_k` | `score_threshold` |
44    /// |----------|----------------:|------------------:|
45    /// | Deployment | 300 (default) | ≥ 0.25 |
46    /// | COCO mAP evaluation | 8 400 (all anchors) | 0.001 |
47    /// | Unbounded | 0 (no limit) | any |
48    ///
49    /// Post-processing latency scales with the number of candidates entering
50    /// NMS. At deployment thresholds the candidate count is already small, so
51    /// raising `pre_nms_top_k` has negligible cost. At validation thresholds
52    /// the increase is measurable but necessary for correct recall.
53    pub pre_nms_top_k: usize,
54    /// Maximum number of detections returned after NMS. Matches the
55    /// Ultralytics `max_det` parameter.  Default: 300.
56    ///
57    /// This bound applies uniformly across all segmentation and detection
58    /// decode paths reached via [`Decoder::decode`] / [`Decoder::decode_proto`].
59    /// The output `Vec`'s capacity is only an allocation hint; the post-NMS
60    /// detection count is bounded solely by `max_det` (EDGEAI-1302).
61    pub max_det: usize,
62    /// Whether decoded boxes are in normalized [0,1] coordinates.
63    /// - `Some(true)`: Coordinates in [0,1] range
64    /// - `Some(false)`: Pixel coordinates
65    /// - `None`: Unknown, caller must infer (e.g., check if any coordinate >
66    ///   1.0)
67    normalized: Option<bool>,
68    /// Model input spatial dimensions `(width, height)`, captured from
69    /// the schema's `input.shape` / `input.dshape` at builder time.
70    /// Required to honour `normalized: false`: pixel-space box coords
71    /// emitted by the model are divided by these dimensions before NMS
72    /// so the post-NMS bbox is in `[0, 1]`. `None` when no schema input
73    /// spec is available — the legacy >2.0 reject in `protobox` then
74    /// preserves the previous safety net (EDGEAI-1303).
75    input_dims: Option<(usize, usize)>,
76    /// Schema v2 merge program. Present when the decoder was built from
77    /// a [`crate::schema::SchemaV2`] whose logical outputs carry
78    /// physical children. Absent for flat configurations (v1 and
79    /// flat-v2).
80    pub(crate) decode_program: Option<merge::DecodeProgram>,
81    /// When `true`, emit one detection per (anchor, class) pair for every
82    /// class whose score meets the threshold — matching Ultralytics `val`
83    /// multi-label decode.  **OFF by default.**
84    ///
85    /// # Deployment safety
86    /// Multi-label must never be enabled on the tracked/deployment path:
87    /// the ByteTrack IoU-only tracker spawns one track per detection, so
88    /// duplicate boxes for the same anchor at different labels produce
89    /// phantom tracks.  `decode_tracked_*` entry points assert this is off.
90    ///
91    /// This field is intentionally builder-only (not schema/config-driven)
92    /// so a deployed `edgefirst.json` can never accidentally enable it.
93    pub(crate) multi_label: bool,
94    /// Per-scale fast path. Constructed at build time from a schema-v2
95    /// document with per-scale children. Wrapped in `Mutex` because
96    /// `Decoder::decode_proto` and `Decoder::decode` are `&self` but
97    /// the per-scale buffers are mutated per-frame.
98    pub(crate) per_scale: Option<std::sync::Mutex<crate::per_scale::PerScaleDecoder>>,
99}
100
101impl PartialEq for Decoder {
102    fn eq(&self, other: &Self) -> bool {
103        // DecodeProgram and PerScaleDecoder have non-comparable embedded
104        // data; compare by the config-derived fields only.
105        self.model_type == other.model_type
106            && self.iou_threshold == other.iou_threshold
107            && self.score_threshold == other.score_threshold
108            && self.nms == other.nms
109            && self.pre_nms_top_k == other.pre_nms_top_k
110            && self.max_det == other.max_det
111            && self.normalized == other.normalized
112            && self.input_dims == other.input_dims
113            && self.multi_label == other.multi_label
114            && self.decode_program.is_some() == other.decode_program.is_some()
115            && self.per_scale.is_some() == other.per_scale.is_some()
116    }
117}
118
119impl Clone for Decoder {
120    /// Cloning a `Decoder` preserves the legacy decode path
121    /// (`decode_program`) but drops the per-scale fast path:
122    /// `PerScaleDecoder` owns mutable per-frame scratch buffers and is
123    /// not `Clone`. Decoders built from a per-scale schema should be
124    /// rebuilt via [`DecoderBuilder`] rather than cloned to preserve the
125    /// fast path; cloning is intended for tests and rare configs.
126    fn clone(&self) -> Self {
127        Self {
128            model_type: self.model_type.clone(),
129            iou_threshold: self.iou_threshold,
130            score_threshold: self.score_threshold,
131            nms: self.nms,
132            pre_nms_top_k: self.pre_nms_top_k,
133            max_det: self.max_det,
134            normalized: self.normalized,
135            input_dims: self.input_dims,
136            multi_label: self.multi_label,
137            decode_program: self.decode_program.clone(),
138            per_scale: None,
139        }
140    }
141}
142
143#[derive(Debug)]
144pub(crate) enum ArrayViewDQuantized<'a> {
145    UInt8(ArrayViewD<'a, u8>),
146    Int8(ArrayViewD<'a, i8>),
147    UInt16(ArrayViewD<'a, u16>),
148    Int16(ArrayViewD<'a, i16>),
149    UInt32(ArrayViewD<'a, u32>),
150    Int32(ArrayViewD<'a, i32>),
151}
152
153impl<'a, D> From<ArrayView<'a, u8, D>> for ArrayViewDQuantized<'a>
154where
155    D: Dimension,
156{
157    fn from(arr: ArrayView<'a, u8, D>) -> Self {
158        Self::UInt8(arr.into_dyn())
159    }
160}
161
162impl<'a, D> From<ArrayView<'a, i8, D>> for ArrayViewDQuantized<'a>
163where
164    D: Dimension,
165{
166    fn from(arr: ArrayView<'a, i8, D>) -> Self {
167        Self::Int8(arr.into_dyn())
168    }
169}
170
171impl<'a, D> From<ArrayView<'a, u16, D>> for ArrayViewDQuantized<'a>
172where
173    D: Dimension,
174{
175    fn from(arr: ArrayView<'a, u16, D>) -> Self {
176        Self::UInt16(arr.into_dyn())
177    }
178}
179
180impl<'a, D> From<ArrayView<'a, i16, D>> for ArrayViewDQuantized<'a>
181where
182    D: Dimension,
183{
184    fn from(arr: ArrayView<'a, i16, D>) -> Self {
185        Self::Int16(arr.into_dyn())
186    }
187}
188
189impl<'a, D> From<ArrayView<'a, u32, D>> for ArrayViewDQuantized<'a>
190where
191    D: Dimension,
192{
193    fn from(arr: ArrayView<'a, u32, D>) -> Self {
194        Self::UInt32(arr.into_dyn())
195    }
196}
197
198impl<'a, D> From<ArrayView<'a, i32, D>> for ArrayViewDQuantized<'a>
199where
200    D: Dimension,
201{
202    fn from(arr: ArrayView<'a, i32, D>) -> Self {
203        Self::Int32(arr.into_dyn())
204    }
205}
206
207impl<'a> ArrayViewDQuantized<'a> {
208    /// Returns the shape of the underlying array.
209    pub(crate) fn shape(&self) -> &[usize] {
210        match self {
211            ArrayViewDQuantized::UInt8(a) => a.shape(),
212            ArrayViewDQuantized::Int8(a) => a.shape(),
213            ArrayViewDQuantized::UInt16(a) => a.shape(),
214            ArrayViewDQuantized::Int16(a) => a.shape(),
215            ArrayViewDQuantized::UInt32(a) => a.shape(),
216            ArrayViewDQuantized::Int32(a) => a.shape(),
217        }
218    }
219}
220
221/// WARNING: Do NOT nest `with_quantized!` calls. Each level multiplies
222/// monomorphized code paths by 6 (one per integer variant), so nesting
223/// N levels deep produces 6^N instantiations.
224///
225/// Instead, dequantize each tensor sequentially with `dequant_3d!`/`dequant_4d!`
226/// (6*N paths) or split into independent phases that each nest at most 2 levels.
227macro_rules! with_quantized {
228    ($x:expr, $var:ident, $body:expr) => {
229        match $x {
230            ArrayViewDQuantized::UInt8(x) => {
231                let $var = x;
232                $body
233            }
234            ArrayViewDQuantized::Int8(x) => {
235                let $var = x;
236                $body
237            }
238            ArrayViewDQuantized::UInt16(x) => {
239                let $var = x;
240                $body
241            }
242            ArrayViewDQuantized::Int16(x) => {
243                let $var = x;
244                $body
245            }
246            ArrayViewDQuantized::UInt32(x) => {
247                let $var = x;
248                $body
249            }
250            ArrayViewDQuantized::Int32(x) => {
251                let $var = x;
252                $body
253            }
254        }
255    };
256}
257
258mod builder;
259mod helpers;
260mod merge;
261mod per_scale_bridge;
262mod postprocess;
263mod tensor_bridge;
264mod tests;
265
266pub use builder::DecoderBuilder;
267pub use config::{ConfigOutput, ConfigOutputRef, ConfigOutputs};
268
269impl Decoder {
270    /// Static label identifying which dispatch path `decode` / `decode_proto`
271    /// will take, used as a tracing-span attribute. Lets profiling tools
272    /// distinguish `per_scale` (the fast path), `decode_program` (schema-v2
273    /// merge), and `legacy` (config-driven) without requiring callers to
274    /// inspect the model.
275    fn decode_path_label(&self) -> &'static str {
276        if self.per_scale.is_some() {
277            "per_scale"
278        } else if self.decode_program.is_some() {
279            "decode_program"
280        } else {
281            "legacy"
282        }
283    }
284
285    /// This function returns the parsed model type of the decoder.
286    ///
287    /// # Examples
288    ///
289    /// ```rust
290    /// # use edgefirst_decoder::{DecoderBuilder, DecoderResult, configs::ModelType};
291    /// # fn main() -> DecoderResult<()> {
292    /// #    let config_yaml = edgefirst_bench::testdata::read_to_string("modelpack_split.yaml").to_string();
293    ///     let decoder = DecoderBuilder::default()
294    ///         .with_config_yaml_str(config_yaml)
295    ///         .build()?;
296    ///     assert!(matches!(
297    ///         decoder.model_type(),
298    ///         ModelType::ModelPackDetSplit { .. }
299    ///     ));
300    /// #    Ok(())
301    /// # }
302    /// ```
303    pub fn model_type(&self) -> &ModelType {
304        &self.model_type
305    }
306
307    /// Returns the coordinate format of the boxes the decoder emits to
308    /// the caller.
309    ///
310    /// - `Some(true)`: Boxes are in normalized `[0, 1]` coordinates
311    /// - `Some(false)`: Boxes are in pixel coordinates relative to the
312    ///   model input
313    /// - `None`: Unknown, caller must infer (e.g., check if any coordinate
314    ///   > 1.0)
315    ///
316    /// This describes the **post-decode** coordinate space, not the raw
317    /// schema annotation. The decoder applies EDGEAI-1303 normalization
318    /// (dividing bbox channels by `(input_w, input_h)`) on a per-path
319    /// basis, not unconditionally. Four paths are known to invoke the
320    /// helper uniformly across all of their entry points (`decode`,
321    /// `decode_proto`, and — where applicable — `decode_tracked` and
322    /// `decode_tracked_proto`):
323    ///
324    /// 1. The **per-scale fast path** (DFL/LTRB → dist2bbox → sigmoid),
325    ///    which emits pixel-space boxes by design and always normalizes
326    ///    before returning.
327    /// 2. [`ModelType::YoloSegDet`](crate::ModelType::YoloSegDet), whose
328    ///    quantized and float, tracked and untracked, masks and proto
329    ///    variants each call the helper after NMS.
330    /// 3. [`ModelType::YoloSplitSegDet`](crate::ModelType::YoloSplitSegDet),
331    ///    aligned across `decode`, `decode_proto`, `decode_tracked`,
332    ///    and `decode_tracked_proto` for both quantized and float
333    ///    variants.
334    /// 4. [`ModelType::YoloSegDet2Way`](crate::ModelType::YoloSegDet2Way),
335    ///    aligned across the same four entry points and both element
336    ///    type variants.
337    ///
338    /// When any of those paths is active and the schema declares
339    /// `normalized: false` with valid [`input_dims`](Self::input_dims),
340    /// this accessor reports `Some(true)` to match what the caller
341    /// actually receives.
342    ///
343    /// The remaining model types still surface the raw schema flag
344    /// because their post-decode contract differs:
345    /// [`ModelType::YoloDet`](crate::ModelType::YoloDet) and
346    /// [`ModelType::YoloSplitDet`](crate::ModelType::YoloSplitDet)
347    /// (detection-only, no protobox crop coupling), the
348    /// `YoloEndToEnd*` family (model embeds its own NMS and emits its
349    /// own coordinate space), and the `ModelPack*` family (separate
350    /// model conventions). For those, this accessor returns
351    /// `self.normalized` verbatim and leaves it to the caller to
352    /// handle pixel-space output explicitly (e.g. divide by
353    /// `input_dims()` themselves).
354    ///
355    /// # Examples
356    ///
357    /// ```rust
358    /// # use edgefirst_decoder::{DecoderBuilder, DecoderResult};
359    /// # fn main() -> DecoderResult<()> {
360    /// #    let config_yaml = edgefirst_bench::testdata::read_to_string("modelpack_split.yaml").to_string();
361    ///     let decoder = DecoderBuilder::default()
362    ///         .with_config_yaml_str(config_yaml)
363    ///         .build()?;
364    ///     // Config doesn't specify normalized, so it's None
365    ///     assert!(decoder.normalized_boxes().is_none());
366    /// #    Ok(())
367    /// # }
368    /// ```
369    pub fn normalized_boxes(&self) -> Option<bool> {
370        // Four paths invoke `yolo::maybe_normalize_boxes_in_place`
371        // uniformly across every entry point that can reach them:
372        //   - the per-scale fast path (always normalizes by design),
373        //   - `ModelType::YoloSegDet` (helper fires in
374        //     `decode`/`decode_proto` via `yolo::impl_yolo_segdet_*` and
375        //     in `decode_tracked`/`decode_tracked_proto` via the
376        //     `process_tracked_yolo_segmentation!` macro and
377        //     `process_tracked_yolo_segdet_float`),
378        //   - `ModelType::YoloSplitSegDet` (helper fires in
379        //     `decode_yolo_split_segdet_*`, `impl_yolo_split_segdet_*`,
380        //     `process_tracked_yolo_segmentation_split!`, and
381        //     `process_tracked_yolo_segdet_split_float`), and
382        //   - `ModelType::YoloSegDet2Way` (helper fires in
383        //     `decode_yolo_segdet_2way_*`, the float decode routes
384        //     through `impl_yolo_split_segdet_float*`,
385        //     `process_tracked_yolo_segmentation_2way!`, and the
386        //     inline tracked-2way float helpers).
387        // For those, `normalized == Some(false)` with valid `input_dims`
388        // upgrades to a post-decode `Some(true)`. Other paths invoke
389        // the helper inconsistently across `ModelType` variants and
390        // tracked/proto entry points — surface the raw schema flag
391        // there and let callers handle pixel-space output explicitly.
392        if self.per_scale.is_some() || self.legacy_path_normalizes_uniformly() {
393            match (self.normalized, self.input_dims) {
394                (Some(true), _) => Some(true),
395                (Some(false), Some((w, h))) if w != 0 && h != 0 => Some(true),
396                (Some(false), _) => Some(false),
397                (None, _) => None,
398            }
399        } else {
400            self.normalized
401        }
402    }
403
404    /// Returns true for legacy `ModelType` dispatch paths that are known
405    /// to call `yolo::maybe_normalize_boxes_in_place` on every entry
406    /// point (`decode`, `decode_proto`, `decode_tracked`,
407    /// `decode_tracked_proto`, both quantized and float variants).
408    ///
409    /// Used by [`normalized_boxes`](Self::normalized_boxes) to gate the
410    /// pixel→normalized upgrade for non-per-scale model types whose
411    /// post-decode contract matches the per-scale path. Extend this
412    /// list as additional `ModelType` variants are brought into
413    /// uniform-normalization alignment.
414    fn legacy_path_normalizes_uniformly(&self) -> bool {
415        matches!(
416            self.model_type,
417            ModelType::YoloSegDet { .. }
418                | ModelType::YoloSplitSegDet { .. }
419                | ModelType::YoloSegDet2Way { .. }
420        )
421    }
422
423    /// Model input dimensions `(width, height)` captured from the
424    /// schema's `input.shape` / `input.dshape`, or `None` when the
425    /// schema did not declare an input spec (e.g. flat YAML configs
426    /// or `DecoderBuilder::add_output(...)` programmatic builds).
427    ///
428    /// Drives EDGEAI-1303 normalization on the paths that invoke the
429    /// helper uniformly: when the schema declares pixel-space outputs
430    /// and `input_dims()` is `Some((w, h))`, the per-scale bridge and
431    /// the `ModelType::YoloSegDet`, `ModelType::YoloSplitSegDet`, and
432    /// `ModelType::YoloSegDet2Way` dispatch paths divide post-NMS
433    /// bbox coordinates by `(w, h)` so they enter the canonical
434    /// `[0, 1]` range before mask cropping / tracker dispatch, and
435    /// [`normalized_boxes`](Self::normalized_boxes) reports
436    /// `Some(true)` to match. The remaining legacy `ModelType`
437    /// dispatch paths (detection-only `YoloDet`/`YoloSplitDet`, the
438    /// `YoloEndToEnd*` family, and the `ModelPack*` family) do not
439    /// apply this division — see
440    /// [`normalized_boxes`](Self::normalized_boxes) for the per-path
441    /// contract. The legacy `protobox` `> 2.0` reject acts as a safety
442    /// net for paths that emit pixel-space coordinates.
443    ///
444    /// # Examples
445    ///
446    /// ```rust
447    /// # use edgefirst_decoder::{schema::SchemaV2, DecoderBuilder, DecoderResult};
448    /// # fn main() -> DecoderResult<()> {
449    ///     let json = r#"{
450    ///         "schema_version": 2,
451    ///         "nms": "class_agnostic",
452    ///         "input": {
453    ///             "shape": [1, 640, 640, 3],
454    ///             "dshape": [{"batch": 1}, {"height": 640}, {"width": 640}, {"num_features": 3}]
455    ///         },
456    ///         "outputs": [{
457    ///             "name": "out", "type": "detection",
458    ///             "shape": [1, 38, 256],
459    ///             "dshape": [{"batch": 1}, {"num_features": 38}, {"num_boxes": 256}],
460    ///             "decoder": "ultralytics", "encoding": "direct", "normalized": false
461    ///         }]
462    ///     }"#;
463    ///     let schema: SchemaV2 = serde_json::from_str(json).unwrap();
464    ///     let decoder = DecoderBuilder::default().with_schema(schema).build()?;
465    ///     assert_eq!(decoder.input_dims(), Some((640, 640)));
466    /// #   Ok(())
467    /// # }
468    /// ```
469    pub fn input_dims(&self) -> Option<(usize, usize)> {
470        self.input_dims
471    }
472
473    /// Decode quantized model outputs into detection boxes and segmentation
474    /// masks. The quantized outputs can be of u8, i8, u16, i16, u32, or i32
475    /// types. Clears the provided output vectors before populating them.
476    pub(crate) fn decode_quantized(
477        &self,
478        outputs: &[ArrayViewDQuantized],
479        output_boxes: &mut Vec<DetectBox>,
480        output_masks: &mut Vec<Segmentation>,
481    ) -> Result<(), DecoderError> {
482        output_boxes.clear();
483        output_masks.clear();
484        match &self.model_type {
485            ModelType::ModelPackSegDet {
486                boxes,
487                scores,
488                segmentation,
489            } => {
490                self.decode_modelpack_det_quantized(outputs, boxes, scores, output_boxes)?;
491                self.decode_modelpack_seg_quantized(outputs, segmentation, output_masks)
492            }
493            ModelType::ModelPackSegDetSplit {
494                detection,
495                segmentation,
496            } => {
497                self.decode_modelpack_det_split_quantized(outputs, detection, output_boxes)?;
498                self.decode_modelpack_seg_quantized(outputs, segmentation, output_masks)
499            }
500            ModelType::ModelPackDet { boxes, scores } => {
501                self.decode_modelpack_det_quantized(outputs, boxes, scores, output_boxes)
502            }
503            ModelType::ModelPackDetSplit { detection } => {
504                self.decode_modelpack_det_split_quantized(outputs, detection, output_boxes)
505            }
506            ModelType::ModelPackSeg { segmentation } => {
507                self.decode_modelpack_seg_quantized(outputs, segmentation, output_masks)
508            }
509            ModelType::YoloDet { boxes } => {
510                self.decode_yolo_det_quantized(outputs, boxes, output_boxes)
511            }
512            ModelType::YoloSegDet { boxes, protos } => self.decode_yolo_segdet_quantized(
513                outputs,
514                boxes,
515                protos,
516                output_boxes,
517                output_masks,
518            ),
519            ModelType::YoloSplitDet { boxes, scores } => {
520                self.decode_yolo_split_det_quantized(outputs, boxes, scores, output_boxes)
521            }
522            ModelType::YoloSplitSegDet {
523                boxes,
524                scores,
525                mask_coeff,
526                protos,
527            } => self.decode_yolo_split_segdet_quantized(
528                outputs,
529                boxes,
530                scores,
531                mask_coeff,
532                protos,
533                output_boxes,
534                output_masks,
535            ),
536            ModelType::YoloSegDet2Way {
537                boxes,
538                mask_coeff,
539                protos,
540            } => self.decode_yolo_segdet_2way_quantized(
541                outputs,
542                boxes,
543                mask_coeff,
544                protos,
545                output_boxes,
546                output_masks,
547            ),
548            ModelType::YoloEndToEndDet { boxes } => {
549                self.decode_yolo_end_to_end_det_quantized(outputs, boxes, output_boxes)
550            }
551            ModelType::YoloEndToEndSegDet { boxes, protos } => self
552                .decode_yolo_end_to_end_segdet_quantized(
553                    outputs,
554                    boxes,
555                    protos,
556                    output_boxes,
557                    output_masks,
558                ),
559            ModelType::YoloSplitEndToEndDet {
560                boxes,
561                scores,
562                classes,
563            } => self.decode_yolo_split_end_to_end_det_quantized(
564                outputs,
565                boxes,
566                scores,
567                classes,
568                output_boxes,
569            ),
570            ModelType::YoloSplitEndToEndSegDet {
571                boxes,
572                scores,
573                classes,
574                mask_coeff,
575                protos,
576            } => self.decode_yolo_split_end_to_end_segdet_quantized(
577                outputs,
578                boxes,
579                scores,
580                classes,
581                mask_coeff,
582                protos,
583                output_boxes,
584                output_masks,
585            ),
586            ModelType::PerScale => Err(DecoderError::Internal(
587                "per-scale path must be intercepted before ModelType dispatch".into(),
588            )),
589        }
590    }
591
592    /// Decode floating point model outputs into detection boxes and
593    /// segmentation masks. Clears the provided output vectors before
594    /// populating them.
595    pub(crate) fn decode_float<T>(
596        &self,
597        outputs: &[ArrayViewD<T>],
598        output_boxes: &mut Vec<DetectBox>,
599        output_masks: &mut Vec<Segmentation>,
600    ) -> Result<(), DecoderError>
601    where
602        T: Float + AsPrimitive<f32> + AsPrimitive<u8> + Send + Sync + 'static,
603        f32: AsPrimitive<T>,
604    {
605        output_boxes.clear();
606        output_masks.clear();
607        match &self.model_type {
608            ModelType::ModelPackSegDet {
609                boxes,
610                scores,
611                segmentation,
612            } => {
613                self.decode_modelpack_det_float(outputs, boxes, scores, output_boxes)?;
614                self.decode_modelpack_seg_float(outputs, segmentation, output_masks)?;
615            }
616            ModelType::ModelPackSegDetSplit {
617                detection,
618                segmentation,
619            } => {
620                self.decode_modelpack_det_split_float(outputs, detection, output_boxes)?;
621                self.decode_modelpack_seg_float(outputs, segmentation, output_masks)?;
622            }
623            ModelType::ModelPackDet { boxes, scores } => {
624                self.decode_modelpack_det_float(outputs, boxes, scores, output_boxes)?;
625            }
626            ModelType::ModelPackDetSplit { detection } => {
627                self.decode_modelpack_det_split_float(outputs, detection, output_boxes)?;
628            }
629            ModelType::ModelPackSeg { segmentation } => {
630                self.decode_modelpack_seg_float(outputs, segmentation, output_masks)?;
631            }
632            ModelType::YoloDet { boxes } => {
633                self.decode_yolo_det_float(outputs, boxes, output_boxes)?;
634            }
635            ModelType::YoloSegDet { boxes, protos } => {
636                self.decode_yolo_segdet_float(outputs, boxes, protos, output_boxes, output_masks)?;
637            }
638            ModelType::YoloSplitDet { boxes, scores } => {
639                self.decode_yolo_split_det_float(outputs, boxes, scores, output_boxes)?;
640            }
641            ModelType::YoloSplitSegDet {
642                boxes,
643                scores,
644                mask_coeff,
645                protos,
646            } => {
647                self.decode_yolo_split_segdet_float(
648                    outputs,
649                    boxes,
650                    scores,
651                    mask_coeff,
652                    protos,
653                    output_boxes,
654                    output_masks,
655                )?;
656            }
657            ModelType::YoloSegDet2Way {
658                boxes,
659                mask_coeff,
660                protos,
661            } => {
662                self.decode_yolo_segdet_2way_float(
663                    outputs,
664                    boxes,
665                    mask_coeff,
666                    protos,
667                    output_boxes,
668                    output_masks,
669                )?;
670            }
671            ModelType::YoloEndToEndDet { boxes } => {
672                self.decode_yolo_end_to_end_det_float(outputs, boxes, output_boxes)?;
673            }
674            ModelType::YoloEndToEndSegDet { boxes, protos } => {
675                self.decode_yolo_end_to_end_segdet_float(
676                    outputs,
677                    boxes,
678                    protos,
679                    output_boxes,
680                    output_masks,
681                )?;
682            }
683            ModelType::YoloSplitEndToEndDet {
684                boxes,
685                scores,
686                classes,
687            } => {
688                self.decode_yolo_split_end_to_end_det_float(
689                    outputs,
690                    boxes,
691                    scores,
692                    classes,
693                    output_boxes,
694                )?;
695            }
696            ModelType::YoloSplitEndToEndSegDet {
697                boxes,
698                scores,
699                classes,
700                mask_coeff,
701                protos,
702            } => {
703                self.decode_yolo_split_end_to_end_segdet_float(
704                    outputs,
705                    boxes,
706                    scores,
707                    classes,
708                    mask_coeff,
709                    protos,
710                    output_boxes,
711                    output_masks,
712                )?;
713            }
714            ModelType::PerScale => {
715                return Err(DecoderError::Internal(
716                    "per-scale path must be intercepted before ModelType dispatch".into(),
717                ));
718            }
719        }
720        Ok(())
721    }
722
723    /// Decodes quantized model outputs into detection boxes, returning raw
724    /// `ProtoData` for segmentation models instead of materialized masks.
725    ///
726    /// Returns `Ok(None)` for detection-only and ModelPack models (detections
727    /// are still decoded into `output_boxes`). Returns `Ok(Some(ProtoData))`
728    /// for YOLO segmentation models.
729    pub(crate) fn decode_quantized_proto(
730        &self,
731        outputs: &[ArrayViewDQuantized],
732        output_boxes: &mut Vec<DetectBox>,
733    ) -> Result<Option<ProtoData>, DecoderError> {
734        output_boxes.clear();
735        match &self.model_type {
736            // Detection-only variants: decode boxes, return None for proto data.
737            ModelType::ModelPackDet { boxes, scores } => {
738                self.decode_modelpack_det_quantized(outputs, boxes, scores, output_boxes)?;
739                Ok(None)
740            }
741            ModelType::ModelPackDetSplit { detection } => {
742                self.decode_modelpack_det_split_quantized(outputs, detection, output_boxes)?;
743                Ok(None)
744            }
745            ModelType::YoloDet { boxes } => {
746                self.decode_yolo_det_quantized(outputs, boxes, output_boxes)?;
747                Ok(None)
748            }
749            ModelType::YoloSplitDet { boxes, scores } => {
750                self.decode_yolo_split_det_quantized(outputs, boxes, scores, output_boxes)?;
751                Ok(None)
752            }
753            ModelType::YoloEndToEndDet { boxes } => {
754                self.decode_yolo_end_to_end_det_quantized(outputs, boxes, output_boxes)?;
755                Ok(None)
756            }
757            ModelType::YoloSplitEndToEndDet {
758                boxes,
759                scores,
760                classes,
761            } => {
762                self.decode_yolo_split_end_to_end_det_quantized(
763                    outputs,
764                    boxes,
765                    scores,
766                    classes,
767                    output_boxes,
768                )?;
769                Ok(None)
770            }
771            // ModelPack seg/segdet variants have no YOLO proto data.
772            ModelType::ModelPackSegDet { boxes, scores, .. } => {
773                self.decode_modelpack_det_quantized(outputs, boxes, scores, output_boxes)?;
774                Ok(None)
775            }
776            ModelType::ModelPackSegDetSplit { detection, .. } => {
777                self.decode_modelpack_det_split_quantized(outputs, detection, output_boxes)?;
778                Ok(None)
779            }
780            ModelType::ModelPackSeg { .. } => Ok(None),
781
782            ModelType::YoloSegDet { boxes, protos } => {
783                let proto =
784                    self.decode_yolo_segdet_quantized_proto(outputs, boxes, protos, output_boxes)?;
785                Ok(Some(proto))
786            }
787            ModelType::YoloSplitSegDet {
788                boxes,
789                scores,
790                mask_coeff,
791                protos,
792            } => {
793                let proto = self.decode_yolo_split_segdet_quantized_proto(
794                    outputs,
795                    boxes,
796                    scores,
797                    mask_coeff,
798                    protos,
799                    output_boxes,
800                )?;
801                Ok(Some(proto))
802            }
803            ModelType::YoloSegDet2Way {
804                boxes,
805                mask_coeff,
806                protos,
807            } => {
808                let proto = self.decode_yolo_segdet_2way_quantized_proto(
809                    outputs,
810                    boxes,
811                    mask_coeff,
812                    protos,
813                    output_boxes,
814                )?;
815                Ok(Some(proto))
816            }
817            ModelType::YoloEndToEndSegDet { boxes, protos } => {
818                let proto = self.decode_yolo_end_to_end_segdet_quantized_proto(
819                    outputs,
820                    boxes,
821                    protos,
822                    output_boxes,
823                )?;
824                Ok(Some(proto))
825            }
826            ModelType::YoloSplitEndToEndSegDet {
827                boxes,
828                scores,
829                classes,
830                mask_coeff,
831                protos,
832            } => {
833                let proto = self.decode_yolo_split_end_to_end_segdet_quantized_proto(
834                    outputs,
835                    boxes,
836                    scores,
837                    classes,
838                    mask_coeff,
839                    protos,
840                    output_boxes,
841                )?;
842                Ok(Some(proto))
843            }
844            ModelType::PerScale => Err(DecoderError::Internal(
845                "per-scale path must be intercepted before ModelType dispatch".into(),
846            )),
847        }
848    }
849
850    /// Decodes floating-point model outputs into detection boxes, returning
851    /// raw `ProtoData` for segmentation models instead of materialized masks.
852    ///
853    /// Returns `Ok(None)` for detection-only and ModelPack models (detections
854    /// are still decoded into `output_boxes`). Returns `Ok(Some(ProtoData))`
855    /// for YOLO segmentation models.
856    pub(crate) fn decode_float_proto<T>(
857        &self,
858        outputs: &[ArrayViewD<T>],
859        output_boxes: &mut Vec<DetectBox>,
860    ) -> Result<Option<ProtoData>, DecoderError>
861    where
862        T: Float + AsPrimitive<f32> + AsPrimitive<u8> + Send + Sync + crate::yolo::FloatProtoElem,
863        f32: AsPrimitive<T>,
864    {
865        output_boxes.clear();
866        match &self.model_type {
867            // Detection-only variants: decode boxes, return None for proto data.
868            ModelType::ModelPackDet { boxes, scores } => {
869                self.decode_modelpack_det_float(outputs, boxes, scores, output_boxes)?;
870                Ok(None)
871            }
872            ModelType::ModelPackDetSplit { detection } => {
873                self.decode_modelpack_det_split_float(outputs, detection, output_boxes)?;
874                Ok(None)
875            }
876            ModelType::YoloDet { boxes } => {
877                self.decode_yolo_det_float(outputs, boxes, output_boxes)?;
878                Ok(None)
879            }
880            ModelType::YoloSplitDet { boxes, scores } => {
881                self.decode_yolo_split_det_float(outputs, boxes, scores, output_boxes)?;
882                Ok(None)
883            }
884            ModelType::YoloEndToEndDet { boxes } => {
885                self.decode_yolo_end_to_end_det_float(outputs, boxes, output_boxes)?;
886                Ok(None)
887            }
888            ModelType::YoloSplitEndToEndDet {
889                boxes,
890                scores,
891                classes,
892            } => {
893                self.decode_yolo_split_end_to_end_det_float(
894                    outputs,
895                    boxes,
896                    scores,
897                    classes,
898                    output_boxes,
899                )?;
900                Ok(None)
901            }
902            // ModelPack seg/segdet variants have no YOLO proto data.
903            ModelType::ModelPackSegDet { boxes, scores, .. } => {
904                self.decode_modelpack_det_float(outputs, boxes, scores, output_boxes)?;
905                Ok(None)
906            }
907            ModelType::ModelPackSegDetSplit { detection, .. } => {
908                self.decode_modelpack_det_split_float(outputs, detection, output_boxes)?;
909                Ok(None)
910            }
911            ModelType::ModelPackSeg { .. } => Ok(None),
912
913            ModelType::YoloSegDet { boxes, protos } => {
914                let proto =
915                    self.decode_yolo_segdet_float_proto(outputs, boxes, protos, output_boxes)?;
916                Ok(Some(proto))
917            }
918            ModelType::YoloSplitSegDet {
919                boxes,
920                scores,
921                mask_coeff,
922                protos,
923            } => {
924                let proto = self.decode_yolo_split_segdet_float_proto(
925                    outputs,
926                    boxes,
927                    scores,
928                    mask_coeff,
929                    protos,
930                    output_boxes,
931                )?;
932                Ok(Some(proto))
933            }
934            ModelType::YoloSegDet2Way {
935                boxes,
936                mask_coeff,
937                protos,
938            } => {
939                let proto = self.decode_yolo_segdet_2way_float_proto(
940                    outputs,
941                    boxes,
942                    mask_coeff,
943                    protos,
944                    output_boxes,
945                )?;
946                Ok(Some(proto))
947            }
948            ModelType::YoloEndToEndSegDet { boxes, protos } => {
949                let proto = self.decode_yolo_end_to_end_segdet_float_proto(
950                    outputs,
951                    boxes,
952                    protos,
953                    output_boxes,
954                )?;
955                Ok(Some(proto))
956            }
957            ModelType::YoloSplitEndToEndSegDet {
958                boxes,
959                scores,
960                classes,
961                mask_coeff,
962                protos,
963            } => {
964                let proto = self.decode_yolo_split_end_to_end_segdet_float_proto(
965                    outputs,
966                    boxes,
967                    scores,
968                    classes,
969                    mask_coeff,
970                    protos,
971                    output_boxes,
972                )?;
973                Ok(Some(proto))
974            }
975            ModelType::PerScale => Err(DecoderError::Internal(
976                "per-scale path must be intercepted before ModelType dispatch".into(),
977            )),
978        }
979    }
980
981    // ========================================================================
982    // TensorDyn-based public API
983    // ========================================================================
984
985    /// Decode model outputs into detection boxes and segmentation masks.
986    ///
987    /// This is the primary decode API. Accepts `TensorDyn` outputs directly
988    /// from model inference. Automatically dispatches to quantized or float
989    /// paths based on the tensor dtype.
990    ///
991    /// # Arguments
992    ///
993    /// * `outputs` - Tensor outputs from model inference
994    /// * `output_boxes` - Destination for decoded detection boxes (cleared first)
995    /// * `output_masks` - Destination for decoded segmentation masks (cleared first)
996    ///
997    /// # `output_boxes` / `output_masks` capacity
998    ///
999    /// The capacity of the supplied `Vec`s is **only** an allocation hint —
1000    /// it is **not** a cap on the number of detections returned. The
1001    /// post-NMS detection count is bounded by [`Decoder::max_det`] (set
1002    /// via [`DecoderBuilder::with_max_det`], default `300`). Passing
1003    /// `Vec::new()` (capacity 0) returns up to `max_det` detections;
1004    /// pre-allocating with [`Vec::with_capacity`] only avoids the
1005    /// reallocation when the decoder grows the buffer.
1006    ///
1007    /// # Errors
1008    ///
1009    /// Returns `DecoderError` if tensor mapping fails, dtypes are unsupported,
1010    /// or the outputs don't match the decoder's model configuration.
1011    pub fn decode(
1012        &self,
1013        outputs: &[&edgefirst_tensor::TensorDyn],
1014        output_boxes: &mut Vec<DetectBox>,
1015        output_masks: &mut Vec<Segmentation>,
1016    ) -> Result<(), DecoderError> {
1017        let path = self.decode_path_label();
1018        let _span = tracing::trace_span!("decoder.decode", path = path, n_outputs = outputs.len())
1019            .entered();
1020        // Per-scale fast path — selected at builder time when the schema
1021        // declares per-scale children with DFL or LTRB encoding.
1022        if let Some(per_scale_mutex) = &self.per_scale {
1023            let mut ps = per_scale_mutex
1024                .lock()
1025                .map_err(|e| DecoderError::Internal(format!("per_scale mutex poisoned: {e}")))?;
1026            let decoded = ps.run(outputs)?;
1027            return per_scale_bridge::per_scale_to_masks(
1028                &decoded,
1029                output_boxes,
1030                output_masks,
1031                self.iou_threshold,
1032                self.score_threshold,
1033                self.nms,
1034                self.pre_nms_top_k,
1035                self.max_det,
1036                self.normalized,
1037                self.input_dims,
1038                self.multi_label,
1039            );
1040        }
1041
1042        // Schema v2 merge path: dequantize physical children into
1043        // logical float32 tensors, then feed through the float dispatch.
1044        if let Some(program) = &self.decode_program {
1045            let merged = program.execute(outputs)?;
1046            let views: Vec<_> = merged.iter().map(|a| a.view()).collect();
1047            return self.decode_float(&views, output_boxes, output_masks);
1048        }
1049
1050        let mapped = tensor_bridge::map_tensors(outputs)?;
1051        match &mapped {
1052            tensor_bridge::MappedOutputs::Quantized(maps) => {
1053                let views = tensor_bridge::quantized_views(maps)?;
1054                self.decode_quantized(&views, output_boxes, output_masks)
1055            }
1056            tensor_bridge::MappedOutputs::Float16(maps) => {
1057                let views = tensor_bridge::f16_views(maps)?;
1058                self.decode_float(&views, output_boxes, output_masks)
1059            }
1060            tensor_bridge::MappedOutputs::Float32(maps) => {
1061                let views = tensor_bridge::f32_views(maps)?;
1062                self.decode_float(&views, output_boxes, output_masks)
1063            }
1064            tensor_bridge::MappedOutputs::Float64(maps) => {
1065                let views = tensor_bridge::f64_views(maps)?;
1066                self.decode_float(&views, output_boxes, output_masks)
1067            }
1068        }
1069    }
1070
1071    /// Decode model outputs into detection boxes, returning raw proto data
1072    /// for segmentation models instead of materialized masks.
1073    ///
1074    /// Accepts `TensorDyn` outputs directly from model inference.
1075    /// Detections are always decoded into `output_boxes` regardless of model type.
1076    /// Returns `Ok(None)` for detection-only and ModelPack models.
1077    /// Returns `Ok(Some(ProtoData))` for YOLO segmentation models.
1078    ///
1079    /// # Arguments
1080    ///
1081    /// * `outputs` - Tensor outputs from model inference
1082    /// * `output_boxes` - Destination for decoded detection boxes (cleared first)
1083    ///
1084    /// # `output_boxes` capacity
1085    ///
1086    /// The capacity of `output_boxes` is **only** an allocation hint — it
1087    /// is **not** a cap on the number of detections returned. The
1088    /// post-NMS detection count is bounded by [`Decoder::max_det`] (set
1089    /// via [`DecoderBuilder::with_max_det`], default `300`). Passing
1090    /// `Vec::new()` (capacity 0) returns up to `max_det` detections.
1091    ///
1092    /// # Errors
1093    ///
1094    /// Returns `DecoderError` if tensor mapping fails, dtypes are unsupported,
1095    /// or the outputs don't match the decoder's model configuration.
1096    pub fn decode_proto(
1097        &self,
1098        outputs: &[&edgefirst_tensor::TensorDyn],
1099        output_boxes: &mut Vec<DetectBox>,
1100    ) -> Result<Option<ProtoData>, DecoderError> {
1101        let path = self.decode_path_label();
1102        let _span = tracing::trace_span!(
1103            "decoder.decode_proto",
1104            path = path,
1105            n_outputs = outputs.len()
1106        )
1107        .entered();
1108        // Per-scale fast path — selected at builder time when the schema
1109        // declares per-scale children with DFL or LTRB encoding.
1110        if let Some(per_scale_mutex) = &self.per_scale {
1111            let mut ps = per_scale_mutex
1112                .lock()
1113                .map_err(|e| DecoderError::Internal(format!("per_scale mutex poisoned: {e}")))?;
1114            let decoded = ps.run(outputs)?;
1115            return per_scale_bridge::per_scale_to_proto_data(
1116                &decoded,
1117                output_boxes,
1118                self.iou_threshold,
1119                self.score_threshold,
1120                self.nms,
1121                self.pre_nms_top_k,
1122                self.max_det,
1123                self.normalized,
1124                self.input_dims,
1125                self.multi_label,
1126            );
1127        }
1128
1129        // Schema v2 merge path: dequantize physical children into
1130        // logical float32 tensors, then feed through the float dispatch.
1131        if let Some(program) = &self.decode_program {
1132            let merged = program.execute(outputs)?;
1133            let views: Vec<_> = merged.iter().map(|a| a.view()).collect();
1134            return self.decode_float_proto(&views, output_boxes);
1135        }
1136
1137        let mapped = tensor_bridge::map_tensors(outputs)?;
1138        let result = match &mapped {
1139            tensor_bridge::MappedOutputs::Quantized(maps) => {
1140                let views = tensor_bridge::quantized_views(maps)?;
1141                self.decode_quantized_proto(&views, output_boxes)
1142            }
1143            tensor_bridge::MappedOutputs::Float16(maps) => {
1144                let views = tensor_bridge::f16_views(maps)?;
1145                self.decode_float_proto(&views, output_boxes)
1146            }
1147            tensor_bridge::MappedOutputs::Float32(maps) => {
1148                let views = tensor_bridge::f32_views(maps)?;
1149                self.decode_float_proto(&views, output_boxes)
1150            }
1151            tensor_bridge::MappedOutputs::Float64(maps) => {
1152                let views = tensor_bridge::f64_views(maps)?;
1153                self.decode_float_proto(&views, output_boxes)
1154            }
1155        };
1156        result
1157    }
1158
1159    /// Run the per-scale pipeline and return pre-NMS buffers as owned f32.
1160    ///
1161    /// Test-only entry point used by the parity-fixture tests to compare
1162    /// HAL stage output against the NumPy reference's stage output
1163    /// without NMS ordering noise. Returns an error if the decoder
1164    /// isn't configured for per-scale decoding.
1165    #[doc(hidden)]
1166    pub fn _testing_run_per_scale_pre_nms(
1167        &self,
1168        outputs: &[&edgefirst_tensor::TensorDyn],
1169    ) -> Result<crate::per_scale::PreNmsCapture, crate::error::DecoderError> {
1170        let mutex = self.per_scale.as_ref().ok_or_else(|| {
1171            crate::error::DecoderError::Internal("decoder not configured for per-scale".into())
1172        })?;
1173        let mut ps = mutex.lock().map_err(|e| {
1174            crate::error::DecoderError::Internal(format!("per_scale mutex poisoned: {e}"))
1175        })?;
1176        // Drop the borrowed view immediately so we can reborrow buffers below.
1177        {
1178            ps.run(outputs)?;
1179        }
1180        let total_anchors = ps.plan.total_anchors;
1181        let num_classes = ps.plan.num_classes;
1182        let num_mc = ps.plan.num_mask_coefs;
1183        Ok(ps
1184            .buffers
1185            .snapshot_owned_f32(total_anchors, num_classes, num_mc))
1186    }
1187}
1188
1189#[cfg(feature = "tracker")]
1190pub use edgefirst_tracker::TrackInfo;
1191
1192#[cfg(feature = "tracker")]
1193pub use edgefirst_tracker::Tracker;
1194
1195#[cfg(feature = "tracker")]
1196impl Decoder {
1197    /// Decode quantized model outputs into detection boxes and segmentation
1198    /// masks with tracking. Clears the provided output vectors before
1199    /// populating them.
1200    pub(crate) fn decode_tracked_quantized<TR: edgefirst_tracker::Tracker<DetectBox>>(
1201        &self,
1202        tracker: &mut TR,
1203        timestamp: u64,
1204        outputs: &[ArrayViewDQuantized],
1205        output_boxes: &mut Vec<DetectBox>,
1206        output_masks: &mut Vec<Segmentation>,
1207        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1208    ) -> Result<(), DecoderError> {
1209        // multi-label duplicates would spawn phantom tracks in ByteTrack
1210        // (tracker matches IoU-only; two boxes for the same anchor at different
1211        // labels become two tracks). Deployment must always stay on argmax.
1212        debug_assert!(
1213            !self.multi_label,
1214            "multi_label must be off on the tracked/deployment path"
1215        );
1216        output_boxes.clear();
1217        output_masks.clear();
1218        output_tracks.clear();
1219
1220        // yolo segdet variants require special handling to separate boxes that come from decoding vs active tracks.
1221        // Only boxes that come from decoding can be used for proto/mask generation.
1222        match &self.model_type {
1223            ModelType::YoloSegDet { boxes, protos } => self.decode_tracked_yolo_segdet_quantized(
1224                tracker,
1225                timestamp,
1226                outputs,
1227                boxes,
1228                protos,
1229                output_boxes,
1230                output_masks,
1231                output_tracks,
1232            ),
1233            ModelType::YoloSplitSegDet {
1234                boxes,
1235                scores,
1236                mask_coeff,
1237                protos,
1238            } => self.decode_tracked_yolo_split_segdet_quantized(
1239                tracker,
1240                timestamp,
1241                outputs,
1242                boxes,
1243                scores,
1244                mask_coeff,
1245                protos,
1246                output_boxes,
1247                output_masks,
1248                output_tracks,
1249            ),
1250            ModelType::YoloEndToEndSegDet { boxes, protos } => self
1251                .decode_tracked_yolo_end_to_end_segdet_quantized(
1252                    tracker,
1253                    timestamp,
1254                    outputs,
1255                    boxes,
1256                    protos,
1257                    output_boxes,
1258                    output_masks,
1259                    output_tracks,
1260                ),
1261            ModelType::YoloSplitEndToEndSegDet {
1262                boxes,
1263                scores,
1264                classes,
1265                mask_coeff,
1266                protos,
1267            } => self.decode_tracked_yolo_split_end_to_end_segdet_quantized(
1268                tracker,
1269                timestamp,
1270                outputs,
1271                boxes,
1272                scores,
1273                classes,
1274                mask_coeff,
1275                protos,
1276                output_boxes,
1277                output_masks,
1278                output_tracks,
1279            ),
1280            ModelType::YoloSegDet2Way {
1281                boxes,
1282                mask_coeff,
1283                protos,
1284            } => self.decode_tracked_yolo_segdet_2way_quantized(
1285                tracker,
1286                timestamp,
1287                outputs,
1288                boxes,
1289                mask_coeff,
1290                protos,
1291                output_boxes,
1292                output_masks,
1293                output_tracks,
1294            ),
1295            _ => {
1296                self.decode_quantized(outputs, output_boxes, output_masks)?;
1297                Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1298                Ok(())
1299            }
1300        }
1301    }
1302
1303    /// This function decodes floating point model outputs into detection boxes
1304    /// and segmentation masks. Up to `output_boxes.capacity()` boxes and
1305    /// masks will be decoded. The function clears the provided output
1306    /// vectors before populating them with the decoded results.
1307    ///
1308    /// This function returns an `Error` if the provided outputs don't
1309    /// match the configuration provided by the user when building the decoder.
1310    ///
1311    /// Any quantization information in the configuration will be ignored.
1312    pub(crate) fn decode_tracked_float<TR: edgefirst_tracker::Tracker<DetectBox>, T>(
1313        &self,
1314        tracker: &mut TR,
1315        timestamp: u64,
1316        outputs: &[ArrayViewD<T>],
1317        output_boxes: &mut Vec<DetectBox>,
1318        output_masks: &mut Vec<Segmentation>,
1319        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1320    ) -> Result<(), DecoderError>
1321    where
1322        T: Float + AsPrimitive<f32> + AsPrimitive<u8> + Send + Sync + 'static,
1323        f32: AsPrimitive<T>,
1324    {
1325        debug_assert!(
1326            !self.multi_label,
1327            "multi_label must be off on the tracked/deployment path"
1328        );
1329        output_boxes.clear();
1330        output_masks.clear();
1331        output_tracks.clear();
1332        match &self.model_type {
1333            ModelType::YoloSegDet { boxes, protos } => {
1334                self.decode_tracked_yolo_segdet_float(
1335                    tracker,
1336                    timestamp,
1337                    outputs,
1338                    boxes,
1339                    protos,
1340                    output_boxes,
1341                    output_masks,
1342                    output_tracks,
1343                )?;
1344            }
1345            ModelType::YoloSplitSegDet {
1346                boxes,
1347                scores,
1348                mask_coeff,
1349                protos,
1350            } => {
1351                self.decode_tracked_yolo_split_segdet_float(
1352                    tracker,
1353                    timestamp,
1354                    outputs,
1355                    boxes,
1356                    scores,
1357                    mask_coeff,
1358                    protos,
1359                    output_boxes,
1360                    output_masks,
1361                    output_tracks,
1362                )?;
1363            }
1364            ModelType::YoloEndToEndSegDet { boxes, protos } => {
1365                self.decode_tracked_yolo_end_to_end_segdet_float(
1366                    tracker,
1367                    timestamp,
1368                    outputs,
1369                    boxes,
1370                    protos,
1371                    output_boxes,
1372                    output_masks,
1373                    output_tracks,
1374                )?;
1375            }
1376            ModelType::YoloSplitEndToEndSegDet {
1377                boxes,
1378                scores,
1379                classes,
1380                mask_coeff,
1381                protos,
1382            } => {
1383                self.decode_tracked_yolo_split_end_to_end_segdet_float(
1384                    tracker,
1385                    timestamp,
1386                    outputs,
1387                    boxes,
1388                    scores,
1389                    classes,
1390                    mask_coeff,
1391                    protos,
1392                    output_boxes,
1393                    output_masks,
1394                    output_tracks,
1395                )?;
1396            }
1397            ModelType::YoloSegDet2Way {
1398                boxes,
1399                mask_coeff,
1400                protos,
1401            } => {
1402                self.decode_tracked_yolo_segdet_2way_float(
1403                    tracker,
1404                    timestamp,
1405                    outputs,
1406                    boxes,
1407                    mask_coeff,
1408                    protos,
1409                    output_boxes,
1410                    output_masks,
1411                    output_tracks,
1412                )?;
1413            }
1414            _ => {
1415                self.decode_float(outputs, output_boxes, output_masks)?;
1416                Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1417            }
1418        }
1419        Ok(())
1420    }
1421
1422    /// Decodes quantized model outputs into detection boxes, returning raw
1423    /// `ProtoData` for segmentation models instead of materialized masks.
1424    ///
1425    /// Returns `Ok(None)` for detection-only and ModelPack models (use
1426    /// `decode_quantized` for those). Returns `Ok(Some(ProtoData))` for
1427    /// YOLO segmentation models.
1428    pub(crate) fn decode_tracked_quantized_proto<TR: edgefirst_tracker::Tracker<DetectBox>>(
1429        &self,
1430        tracker: &mut TR,
1431        timestamp: u64,
1432        outputs: &[ArrayViewDQuantized],
1433        output_boxes: &mut Vec<DetectBox>,
1434        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1435    ) -> Result<Option<ProtoData>, DecoderError> {
1436        debug_assert!(
1437            !self.multi_label,
1438            "multi_label must be off on the tracked/deployment path"
1439        );
1440        output_boxes.clear();
1441        output_tracks.clear();
1442        match &self.model_type {
1443            ModelType::YoloSegDet { boxes, protos } => {
1444                let proto = self.decode_tracked_yolo_segdet_quantized_proto(
1445                    tracker,
1446                    timestamp,
1447                    outputs,
1448                    boxes,
1449                    protos,
1450                    output_boxes,
1451                    output_tracks,
1452                )?;
1453                Ok(Some(proto))
1454            }
1455            ModelType::YoloSplitSegDet {
1456                boxes,
1457                scores,
1458                mask_coeff,
1459                protos,
1460            } => {
1461                let proto = self.decode_tracked_yolo_split_segdet_quantized_proto(
1462                    tracker,
1463                    timestamp,
1464                    outputs,
1465                    boxes,
1466                    scores,
1467                    mask_coeff,
1468                    protos,
1469                    output_boxes,
1470                    output_tracks,
1471                )?;
1472                Ok(Some(proto))
1473            }
1474            ModelType::YoloSegDet2Way {
1475                boxes,
1476                mask_coeff,
1477                protos,
1478            } => {
1479                let proto = self.decode_tracked_yolo_segdet_2way_quantized_proto(
1480                    tracker,
1481                    timestamp,
1482                    outputs,
1483                    boxes,
1484                    mask_coeff,
1485                    protos,
1486                    output_boxes,
1487                    output_tracks,
1488                )?;
1489                Ok(Some(proto))
1490            }
1491            ModelType::YoloEndToEndSegDet { boxes, protos } => {
1492                let proto = self.decode_tracked_yolo_end_to_end_segdet_quantized_proto(
1493                    tracker,
1494                    timestamp,
1495                    outputs,
1496                    boxes,
1497                    protos,
1498                    output_boxes,
1499                    output_tracks,
1500                )?;
1501                Ok(Some(proto))
1502            }
1503            ModelType::YoloSplitEndToEndSegDet {
1504                boxes,
1505                scores,
1506                classes,
1507                mask_coeff,
1508                protos,
1509            } => {
1510                let proto = self.decode_tracked_yolo_split_end_to_end_segdet_quantized_proto(
1511                    tracker,
1512                    timestamp,
1513                    outputs,
1514                    boxes,
1515                    scores,
1516                    classes,
1517                    mask_coeff,
1518                    protos,
1519                    output_boxes,
1520                    output_tracks,
1521                )?;
1522                Ok(Some(proto))
1523            }
1524            // Non-seg variants: decode boxes via the non-proto path, then track.
1525            _ => {
1526                let mut masks = Vec::new();
1527                self.decode_quantized(outputs, output_boxes, &mut masks)?;
1528                Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1529                Ok(None)
1530            }
1531        }
1532    }
1533
1534    /// Decodes floating-point model outputs into detection boxes, returning
1535    /// raw `ProtoData` for segmentation models instead of materialized masks.
1536    ///
1537    /// Detections are always decoded into `output_boxes` regardless of model type.
1538    /// Returns `Ok(None)` for detection-only and ModelPack models. Returns
1539    /// `Ok(Some(ProtoData))` for YOLO segmentation models.
1540    pub(crate) fn decode_tracked_float_proto<TR: edgefirst_tracker::Tracker<DetectBox>, T>(
1541        &self,
1542        tracker: &mut TR,
1543        timestamp: u64,
1544        outputs: &[ArrayViewD<T>],
1545        output_boxes: &mut Vec<DetectBox>,
1546        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1547    ) -> Result<Option<ProtoData>, DecoderError>
1548    where
1549        T: Float + AsPrimitive<f32> + AsPrimitive<u8> + Send + Sync + crate::yolo::FloatProtoElem,
1550        f32: AsPrimitive<T>,
1551    {
1552        output_boxes.clear();
1553        output_tracks.clear();
1554        match &self.model_type {
1555            ModelType::YoloSegDet { boxes, protos } => {
1556                let proto = self.decode_tracked_yolo_segdet_float_proto(
1557                    tracker,
1558                    timestamp,
1559                    outputs,
1560                    boxes,
1561                    protos,
1562                    output_boxes,
1563                    output_tracks,
1564                )?;
1565                Ok(Some(proto))
1566            }
1567            ModelType::YoloSplitSegDet {
1568                boxes,
1569                scores,
1570                mask_coeff,
1571                protos,
1572            } => {
1573                let proto = self.decode_tracked_yolo_split_segdet_float_proto(
1574                    tracker,
1575                    timestamp,
1576                    outputs,
1577                    boxes,
1578                    scores,
1579                    mask_coeff,
1580                    protos,
1581                    output_boxes,
1582                    output_tracks,
1583                )?;
1584                Ok(Some(proto))
1585            }
1586            ModelType::YoloSegDet2Way {
1587                boxes,
1588                mask_coeff,
1589                protos,
1590            } => {
1591                let proto = self.decode_tracked_yolo_segdet_2way_float_proto(
1592                    tracker,
1593                    timestamp,
1594                    outputs,
1595                    boxes,
1596                    mask_coeff,
1597                    protos,
1598                    output_boxes,
1599                    output_tracks,
1600                )?;
1601                Ok(Some(proto))
1602            }
1603            ModelType::YoloEndToEndSegDet { boxes, protos } => {
1604                let proto = self.decode_tracked_yolo_end_to_end_segdet_float_proto(
1605                    tracker,
1606                    timestamp,
1607                    outputs,
1608                    boxes,
1609                    protos,
1610                    output_boxes,
1611                    output_tracks,
1612                )?;
1613                Ok(Some(proto))
1614            }
1615            ModelType::YoloSplitEndToEndSegDet {
1616                boxes,
1617                scores,
1618                classes,
1619                mask_coeff,
1620                protos,
1621            } => {
1622                let proto = self.decode_tracked_yolo_split_end_to_end_segdet_float_proto(
1623                    tracker,
1624                    timestamp,
1625                    outputs,
1626                    boxes,
1627                    scores,
1628                    classes,
1629                    mask_coeff,
1630                    protos,
1631                    output_boxes,
1632                    output_tracks,
1633                )?;
1634                Ok(Some(proto))
1635            }
1636            // Non-seg variants: decode boxes via the non-proto path, then track.
1637            _ => {
1638                let mut masks = Vec::new();
1639                self.decode_float(outputs, output_boxes, &mut masks)?;
1640                Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1641                Ok(None)
1642            }
1643        }
1644    }
1645
1646    // ========================================================================
1647    // TensorDyn-based tracked public API
1648    // ========================================================================
1649
1650    /// Decode model outputs with tracking.
1651    ///
1652    /// Accepts `TensorDyn` outputs directly from model inference. Automatically
1653    /// dispatches to quantized or float paths based on the tensor dtype, then
1654    /// updates the tracker with the decoded boxes.
1655    ///
1656    /// # Arguments
1657    ///
1658    /// * `tracker` - The tracker instance to update
1659    /// * `timestamp` - Current frame timestamp
1660    /// * `outputs` - Tensor outputs from model inference
1661    /// * `output_boxes` - Destination for decoded detection boxes (cleared first)
1662    /// * `output_masks` - Destination for decoded segmentation masks (cleared first)
1663    /// * `output_tracks` - Destination for track info (cleared first)
1664    ///
1665    /// # Errors
1666    ///
1667    /// Returns `DecoderError` if tensor mapping fails, dtypes are unsupported,
1668    /// or the outputs don't match the decoder's model configuration.
1669    pub fn decode_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1670        &self,
1671        tracker: &mut TR,
1672        timestamp: u64,
1673        outputs: &[&edgefirst_tensor::TensorDyn],
1674        output_boxes: &mut Vec<DetectBox>,
1675        output_masks: &mut Vec<Segmentation>,
1676        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1677    ) -> Result<(), DecoderError> {
1678        // Per-scale fast path: route via the basic decode then update the
1679        // tracker. The current implementation keeps the tracker integration simple; per-frame
1680        // decoupling between detection and tracking is preserved.
1681        if self.per_scale.is_some() {
1682            output_tracks.clear();
1683            self.decode(outputs, output_boxes, output_masks)?;
1684            Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1685            return Ok(());
1686        }
1687
1688        let mapped = tensor_bridge::map_tensors(outputs)?;
1689        match &mapped {
1690            tensor_bridge::MappedOutputs::Quantized(maps) => {
1691                let views = tensor_bridge::quantized_views(maps)?;
1692                self.decode_tracked_quantized(
1693                    tracker,
1694                    timestamp,
1695                    &views,
1696                    output_boxes,
1697                    output_masks,
1698                    output_tracks,
1699                )
1700            }
1701            tensor_bridge::MappedOutputs::Float16(maps) => {
1702                let views = tensor_bridge::f16_views(maps)?;
1703                self.decode_tracked_float(
1704                    tracker,
1705                    timestamp,
1706                    &views,
1707                    output_boxes,
1708                    output_masks,
1709                    output_tracks,
1710                )
1711            }
1712            tensor_bridge::MappedOutputs::Float32(maps) => {
1713                let views = tensor_bridge::f32_views(maps)?;
1714                self.decode_tracked_float(
1715                    tracker,
1716                    timestamp,
1717                    &views,
1718                    output_boxes,
1719                    output_masks,
1720                    output_tracks,
1721                )
1722            }
1723            tensor_bridge::MappedOutputs::Float64(maps) => {
1724                let views = tensor_bridge::f64_views(maps)?;
1725                self.decode_tracked_float(
1726                    tracker,
1727                    timestamp,
1728                    &views,
1729                    output_boxes,
1730                    output_masks,
1731                    output_tracks,
1732                )
1733            }
1734        }
1735    }
1736
1737    /// Decode model outputs with tracking, returning raw proto data for
1738    /// segmentation models.
1739    ///
1740    /// Accepts `TensorDyn` outputs directly from model inference.
1741    /// Returns `Ok(None)` for detection-only and ModelPack models.
1742    /// Returns `Ok(Some(ProtoData))` for YOLO segmentation models.
1743    ///
1744    /// # Arguments
1745    ///
1746    /// * `tracker` - The tracker instance to update
1747    /// * `timestamp` - Current frame timestamp
1748    /// * `outputs` - Tensor outputs from model inference
1749    /// * `output_boxes` - Destination for decoded detection boxes (cleared first)
1750    /// * `output_tracks` - Destination for track info (cleared first)
1751    ///
1752    /// # Errors
1753    ///
1754    /// Returns `DecoderError` if tensor mapping fails, dtypes are unsupported,
1755    /// or the outputs don't match the decoder's model configuration.
1756    pub fn decode_proto_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1757        &self,
1758        tracker: &mut TR,
1759        timestamp: u64,
1760        outputs: &[&edgefirst_tensor::TensorDyn],
1761        output_boxes: &mut Vec<DetectBox>,
1762        output_tracks: &mut Vec<edgefirst_tracker::TrackInfo>,
1763    ) -> Result<Option<ProtoData>, DecoderError> {
1764        // Per-scale fast path: route via the basic decode_proto then
1765        // update the tracker on the resulting boxes.
1766        if self.per_scale.is_some() {
1767            output_tracks.clear();
1768            let proto = self.decode_proto(outputs, output_boxes)?;
1769            Self::update_tracker(tracker, timestamp, output_boxes, output_tracks);
1770            return Ok(proto);
1771        }
1772
1773        let mapped = tensor_bridge::map_tensors(outputs)?;
1774        match &mapped {
1775            tensor_bridge::MappedOutputs::Quantized(maps) => {
1776                let views = tensor_bridge::quantized_views(maps)?;
1777                self.decode_tracked_quantized_proto(
1778                    tracker,
1779                    timestamp,
1780                    &views,
1781                    output_boxes,
1782                    output_tracks,
1783                )
1784            }
1785            tensor_bridge::MappedOutputs::Float16(maps) => {
1786                let views = tensor_bridge::f16_views(maps)?;
1787                self.decode_tracked_float_proto(
1788                    tracker,
1789                    timestamp,
1790                    &views,
1791                    output_boxes,
1792                    output_tracks,
1793                )
1794            }
1795            tensor_bridge::MappedOutputs::Float32(maps) => {
1796                let views = tensor_bridge::f32_views(maps)?;
1797                self.decode_tracked_float_proto(
1798                    tracker,
1799                    timestamp,
1800                    &views,
1801                    output_boxes,
1802                    output_tracks,
1803                )
1804            }
1805            tensor_bridge::MappedOutputs::Float64(maps) => {
1806                let views = tensor_bridge::f64_views(maps)?;
1807                self.decode_tracked_float_proto(
1808                    tracker,
1809                    timestamp,
1810                    &views,
1811                    output_boxes,
1812                    output_tracks,
1813                )
1814            }
1815        }
1816    }
1817}
edgefirst_decoder/decoder/mod.rs

edgefirst_decoder/decoder/
mod.rs