Skip to main content

edgefirst_decoder/per_scale/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4//! Per-scale quantized decoder — see
5//! `.claude/plans/2026-04-28-per-scale-decoder-optimized-design.md`.
6
7pub mod helper;
8pub(crate) mod kernels;
9pub(crate) mod outputs;
10pub(crate) mod pipeline;
11pub(crate) mod plan;
12
13pub use helper::apply_schema_quant;
14
15/// Output element type chosen by the user at `DecoderBuilder::with_decode_dtype()`.
16///
17/// The whole post-merge pipeline (boxes, scores, mask coefs, protos) is
18/// emitted in this dtype. `F16` saves ~2× memory bandwidth at the cost of
19/// 10-bit mantissa precision — empirically safe for YOLO-family models.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum DecodeDtype {
22    #[default]
23    F32,
24    F16,
25}
26
27/// Activation function applied after dequantization on a logical output.
28///
29/// Sourced from the schema's `activation_required` field. Currently only
30/// `Sigmoid` is wired through the per-scale pipeline; future activations
31/// (e.g. `Softmax` on objectness) extend this enum without ripple.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33#[allow(dead_code)] // consumed by later per-scale phase 1 tasks
34pub(crate) enum Activation {
35    #[default]
36    None,
37    Sigmoid,
38}
39
40impl Activation {
41    /// Translate a schema activation to a per_scale Activation.
42    /// Returns Activation::None when the schema declares no activation.
43    #[allow(dead_code)] // consumed by later per-scale phase 1 tasks
44    pub(crate) fn from_schema(s: Option<crate::schema::Activation>) -> Self {
45        match s {
46            Some(crate::schema::Activation::Sigmoid) => Self::Sigmoid,
47            _ => Self::None,
48        }
49    }
50}
51
52pub(crate) use outputs::{DecodedOutputBuffers, DecodedOutputsRef};
53pub(crate) use plan::PerScalePlan;
54
55/// Per-scale decoder for schema-v2 per-scale models. Built once at
56/// `DecoderBuilder::build()` time; consumed per-frame via `run()`.
57#[derive(Debug)]
58#[allow(dead_code)] // Wired by Task 24's Decoder integration.
59pub(crate) struct PerScaleDecoder {
60    pub(crate) plan: PerScalePlan,
61    pub(crate) buffers: DecodedOutputBuffers,
62}
63
64impl PerScaleDecoder {
65    /// Build a decoder from a plan, allocating output buffers.
66    #[allow(dead_code)] // Wired by Task 23's builder.
67    pub(crate) fn new(plan: PerScalePlan) -> Self {
68        let buffers = DecodedOutputBuffers::new(
69            plan.out_dtype,
70            plan.total_anchors,
71            plan.num_classes,
72            plan.num_mask_coefs,
73            plan.proto_nhwc_shape.as_deref(),
74        );
75        Self { plan, buffers }
76    }
77
78    /// Decode one frame's worth of inputs.
79    #[allow(dead_code)] // Wired by Task 24.
80    pub(crate) fn run<'a>(
81        &'a mut self,
82        inputs: &[&edgefirst_tensor::TensorDyn],
83    ) -> crate::DecoderResult<DecodedOutputsRef<'a>> {
84        pipeline::run(&self.plan, &mut self.buffers, inputs)
85    }
86}
87
88/// Owned f32 snapshot of pre-NMS per-scale outputs.
89///
90/// Returned by [`crate::Decoder::_testing_run_per_scale_pre_nms`] and
91/// used by integration tests to compare against fixture intermediates
92/// without the noise of NMS ordering.
93#[doc(hidden)]
94pub struct PreNmsCapture {
95    pub boxes_xywh: ndarray::Array2<f32>,
96    pub scores: ndarray::Array2<f32>,
97    pub mask_coefs: Option<ndarray::Array2<f32>>,
98    pub protos: Option<ndarray::Array4<f32>>,
99}