edgefirst_decoder/per_scale/mod.rs
1// SPDX-FileCopyrightText: Copyright 2026 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4//! Per-scale quantized decoder — see
5//! `.claude/plans/2026-04-28-per-scale-decoder-optimized-design.md`.
6
7pub mod helper;
8pub(crate) mod kernels;
9pub(crate) mod outputs;
10pub(crate) mod pipeline;
11pub(crate) mod plan;
12
13pub use helper::apply_schema_quant;
14
15/// Output element type chosen by the user at `DecoderBuilder::with_decode_dtype()`.
16///
17/// The whole post-merge pipeline (boxes, scores, mask coefs, protos) is
18/// emitted in this dtype. `F16` saves ~2× memory bandwidth at the cost of
19/// 10-bit mantissa precision — empirically safe for YOLO-family models.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum DecodeDtype {
22 #[default]
23 F32,
24 F16,
25}
26
27/// Activation function applied after dequantization on a logical output.
28///
29/// Sourced from the schema's `activation_required` field. Currently only
30/// `Sigmoid` is wired through the per-scale pipeline; future activations
31/// (e.g. `Softmax` on objectness) extend this enum without ripple.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33#[allow(dead_code)] // consumed by later per-scale phase 1 tasks
34pub(crate) enum Activation {
35 #[default]
36 None,
37 Sigmoid,
38}
39
40impl Activation {
41 /// Translate a schema activation to a per_scale Activation.
42 /// Returns Activation::None when the schema declares no activation.
43 #[allow(dead_code)] // consumed by later per-scale phase 1 tasks
44 pub(crate) fn from_schema(s: Option<crate::schema::Activation>) -> Self {
45 match s {
46 Some(crate::schema::Activation::Sigmoid) => Self::Sigmoid,
47 _ => Self::None,
48 }
49 }
50}
51
52pub(crate) use outputs::{DecodedOutputBuffers, DecodedOutputsRef};
53pub(crate) use plan::PerScalePlan;
54
55/// Per-scale decoder for schema-v2 per-scale models. Built once at
56/// `DecoderBuilder::build()` time; consumed per-frame via `run()`.
57#[derive(Debug)]
58#[allow(dead_code)] // Wired by Task 24's Decoder integration.
59pub(crate) struct PerScaleDecoder {
60 pub(crate) plan: PerScalePlan,
61 pub(crate) buffers: DecodedOutputBuffers,
62}
63
64impl PerScaleDecoder {
65 /// Build a decoder from a plan, allocating output buffers.
66 #[allow(dead_code)] // Wired by Task 23's builder.
67 pub(crate) fn new(plan: PerScalePlan) -> Self {
68 let buffers = DecodedOutputBuffers::new(
69 plan.out_dtype,
70 plan.total_anchors,
71 plan.num_classes,
72 plan.num_mask_coefs,
73 plan.proto_nhwc_shape.as_deref(),
74 );
75 Self { plan, buffers }
76 }
77
78 /// Decode one frame's worth of inputs.
79 #[allow(dead_code)] // Wired by Task 24.
80 pub(crate) fn run<'a>(
81 &'a mut self,
82 inputs: &[&edgefirst_tensor::TensorDyn],
83 ) -> crate::DecoderResult<DecodedOutputsRef<'a>> {
84 pipeline::run(&self.plan, &mut self.buffers, inputs)
85 }
86}
87
88/// Owned f32 snapshot of pre-NMS per-scale outputs.
89///
90/// Returned by [`crate::Decoder::_testing_run_per_scale_pre_nms`] and
91/// used by integration tests to compare against fixture intermediates
92/// without the noise of NMS ordering.
93#[doc(hidden)]
94pub struct PreNmsCapture {
95 pub boxes_xywh: ndarray::Array2<f32>,
96 pub scores: ndarray::Array2<f32>,
97 pub mask_coefs: Option<ndarray::Array2<f32>>,
98 pub protos: Option<ndarray::Array4<f32>>,
99}