Skip to main content

hanzo_quant/dummy/
mod.rs

1use hanzo_ml::Result;
2
3use crate::{QuantMethod, QuantizeOntoGuard, QuantizedSerde};
4
5#[derive(Debug, Clone)]
6pub struct DummyLayerInfo {
7    pub context: String,
8    pub prefix: String,
9    pub missing_tensors: Vec<String>,
10}
11
12impl DummyLayerInfo {
13    pub fn unknown() -> Self {
14        Self {
15            context: "unknown".to_string(),
16            prefix: "<unknown>".to_string(),
17            missing_tensors: Vec::new(),
18        }
19    }
20
21    pub fn message(&self, action: &str) -> String {
22        let missing = if self.missing_tensors.is_empty() {
23            "<unknown>".to_string()
24        } else {
25            self.missing_tensors.join(", ")
26        };
27        format!(
28            "DummyLayer reached {action} for {} at prefix `{}`. Missing tensor path(s): {missing}. Dummy layers are only valid as temporary UQFF placeholders and must be replaced before inference.",
29            self.context, self.prefix
30        )
31    }
32}
33
34#[derive(Debug, Clone)]
35pub struct DummyLayer {
36    info: DummyLayerInfo,
37}
38
39impl DummyLayer {
40    pub fn placeholder(info: DummyLayerInfo) -> Self {
41        Self { info }
42    }
43
44    pub fn info(&self) -> &DummyLayerInfo {
45        &self.info
46    }
47}
48
49impl QuantMethod for DummyLayer {
50    fn new(_method: crate::QuantMethodConfig) -> hanzo_ml::Result<Self>
51    where
52        Self: Sized,
53    {
54        Ok(Self {
55            info: DummyLayerInfo::unknown(),
56        })
57    }
58    fn dequantize_w(&self) -> Result<hanzo_ml::Tensor> {
59        hanzo_ml::bail!("{}", self.info.message("dequantization"))
60    }
61    fn add_delta_w(
62        &self,
63        _delta: &hanzo_ml::Tensor,
64    ) -> hanzo_ml::Result<std::sync::Arc<dyn QuantMethod>> {
65        hanzo_ml::bail!("{}", self.info.message("LoRA delta application"))
66    }
67    fn apply_isq(
68        self: std::sync::Arc<Self>,
69        _dtype: Option<crate::IsqType>,
70        _device: hanzo_ml::Device,
71        _n_quantized: &std::sync::atomic::AtomicUsize,
72        _imatrix_weight: Option<Vec<f32>>,
73        _guard: QuantizeOntoGuard,
74    ) -> hanzo_ml::Result<std::sync::Arc<dyn QuantMethod>> {
75        // This is necessary for the immediate ISQ
76        Ok(self)
77    }
78    fn dtype_and_device(&self) -> (hanzo_ml::DType, hanzo_ml::Device) {
79        (hanzo_ml::DType::F32, hanzo_ml::Device::Cpu)
80    }
81    fn forward_raw(&self, _a: &hanzo_ml::Tensor) -> hanzo_ml::Result<hanzo_ml::Tensor> {
82        hanzo_ml::bail!("{}", self.info.message("forward pass"))
83    }
84    fn quantized_act_type(&self) -> Option<hanzo_ml::DType> {
85        None
86    }
87
88    fn dummy_info(&self) -> Option<&crate::DummyLayerInfo> {
89        Some(&self.info)
90    }
91}
92
93impl QuantizedSerde for DummyLayer {
94    fn name(&self) -> &'static str {
95        "dummy"
96    }
97}