1#![allow(non_camel_case_types)]
22
23mod format;
24mod gguf;
25mod weights;
26
27pub use format::{DequantStrategy, QuantFormat};
28pub use gguf::{GgufError, GgufHeader, GgufLoader, GgufResult, GgufTensorInfo, GgufValue};
29pub use weights::{LayerQuantStats, QuantStats, QuantizedWeights};
30
31use std::fmt;
32
33#[derive(Debug, Clone)]
37pub struct QuantizedBrick {
38 pub name: String,
40 pub weights: Option<QuantizedWeights>,
42 pub dequant_strategy: DequantStrategy,
44 pub budget_tok_per_sec: Option<u64>,
46}
47
48impl QuantizedBrick {
49 pub fn new(name: &str) -> Self {
51 Self {
52 name: name.to_string(),
53 weights: None,
54 dequant_strategy: DequantStrategy::default(),
55 budget_tok_per_sec: None,
56 }
57 }
58
59 pub fn with_weights(mut self, weights: QuantizedWeights) -> Self {
61 self.weights = Some(weights);
62 self
63 }
64
65 pub fn with_dequant_strategy(mut self, strategy: DequantStrategy) -> Self {
67 self.dequant_strategy = strategy;
68 self
69 }
70
71 pub fn with_budget(mut self, tok_per_sec: u64) -> Self {
73 self.budget_tok_per_sec = Some(tok_per_sec);
74 self
75 }
76
77 pub fn memory_bytes(&self) -> usize {
79 self.weights.as_ref().map_or(0, |w| w.memory_bytes())
80 }
81
82 pub fn bits_per_weight(&self) -> f64 {
84 self.weights
85 .as_ref()
86 .map_or(0.0, |w| w.actual_bits_per_weight())
87 }
88
89 pub fn format(&self) -> Option<QuantFormat> {
91 self.weights.as_ref().map(|w| w.format)
92 }
93
94 pub fn has_weights(&self) -> bool {
96 self.weights.is_some()
97 }
98}
99
100impl fmt::Display for QuantizedBrick {
101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 write!(f, "QuantizedBrick[{}]", self.name)?;
103 if let Some(weights) = &self.weights {
104 write!(
105 f,
106 " format={} weights={} memory={:.2}MB",
107 weights.format,
108 weights.num_weights(),
109 weights.memory_bytes() as f64 / 1_000_000.0
110 )?;
111 }
112 Ok(())
113 }
114}
115
116pub fn ggml_type_to_format(ggml_type: u32) -> Option<QuantFormat> {
118 match ggml_type {
119 0 => Some(QuantFormat::F32),
120 1 => Some(QuantFormat::F16),
121 2 => Some(QuantFormat::Q4_0),
122 3 => Some(QuantFormat::Q4_K), 8 => Some(QuantFormat::Q8_0),
124 12 => Some(QuantFormat::Q4_K),
125 13 => Some(QuantFormat::Q5_K),
126 14 => Some(QuantFormat::Q6_K),
127 _ => None,
128 }
129}
130
131#[cfg(test)]
132mod tests;