1use std::io::Read;
10use std::path::Path;
11use std::time::Instant;
12
13#[derive(Debug, thiserror::Error)]
19pub enum LoadError {
20 #[error("I/O error: {0}")]
22 Io(#[from] std::io::Error),
23
24 #[error("GGUF parse error: {0}")]
26 Parse(String),
27
28 #[error("memory budget exceeded: need {need} bytes, budget {budget} bytes")]
30 MemoryBudgetExceeded { need: u64, budget: u64 },
31
32 #[error("unsupported GGUF version: {0}")]
34 UnsupportedVersion(u32),
35
36 #[error("validation failed: {0}")]
38 ValidationFailed(String),
39}
40
41#[derive(Debug, Clone)]
47pub struct LoadConfig {
48 pub max_memory_bytes: Option<usize>,
50 pub validate_checksums: bool,
52 pub allow_unknown_quant_types: bool,
55 pub streaming_chunk_size: usize,
57 pub strict_version: bool,
59}
60
61impl Default for LoadConfig {
62 fn default() -> Self {
63 Self {
64 max_memory_bytes: None,
65 validate_checksums: false,
66 allow_unknown_quant_types: true,
67 streaming_chunk_size: 4 * 1024 * 1024, strict_version: false,
69 }
70 }
71}
72
73#[derive(Debug, Clone, Default)]
79pub struct LoadStats {
80 pub tensors_loaded: usize,
82 pub bytes_loaded: u64,
84 pub skipped_tensors: usize,
86 pub load_time_ms: u64,
88 pub peak_memory_bytes: usize,
90 pub validation_warnings: Vec<String>,
92}
93
94const KNOWN_QUANT_TYPES: &[(u32, &str)] = &[
100 (0, "F32"),
101 (1, "F16"),
102 (2, "Q4_0"),
103 (3, "Q4_1"),
104 (6, "Q5_0"),
105 (7, "Q5_1"),
106 (8, "Q8_0"),
107 (9, "Q8_1"),
108 (10, "Q2_K"),
109 (11, "Q3_K"),
110 (12, "Q4_K"),
111 (13, "Q5_K"),
112 (14, "Q6_K"),
113 (15, "Q8_K"),
114 (30, "BF16"),
115 (35, "TQ2_0"),
116 (41, "Q1_0_g128"),
117 (42, "TQ2_0_g128"),
118];
119
120#[derive(Debug, Clone)]
124pub struct TensorEntry {
125 pub name: String,
127 pub shape: Vec<u64>,
129 pub quant_type_id: u32,
131 pub offset: u64,
133 pub size_bytes: u64,
135}
136
137impl TensorEntry {
138 pub fn element_count(&self) -> u64 {
140 self.shape.iter().product()
141 }
142
143 pub fn quant_name(&self) -> &'static str {
145 KNOWN_QUANT_TYPES
146 .iter()
147 .find(|(id, _)| *id == self.quant_type_id)
148 .map(|(_, name)| *name)
149 .unwrap_or("UNKNOWN")
150 }
151
152 pub fn is_known_quant(&self) -> bool {
154 KNOWN_QUANT_TYPES
155 .iter()
156 .any(|(id, _)| *id == self.quant_type_id)
157 }
158}
159
160const GGUF_MAGIC: u32 = 0x4655_4747;
166
167const SUPPORTED_VERSIONS: &[u32] = &[2, 3];
169
170fn read_u32_le(buf: &[u8], pos: &mut usize) -> Result<u32, LoadError> {
172 if *pos + 4 > buf.len() {
173 return Err(LoadError::Parse(format!(
174 "unexpected EOF at offset {} reading u32",
175 pos
176 )));
177 }
178 let v = u32::from_le_bytes(
179 buf[*pos..*pos + 4]
180 .try_into()
181 .map_err(|_| LoadError::Parse("slice conversion failed for u32".to_string()))?,
182 );
183 *pos += 4;
184 Ok(v)
185}
186
187fn read_u64_le(buf: &[u8], pos: &mut usize) -> Result<u64, LoadError> {
189 if *pos + 8 > buf.len() {
190 return Err(LoadError::Parse(format!(
191 "unexpected EOF at offset {} reading u64",
192 pos
193 )));
194 }
195 let v = u64::from_le_bytes(
196 buf[*pos..*pos + 8]
197 .try_into()
198 .map_err(|_| LoadError::Parse("slice conversion failed for u64".to_string()))?,
199 );
200 *pos += 8;
201 Ok(v)
202}
203
204fn read_gguf_string(buf: &[u8], pos: &mut usize) -> Result<String, LoadError> {
206 let len = read_u64_le(buf, pos)? as usize;
207 if *pos + len > buf.len() {
208 return Err(LoadError::Parse(format!(
209 "string of length {len} extends beyond buffer"
210 )));
211 }
212 let s = std::str::from_utf8(&buf[*pos..*pos + len])
213 .map_err(|e| LoadError::Parse(format!("invalid UTF-8 in string: {e}")))?
214 .to_string();
215 *pos += len;
216 Ok(s)
217}
218
219fn skip_metadata_value(buf: &[u8], pos: &mut usize, value_type: u32) -> Result<(), LoadError> {
222 match value_type {
223 0 | 1 => {
224 if *pos + 1 > buf.len() {
226 return Err(LoadError::Parse("EOF in u8/i8 value".to_string()));
227 }
228 *pos += 1;
229 }
230 2 | 3 => {
231 if *pos + 2 > buf.len() {
233 return Err(LoadError::Parse("EOF in u16/i16 value".to_string()));
234 }
235 *pos += 2;
236 }
237 4..=7 => {
238 if *pos + 4 > buf.len() {
240 return Err(LoadError::Parse(
241 "EOF in u32/i32/f32/bool value".to_string(),
242 ));
243 }
244 *pos += 4;
245 }
246 8 => {
247 read_gguf_string(buf, pos)?;
249 }
250 9 => {
251 let elem_type = read_u32_le(buf, pos)?;
253 let count = read_u64_le(buf, pos)?;
254 for _ in 0..count {
255 skip_metadata_value(buf, pos, elem_type)?;
256 }
257 }
258 10..=12 => {
259 if *pos + 8 > buf.len() {
261 return Err(LoadError::Parse("EOF in u64/i64/f64 value".to_string()));
262 }
263 *pos += 8;
264 }
265 other => {
266 return Err(LoadError::Parse(format!(
267 "unknown metadata value type id: {other}"
268 )));
269 }
270 }
271 Ok(())
272}
273
274struct ParsedGgufMeta {
276 version: u32,
277 tensor_entries: Vec<TensorEntry>,
278}
279
280fn parse_gguf_meta(buf: &[u8]) -> Result<ParsedGgufMeta, LoadError> {
283 let mut pos = 0usize;
284
285 let magic = read_u32_le(buf, &mut pos)?;
287 if magic != GGUF_MAGIC {
288 return Err(LoadError::Parse(format!(
289 "invalid GGUF magic: 0x{:08X} (expected 0x{:08X})",
290 magic, GGUF_MAGIC
291 )));
292 }
293
294 let version = read_u32_le(buf, &mut pos)?;
295
296 let tensor_count = read_u64_le(buf, &mut pos)?;
297 let metadata_kv_count = read_u64_le(buf, &mut pos)?;
298
299 for _ in 0..metadata_kv_count {
301 read_gguf_string(buf, &mut pos)?;
303 let value_type = read_u32_le(buf, &mut pos)?;
305 skip_metadata_value(buf, &mut pos, value_type)?;
307 }
308
309 let mut tensor_entries = Vec::with_capacity(tensor_count as usize);
311 for _ in 0..tensor_count {
312 let name = read_gguf_string(buf, &mut pos)?;
313 let n_dims = read_u32_le(buf, &mut pos)?;
314 let mut shape = Vec::with_capacity(n_dims as usize);
315 for _ in 0..n_dims {
316 shape.push(read_u64_le(buf, &mut pos)?);
317 }
318 let quant_type_id = read_u32_le(buf, &mut pos)?;
319 let offset = read_u64_le(buf, &mut pos)?;
320
321 let size_bytes = compute_tensor_size_bytes(&shape, quant_type_id);
323
324 tensor_entries.push(TensorEntry {
325 name,
326 shape,
327 quant_type_id,
328 offset,
329 size_bytes,
330 });
331 }
332
333 Ok(ParsedGgufMeta {
334 version,
335 tensor_entries,
336 })
337}
338
339fn compute_tensor_size_bytes(shape: &[u64], quant_type_id: u32) -> u64 {
341 let element_count: u64 = shape.iter().product();
342 let (block_size, block_bytes): (u64, u64) = match quant_type_id {
343 0 => (1, 4), 1 => (1, 2), 2 => (32, 18), 3 => (32, 20), 6 => (32, 22), 7 => (32, 24), 8 => (32, 34), 9 => (32, 40), 10 => (256, 84), 11 => (256, 110), 12 => (256, 144), 13 => (256, 176), 14 => (256, 210), 15 => (256, 292), 30 => (1, 2), 35 => (256, 66), 41 => (128, 18), 42 => (128, 34), _ => (1, 1),
363 };
364 let num_blocks = element_count.div_ceil(block_size);
365 num_blocks * block_bytes
366}
367
368pub fn validate_gguf_file(path: &Path) -> Result<Vec<String>, LoadError> {
381 let mut file = std::fs::File::open(path)?;
382 let mut buf = Vec::new();
383 file.read_to_end(&mut buf)?;
384
385 let mut warnings = Vec::new();
386 let start = Instant::now();
387
388 let meta = parse_gguf_meta(&buf)?;
389
390 if !SUPPORTED_VERSIONS.contains(&meta.version) {
391 warnings.push(format!(
392 "GGUF version {} is not in the officially supported set {:?}",
393 meta.version, SUPPORTED_VERSIONS
394 ));
395 }
396
397 if meta.tensor_entries.is_empty() {
398 warnings.push("file contains zero tensors".to_string());
399 }
400
401 for entry in &meta.tensor_entries {
402 if !entry.is_known_quant() {
403 warnings.push(format!(
404 "tensor '{}' has unknown quantisation type id {}",
405 entry.name, entry.quant_type_id
406 ));
407 }
408 if entry.shape.is_empty() {
409 warnings.push(format!(
410 "tensor '{}' has zero-dimensional shape",
411 entry.name
412 ));
413 }
414 }
415
416 let _elapsed = start.elapsed();
417 Ok(warnings)
418}
419
420pub fn load_tensor_metadata(path: &Path) -> Result<Vec<TensorEntry>, LoadError> {
425 let _t0 = Instant::now();
426
427 let mut file = std::fs::File::open(path)?;
428 let mut buf = Vec::new();
429 file.read_to_end(&mut buf)?;
430
431 let meta = parse_gguf_meta(&buf)?;
432 Ok(meta.tensor_entries)
433}
434
435pub fn estimate_memory_bytes(path: &Path) -> Result<u64, LoadError> {
440 let entries = load_tensor_metadata(path)?;
441 let total: u64 = entries.iter().map(|e| e.size_bytes).sum();
442 Ok(total)
443}
444
445pub fn fits_in_budget(path: &Path, budget_bytes: u64) -> Result<bool, LoadError> {
449 let need = estimate_memory_bytes(path)?;
450 Ok(need <= budget_bytes)
451}
452
453pub struct TensorChunkIter {
467 data: Vec<u8>,
468 chunk_size: usize,
469 pos: usize,
470}
471
472impl TensorChunkIter {
473 pub fn new(data: Vec<u8>, chunk_size: usize) -> Self {
475 assert!(chunk_size > 0, "chunk_size must be > 0");
476 Self {
477 data,
478 chunk_size,
479 pos: 0,
480 }
481 }
482
483 pub fn total_chunks(&self) -> usize {
485 if self.data.is_empty() {
486 return 0;
487 }
488 self.data.len().div_ceil(self.chunk_size)
489 }
490
491 pub fn bytes_remaining(&self) -> usize {
493 self.data.len().saturating_sub(self.pos)
494 }
495}
496
497impl Iterator for TensorChunkIter {
498 type Item = Vec<u8>;
499
500 fn next(&mut self) -> Option<Self::Item> {
501 if self.pos >= self.data.len() {
502 return None;
503 }
504 let end = (self.pos + self.chunk_size).min(self.data.len());
505 let chunk = self.data[self.pos..end].to_vec();
506 self.pos = end;
507 Some(chunk)
508 }
509}