1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
//! GGUF (GPT-Generated Unified Format) parser
//!
//! Pure Rust implementation of GGUF binary format reader.
//! Used by llama.cpp, Ollama, and compatible tools.
//!
//! Format specification: <https://github.com/ggerganov/ggml/blob/master/docs/gguf.md>
//!
//! ## Module Structure
//!
//! This module is being incrementally refactored from a 54K-line monolith
//! into focused submodules for better testability and coverage.
// GGUF Module Structure
//
// Incremental shatter of src/gguf.rs (54K lines) into domain modules.
// Each module should be ≤800 lines for testability.
//
// Shatter Plan (19 modules from 54K lines):
// 🚧 types.rs: Additional tests for constants (~50 lines)
// - header.rs: GGUFHeader, TensorInfo
// - model.rs: GGUFModel, MappedGGUFModel
// - config.rs: GGUFConfig
// - transformer.rs: GGUFTransformer, GGUFTransformerLayer
// - quantized.rs: Quantized tensor types
// - owned.rs: OwnedQuantized* types
// - cached.rs: Cached model variants
// - batching.rs: Batch processing
// - scheduling.rs: Request scheduling
// - gpu_buffer.rs: GPU buffer management
// - prefix_cache.rs: Prefix caching
// - kv_cache.rs: KV cache types
// - inference.rs: OwnedQuantizedModel inference impl
// - cuda.rs: CUDA-specific code
//
// Migration Strategy: Include monolith, gradually extract, re-export all
// Modular structure
pub
pub
// Pure math operations (shared between CPU and GPU paths)
// UCBD §4: pub for re-export of rms_norm at crate root
// Test helpers module - shared utilities for GGUF tests
pub
// Test factory module - synthesize valid GGUF files in memory
pub
// Rosetta format factory - synthesize all model formats (GGUF, SafeTensors, APR)
pub
// Re-export types from organized modules
pub use *;
pub use *;
pub use ;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
// Re-export inference types
pub use *;
// Re-export cached model types from inference module
pub use ;
// Tests module - shattered from monolith into focused part files