1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#![cfg_attr(coverage_nightly, coverage(off))]
//! CB-1000 Series: MLOps Model Quality Detection
//!
//! Header-only analysis of ML model binary files (GGUF, APR, SafeTensors).
//! Never loads tensor data — parses only metadata for quality checks.
//!
//! Based on: BUG-GGUF-001/002 (aprender), BUG-212 (safetensors sharding),
//! LAYOUT-002 (APR row-major mandate), Sculley et al. (2015) ML tech debt.
use super::types::*;
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
/// Directories to skip when walking for model files.
const SKIP_DIRS: &[&str] = &[
".git",
".claude",
"node_modules",
"target",
".pmat",
"vendor",
"build",
"dist",
"__pycache__",
".venv",
];
/// Model file extensions we recognize.
const MODEL_EXTENSIONS: &[&str] = &["gguf", "apr", "safetensors"];
/// Maximum tensor count before flagging as likely corrupt (BUG-GGUF-001).
const MAX_TENSOR_COUNT: u64 = 100_000;
/// File size threshold for "consider quantization" advisory (10 GB).
const LARGE_MODEL_THRESHOLD: u64 = 10 * 1024 * 1024 * 1024;
// =============================================================================
// Model format detection
// =============================================================================
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// Output format options for model.
pub enum ModelFormat {
Gguf,
Apr,
SafeTensors,
}
impl ModelFormat {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
/// From extension.
pub fn from_extension(ext: &str) -> Option<Self> {
match ext {
"gguf" => Some(Self::Gguf),
"apr" => Some(Self::Apr),
"safetensors" => Some(Self::SafeTensors),
_ => None,
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
/// Name.
pub fn name(&self) -> &'static str {
match self {
Self::Gguf => "GGUF",
Self::Apr => "APR",
Self::SafeTensors => "SafeTensors",
}
}
}
/// Minimal model metadata extracted from header only.
#[derive(Debug)]
pub struct ModelMetadata {
pub format: ModelFormat,
pub file_size_bytes: u64,
pub tensor_count: Option<u64>,
pub architecture: Option<String>,
pub has_crc: bool,
}
// =============================================================================
// Include sub-files
// =============================================================================
// File walking and model header parsing (GGUF, APR, SafeTensors)
include!("model_quality_parsing.rs");
// CB-1000 through CB-1008 detection functions
include!("model_quality_checks.rs");