Skip to main content

apr_qa_report/
certification_data.rs

1//! Certification Data for Oracle Integration (PMAT-260)
2//!
3//! This module provides the data structures and CSV parsing for the certification
4//! lookup table consumed by aprender's `apr oracle` CLI command.
5//!
6//! # Theoretical Foundation
7//!
8//! This implementation follows:
9//! - **Toyota Production System (Ohno, 1988)**: Jidoka - automatic stop on malformed data
10//! - **Poka-Yoke (Shingo, 1986)**: Schema validation prevents invalid certification states
11//! - **Popperian Falsification (Popper, 1959)**: Round-trip integrity tests verify correctness
12//!
13//! # CSV Schema
14//!
15//! The `models.csv` file uses this schema:
16//! ```csv
17//! model_id,family,parameters,size_category,status,mqs_score,grade,certified_tier,last_certified,g1,g2,g3,g4,tps_gguf_cpu,tps_gguf_gpu,tps_apr_cpu,tps_apr_gpu,tps_st_cpu,tps_st_gpu,provenance_verified
18//! ```
19
20use chrono::{DateTime, Utc};
21use serde::{Deserialize, Serialize};
22use std::path::Path;
23
24use crate::error::{Error, Result};
25
26/// Certification status for a model.
27///
28/// Status definitions follow the specification:
29/// - **CERTIFIED**: MQS >= 800, all gateway gates passed, tier requirements met
30/// - **BLOCKED**: MQS < 800 or gateway gate failure, cannot be used in production
31/// - **PENDING**: No certification run completed, awaiting testing
32/// - **UNTESTED**: Legacy status for models never tested
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
34#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
35pub enum ModelStatus {
36    /// MQS >= 800, all gateways passed
37    Certified,
38    /// MQS < 800 or gateway failure
39    Blocked,
40    /// Awaiting certification run
41    #[default]
42    Pending,
43    /// Never tested (legacy)
44    Untested,
45}
46
47impl std::fmt::Display for ModelStatus {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Certified => write!(f, "CERTIFIED"),
51            Self::Blocked => write!(f, "BLOCKED"),
52            Self::Pending => write!(f, "PENDING"),
53            Self::Untested => write!(f, "UNTESTED"),
54        }
55    }
56}
57
58impl std::str::FromStr for ModelStatus {
59    type Err = Error;
60
61    fn from_str(s: &str) -> Result<Self> {
62        match s.to_uppercase().as_str() {
63            "CERTIFIED" => Ok(Self::Certified),
64            "BLOCKED" => Ok(Self::Blocked),
65            "PENDING" => Ok(Self::Pending),
66            "UNTESTED" => Ok(Self::Untested),
67            other => Err(Error::Validation(format!("Invalid status: {other}"))),
68        }
69    }
70}
71
72/// Size category for resource-aware scheduling.
73///
74/// Matches the `SizeCategory` enum in `apr-qa-runner::playbook`.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
76#[serde(rename_all = "lowercase")]
77pub enum SizeCategory {
78    /// < 1B params, 4 workers
79    #[default]
80    Tiny,
81    /// 1-2B params, 4 workers
82    Small,
83    /// 2-7B params, 2 workers
84    Medium,
85    /// 7-14B params, 1 worker
86    Large,
87    /// 14-32B params, 1 worker
88    Xlarge,
89    /// > 32B params, 1 worker
90    Huge,
91}
92
93impl std::fmt::Display for SizeCategory {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        match self {
96            Self::Tiny => write!(f, "tiny"),
97            Self::Small => write!(f, "small"),
98            Self::Medium => write!(f, "medium"),
99            Self::Large => write!(f, "large"),
100            Self::Xlarge => write!(f, "xlarge"),
101            Self::Huge => write!(f, "huge"),
102        }
103    }
104}
105
106impl std::str::FromStr for SizeCategory {
107    type Err = Error;
108
109    fn from_str(s: &str) -> Result<Self> {
110        match s.to_lowercase().as_str() {
111            "tiny" => Ok(Self::Tiny),
112            "small" => Ok(Self::Small),
113            "medium" => Ok(Self::Medium),
114            "large" => Ok(Self::Large),
115            "xlarge" => Ok(Self::Xlarge),
116            "huge" => Ok(Self::Huge),
117            other => Err(Error::Validation(format!("Invalid size category: {other}"))),
118        }
119    }
120}
121
122/// A single row from the certification lookup table (models.csv).
123///
124/// This struct represents the complete certification state for a model variant,
125/// including MQS score, gateway results, and performance metrics.
126///
127/// The boolean fields (g1-g4, provenance_verified) match the CSV schema
128/// and represent gateway pass/fail state directly from test results.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130#[allow(clippy::struct_excessive_bools)]
131pub struct CertificationRow {
132    /// HuggingFace model ID (e.g., "Qwen/Qwen2.5-Coder-0.5B-Instruct")
133    pub model_id: String,
134
135    /// Model family (e.g., "qwen-coder", "llama", "mistral")
136    pub family: String,
137
138    /// Parameter count string (e.g., "0.5B", "1.5B", "7B")
139    pub parameters: String,
140
141    /// Size category for resource scheduling
142    pub size_category: SizeCategory,
143
144    /// Certification status
145    pub status: ModelStatus,
146
147    /// Model Qualification Score (0-1000)
148    pub mqs_score: u32,
149
150    /// Letter grade (A, B, C, D, F, or "-" for ungraded)
151    pub grade: String,
152
153    /// Highest certified tier (quick, smoke, mvp, full, or "none")
154    pub certified_tier: String,
155
156    /// Last certification timestamp (ISO8601)
157    pub last_certified: DateTime<Utc>,
158
159    // Gateway results (G1-G4)
160    /// G1: Model loads successfully
161    pub g1: bool,
162    /// G2: Basic inference works
163    pub g2: bool,
164    /// G3: No crashes or panics
165    pub g3: bool,
166    /// G4: Output is not garbage
167    pub g4: bool,
168
169    // Performance metrics (tokens per second)
170    /// GGUF format, CPU backend
171    pub tps_gguf_cpu: Option<f64>,
172    /// GGUF format, GPU backend
173    pub tps_gguf_gpu: Option<f64>,
174    /// APR format, CPU backend
175    pub tps_apr_cpu: Option<f64>,
176    /// APR format, GPU backend
177    pub tps_apr_gpu: Option<f64>,
178    /// SafeTensors format, CPU backend
179    pub tps_st_cpu: Option<f64>,
180    /// SafeTensors format, GPU backend
181    pub tps_st_gpu: Option<f64>,
182
183    /// Whether model provenance has been verified
184    pub provenance_verified: bool,
185}
186
187impl Default for CertificationRow {
188    fn default() -> Self {
189        Self {
190            model_id: String::new(),
191            family: String::new(),
192            parameters: String::new(),
193            size_category: SizeCategory::default(),
194            status: ModelStatus::default(),
195            mqs_score: 0,
196            grade: "-".to_string(),
197            certified_tier: "none".to_string(),
198            last_certified: Utc::now(),
199            g1: false,
200            g2: false,
201            g3: false,
202            g4: false,
203            tps_gguf_cpu: None,
204            tps_gguf_gpu: None,
205            tps_apr_cpu: None,
206            tps_apr_gpu: None,
207            tps_st_cpu: None,
208            tps_st_gpu: None,
209            provenance_verified: false,
210        }
211    }
212}
213
214impl CertificationRow {
215    /// Create a new certification row for a model.
216    #[must_use]
217    pub fn new(model_id: impl Into<String>, family: impl Into<String>) -> Self {
218        Self {
219            model_id: model_id.into(),
220            family: family.into(),
221            ..Default::default()
222        }
223    }
224
225    /// Check if all gateway checks passed.
226    #[must_use]
227    pub const fn all_gateways_passed(&self) -> bool {
228        self.g1 && self.g2 && self.g3 && self.g4
229    }
230
231    /// Derive status from MQS score and gateway results.
232    ///
233    /// Follows the specification:
234    /// - CERTIFIED: MQS >= 800 AND all gateways passed
235    /// - BLOCKED: otherwise
236    #[must_use]
237    pub fn derive_status(&self) -> ModelStatus {
238        if self.mqs_score >= 800 && self.all_gateways_passed() {
239            ModelStatus::Certified
240        } else if self.mqs_score == 0 && !self.g1 {
241            ModelStatus::Pending
242        } else {
243            ModelStatus::Blocked
244        }
245    }
246
247    /// Derive grade from MQS score.
248    ///
249    /// Grade thresholds:
250    /// - A: 900-1000
251    /// - B: 800-899
252    /// - C: 600-799
253    /// - D: 400-599
254    /// - F: 0-399
255    #[must_use]
256    pub fn derive_grade(&self) -> String {
257        match self.mqs_score {
258            900..=1000 => "A".to_string(),
259            800..=899 => "B".to_string(),
260            600..=799 => "C".to_string(),
261            400..=599 => "D".to_string(),
262            0..=399 => "F".to_string(),
263            _ => "-".to_string(),
264        }
265    }
266}
267
268/// Read certification rows from a CSV file.
269///
270/// # Errors
271///
272/// Returns an error if:
273/// - The file cannot be read
274/// - The CSV is malformed
275/// - A row contains invalid data
276pub fn read_models_csv<P: AsRef<Path>>(path: P) -> Result<Vec<CertificationRow>> {
277    let file = std::fs::File::open(path.as_ref()).map_err(|e| {
278        Error::Io(format!(
279            "Failed to open models.csv at {}: {e}",
280            path.as_ref().display()
281        ))
282    })?;
283
284    let mut reader = csv::ReaderBuilder::new()
285        .has_headers(true)
286        .flexible(true)
287        .from_reader(file);
288
289    let mut rows = Vec::new();
290
291    for (idx, result) in reader.records().enumerate() {
292        let record =
293            result.map_err(|e| Error::Validation(format!("CSV parse error at row {idx}: {e}")))?;
294
295        let row = parse_csv_record(&record, idx)?;
296        rows.push(row);
297    }
298
299    Ok(rows)
300}
301
302/// Parse a single CSV record into a CertificationRow.
303fn parse_csv_record(record: &csv::StringRecord, idx: usize) -> Result<CertificationRow> {
304    // Helper for getting field with context
305    let get_field = |i: usize, name: &str| -> Result<&str> {
306        record
307            .get(i)
308            .ok_or_else(|| Error::Validation(format!("Missing field '{name}' at row {idx}")))
309    };
310
311    let model_id = get_field(0, "model_id")?.to_string();
312    let family = get_field(1, "family")?.to_string();
313    let parameters = get_field(2, "parameters")?.to_string();
314    let size_category: SizeCategory = get_field(3, "size_category")?.parse()?;
315    let status: ModelStatus = get_field(4, "status")?.parse()?;
316    let mqs_score: u32 = get_field(5, "mqs_score")?
317        .parse()
318        .map_err(|e| Error::Validation(format!("Invalid mqs_score at row {idx}: {e}")))?;
319    let grade = get_field(6, "grade")?.to_string();
320    let certified_tier = get_field(7, "certified_tier")?.to_string();
321
322    let last_certified = get_field(8, "last_certified")?;
323    let last_certified: DateTime<Utc> = DateTime::parse_from_rfc3339(last_certified)
324        .map_err(|e| Error::Validation(format!("Invalid timestamp at row {idx}: {e}")))?
325        .with_timezone(&Utc);
326
327    let parse_bool = |i: usize, name: &str| -> Result<bool> {
328        match get_field(i, name)?.to_lowercase().as_str() {
329            "true" | "1" | "yes" => Ok(true),
330            "false" | "0" | "no" | "" => Ok(false),
331            other => Err(Error::Validation(format!(
332                "Invalid boolean '{other}' for {name} at row {idx}"
333            ))),
334        }
335    };
336
337    let parse_optional_f64 = |i: usize| -> Option<f64> {
338        record.get(i).and_then(|s| {
339            let s = s.trim();
340            if s.is_empty() { None } else { s.parse().ok() }
341        })
342    };
343
344    Ok(CertificationRow {
345        model_id,
346        family,
347        parameters,
348        size_category,
349        status,
350        mqs_score,
351        grade,
352        certified_tier,
353        last_certified,
354        g1: parse_bool(9, "g1")?,
355        g2: parse_bool(10, "g2")?,
356        g3: parse_bool(11, "g3")?,
357        g4: parse_bool(12, "g4")?,
358        tps_gguf_cpu: parse_optional_f64(13),
359        tps_gguf_gpu: parse_optional_f64(14),
360        tps_apr_cpu: parse_optional_f64(15),
361        tps_apr_gpu: parse_optional_f64(16),
362        tps_st_cpu: parse_optional_f64(17),
363        tps_st_gpu: parse_optional_f64(18),
364        provenance_verified: parse_bool(19, "provenance_verified")?,
365    })
366}
367
368/// Write certification rows to a CSV file.
369///
370/// # Errors
371///
372/// Returns an error if the file cannot be written.
373pub fn write_models_csv<P: AsRef<Path>>(rows: &[CertificationRow], path: P) -> Result<()> {
374    let file = std::fs::File::create(path.as_ref()).map_err(|e| {
375        Error::Io(format!(
376            "Failed to create models.csv at {}: {e}",
377            path.as_ref().display()
378        ))
379    })?;
380
381    let mut writer = csv::Writer::from_writer(file);
382
383    // Write header
384    writer
385        .write_record([
386            "model_id",
387            "family",
388            "parameters",
389            "size_category",
390            "status",
391            "mqs_score",
392            "grade",
393            "certified_tier",
394            "last_certified",
395            "g1",
396            "g2",
397            "g3",
398            "g4",
399            "tps_gguf_cpu",
400            "tps_gguf_gpu",
401            "tps_apr_cpu",
402            "tps_apr_gpu",
403            "tps_st_cpu",
404            "tps_st_gpu",
405            "provenance_verified",
406        ])
407        .map_err(|e| Error::Io(format!("Failed to write CSV header: {e}")))?;
408
409    // Write rows
410    for row in rows {
411        let format_optional_f64 =
412            |opt: Option<f64>| -> String { opt.map_or_else(String::new, |v| format!("{v:.1}")) };
413
414        writer
415            .write_record([
416                &row.model_id,
417                &row.family,
418                &row.parameters,
419                &row.size_category.to_string(),
420                &row.status.to_string(),
421                &row.mqs_score.to_string(),
422                &row.grade,
423                &row.certified_tier,
424                &row.last_certified.to_rfc3339(),
425                &row.g1.to_string(),
426                &row.g2.to_string(),
427                &row.g3.to_string(),
428                &row.g4.to_string(),
429                &format_optional_f64(row.tps_gguf_cpu),
430                &format_optional_f64(row.tps_gguf_gpu),
431                &format_optional_f64(row.tps_apr_cpu),
432                &format_optional_f64(row.tps_apr_gpu),
433                &format_optional_f64(row.tps_st_cpu),
434                &format_optional_f64(row.tps_st_gpu),
435                &row.provenance_verified.to_string(),
436            ])
437            .map_err(|e| Error::Io(format!("Failed to write CSV row: {e}")))?;
438    }
439
440    writer
441        .flush()
442        .map_err(|e| Error::Io(format!("Failed to flush CSV writer: {e}")))?;
443
444    Ok(())
445}
446
447/// Lookup a certification row by model ID.
448///
449/// Returns `None` if the model is not found.
450#[must_use]
451pub fn lookup_model<'a>(
452    rows: &'a [CertificationRow],
453    model_id: &str,
454) -> Option<&'a CertificationRow> {
455    rows.iter().find(|r| r.model_id == model_id)
456}
457
458/// Lookup certification rows by family.
459#[must_use]
460pub fn lookup_family<'a>(rows: &'a [CertificationRow], family: &str) -> Vec<&'a CertificationRow> {
461    rows.iter().filter(|r| r.family == family).collect()
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467    use tempfile::NamedTempFile;
468
469    const TEST_CSV: &str = r#"model_id,family,parameters,size_category,status,mqs_score,grade,certified_tier,last_certified,g1,g2,g3,g4,tps_gguf_cpu,tps_gguf_gpu,tps_apr_cpu,tps_apr_gpu,tps_st_cpu,tps_st_gpu,provenance_verified
470Qwen/Qwen2.5-Coder-0.5B-Instruct,qwen-coder,0.5B,tiny,BLOCKED,246,F,quick,2026-02-04T13:28:18.663298968+00:00,true,true,true,true,,,,,,,false
471Qwen/Qwen2.5-Coder-1.5B-Instruct,qwen-coder,1.5B,small,BLOCKED,415,-,none,2026-02-03T15:50:04.803811188+00:00,true,true,true,true,17.9,129.8,16.2,0.6,2.9,23.8,false
472meta-llama/Llama-3.2-1B-Instruct,llama,1B,small,PENDING,0,-,none,2026-01-31T00:00:00+00:00,false,false,false,false,,,,,,,false
473"#;
474
475    // FALSIFY-CERT-001: Round-trip integrity
476    //
477    // Falsification hypothesis: "CSV round-trip corrupts data"
478    // If read(write(rows)) != rows, implementation is broken.
479    #[test]
480    fn test_falsify_cert_001_roundtrip_integrity() {
481        let temp_file = NamedTempFile::new().expect("temp file");
482
483        // Write test CSV to temp file
484        std::fs::write(temp_file.path(), TEST_CSV).expect("write");
485
486        // Read original
487        let original = read_models_csv(temp_file.path()).expect("read original");
488        assert_eq!(original.len(), 3, "Expected 3 rows");
489
490        // Write to new temp file
491        let temp_file2 = NamedTempFile::new().expect("temp file 2");
492        write_models_csv(&original, temp_file2.path()).expect("write");
493
494        // Read back
495        let roundtrip = read_models_csv(temp_file2.path()).expect("read roundtrip");
496        assert_eq!(roundtrip.len(), original.len(), "Row count mismatch");
497
498        // Verify each row
499        for (orig, rt) in original.iter().zip(roundtrip.iter()) {
500            assert_eq!(orig.model_id, rt.model_id, "model_id mismatch");
501            assert_eq!(orig.family, rt.family, "family mismatch");
502            assert_eq!(orig.parameters, rt.parameters, "parameters mismatch");
503            assert_eq!(
504                orig.size_category, rt.size_category,
505                "size_category mismatch"
506            );
507            assert_eq!(orig.status, rt.status, "status mismatch");
508            assert_eq!(orig.mqs_score, rt.mqs_score, "mqs_score mismatch");
509            assert_eq!(orig.grade, rt.grade, "grade mismatch");
510            assert_eq!(
511                orig.certified_tier, rt.certified_tier,
512                "certified_tier mismatch"
513            );
514            assert_eq!(orig.g1, rt.g1, "g1 mismatch");
515            assert_eq!(orig.g2, rt.g2, "g2 mismatch");
516            assert_eq!(orig.g3, rt.g3, "g3 mismatch");
517            assert_eq!(orig.g4, rt.g4, "g4 mismatch");
518            assert_eq!(
519                orig.provenance_verified, rt.provenance_verified,
520                "provenance_verified mismatch"
521            );
522        }
523    }
524
525    #[test]
526    fn test_model_status_from_str() {
527        assert_eq!(
528            "CERTIFIED".parse::<ModelStatus>().unwrap(),
529            ModelStatus::Certified
530        );
531        assert_eq!(
532            "BLOCKED".parse::<ModelStatus>().unwrap(),
533            ModelStatus::Blocked
534        );
535        assert_eq!(
536            "PENDING".parse::<ModelStatus>().unwrap(),
537            ModelStatus::Pending
538        );
539        assert_eq!(
540            "UNTESTED".parse::<ModelStatus>().unwrap(),
541            ModelStatus::Untested
542        );
543        assert_eq!(
544            "certified".parse::<ModelStatus>().unwrap(),
545            ModelStatus::Certified
546        );
547        assert!("INVALID".parse::<ModelStatus>().is_err());
548    }
549
550    #[test]
551    fn test_model_status_display() {
552        assert_eq!(format!("{}", ModelStatus::Certified), "CERTIFIED");
553        assert_eq!(format!("{}", ModelStatus::Blocked), "BLOCKED");
554        assert_eq!(format!("{}", ModelStatus::Pending), "PENDING");
555        assert_eq!(format!("{}", ModelStatus::Untested), "UNTESTED");
556    }
557
558    #[test]
559    fn test_size_category_from_str() {
560        assert_eq!("tiny".parse::<SizeCategory>().unwrap(), SizeCategory::Tiny);
561        assert_eq!(
562            "SMALL".parse::<SizeCategory>().unwrap(),
563            SizeCategory::Small
564        );
565        assert_eq!(
566            "Medium".parse::<SizeCategory>().unwrap(),
567            SizeCategory::Medium
568        );
569        assert_eq!(
570            "large".parse::<SizeCategory>().unwrap(),
571            SizeCategory::Large
572        );
573        assert_eq!(
574            "xlarge".parse::<SizeCategory>().unwrap(),
575            SizeCategory::Xlarge
576        );
577        assert_eq!("huge".parse::<SizeCategory>().unwrap(), SizeCategory::Huge);
578        assert!("invalid".parse::<SizeCategory>().is_err());
579    }
580
581    #[test]
582    fn test_size_category_display() {
583        assert_eq!(format!("{}", SizeCategory::Tiny), "tiny");
584        assert_eq!(format!("{}", SizeCategory::Small), "small");
585        assert_eq!(format!("{}", SizeCategory::Medium), "medium");
586        assert_eq!(format!("{}", SizeCategory::Large), "large");
587        assert_eq!(format!("{}", SizeCategory::Xlarge), "xlarge");
588        assert_eq!(format!("{}", SizeCategory::Huge), "huge");
589    }
590
591    #[test]
592    fn test_certification_row_default() {
593        let row = CertificationRow::default();
594        assert!(row.model_id.is_empty());
595        assert_eq!(row.status, ModelStatus::Pending);
596        assert_eq!(row.mqs_score, 0);
597        assert!(!row.g1);
598    }
599
600    #[test]
601    fn test_certification_row_new() {
602        let row = CertificationRow::new("test/model", "test-family");
603        assert_eq!(row.model_id, "test/model");
604        assert_eq!(row.family, "test-family");
605    }
606
607    #[test]
608    fn test_all_gateways_passed() {
609        // Default has no gateways passed
610        let row = CertificationRow::default();
611        assert!(!row.all_gateways_passed());
612
613        // All gateways passed
614        let row = CertificationRow {
615            g1: true,
616            g2: true,
617            g3: true,
618            g4: true,
619            ..Default::default()
620        };
621        assert!(row.all_gateways_passed());
622
623        // One gateway failed
624        let row = CertificationRow {
625            g1: true,
626            g2: true,
627            g3: false,
628            g4: true,
629            ..Default::default()
630        };
631        assert!(!row.all_gateways_passed());
632    }
633
634    #[test]
635    fn test_derive_status() {
636        // Test CERTIFIED: MQS >= 800 and all gateways passed
637        let row = CertificationRow {
638            g1: true,
639            g2: true,
640            g3: true,
641            g4: true,
642            mqs_score: 850,
643            ..Default::default()
644        };
645        assert_eq!(row.derive_status(), ModelStatus::Certified);
646
647        // Test BLOCKED: MQS < 800
648        let row = CertificationRow {
649            g1: true,
650            g2: true,
651            g3: true,
652            g4: true,
653            mqs_score: 799,
654            ..Default::default()
655        };
656        assert_eq!(row.derive_status(), ModelStatus::Blocked);
657
658        // Test BLOCKED: gateway failure
659        let row = CertificationRow {
660            g1: true,
661            g2: true,
662            g3: false,
663            g4: true,
664            mqs_score: 900,
665            ..Default::default()
666        };
667        assert_eq!(row.derive_status(), ModelStatus::Blocked);
668
669        // Test PENDING: never tested
670        let row = CertificationRow {
671            g1: false,
672            mqs_score: 0,
673            ..Default::default()
674        };
675        assert_eq!(row.derive_status(), ModelStatus::Pending);
676    }
677
678    #[test]
679    fn test_derive_grade() {
680        let row_a = CertificationRow {
681            mqs_score: 950,
682            ..Default::default()
683        };
684        assert_eq!(row_a.derive_grade(), "A");
685
686        let row_b = CertificationRow {
687            mqs_score: 850,
688            ..Default::default()
689        };
690        assert_eq!(row_b.derive_grade(), "B");
691
692        let row_c = CertificationRow {
693            mqs_score: 700,
694            ..Default::default()
695        };
696        assert_eq!(row_c.derive_grade(), "C");
697
698        let row_d = CertificationRow {
699            mqs_score: 500,
700            ..Default::default()
701        };
702        assert_eq!(row_d.derive_grade(), "D");
703
704        let row_f = CertificationRow {
705            mqs_score: 200,
706            ..Default::default()
707        };
708        assert_eq!(row_f.derive_grade(), "F");
709    }
710
711    #[test]
712    fn test_lookup_model() {
713        let rows = vec![
714            CertificationRow::new("test/model-1", "family-a"),
715            CertificationRow::new("test/model-2", "family-b"),
716            CertificationRow::new("test/model-3", "family-a"),
717        ];
718
719        let found = lookup_model(&rows, "test/model-2");
720        assert!(found.is_some());
721        assert_eq!(found.unwrap().family, "family-b");
722
723        let not_found = lookup_model(&rows, "nonexistent");
724        assert!(not_found.is_none());
725    }
726
727    #[test]
728    fn test_lookup_family() {
729        let rows = vec![
730            CertificationRow::new("test/model-1", "family-a"),
731            CertificationRow::new("test/model-2", "family-b"),
732            CertificationRow::new("test/model-3", "family-a"),
733        ];
734
735        let family_a = lookup_family(&rows, "family-a");
736        assert_eq!(family_a.len(), 2);
737
738        let family_b = lookup_family(&rows, "family-b");
739        assert_eq!(family_b.len(), 1);
740
741        let family_c = lookup_family(&rows, "family-c");
742        assert!(family_c.is_empty());
743    }
744
745    #[test]
746    fn test_read_missing_file() {
747        let result = read_models_csv("/nonexistent/path/models.csv");
748        assert!(result.is_err());
749    }
750
751    #[test]
752    fn test_read_malformed_csv() {
753        let temp_file = NamedTempFile::new().expect("temp file");
754        std::fs::write(
755            temp_file.path(),
756            "model_id,family\ntest,test,extra,fields,here",
757        )
758        .expect("write");
759
760        // Should handle flexible field count gracefully
761        let result = read_models_csv(temp_file.path());
762        // This may error due to missing required fields
763        assert!(result.is_err());
764    }
765
766    #[test]
767    fn test_optional_tps_fields() {
768        let temp_file = NamedTempFile::new().expect("temp file");
769        std::fs::write(temp_file.path(), TEST_CSV).expect("write");
770
771        let cert_rows = read_models_csv(temp_file.path()).expect("read");
772
773        // First row has no TPS values
774        let first_row = &cert_rows[0];
775        assert!(first_row.tps_gguf_cpu.is_none());
776        assert!(first_row.tps_gguf_gpu.is_none());
777
778        // Second row has TPS values
779        let second_row = &cert_rows[1];
780        assert!(second_row.tps_gguf_cpu.is_some());
781        assert!((second_row.tps_gguf_cpu.unwrap() - 17.9).abs() < 0.1);
782    }
783
784    #[test]
785    fn test_write_models_csv_creates_file() {
786        let temp_file = NamedTempFile::new().expect("temp file");
787
788        let rows = vec![CertificationRow {
789            model_id: "test/model".to_string(),
790            family: "test-family".to_string(),
791            parameters: "1B".to_string(),
792            size_category: SizeCategory::Small,
793            status: ModelStatus::Blocked,
794            mqs_score: 500,
795            grade: "D".to_string(),
796            certified_tier: "mvp".to_string(),
797            g1: true,
798            g2: true,
799            g3: false,
800            g4: true,
801            tps_gguf_cpu: Some(10.5),
802            tps_gguf_gpu: Some(100.0),
803            tps_apr_cpu: None,
804            tps_apr_gpu: None,
805            tps_st_cpu: None,
806            tps_st_gpu: None,
807            provenance_verified: true,
808            ..Default::default()
809        }];
810
811        write_models_csv(&rows, temp_file.path()).expect("write");
812
813        // Verify file exists and can be read back
814        let read_back = read_models_csv(temp_file.path()).expect("read");
815        assert_eq!(read_back.len(), 1);
816        assert_eq!(read_back[0].model_id, "test/model");
817        assert_eq!(read_back[0].tps_gguf_cpu.unwrap(), 10.5);
818        assert!(read_back[0].provenance_verified);
819    }
820
821    #[test]
822    fn test_write_to_nonexistent_dir() {
823        let result = write_models_csv(&[], "/nonexistent/dir/models.csv");
824        assert!(result.is_err());
825        let err = result.unwrap_err();
826        assert!(err.to_string().contains("Failed to create"));
827    }
828
829    #[test]
830    fn test_model_status_serde() {
831        let status = ModelStatus::Certified;
832        let json = serde_json::to_string(&status).expect("serialize");
833        assert_eq!(json, "\"CERTIFIED\"");
834
835        let deserialized: ModelStatus = serde_json::from_str(&json).expect("deserialize");
836        assert_eq!(deserialized, ModelStatus::Certified);
837    }
838
839    #[test]
840    fn test_size_category_serde() {
841        let size = SizeCategory::Medium;
842        let json = serde_json::to_string(&size).expect("serialize");
843        assert_eq!(json, "\"medium\"");
844
845        let deserialized: SizeCategory = serde_json::from_str(&json).expect("deserialize");
846        assert_eq!(deserialized, SizeCategory::Medium);
847    }
848
849    #[test]
850    fn test_certification_row_serde() {
851        let row = CertificationRow {
852            model_id: "test/model".to_string(),
853            family: "test".to_string(),
854            status: ModelStatus::Certified,
855            mqs_score: 850,
856            ..Default::default()
857        };
858
859        let json = serde_json::to_string(&row).expect("serialize");
860        assert!(json.contains("\"model_id\":\"test/model\""));
861        assert!(json.contains("\"status\":\"CERTIFIED\""));
862
863        let deserialized: CertificationRow = serde_json::from_str(&json).expect("deserialize");
864        assert_eq!(deserialized.model_id, "test/model");
865        assert_eq!(deserialized.status, ModelStatus::Certified);
866    }
867
868    #[test]
869    fn test_invalid_status_parse() {
870        let result = "GARBAGE".parse::<ModelStatus>();
871        assert!(result.is_err());
872        assert!(result.unwrap_err().to_string().contains("Invalid status"));
873    }
874
875    #[test]
876    fn test_invalid_size_category_parse() {
877        let result = "massive".parse::<SizeCategory>();
878        assert!(result.is_err());
879        assert!(
880            result
881                .unwrap_err()
882                .to_string()
883                .contains("Invalid size category")
884        );
885    }
886
887    #[test]
888    fn test_model_status_default() {
889        let status = ModelStatus::default();
890        assert_eq!(status, ModelStatus::Pending);
891    }
892
893    #[test]
894    fn test_size_category_default() {
895        let size = SizeCategory::default();
896        assert_eq!(size, SizeCategory::Tiny);
897    }
898
899    // ── FALSIFY-CERT-002: Status derivation from MQS score ────────────────────
900    //
901    // Prediction: status is deterministically derived from mqs_score and g1-g4 gateways.
902    // Per Popper (1959), this test attempts to falsify the status derivation algorithm.
903
904    #[test]
905    fn test_falsify_cert_002_status_derivation() {
906        // All gateways passed, high score -> CERTIFIED
907        let certified = CertificationRow {
908            mqs_score: 850,
909            g1: true,
910            g2: true,
911            g3: true,
912            g4: true,
913            ..CertificationRow::default()
914        };
915        assert_eq!(
916            certified.derive_status(),
917            ModelStatus::Certified,
918            "All gateways passed + score >= 800 should be CERTIFIED"
919        );
920
921        // All gateways passed, low score -> BLOCKED
922        let blocked_low = CertificationRow {
923            mqs_score: 500,
924            g1: true,
925            g2: true,
926            g3: true,
927            g4: true,
928            ..CertificationRow::default()
929        };
930        assert_eq!(
931            blocked_low.derive_status(),
932            ModelStatus::Blocked,
933            "All gateways passed + score < 800 should be BLOCKED"
934        );
935
936        // Gateway G3 failed, high score -> BLOCKED
937        let blocked_gw = CertificationRow {
938            mqs_score: 950,
939            g1: true,
940            g2: true,
941            g3: false, // Gateway failure
942            g4: true,
943            ..CertificationRow::default()
944        };
945        assert_eq!(
946            blocked_gw.derive_status(),
947            ModelStatus::Blocked,
948            "Gateway failed should always be BLOCKED"
949        );
950
951        // Score 0 with g1=false -> PENDING (never tested)
952        let pending = CertificationRow {
953            mqs_score: 0,
954            g1: false,
955            g2: false,
956            g3: false,
957            g4: false,
958            ..CertificationRow::default()
959        };
960        assert_eq!(
961            pending.derive_status(),
962            ModelStatus::Pending,
963            "Score 0 with g1=false should be PENDING (not yet tested)"
964        );
965    }
966
967    // ── FALSIFY-CERT-003: Grade derivation from MQS score ─────────────────────
968    //
969    // Prediction: grade is deterministically derived from mqs_score using fixed thresholds.
970    // Per Popper (1959), this test attempts to falsify the grade derivation algorithm.
971    //
972    // Grade thresholds (from derive_grade):
973    // A: 900-1000
974    // B: 800-899
975    // C: 600-799
976    // D: 400-599
977    // F: 0-399
978
979    #[test]
980    fn test_falsify_cert_003_grade_derivation() {
981        // Helper to derive grade from score
982        let grade_for = |score: u32| -> String {
983            CertificationRow {
984                mqs_score: score,
985                ..CertificationRow::default()
986            }
987            .derive_grade()
988        };
989
990        // A grade: 900-1000
991        assert_eq!(grade_for(1000), "A", "1000 should be A");
992        assert_eq!(grade_for(950), "A", "950 should be A");
993        assert_eq!(grade_for(900), "A", "900 (lower bound) should be A");
994
995        // B grade: 800-899
996        assert_eq!(grade_for(899), "B", "899 (upper bound of B) should be B");
997        assert_eq!(grade_for(850), "B", "850 should be B");
998        assert_eq!(grade_for(800), "B", "800 (lower bound) should be B");
999
1000        // C grade: 600-799
1001        assert_eq!(grade_for(799), "C", "799 (upper bound of C) should be C");
1002        assert_eq!(grade_for(700), "C", "700 should be C");
1003        assert_eq!(grade_for(600), "C", "600 (lower bound) should be C");
1004
1005        // D grade: 400-599
1006        assert_eq!(grade_for(599), "D", "599 (upper bound of D) should be D");
1007        assert_eq!(grade_for(500), "D", "500 should be D");
1008        assert_eq!(grade_for(400), "D", "400 (lower bound) should be D");
1009
1010        // F grade: 0-399
1011        assert_eq!(grade_for(399), "F", "399 (upper bound of F) should be F");
1012        assert_eq!(grade_for(200), "F", "200 should be F");
1013        assert_eq!(grade_for(0), "F", "0 should be F");
1014    }
1015}