Skip to main content

vyre_self_substrate/hardware/
gpu_preprocessing_coverage.rs

1//! Vyrec GPU preprocessing coverage validation.
2
3use std::collections::BTreeSet;
4
5/// GPU preprocessing capability required before parser input.
6#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
7pub enum GpuPreprocessingCapability {
8    /// Macro expansion.
9    MacroExpansion,
10    /// Conditional inclusion.
11    ConditionalInclusion,
12    /// Include graph tracking.
13    IncludeGraphTracking,
14    /// Token provenance.
15    TokenProvenance,
16    /// Line marker tracking.
17    LineMarkers,
18    /// Macro stringification.
19    Stringification,
20    /// Token pasting.
21    TokenPasting,
22    /// Variadic macros.
23    VariadicMacros,
24    /// Builtin macros.
25    BuiltinMacros,
26}
27
28/// GPU token class required by preprocessing/lexing.
29#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
30pub enum GpuTokenClass {
31    /// Comments.
32    Comments,
33    /// Identifiers.
34    Identifiers,
35    /// Literals.
36    Literals,
37    /// Punctuation.
38    Punctuation,
39    /// Whitespace.
40    Whitespace,
41    /// Directives.
42    Directives,
43    /// String and character states.
44    StringCharStates,
45}
46
47/// One GPU preprocessing capability evidence record.
48#[derive(Clone, Copy, Debug, Eq, PartialEq)]
49pub struct GpuPreprocessingCapabilityRecord<'a> {
50    /// Covered capability.
51    pub capability: GpuPreprocessingCapability,
52    /// Exact cargo_full command.
53    pub command: &'a str,
54    /// Evidence path or test.
55    pub evidence: &'a str,
56}
57
58/// One GPU token-class evidence record.
59#[derive(Clone, Copy, Debug, Eq, PartialEq)]
60pub struct GpuTokenClassRecord<'a> {
61    /// Covered token class.
62    pub class: GpuTokenClass,
63    /// Exact cargo_full command.
64    pub command: &'a str,
65    /// Evidence path or test.
66    pub evidence: &'a str,
67}
68
69/// GPU preprocessing coverage proof.
70#[derive(Clone, Copy, Debug, Eq, PartialEq)]
71pub struct GpuPreprocessingCoverageProof {
72    /// Capability count.
73    pub capability_count: usize,
74    /// Token-class count.
75    pub token_class_count: usize,
76}
77
78/// Committed Linux GPU preprocessing artifact proof.
79#[derive(Clone, Copy, Debug, Eq, PartialEq)]
80pub struct GpuPreprocessingLinuxArtifactProof {
81    /// Linux C files covered by the artifact.
82    pub total_files: u64,
83    /// Linux C source bytes covered by the artifact.
84    pub total_source_bytes: u64,
85    /// Preprocessor pipeline cache hits.
86    pub preprocessor_pipeline_cache_hits: u64,
87    /// Include cache bytes stored.
88    pub include_cache_bytes_stored: u64,
89}
90
91/// GPU preprocessing coverage errors.
92#[derive(Clone, Debug, Eq, PartialEq)]
93pub enum GpuPreprocessingCoverageError {
94    /// No capability evidence supplied.
95    EmptyCapabilities,
96    /// No token-class evidence supplied.
97    EmptyTokenClasses,
98    /// Capability metadata is empty.
99    EmptyCapabilityMetadata {
100        /// Capability.
101        capability: GpuPreprocessingCapability,
102        /// Field.
103        field: &'static str,
104    },
105    /// Token-class metadata is empty.
106    EmptyTokenClassMetadata {
107        /// Token class.
108        class: GpuTokenClass,
109        /// Field.
110        field: &'static str,
111    },
112    /// Command does not use cargo_full.
113    CommandDoesNotUseCargoFull {
114        /// Command.
115        command: String,
116    },
117    /// Required capability is missing.
118    MissingCapability {
119        /// Missing capability.
120        capability: GpuPreprocessingCapability,
121    },
122    /// Required token class is missing.
123    MissingTokenClass {
124        /// Missing token class.
125        class: GpuTokenClass,
126    },
127}
128
129impl std::fmt::Display for GpuPreprocessingCoverageError {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            Self::EmptyCapabilities => write!(
133                f,
134                "GPU preprocessing capability coverage is empty. Fix: add evidence for macro expansion, includes, provenance, line markers, stringification, token pasting, variadics, and builtins."
135            ),
136            Self::EmptyTokenClasses => write!(
137                f,
138                "GPU token-class coverage is empty. Fix: add evidence for comments, identifiers, literals, punctuation, whitespace, directives, and string/char states."
139            ),
140            Self::EmptyCapabilityMetadata { capability, field } => write!(
141                f,
142                "GPU preprocessing capability {capability:?} has empty {field}. Fix: every record needs command and evidence."
143            ),
144            Self::EmptyTokenClassMetadata { class, field } => write!(
145                f,
146                "GPU token class {class:?} has empty {field}. Fix: every record needs command and evidence."
147            ),
148            Self::CommandDoesNotUseCargoFull { command } => write!(
149                f,
150                "GPU preprocessing coverage command `{command}` does not use ./cargo_full. Fix: run preprocessing evidence through cargo_full."
151            ),
152            Self::MissingCapability { capability } => write!(
153                f,
154                "GPU preprocessing coverage is missing {capability:?}. Fix: add explicit parity evidence for that preprocessing capability."
155            ),
156            Self::MissingTokenClass { class } => write!(
157                f,
158                "GPU token-class coverage is missing {class:?}. Fix: add explicit token classification evidence for that class."
159            ),
160        }
161    }
162}
163
164impl std::error::Error for GpuPreprocessingCoverageError {}
165
166/// Committed Linux GPU preprocessing artifact validation errors.
167#[derive(Clone, Debug, Eq, PartialEq)]
168pub enum GpuPreprocessingLinuxArtifactError {
169    /// Required literal field is missing.
170    MissingField {
171        /// Missing field.
172        field: &'static str,
173    },
174    /// Required numeric field is missing or malformed.
175    MissingNumber {
176        /// Missing field.
177        field: &'static str,
178    },
179    /// Numeric field does not meet the release threshold.
180    ThresholdMiss {
181        /// Field name.
182        field: &'static str,
183        /// Observed value.
184        observed: u64,
185        /// Required minimum or exact value.
186        required: u64,
187    },
188}
189
190impl std::fmt::Display for GpuPreprocessingLinuxArtifactError {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        match self {
193            Self::MissingField { field } => write!(
194                f,
195                "GPU preprocessing Linux artifact is missing {field}. Fix: commit CUDA preprocessing evidence over the Linux C corpus."
196            ),
197            Self::MissingNumber { field } => write!(
198                f,
199                "GPU preprocessing Linux artifact has no numeric {field}. Fix: record the exact release preprocessing counter."
200            ),
201            Self::ThresholdMiss {
202                field,
203                observed,
204                required,
205            } => write!(
206                f,
207                "GPU preprocessing Linux artifact {field}={observed} missed required {required}. Fix: keep preprocessing on the CUDA path and remove host-token staging."
208            ),
209        }
210    }
211}
212
213impl std::error::Error for GpuPreprocessingLinuxArtifactError {}
214
215const REQUIRED_CAPABILITIES: &[GpuPreprocessingCapability] = &[
216    GpuPreprocessingCapability::MacroExpansion,
217    GpuPreprocessingCapability::ConditionalInclusion,
218    GpuPreprocessingCapability::IncludeGraphTracking,
219    GpuPreprocessingCapability::TokenProvenance,
220    GpuPreprocessingCapability::LineMarkers,
221    GpuPreprocessingCapability::Stringification,
222    GpuPreprocessingCapability::TokenPasting,
223    GpuPreprocessingCapability::VariadicMacros,
224    GpuPreprocessingCapability::BuiltinMacros,
225];
226
227const REQUIRED_TOKEN_CLASSES: &[GpuTokenClass] = &[
228    GpuTokenClass::Comments,
229    GpuTokenClass::Identifiers,
230    GpuTokenClass::Literals,
231    GpuTokenClass::Punctuation,
232    GpuTokenClass::Whitespace,
233    GpuTokenClass::Directives,
234    GpuTokenClass::StringCharStates,
235];
236
237/// Validate GPU preprocessing and token-class coverage.
238pub fn validate_gpu_preprocessing_coverage(
239    capabilities: &[GpuPreprocessingCapabilityRecord<'_>],
240    token_classes: &[GpuTokenClassRecord<'_>],
241) -> Result<GpuPreprocessingCoverageProof, GpuPreprocessingCoverageError> {
242    if capabilities.is_empty() {
243        return Err(GpuPreprocessingCoverageError::EmptyCapabilities);
244    }
245    if token_classes.is_empty() {
246        return Err(GpuPreprocessingCoverageError::EmptyTokenClasses);
247    }
248
249    let mut covered_capabilities = BTreeSet::new();
250    for record in capabilities {
251        for (field, value) in [("command", record.command), ("evidence", record.evidence)] {
252            if value.trim().is_empty() {
253                return Err(GpuPreprocessingCoverageError::EmptyCapabilityMetadata {
254                    capability: record.capability,
255                    field,
256                });
257            }
258        }
259        require_cargo_full(record.command)?;
260        covered_capabilities.insert(record.capability);
261    }
262
263    let mut covered_token_classes = BTreeSet::new();
264    for record in token_classes {
265        for (field, value) in [("command", record.command), ("evidence", record.evidence)] {
266            if value.trim().is_empty() {
267                return Err(GpuPreprocessingCoverageError::EmptyTokenClassMetadata {
268                    class: record.class,
269                    field,
270                });
271            }
272        }
273        require_cargo_full(record.command)?;
274        covered_token_classes.insert(record.class);
275    }
276
277    for capability in REQUIRED_CAPABILITIES {
278        if !covered_capabilities.contains(capability) {
279            return Err(GpuPreprocessingCoverageError::MissingCapability {
280                capability: *capability,
281            });
282        }
283    }
284    for class in REQUIRED_TOKEN_CLASSES {
285        if !covered_token_classes.contains(class) {
286            return Err(GpuPreprocessingCoverageError::MissingTokenClass { class: *class });
287        }
288    }
289
290    Ok(GpuPreprocessingCoverageProof {
291        capability_count: covered_capabilities.len(),
292        token_class_count: covered_token_classes.len(),
293    })
294}
295
296fn require_cargo_full(command: &str) -> Result<(), GpuPreprocessingCoverageError> {
297    if command.trim_start().starts_with("./cargo_full ") {
298        Ok(())
299    } else {
300        Err(GpuPreprocessingCoverageError::CommandDoesNotUseCargoFull {
301            command: command.to_owned(),
302        })
303    }
304}
305
306/// Validate the committed Linux CUDA preprocessing artifact.
307pub fn validate_gpu_preprocessing_linux_artifact(
308    artifact: &str,
309) -> Result<GpuPreprocessingLinuxArtifactProof, GpuPreprocessingLinuxArtifactError> {
310    preproc_contains(
311        artifact,
312        "raw GPU lexer input",
313        "\"compile_tu_lexer_input_mode\": \"raw_bytes_gpu_lex\"",
314    )?;
315    preproc_contains(
316        artifact,
317        "raw GPU preprocessor input",
318        "\"compile_tu_preprocessor_input_mode\": \"raw_bytes_gpu_preprocess\"",
319    )?;
320    preproc_contains(
321        artifact,
322        "CUDA parser backend",
323        "\"resident_vyre_parse_backend_id\": \"cuda\"",
324    )?;
325    preproc_contains(
326        artifact,
327        "raw GPU syntax input",
328        "\"resident_vyre_parse_input_mode\": \"raw_bytes_gpu_syntax\"",
329    )?;
330    preproc_contains(artifact, "Linux macro state", "\"__KERNEL__=1\"")?;
331    preproc_contains(artifact, "Linux x86 macro state", "\"CONFIG_X86_64=1\"")?;
332    preproc_contains(artifact, "Linux include dirs", "include/uapi")?;
333
334    let total_files = preproc_number_field(artifact, "total_files")?;
335    let total_source_bytes = preproc_number_field(artifact, "total_source_bytes")?;
336    let preprocessor_pipeline_cache_hits =
337        preproc_number_field(artifact, "preprocessor_pipeline_cache_hits")?;
338    let preprocessor_pipeline_cache_misses =
339        preproc_number_field(artifact, "preprocessor_pipeline_cache_misses")?;
340    let preprocessor_pipeline_cache_evictions =
341        preproc_number_field(artifact, "preprocessor_pipeline_cache_evictions")?;
342    let macro_state_cache_hits = preproc_number_field(artifact, "macro_state_cache_hits")?;
343    let macro_state_cache_misses = preproc_number_field(artifact, "macro_state_cache_misses")?;
344    let include_cache_hits = preproc_number_field(artifact, "include_cache_hits")?;
345    let include_cache_misses = preproc_number_field(artifact, "include_cache_misses")?;
346    let include_cache_bytes_stored = preproc_number_field(artifact, "include_cache_bytes_stored")?;
347    let host_token_upload = preproc_number_field(
348        artifact,
349        "resident_vyre_parse_host_token_stream_upload_bytes",
350    )?;
351
352    preproc_at_least("total_files", total_files, 250)?;
353    preproc_at_least("total_source_bytes", total_source_bytes, 4 * 1024 * 1024)?;
354    preproc_at_least(
355        "preprocessor_pipeline_cache_hits",
356        preprocessor_pipeline_cache_hits,
357        1,
358    )?;
359    preproc_at_least(
360        "preprocessor_pipeline_cache_misses",
361        preprocessor_pipeline_cache_misses,
362        1,
363    )?;
364    preproc_exact(
365        "preprocessor_pipeline_cache_evictions",
366        preprocessor_pipeline_cache_evictions,
367        0,
368    )?;
369    preproc_at_least("macro_state_cache_hits", macro_state_cache_hits, 1)?;
370    preproc_at_least("macro_state_cache_misses", macro_state_cache_misses, 1)?;
371    preproc_at_least("include_cache_hits", include_cache_hits, 1)?;
372    preproc_at_least("include_cache_misses", include_cache_misses, 1)?;
373    preproc_at_least(
374        "include_cache_bytes_stored",
375        include_cache_bytes_stored,
376        total_source_bytes,
377    )?;
378    preproc_exact(
379        "resident_vyre_parse_host_token_stream_upload_bytes",
380        host_token_upload,
381        0,
382    )?;
383
384    Ok(GpuPreprocessingLinuxArtifactProof {
385        total_files,
386        total_source_bytes,
387        preprocessor_pipeline_cache_hits,
388        include_cache_bytes_stored,
389    })
390}
391
392fn preproc_contains(
393    artifact: &str,
394    field: &'static str,
395    needle: &str,
396) -> Result<(), GpuPreprocessingLinuxArtifactError> {
397    if artifact.contains(needle) {
398        Ok(())
399    } else {
400        Err(GpuPreprocessingLinuxArtifactError::MissingField { field })
401    }
402}
403
404fn preproc_exact(
405    field: &'static str,
406    observed: u64,
407    required: u64,
408) -> Result<(), GpuPreprocessingLinuxArtifactError> {
409    if observed == required {
410        Ok(())
411    } else {
412        Err(GpuPreprocessingLinuxArtifactError::ThresholdMiss {
413            field,
414            observed,
415            required,
416        })
417    }
418}
419
420fn preproc_at_least(
421    field: &'static str,
422    observed: u64,
423    required: u64,
424) -> Result<(), GpuPreprocessingLinuxArtifactError> {
425    if observed >= required {
426        Ok(())
427    } else {
428        Err(GpuPreprocessingLinuxArtifactError::ThresholdMiss {
429            field,
430            observed,
431            required,
432        })
433    }
434}
435
436fn preproc_number_field(
437    artifact: &str,
438    field: &'static str,
439) -> Result<u64, GpuPreprocessingLinuxArtifactError> {
440    let key = format!("\"{field}\"");
441    let start = artifact
442        .find(&key)
443        .ok_or(GpuPreprocessingLinuxArtifactError::MissingNumber { field })?;
444    let after_key = &artifact[start + key.len()..];
445    let colon = after_key
446        .find(':')
447        .ok_or(GpuPreprocessingLinuxArtifactError::MissingNumber { field })?;
448    let after_colon = after_key[colon + 1..].trim_start();
449    let digits = after_colon
450        .chars()
451        .take_while(|ch| ch.is_ascii_digit())
452        .collect::<String>();
453    if digits.is_empty() {
454        return Err(GpuPreprocessingLinuxArtifactError::MissingNumber { field });
455    }
456    digits
457        .parse::<u64>()
458        .map_err(|_| GpuPreprocessingLinuxArtifactError::MissingNumber { field })
459}
460
461#[cfg(test)]
462
463mod tests {
464    use super::*;
465
466    #[test]
467    fn gpu_preprocessing_coverage_accepts_all_required_records() {
468        let proof = validate_gpu_preprocessing_coverage(&capabilities(), &token_classes())
469            .expect("Fix: complete GPU preprocessing coverage should pass");
470
471        assert_eq!(proof.capability_count, 9);
472        assert_eq!(proof.token_class_count, 7);
473    }
474
475    #[test]
476    fn gpu_preprocessing_coverage_rejects_missing_builtin_macros() {
477        let mut capabilities = capabilities();
478        capabilities.pop();
479
480        assert_eq!(
481            validate_gpu_preprocessing_coverage(&capabilities, &token_classes())
482                .expect_err("missing builtin macros should fail"),
483            GpuPreprocessingCoverageError::MissingCapability {
484                capability: GpuPreprocessingCapability::BuiltinMacros,
485            }
486        );
487    }
488
489    #[test]
490    fn gpu_preprocessing_coverage_rejects_missing_string_char_states_and_raw_cargo() {
491        let mut missing_token_classes = token_classes();
492        missing_token_classes.pop();
493        assert_eq!(
494            validate_gpu_preprocessing_coverage(&capabilities(), &missing_token_classes)
495                .expect_err("missing string/char states should fail"),
496            GpuPreprocessingCoverageError::MissingTokenClass {
497                class: GpuTokenClass::StringCharStates,
498            }
499        );
500
501        let mut capabilities = capabilities();
502        capabilities[0].command = "cargo test";
503        assert_eq!(
504            validate_gpu_preprocessing_coverage(&capabilities, &token_classes())
505                .expect_err("raw cargo should fail"),
506            GpuPreprocessingCoverageError::CommandDoesNotUseCargoFull {
507                command: "cargo test".to_owned(),
508            }
509        );
510    }
511
512    #[test]
513    fn gpu_preprocessing_linux_artifact_accepts_committed_cuda_linux_evidence() {
514        let proof = validate_gpu_preprocessing_linux_artifact(include_str!(
515            "../../../release/evidence/parser/c-parser-linux-subsystem.json"
516        ))
517        .expect("Fix: committed Linux CUDA preprocessing artifact should pass");
518
519        assert!(proof.total_files >= 250);
520        assert!(proof.total_source_bytes >= 4 * 1024 * 1024);
521        assert!(proof.preprocessor_pipeline_cache_hits >= 1);
522        assert!(proof.include_cache_bytes_stored >= proof.total_source_bytes);
523    }
524
525    #[test]
526    fn gpu_preprocessing_linux_artifact_rejects_cpu_preprocessing() {
527        let artifact = r#"{
528          "compile_tu_lexer_input_mode": "raw_bytes_cpu_lex",
529          "compile_tu_preprocessor_input_mode": "raw_bytes_gpu_preprocess",
530          "resident_vyre_parse_backend_id": "cuda",
531          "resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
532          "macros": ["__KERNEL__=1", "CONFIG_X86_64=1"],
533          "include_dirs": ["/linux/include/uapi"],
534          "total_files": 490,
535          "total_source_bytes": 7394810,
536          "preprocessor_pipeline_cache_hits": 489,
537          "preprocessor_pipeline_cache_misses": 1,
538          "preprocessor_pipeline_cache_evictions": 0,
539          "macro_state_cache_hits": 489,
540          "macro_state_cache_misses": 1,
541          "include_cache_hits": 489,
542          "include_cache_misses": 1,
543          "include_cache_bytes_stored": 7394810,
544          "resident_vyre_parse_host_token_stream_upload_bytes": 0
545        }"#;
546
547        assert_eq!(
548            validate_gpu_preprocessing_linux_artifact(artifact)
549                .expect_err("CPU lexing should fail CUDA preprocessing release evidence"),
550            GpuPreprocessingLinuxArtifactError::MissingField {
551                field: "raw GPU lexer input",
552            }
553        );
554    }
555
556    #[test]
557    fn gpu_preprocessing_linux_artifact_rejects_host_token_uploads() {
558        let artifact = r#"{
559          "compile_tu_lexer_input_mode": "raw_bytes_gpu_lex",
560          "compile_tu_preprocessor_input_mode": "raw_bytes_gpu_preprocess",
561          "resident_vyre_parse_backend_id": "cuda",
562          "resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
563          "macros": ["__KERNEL__=1", "CONFIG_X86_64=1"],
564          "include_dirs": ["/linux/include/uapi"],
565          "total_files": 490,
566          "total_source_bytes": 7394810,
567          "preprocessor_pipeline_cache_hits": 489,
568          "preprocessor_pipeline_cache_misses": 1,
569          "preprocessor_pipeline_cache_evictions": 0,
570          "macro_state_cache_hits": 489,
571          "macro_state_cache_misses": 1,
572          "include_cache_hits": 489,
573          "include_cache_misses": 1,
574          "include_cache_bytes_stored": 7394810,
575          "resident_vyre_parse_host_token_stream_upload_bytes": 64
576        }"#;
577
578        assert_eq!(
579            validate_gpu_preprocessing_linux_artifact(artifact)
580                .expect_err("host token upload should fail CUDA preprocessing release evidence"),
581            GpuPreprocessingLinuxArtifactError::ThresholdMiss {
582                field: "resident_vyre_parse_host_token_stream_upload_bytes",
583                observed: 64,
584                required: 0,
585            }
586        );
587    }
588
589    fn capabilities() -> Vec<GpuPreprocessingCapabilityRecord<'static>> {
590        REQUIRED_CAPABILITIES
591            .iter()
592            .copied()
593            .map(capability)
594            .collect()
595    }
596
597    fn token_classes() -> Vec<GpuTokenClassRecord<'static>> {
598        REQUIRED_TOKEN_CLASSES
599            .iter()
600            .copied()
601            .map(token_class)
602            .collect()
603    }
604
605    fn capability(
606        capability: GpuPreprocessingCapability,
607    ) -> GpuPreprocessingCapabilityRecord<'static> {
608        GpuPreprocessingCapabilityRecord {
609            capability,
610            command: "./cargo_full test -j1 -p vyrec",
611            evidence: "release/parity/vyrec-gpu-preprocessing.md",
612        }
613    }
614
615    fn token_class(class: GpuTokenClass) -> GpuTokenClassRecord<'static> {
616        GpuTokenClassRecord {
617            class,
618            command: "./cargo_full test -j1 -p vyrec",
619            evidence: "release/parity/vyrec-gpu-token-classification.md",
620        }
621    }
622}