1use std::collections::BTreeSet;
4
5#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
7pub enum GpuPreprocessingCapability {
8 MacroExpansion,
10 ConditionalInclusion,
12 IncludeGraphTracking,
14 TokenProvenance,
16 LineMarkers,
18 Stringification,
20 TokenPasting,
22 VariadicMacros,
24 BuiltinMacros,
26}
27
28#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
30pub enum GpuTokenClass {
31 Comments,
33 Identifiers,
35 Literals,
37 Punctuation,
39 Whitespace,
41 Directives,
43 StringCharStates,
45}
46
47#[derive(Clone, Copy, Debug, Eq, PartialEq)]
49pub struct GpuPreprocessingCapabilityRecord<'a> {
50 pub capability: GpuPreprocessingCapability,
52 pub command: &'a str,
54 pub evidence: &'a str,
56}
57
58#[derive(Clone, Copy, Debug, Eq, PartialEq)]
60pub struct GpuTokenClassRecord<'a> {
61 pub class: GpuTokenClass,
63 pub command: &'a str,
65 pub evidence: &'a str,
67}
68
69#[derive(Clone, Copy, Debug, Eq, PartialEq)]
71pub struct GpuPreprocessingCoverageProof {
72 pub capability_count: usize,
74 pub token_class_count: usize,
76}
77
78#[derive(Clone, Copy, Debug, Eq, PartialEq)]
80pub struct GpuPreprocessingLinuxArtifactProof {
81 pub total_files: u64,
83 pub total_source_bytes: u64,
85 pub preprocessor_pipeline_cache_hits: u64,
87 pub include_cache_bytes_stored: u64,
89}
90
91#[derive(Clone, Debug, Eq, PartialEq)]
93pub enum GpuPreprocessingCoverageError {
94 EmptyCapabilities,
96 EmptyTokenClasses,
98 EmptyCapabilityMetadata {
100 capability: GpuPreprocessingCapability,
102 field: &'static str,
104 },
105 EmptyTokenClassMetadata {
107 class: GpuTokenClass,
109 field: &'static str,
111 },
112 CommandDoesNotUseCargoFull {
114 command: String,
116 },
117 MissingCapability {
119 capability: GpuPreprocessingCapability,
121 },
122 MissingTokenClass {
124 class: GpuTokenClass,
126 },
127}
128
129impl std::fmt::Display for GpuPreprocessingCoverageError {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 match self {
132 Self::EmptyCapabilities => write!(
133 f,
134 "GPU preprocessing capability coverage is empty. Fix: add evidence for macro expansion, includes, provenance, line markers, stringification, token pasting, variadics, and builtins."
135 ),
136 Self::EmptyTokenClasses => write!(
137 f,
138 "GPU token-class coverage is empty. Fix: add evidence for comments, identifiers, literals, punctuation, whitespace, directives, and string/char states."
139 ),
140 Self::EmptyCapabilityMetadata { capability, field } => write!(
141 f,
142 "GPU preprocessing capability {capability:?} has empty {field}. Fix: every record needs command and evidence."
143 ),
144 Self::EmptyTokenClassMetadata { class, field } => write!(
145 f,
146 "GPU token class {class:?} has empty {field}. Fix: every record needs command and evidence."
147 ),
148 Self::CommandDoesNotUseCargoFull { command } => write!(
149 f,
150 "GPU preprocessing coverage command `{command}` does not use ./cargo_full. Fix: run preprocessing evidence through cargo_full."
151 ),
152 Self::MissingCapability { capability } => write!(
153 f,
154 "GPU preprocessing coverage is missing {capability:?}. Fix: add explicit parity evidence for that preprocessing capability."
155 ),
156 Self::MissingTokenClass { class } => write!(
157 f,
158 "GPU token-class coverage is missing {class:?}. Fix: add explicit token classification evidence for that class."
159 ),
160 }
161 }
162}
163
164impl std::error::Error for GpuPreprocessingCoverageError {}
165
166#[derive(Clone, Debug, Eq, PartialEq)]
168pub enum GpuPreprocessingLinuxArtifactError {
169 MissingField {
171 field: &'static str,
173 },
174 MissingNumber {
176 field: &'static str,
178 },
179 ThresholdMiss {
181 field: &'static str,
183 observed: u64,
185 required: u64,
187 },
188}
189
190impl std::fmt::Display for GpuPreprocessingLinuxArtifactError {
191 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192 match self {
193 Self::MissingField { field } => write!(
194 f,
195 "GPU preprocessing Linux artifact is missing {field}. Fix: commit CUDA preprocessing evidence over the Linux C corpus."
196 ),
197 Self::MissingNumber { field } => write!(
198 f,
199 "GPU preprocessing Linux artifact has no numeric {field}. Fix: record the exact release preprocessing counter."
200 ),
201 Self::ThresholdMiss {
202 field,
203 observed,
204 required,
205 } => write!(
206 f,
207 "GPU preprocessing Linux artifact {field}={observed} missed required {required}. Fix: keep preprocessing on the CUDA path and remove host-token staging."
208 ),
209 }
210 }
211}
212
213impl std::error::Error for GpuPreprocessingLinuxArtifactError {}
214
215const REQUIRED_CAPABILITIES: &[GpuPreprocessingCapability] = &[
216 GpuPreprocessingCapability::MacroExpansion,
217 GpuPreprocessingCapability::ConditionalInclusion,
218 GpuPreprocessingCapability::IncludeGraphTracking,
219 GpuPreprocessingCapability::TokenProvenance,
220 GpuPreprocessingCapability::LineMarkers,
221 GpuPreprocessingCapability::Stringification,
222 GpuPreprocessingCapability::TokenPasting,
223 GpuPreprocessingCapability::VariadicMacros,
224 GpuPreprocessingCapability::BuiltinMacros,
225];
226
227const REQUIRED_TOKEN_CLASSES: &[GpuTokenClass] = &[
228 GpuTokenClass::Comments,
229 GpuTokenClass::Identifiers,
230 GpuTokenClass::Literals,
231 GpuTokenClass::Punctuation,
232 GpuTokenClass::Whitespace,
233 GpuTokenClass::Directives,
234 GpuTokenClass::StringCharStates,
235];
236
237pub fn validate_gpu_preprocessing_coverage(
239 capabilities: &[GpuPreprocessingCapabilityRecord<'_>],
240 token_classes: &[GpuTokenClassRecord<'_>],
241) -> Result<GpuPreprocessingCoverageProof, GpuPreprocessingCoverageError> {
242 if capabilities.is_empty() {
243 return Err(GpuPreprocessingCoverageError::EmptyCapabilities);
244 }
245 if token_classes.is_empty() {
246 return Err(GpuPreprocessingCoverageError::EmptyTokenClasses);
247 }
248
249 let mut covered_capabilities = BTreeSet::new();
250 for record in capabilities {
251 for (field, value) in [("command", record.command), ("evidence", record.evidence)] {
252 if value.trim().is_empty() {
253 return Err(GpuPreprocessingCoverageError::EmptyCapabilityMetadata {
254 capability: record.capability,
255 field,
256 });
257 }
258 }
259 require_cargo_full(record.command)?;
260 covered_capabilities.insert(record.capability);
261 }
262
263 let mut covered_token_classes = BTreeSet::new();
264 for record in token_classes {
265 for (field, value) in [("command", record.command), ("evidence", record.evidence)] {
266 if value.trim().is_empty() {
267 return Err(GpuPreprocessingCoverageError::EmptyTokenClassMetadata {
268 class: record.class,
269 field,
270 });
271 }
272 }
273 require_cargo_full(record.command)?;
274 covered_token_classes.insert(record.class);
275 }
276
277 for capability in REQUIRED_CAPABILITIES {
278 if !covered_capabilities.contains(capability) {
279 return Err(GpuPreprocessingCoverageError::MissingCapability {
280 capability: *capability,
281 });
282 }
283 }
284 for class in REQUIRED_TOKEN_CLASSES {
285 if !covered_token_classes.contains(class) {
286 return Err(GpuPreprocessingCoverageError::MissingTokenClass { class: *class });
287 }
288 }
289
290 Ok(GpuPreprocessingCoverageProof {
291 capability_count: covered_capabilities.len(),
292 token_class_count: covered_token_classes.len(),
293 })
294}
295
296fn require_cargo_full(command: &str) -> Result<(), GpuPreprocessingCoverageError> {
297 if command.trim_start().starts_with("./cargo_full ") {
298 Ok(())
299 } else {
300 Err(GpuPreprocessingCoverageError::CommandDoesNotUseCargoFull {
301 command: command.to_owned(),
302 })
303 }
304}
305
306pub fn validate_gpu_preprocessing_linux_artifact(
308 artifact: &str,
309) -> Result<GpuPreprocessingLinuxArtifactProof, GpuPreprocessingLinuxArtifactError> {
310 preproc_contains(
311 artifact,
312 "raw GPU lexer input",
313 "\"compile_tu_lexer_input_mode\": \"raw_bytes_gpu_lex\"",
314 )?;
315 preproc_contains(
316 artifact,
317 "raw GPU preprocessor input",
318 "\"compile_tu_preprocessor_input_mode\": \"raw_bytes_gpu_preprocess\"",
319 )?;
320 preproc_contains(
321 artifact,
322 "CUDA parser backend",
323 "\"resident_vyre_parse_backend_id\": \"cuda\"",
324 )?;
325 preproc_contains(
326 artifact,
327 "raw GPU syntax input",
328 "\"resident_vyre_parse_input_mode\": \"raw_bytes_gpu_syntax\"",
329 )?;
330 preproc_contains(artifact, "Linux macro state", "\"__KERNEL__=1\"")?;
331 preproc_contains(artifact, "Linux x86 macro state", "\"CONFIG_X86_64=1\"")?;
332 preproc_contains(artifact, "Linux include dirs", "include/uapi")?;
333
334 let total_files = preproc_number_field(artifact, "total_files")?;
335 let total_source_bytes = preproc_number_field(artifact, "total_source_bytes")?;
336 let preprocessor_pipeline_cache_hits =
337 preproc_number_field(artifact, "preprocessor_pipeline_cache_hits")?;
338 let preprocessor_pipeline_cache_misses =
339 preproc_number_field(artifact, "preprocessor_pipeline_cache_misses")?;
340 let preprocessor_pipeline_cache_evictions =
341 preproc_number_field(artifact, "preprocessor_pipeline_cache_evictions")?;
342 let macro_state_cache_hits = preproc_number_field(artifact, "macro_state_cache_hits")?;
343 let macro_state_cache_misses = preproc_number_field(artifact, "macro_state_cache_misses")?;
344 let include_cache_hits = preproc_number_field(artifact, "include_cache_hits")?;
345 let include_cache_misses = preproc_number_field(artifact, "include_cache_misses")?;
346 let include_cache_bytes_stored = preproc_number_field(artifact, "include_cache_bytes_stored")?;
347 let host_token_upload = preproc_number_field(
348 artifact,
349 "resident_vyre_parse_host_token_stream_upload_bytes",
350 )?;
351
352 preproc_at_least("total_files", total_files, 250)?;
353 preproc_at_least("total_source_bytes", total_source_bytes, 4 * 1024 * 1024)?;
354 preproc_at_least(
355 "preprocessor_pipeline_cache_hits",
356 preprocessor_pipeline_cache_hits,
357 1,
358 )?;
359 preproc_at_least(
360 "preprocessor_pipeline_cache_misses",
361 preprocessor_pipeline_cache_misses,
362 1,
363 )?;
364 preproc_exact(
365 "preprocessor_pipeline_cache_evictions",
366 preprocessor_pipeline_cache_evictions,
367 0,
368 )?;
369 preproc_at_least("macro_state_cache_hits", macro_state_cache_hits, 1)?;
370 preproc_at_least("macro_state_cache_misses", macro_state_cache_misses, 1)?;
371 preproc_at_least("include_cache_hits", include_cache_hits, 1)?;
372 preproc_at_least("include_cache_misses", include_cache_misses, 1)?;
373 preproc_at_least(
374 "include_cache_bytes_stored",
375 include_cache_bytes_stored,
376 total_source_bytes,
377 )?;
378 preproc_exact(
379 "resident_vyre_parse_host_token_stream_upload_bytes",
380 host_token_upload,
381 0,
382 )?;
383
384 Ok(GpuPreprocessingLinuxArtifactProof {
385 total_files,
386 total_source_bytes,
387 preprocessor_pipeline_cache_hits,
388 include_cache_bytes_stored,
389 })
390}
391
392fn preproc_contains(
393 artifact: &str,
394 field: &'static str,
395 needle: &str,
396) -> Result<(), GpuPreprocessingLinuxArtifactError> {
397 if artifact.contains(needle) {
398 Ok(())
399 } else {
400 Err(GpuPreprocessingLinuxArtifactError::MissingField { field })
401 }
402}
403
404fn preproc_exact(
405 field: &'static str,
406 observed: u64,
407 required: u64,
408) -> Result<(), GpuPreprocessingLinuxArtifactError> {
409 if observed == required {
410 Ok(())
411 } else {
412 Err(GpuPreprocessingLinuxArtifactError::ThresholdMiss {
413 field,
414 observed,
415 required,
416 })
417 }
418}
419
420fn preproc_at_least(
421 field: &'static str,
422 observed: u64,
423 required: u64,
424) -> Result<(), GpuPreprocessingLinuxArtifactError> {
425 if observed >= required {
426 Ok(())
427 } else {
428 Err(GpuPreprocessingLinuxArtifactError::ThresholdMiss {
429 field,
430 observed,
431 required,
432 })
433 }
434}
435
436fn preproc_number_field(
437 artifact: &str,
438 field: &'static str,
439) -> Result<u64, GpuPreprocessingLinuxArtifactError> {
440 let key = format!("\"{field}\"");
441 let start = artifact
442 .find(&key)
443 .ok_or(GpuPreprocessingLinuxArtifactError::MissingNumber { field })?;
444 let after_key = &artifact[start + key.len()..];
445 let colon = after_key
446 .find(':')
447 .ok_or(GpuPreprocessingLinuxArtifactError::MissingNumber { field })?;
448 let after_colon = after_key[colon + 1..].trim_start();
449 let digits = after_colon
450 .chars()
451 .take_while(|ch| ch.is_ascii_digit())
452 .collect::<String>();
453 if digits.is_empty() {
454 return Err(GpuPreprocessingLinuxArtifactError::MissingNumber { field });
455 }
456 digits
457 .parse::<u64>()
458 .map_err(|_| GpuPreprocessingLinuxArtifactError::MissingNumber { field })
459}
460
461#[cfg(test)]
462
463mod tests {
464 use super::*;
465
466 #[test]
467 fn gpu_preprocessing_coverage_accepts_all_required_records() {
468 let proof = validate_gpu_preprocessing_coverage(&capabilities(), &token_classes())
469 .expect("Fix: complete GPU preprocessing coverage should pass");
470
471 assert_eq!(proof.capability_count, 9);
472 assert_eq!(proof.token_class_count, 7);
473 }
474
475 #[test]
476 fn gpu_preprocessing_coverage_rejects_missing_builtin_macros() {
477 let mut capabilities = capabilities();
478 capabilities.pop();
479
480 assert_eq!(
481 validate_gpu_preprocessing_coverage(&capabilities, &token_classes())
482 .expect_err("missing builtin macros should fail"),
483 GpuPreprocessingCoverageError::MissingCapability {
484 capability: GpuPreprocessingCapability::BuiltinMacros,
485 }
486 );
487 }
488
489 #[test]
490 fn gpu_preprocessing_coverage_rejects_missing_string_char_states_and_raw_cargo() {
491 let mut missing_token_classes = token_classes();
492 missing_token_classes.pop();
493 assert_eq!(
494 validate_gpu_preprocessing_coverage(&capabilities(), &missing_token_classes)
495 .expect_err("missing string/char states should fail"),
496 GpuPreprocessingCoverageError::MissingTokenClass {
497 class: GpuTokenClass::StringCharStates,
498 }
499 );
500
501 let mut capabilities = capabilities();
502 capabilities[0].command = "cargo test";
503 assert_eq!(
504 validate_gpu_preprocessing_coverage(&capabilities, &token_classes())
505 .expect_err("raw cargo should fail"),
506 GpuPreprocessingCoverageError::CommandDoesNotUseCargoFull {
507 command: "cargo test".to_owned(),
508 }
509 );
510 }
511
512 #[test]
513 fn gpu_preprocessing_linux_artifact_accepts_committed_cuda_linux_evidence() {
514 let proof = validate_gpu_preprocessing_linux_artifact(include_str!(
515 "../../../release/evidence/parser/c-parser-linux-subsystem.json"
516 ))
517 .expect("Fix: committed Linux CUDA preprocessing artifact should pass");
518
519 assert!(proof.total_files >= 250);
520 assert!(proof.total_source_bytes >= 4 * 1024 * 1024);
521 assert!(proof.preprocessor_pipeline_cache_hits >= 1);
522 assert!(proof.include_cache_bytes_stored >= proof.total_source_bytes);
523 }
524
525 #[test]
526 fn gpu_preprocessing_linux_artifact_rejects_cpu_preprocessing() {
527 let artifact = r#"{
528 "compile_tu_lexer_input_mode": "raw_bytes_cpu_lex",
529 "compile_tu_preprocessor_input_mode": "raw_bytes_gpu_preprocess",
530 "resident_vyre_parse_backend_id": "cuda",
531 "resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
532 "macros": ["__KERNEL__=1", "CONFIG_X86_64=1"],
533 "include_dirs": ["/linux/include/uapi"],
534 "total_files": 490,
535 "total_source_bytes": 7394810,
536 "preprocessor_pipeline_cache_hits": 489,
537 "preprocessor_pipeline_cache_misses": 1,
538 "preprocessor_pipeline_cache_evictions": 0,
539 "macro_state_cache_hits": 489,
540 "macro_state_cache_misses": 1,
541 "include_cache_hits": 489,
542 "include_cache_misses": 1,
543 "include_cache_bytes_stored": 7394810,
544 "resident_vyre_parse_host_token_stream_upload_bytes": 0
545 }"#;
546
547 assert_eq!(
548 validate_gpu_preprocessing_linux_artifact(artifact)
549 .expect_err("CPU lexing should fail CUDA preprocessing release evidence"),
550 GpuPreprocessingLinuxArtifactError::MissingField {
551 field: "raw GPU lexer input",
552 }
553 );
554 }
555
556 #[test]
557 fn gpu_preprocessing_linux_artifact_rejects_host_token_uploads() {
558 let artifact = r#"{
559 "compile_tu_lexer_input_mode": "raw_bytes_gpu_lex",
560 "compile_tu_preprocessor_input_mode": "raw_bytes_gpu_preprocess",
561 "resident_vyre_parse_backend_id": "cuda",
562 "resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
563 "macros": ["__KERNEL__=1", "CONFIG_X86_64=1"],
564 "include_dirs": ["/linux/include/uapi"],
565 "total_files": 490,
566 "total_source_bytes": 7394810,
567 "preprocessor_pipeline_cache_hits": 489,
568 "preprocessor_pipeline_cache_misses": 1,
569 "preprocessor_pipeline_cache_evictions": 0,
570 "macro_state_cache_hits": 489,
571 "macro_state_cache_misses": 1,
572 "include_cache_hits": 489,
573 "include_cache_misses": 1,
574 "include_cache_bytes_stored": 7394810,
575 "resident_vyre_parse_host_token_stream_upload_bytes": 64
576 }"#;
577
578 assert_eq!(
579 validate_gpu_preprocessing_linux_artifact(artifact)
580 .expect_err("host token upload should fail CUDA preprocessing release evidence"),
581 GpuPreprocessingLinuxArtifactError::ThresholdMiss {
582 field: "resident_vyre_parse_host_token_stream_upload_bytes",
583 observed: 64,
584 required: 0,
585 }
586 );
587 }
588
589 fn capabilities() -> Vec<GpuPreprocessingCapabilityRecord<'static>> {
590 REQUIRED_CAPABILITIES
591 .iter()
592 .copied()
593 .map(capability)
594 .collect()
595 }
596
597 fn token_classes() -> Vec<GpuTokenClassRecord<'static>> {
598 REQUIRED_TOKEN_CLASSES
599 .iter()
600 .copied()
601 .map(token_class)
602 .collect()
603 }
604
605 fn capability(
606 capability: GpuPreprocessingCapability,
607 ) -> GpuPreprocessingCapabilityRecord<'static> {
608 GpuPreprocessingCapabilityRecord {
609 capability,
610 command: "./cargo_full test -j1 -p vyrec",
611 evidence: "release/parity/vyrec-gpu-preprocessing.md",
612 }
613 }
614
615 fn token_class(class: GpuTokenClass) -> GpuTokenClassRecord<'static> {
616 GpuTokenClassRecord {
617 class,
618 command: "./cargo_full test -j1 -p vyrec",
619 evidence: "release/parity/vyrec-gpu-token-classification.md",
620 }
621 }
622}