use std::collections::BTreeSet;
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub enum SemanticParityCategory {
Declarations,
Typedefs,
TagNamespaces,
Scopes,
Linkage,
StorageClass,
Qualifiers,
IntegerPromotions,
UsualArithmeticConversions,
LvalueRules,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct SemanticParityRecord<'a> {
pub category: SemanticParityCategory,
pub command: &'a str,
pub evidence: &'a str,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct SemanticParityCoverageProof {
pub category_count: usize,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct SemanticParityLinuxArtifactProof {
pub total_files: u64,
pub total_source_bytes: u64,
pub total_semantic_graph_bytes: u64,
pub speedup_x1000: u64,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum SemanticParityCoverageError {
EmptyRecords,
EmptyMetadata {
category: SemanticParityCategory,
field: &'static str,
},
CommandDoesNotUseCargoFull {
category: SemanticParityCategory,
command: String,
},
MissingCategory {
category: SemanticParityCategory,
},
}
impl std::fmt::Display for SemanticParityCoverageError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EmptyRecords => write!(
f,
"semantic parity coverage is empty. Fix: add clang parity evidence for every required C semantic category."
),
Self::EmptyMetadata { category, field } => write!(
f,
"semantic parity record {category:?} has empty {field}. Fix: every category needs command and evidence."
),
Self::CommandDoesNotUseCargoFull { category, command } => write!(
f,
"semantic parity record {category:?} uses `{command}` instead of ./cargo_full. Fix: run semantic parity through cargo_full."
),
Self::MissingCategory { category } => write!(
f,
"semantic parity coverage is missing {category:?}. Fix: add explicit clang parity evidence for that semantic category."
),
}
}
}
impl std::error::Error for SemanticParityCoverageError {}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum SemanticParityLinuxArtifactError {
MissingField {
field: &'static str,
},
MissingNumber {
field: &'static str,
},
ThresholdMiss {
field: &'static str,
observed: u64,
required: u64,
},
}
impl std::fmt::Display for SemanticParityLinuxArtifactError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingField { field } => write!(
f,
"semantic parity Linux artifact is missing {field}. Fix: commit real CUDA Linux parser semantic evidence, not synthetic coverage metadata."
),
Self::MissingNumber { field } => write!(
f,
"semantic parity Linux artifact has no numeric {field}. Fix: record the exact release semantic counter."
),
Self::ThresholdMiss {
field,
observed,
required,
} => write!(
f,
"semantic parity Linux artifact {field}={observed} missed required {required}. Fix: improve the CUDA C frontend semantic path or lower the approved release target explicitly."
),
}
}
}
impl std::error::Error for SemanticParityLinuxArtifactError {}
const REQUIRED_CATEGORIES: &[SemanticParityCategory] = &[
SemanticParityCategory::Declarations,
SemanticParityCategory::Typedefs,
SemanticParityCategory::TagNamespaces,
SemanticParityCategory::Scopes,
SemanticParityCategory::Linkage,
SemanticParityCategory::StorageClass,
SemanticParityCategory::Qualifiers,
SemanticParityCategory::IntegerPromotions,
SemanticParityCategory::UsualArithmeticConversions,
SemanticParityCategory::LvalueRules,
];
pub fn validate_semantic_parity_coverage(
records: &[SemanticParityRecord<'_>],
) -> Result<SemanticParityCoverageProof, SemanticParityCoverageError> {
if records.is_empty() {
return Err(SemanticParityCoverageError::EmptyRecords);
}
let mut categories = BTreeSet::new();
for record in records {
for (field, value) in [("command", record.command), ("evidence", record.evidence)] {
if value.trim().is_empty() {
return Err(SemanticParityCoverageError::EmptyMetadata {
category: record.category,
field,
});
}
}
if !record.command.trim_start().starts_with("./cargo_full ") {
return Err(SemanticParityCoverageError::CommandDoesNotUseCargoFull {
category: record.category,
command: record.command.to_owned(),
});
}
categories.insert(record.category);
}
for category in REQUIRED_CATEGORIES {
if !categories.contains(category) {
return Err(SemanticParityCoverageError::MissingCategory {
category: *category,
});
}
}
Ok(SemanticParityCoverageProof {
category_count: categories.len(),
})
}
pub fn validate_semantic_parity_linux_artifact(
artifact: &str,
) -> Result<SemanticParityLinuxArtifactProof, SemanticParityLinuxArtifactError> {
artifact_contains(
artifact,
"CUDA parser backend",
"\"resident_vyre_parse_backend_id\": \"cuda\"",
)?;
artifact_contains(
artifact,
"raw GPU syntax input",
"\"resident_vyre_parse_input_mode\": \"raw_bytes_gpu_syntax\"",
)?;
artifact_contains(
artifact,
"prepared GPU syntax input",
"\"resident_vyre_prepared_syntax_input\": true",
)?;
artifact_contains(
artifact,
"pipeline cache enabled",
"\"resident_vyre_pipeline_cache_enabled\": true",
)?;
artifact_contains(artifact, "zero parser failures", "\"failures\": []")?;
artifact_contains(artifact, "zero release blockers", "\"blockers\": []")?;
let total_files = artifact_number_field(artifact, "total_files")?;
let total_source_bytes = artifact_number_field(artifact, "total_source_bytes")?;
let total_semantic_graph_bytes = artifact_number_field(artifact, "total_semantic_graph_bytes")?;
let failed_files = artifact_number_field(artifact, "failed_files")?;
let host_token_upload = artifact_number_field(
artifact,
"resident_vyre_parse_host_token_stream_upload_bytes",
)?;
let covered_tokens = artifact_number_field(artifact, "resident_vyre_parse_ast_covered_tokens")?;
let token_count = artifact_number_field(artifact, "resident_vyre_parse_token_count")?;
let gpu_dispatch_count =
artifact_number_field(artifact, "resident_vyre_parse_gpu_dispatch_count")?;
let host_submit_count =
artifact_number_field(artifact, "resident_vyre_parse_host_submit_count")?;
let resident_batch_dispatches =
artifact_number_field(artifact, "resident_vyre_cuda_resident_batch_dispatches")?;
let persistent_handle_dispatches =
artifact_number_field(artifact, "resident_vyre_cuda_persistent_handle_dispatches")?;
let static_param_dispatches =
artifact_number_field(artifact, "resident_vyre_cuda_static_param_dispatches")?;
let suppressed_ast_readback = artifact_number_field(
artifact,
"resident_vyre_parse_resident_ast_readback_suppressed_bytes",
)?;
let speedup_x1000 = artifact_number_field(
artifact,
"resident_vyre_vs_tree_sitter_median_speedup_x1000",
)?;
artifact_at_least("total_files", total_files, 250)?;
artifact_at_least("total_source_bytes", total_source_bytes, 4 * 1024 * 1024)?;
artifact_at_least(
"total_semantic_graph_bytes",
total_semantic_graph_bytes,
1024 * 1024,
)?;
artifact_exact("failed_files", failed_files, 0)?;
artifact_exact(
"resident_vyre_parse_host_token_stream_upload_bytes",
host_token_upload,
0,
)?;
artifact_at_least(
"resident_vyre_parse_gpu_dispatch_count",
gpu_dispatch_count,
1,
)?;
artifact_at_least(
"resident_vyre_parse_host_submit_count",
host_submit_count,
1,
)?;
artifact_at_least(
"resident_vyre_cuda_resident_batch_dispatches",
resident_batch_dispatches,
1,
)?;
artifact_at_least(
"resident_vyre_cuda_persistent_handle_dispatches",
persistent_handle_dispatches,
1,
)?;
artifact_at_least(
"resident_vyre_cuda_static_param_dispatches",
static_param_dispatches,
1,
)?;
artifact_at_least(
"resident_vyre_parse_resident_ast_readback_suppressed_bytes",
suppressed_ast_readback,
total_source_bytes,
)?;
artifact_at_least(
"resident_vyre_vs_tree_sitter_median_speedup_x1000",
speedup_x1000,
100_000,
)?;
if covered_tokens != token_count {
return Err(SemanticParityLinuxArtifactError::ThresholdMiss {
field: "resident_vyre_parse_ast_covered_tokens",
observed: covered_tokens,
required: token_count,
});
}
Ok(SemanticParityLinuxArtifactProof {
total_files,
total_source_bytes,
total_semantic_graph_bytes,
speedup_x1000,
})
}
fn artifact_contains(
artifact: &str,
field: &'static str,
needle: &str,
) -> Result<(), SemanticParityLinuxArtifactError> {
if artifact.contains(needle) {
Ok(())
} else {
Err(SemanticParityLinuxArtifactError::MissingField { field })
}
}
fn artifact_exact(
field: &'static str,
observed: u64,
required: u64,
) -> Result<(), SemanticParityLinuxArtifactError> {
if observed == required {
Ok(())
} else {
Err(SemanticParityLinuxArtifactError::ThresholdMiss {
field,
observed,
required,
})
}
}
fn artifact_at_least(
field: &'static str,
observed: u64,
required: u64,
) -> Result<(), SemanticParityLinuxArtifactError> {
if observed >= required {
Ok(())
} else {
Err(SemanticParityLinuxArtifactError::ThresholdMiss {
field,
observed,
required,
})
}
}
fn artifact_number_field(
artifact: &str,
field: &'static str,
) -> Result<u64, SemanticParityLinuxArtifactError> {
let key = format!("\"{field}\"");
let start = artifact
.find(&key)
.ok_or(SemanticParityLinuxArtifactError::MissingNumber { field })?;
let after_key = &artifact[start + key.len()..];
let colon = after_key
.find(':')
.ok_or(SemanticParityLinuxArtifactError::MissingNumber { field })?;
let after_colon = after_key[colon + 1..].trim_start();
let digits = after_colon
.chars()
.take_while(|ch| ch.is_ascii_digit())
.collect::<String>();
if digits.is_empty() {
return Err(SemanticParityLinuxArtifactError::MissingNumber { field });
}
digits
.parse::<u64>()
.map_err(|_| SemanticParityLinuxArtifactError::MissingNumber { field })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn semantic_parity_coverage_accepts_all_required_categories() {
let proof = validate_semantic_parity_coverage(&records())
.expect("Fix: complete semantic parity coverage should pass");
assert_eq!(proof.category_count, 10);
}
#[test]
fn semantic_parity_coverage_rejects_missing_lvalue_rules() {
let mut records = records();
records.pop();
assert_eq!(
validate_semantic_parity_coverage(&records)
.expect_err("missing lvalue rules should fail"),
SemanticParityCoverageError::MissingCategory {
category: SemanticParityCategory::LvalueRules,
}
);
}
#[test]
fn semantic_parity_coverage_rejects_raw_cargo() {
let mut records = records();
records[0].command = "cargo test";
assert_eq!(
validate_semantic_parity_coverage(&records).expect_err("raw cargo should fail"),
SemanticParityCoverageError::CommandDoesNotUseCargoFull {
category: SemanticParityCategory::Declarations,
command: "cargo test".to_owned(),
}
);
}
#[test]
fn semantic_parity_linux_artifact_accepts_committed_cuda_linux_evidence() {
let proof = validate_semantic_parity_linux_artifact(include_str!(
"../../../../release/evidence/parser/c-parser-linux-subsystem.json"
))
.expect("Fix: committed Linux CUDA semantic artifact should pass");
assert!(proof.total_files >= 250);
assert!(proof.total_source_bytes >= 4 * 1024 * 1024);
assert!(proof.total_semantic_graph_bytes >= 1024 * 1024);
assert!(proof.speedup_x1000 >= 100_000);
}
#[test]
fn semantic_parity_linux_artifact_rejects_cpu_or_host_token_path() {
let artifact = r#"{
"resident_vyre_parse_backend_id": "cpu",
"resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
"resident_vyre_prepared_syntax_input": true,
"resident_vyre_pipeline_cache_enabled": true,
"failures": [],
"blockers": [],
"total_files": 490,
"total_source_bytes": 7394810,
"total_semantic_graph_bytes": 3697526,
"failed_files": 0,
"resident_vyre_parse_host_token_stream_upload_bytes": 1,
"resident_vyre_parse_ast_covered_tokens": 10,
"resident_vyre_parse_token_count": 10,
"resident_vyre_parse_gpu_dispatch_count": 8,
"resident_vyre_parse_host_submit_count": 1,
"resident_vyre_cuda_resident_batch_dispatches": 10,
"resident_vyre_cuda_persistent_handle_dispatches": 20,
"resident_vyre_cuda_static_param_dispatches": 30,
"resident_vyre_parse_resident_ast_readback_suppressed_bytes": 73953320,
"resident_vyre_vs_tree_sitter_median_speedup_x1000": 3699018
}"#;
assert_eq!(
validate_semantic_parity_linux_artifact(artifact)
.expect_err("CPU semantic artifact should fail before counters are trusted"),
SemanticParityLinuxArtifactError::MissingField {
field: "CUDA parser backend",
}
);
}
#[test]
fn semantic_parity_linux_artifact_rejects_incomplete_token_coverage() {
let artifact = r#"{
"resident_vyre_parse_backend_id": "cuda",
"resident_vyre_parse_input_mode": "raw_bytes_gpu_syntax",
"resident_vyre_prepared_syntax_input": true,
"resident_vyre_pipeline_cache_enabled": true,
"failures": [],
"blockers": [],
"total_files": 490,
"total_source_bytes": 7394810,
"total_semantic_graph_bytes": 3697526,
"failed_files": 0,
"resident_vyre_parse_host_token_stream_upload_bytes": 0,
"resident_vyre_parse_ast_covered_tokens": 9,
"resident_vyre_parse_token_count": 10,
"resident_vyre_parse_gpu_dispatch_count": 8,
"resident_vyre_parse_host_submit_count": 1,
"resident_vyre_cuda_resident_batch_dispatches": 10,
"resident_vyre_cuda_persistent_handle_dispatches": 20,
"resident_vyre_cuda_static_param_dispatches": 30,
"resident_vyre_parse_resident_ast_readback_suppressed_bytes": 73953320,
"resident_vyre_vs_tree_sitter_median_speedup_x1000": 3699018
}"#;
assert_eq!(
validate_semantic_parity_linux_artifact(artifact)
.expect_err("partial token coverage should fail"),
SemanticParityLinuxArtifactError::ThresholdMiss {
field: "resident_vyre_parse_ast_covered_tokens",
observed: 9,
required: 10,
}
);
}
fn records() -> Vec<SemanticParityRecord<'static>> {
REQUIRED_CATEGORIES.iter().copied().map(record).collect()
}
fn record(category: SemanticParityCategory) -> SemanticParityRecord<'static> {
SemanticParityRecord {
category,
command: "./cargo_full test -j1 -p vyrec",
evidence: "release/parity/vyrec-semantic-parity.md",
}
}
}