use std::collections::BTreeMap;
use panproto_schema::Schema;
use crate::error::ParseError;
use crate::registry::ParserRegistry;
pub struct ParseEmitLens<'r> {
registry: &'r ParserRegistry,
protocol: String,
}
impl<'r> ParseEmitLens<'r> {
#[must_use]
pub fn new(registry: &'r ParserRegistry, protocol: impl Into<String>) -> Self {
Self {
registry,
protocol: protocol.into(),
}
}
pub fn parse(&self, source: &[u8]) -> Result<Schema, ParseError> {
self.registry
.parse_with_protocol(&self.protocol, source, "parse_emit_lens")
}
pub fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
self.registry
.emit_pretty_with_protocol(&self.protocol, schema)
}
}
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum LawViolation {
#[error("EmitParse law violated for protocol {protocol}: {detail}")]
EmitParse {
protocol: String,
detail: String,
},
#[error("ParseEmit law violated for protocol {protocol}: {detail}")]
ParseEmit {
protocol: String,
detail: String,
},
#[error("underlying parse/emit error: {0}")]
Underlying(#[from] ParseError),
}
pub fn strip_complement(schema: &mut Schema) {
for constraints in schema.constraints.values_mut() {
constraints.retain(|c| {
let s = c.sort.as_ref();
!(s == "start-byte" || s == "end-byte" || s.starts_with("interstitial-"))
});
}
}
#[must_use]
pub fn kind_multiset(schema: &Schema) -> BTreeMap<String, usize> {
panproto_schema::kind_multiset(schema)
}
#[must_use]
pub fn edge_multiset(schema: &Schema) -> BTreeMap<(String, String, String), usize> {
panproto_schema::edge_multiset(schema)
}
pub fn check_emit_parse(lens: &ParseEmitLens<'_>, schema: &Schema) -> Result<(), LawViolation> {
let mut stripped = schema.clone();
strip_complement(&mut stripped);
let expected_kinds = kind_multiset(&stripped);
let expected_edges = edge_multiset(&stripped);
let bytes = lens.emit(&stripped)?;
let mut round = lens.parse(&bytes)?;
strip_complement(&mut round);
let actual_kinds = kind_multiset(&round);
let actual_edges = edge_multiset(&round);
if expected_kinds != actual_kinds {
return Err(LawViolation::EmitParse {
protocol: lens.protocol.clone(),
detail: format!(
"vertex-kind multiset mismatch: expected {} distinct kinds, got {}; \
first divergence: {:?}",
expected_kinds.len(),
actual_kinds.len(),
first_divergence(&expected_kinds, &actual_kinds),
),
});
}
if expected_edges != actual_edges {
return Err(LawViolation::EmitParse {
protocol: lens.protocol.clone(),
detail: format!(
"edge-shape multiset mismatch: expected {} distinct edge shapes, got {}",
expected_edges.len(),
actual_edges.len(),
),
});
}
Ok(())
}
pub fn check_parse_emit(lens: &ParseEmitLens<'_>, bytes: &[u8]) -> Result<(), LawViolation> {
let parsed = lens.parse(bytes)?;
check_emit_parse(lens, &parsed)
}
fn first_divergence(
expected: &BTreeMap<String, usize>,
actual: &BTreeMap<String, usize>,
) -> Option<(String, Option<usize>, Option<usize>)> {
for (k, &v) in expected {
if actual.get(k) != Some(&v) {
return Some((k.clone(), Some(v), actual.get(k).copied()));
}
}
for (k, &v) in actual {
if !expected.contains_key(k) {
return Some((k.clone(), None, Some(v)));
}
}
None
}
#[cfg(test)]
#[cfg(feature = "grammars")]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic, dead_code)]
mod tests {
use super::*;
fn run_check(protocol: &str, source: &[u8]) {
let registry = ParserRegistry::new();
let lens = ParseEmitLens::new(®istry, protocol);
check_parse_emit(&lens, source)
.unwrap_or_else(|e| panic!("law check failed for {protocol}: {e}"));
}
#[cfg(feature = "lang-json")]
#[test]
fn json_lens_satisfies_laws() {
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(|| run_check("json", br#"{"a": 1, "b": [2, 3]}"#))
.expect("spawn")
.join()
.expect("worker panicked");
}
#[cfg(feature = "lang-toml")]
#[test]
fn toml_lens_satisfies_laws() {
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(|| run_check("toml", b"name = \"foo\"\nversion = \"1.0\"\n"))
.expect("spawn")
.join()
.expect("worker panicked");
}
#[cfg(feature = "lang-json")]
#[test]
fn json_check_emit_parse_directly() {
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(|| {
let registry = ParserRegistry::new();
let lens = ParseEmitLens::new(®istry, "json");
let parsed = lens.parse(b"[1, 2, 3]").expect("parse");
check_emit_parse(&lens, &parsed).expect("retraction holds for parsed schema");
})
.expect("spawn")
.join()
.expect("worker panicked");
}
#[cfg(feature = "lang-json")]
#[test]
fn strip_complement_removes_byte_constraints_only() {
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(|| {
let registry = ParserRegistry::new();
let lens = ParseEmitLens::new(®istry, "json");
let mut parsed = lens.parse(br#"{"a": 1}"#).expect("parse");
let total_constraint_count: usize = parsed.constraints.values().map(Vec::len).sum();
strip_complement(&mut parsed);
let stripped_total: usize = parsed.constraints.values().map(Vec::len).sum();
assert!(
stripped_total < total_constraint_count,
"strip_complement must remove byte-position constraints"
);
let preserved = parsed.constraints.values().any(|cs| {
cs.iter()
.any(|c| c.sort.as_ref() == "chose-alt-fingerprint")
});
assert!(
preserved,
"strip_complement must preserve chose-alt-fingerprint witnesses"
);
})
.expect("spawn")
.join()
.expect("worker panicked");
}
#[cfg(feature = "lang-json")]
#[test]
fn edge_multiset_distinguishes_structurally_different_schemas() {
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(|| {
let registry = ParserRegistry::new();
let lens = ParseEmitLens::new(®istry, "json");
let s1 = lens.parse(br#"{"a": 1}"#).expect("parse");
let s2 = lens.parse(b"[1]").expect("parse");
let m1 = edge_multiset(&s1);
let m2 = edge_multiset(&s2);
assert_ne!(
m1, m2,
"object and array schemas have distinct edge-shape multisets"
);
})
.expect("spawn")
.join()
.expect("worker panicked");
}
#[test]
fn first_divergence_finds_count_mismatch() {
let mut a = BTreeMap::new();
a.insert("x".to_owned(), 1);
let mut b = BTreeMap::new();
b.insert("x".to_owned(), 2);
assert_eq!(
first_divergence(&a, &b),
Some(("x".to_owned(), Some(1), Some(2)))
);
}
#[test]
fn first_divergence_finds_extra_key_in_actual() {
let a = BTreeMap::new();
let mut b = BTreeMap::new();
b.insert("y".to_owned(), 3);
assert_eq!(
first_divergence(&a, &b),
Some(("y".to_owned(), None, Some(3)))
);
}
#[test]
fn first_divergence_returns_none_on_match() {
let mut a = BTreeMap::new();
a.insert("x".to_owned(), 1);
let b = a.clone();
assert_eq!(first_divergence(&a, &b), None);
}
}