1use std::collections::BTreeMap;
52
53use panproto_schema::Schema;
54
55use crate::error::ParseError;
56use crate::registry::ParserRegistry;
57
58pub struct ParseEmitLens<'r> {
63 registry: &'r ParserRegistry,
64 protocol: String,
65}
66
67impl<'r> ParseEmitLens<'r> {
68 #[must_use]
70 pub fn new(registry: &'r ParserRegistry, protocol: impl Into<String>) -> Self {
71 Self {
72 registry,
73 protocol: protocol.into(),
74 }
75 }
76
77 pub fn parse(&self, source: &[u8]) -> Result<Schema, ParseError> {
85 self.registry
86 .parse_with_protocol(&self.protocol, source, "parse_emit_lens")
87 }
88
89 pub fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
98 self.registry
99 .emit_pretty_with_protocol(&self.protocol, schema)
100 }
101}
102
103#[derive(Debug, thiserror::Error)]
105#[non_exhaustive]
106pub enum LawViolation {
107 #[error("EmitParse law violated for protocol {protocol}: {detail}")]
109 EmitParse {
110 protocol: String,
112 detail: String,
114 },
115 #[error("ParseEmit law violated for protocol {protocol}: {detail}")]
117 ParseEmit {
118 protocol: String,
120 detail: String,
122 },
123 #[error("underlying parse/emit error: {0}")]
125 Underlying(#[from] ParseError),
126}
127
128pub fn strip_complement(schema: &mut Schema) {
142 for constraints in schema.constraints.values_mut() {
143 constraints.retain(|c| {
144 let s = c.sort.as_ref();
145 !(s == "start-byte" || s == "end-byte" || s.starts_with("interstitial-"))
146 });
147 }
148}
149
150#[must_use]
156pub fn kind_multiset(schema: &Schema) -> BTreeMap<String, usize> {
157 panproto_schema::kind_multiset(schema)
158}
159
160#[must_use]
164pub fn edge_multiset(schema: &Schema) -> BTreeMap<(String, String, String), usize> {
165 panproto_schema::edge_multiset(schema)
166}
167
168pub fn check_emit_parse(lens: &ParseEmitLens<'_>, schema: &Schema) -> Result<(), LawViolation> {
182 let mut stripped = schema.clone();
183 strip_complement(&mut stripped);
184 let expected_kinds = kind_multiset(&stripped);
185 let expected_edges = edge_multiset(&stripped);
186
187 let bytes = lens.emit(&stripped)?;
188 let mut round = lens.parse(&bytes)?;
189 strip_complement(&mut round);
190 let actual_kinds = kind_multiset(&round);
191 let actual_edges = edge_multiset(&round);
192
193 if expected_kinds != actual_kinds {
194 return Err(LawViolation::EmitParse {
195 protocol: lens.protocol.clone(),
196 detail: format!(
197 "vertex-kind multiset mismatch: expected {} distinct kinds, got {}; \
198 first divergence: {:?}",
199 expected_kinds.len(),
200 actual_kinds.len(),
201 first_divergence(&expected_kinds, &actual_kinds),
202 ),
203 });
204 }
205 if expected_edges != actual_edges {
206 return Err(LawViolation::EmitParse {
207 protocol: lens.protocol.clone(),
208 detail: format!(
209 "edge-shape multiset mismatch: expected {} distinct edge shapes, got {}",
210 expected_edges.len(),
211 actual_edges.len(),
212 ),
213 });
214 }
215 Ok(())
216}
217
218pub fn check_parse_emit(lens: &ParseEmitLens<'_>, bytes: &[u8]) -> Result<(), LawViolation> {
234 let parsed = lens.parse(bytes)?;
235 check_emit_parse(lens, &parsed)
236}
237
238fn first_divergence(
239 expected: &BTreeMap<String, usize>,
240 actual: &BTreeMap<String, usize>,
241) -> Option<(String, Option<usize>, Option<usize>)> {
242 for (k, &v) in expected {
243 if actual.get(k) != Some(&v) {
244 return Some((k.clone(), Some(v), actual.get(k).copied()));
245 }
246 }
247 for (k, &v) in actual {
248 if !expected.contains_key(k) {
249 return Some((k.clone(), None, Some(v)));
250 }
251 }
252 None
253}
254
255#[cfg(test)]
256#[cfg(feature = "grammars")]
257#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic, dead_code)]
258mod tests {
259 use super::*;
260
261 fn run_check(protocol: &str, source: &[u8]) {
262 let registry = ParserRegistry::new();
263 let lens = ParseEmitLens::new(®istry, protocol);
264 check_parse_emit(&lens, source)
265 .unwrap_or_else(|e| panic!("law check failed for {protocol}: {e}"));
266 }
267
268 #[cfg(feature = "lang-json")]
269 #[test]
270 fn json_lens_satisfies_laws() {
271 std::thread::Builder::new()
272 .stack_size(32 * 1024 * 1024)
273 .spawn(|| run_check("json", br#"{"a": 1, "b": [2, 3]}"#))
274 .expect("spawn")
275 .join()
276 .expect("worker panicked");
277 }
278
279 #[cfg(feature = "lang-toml")]
280 #[test]
281 fn toml_lens_satisfies_laws() {
282 std::thread::Builder::new()
283 .stack_size(32 * 1024 * 1024)
284 .spawn(|| run_check("toml", b"name = \"foo\"\nversion = \"1.0\"\n"))
285 .expect("spawn")
286 .join()
287 .expect("worker panicked");
288 }
289
290 #[cfg(feature = "lang-json")]
291 #[test]
292 fn json_check_emit_parse_directly() {
293 std::thread::Builder::new()
294 .stack_size(32 * 1024 * 1024)
295 .spawn(|| {
296 let registry = ParserRegistry::new();
297 let lens = ParseEmitLens::new(®istry, "json");
298 let parsed = lens.parse(b"[1, 2, 3]").expect("parse");
299 check_emit_parse(&lens, &parsed).expect("retraction holds for parsed schema");
300 })
301 .expect("spawn")
302 .join()
303 .expect("worker panicked");
304 }
305
306 #[cfg(feature = "lang-json")]
307 #[test]
308 fn strip_complement_removes_byte_constraints_only() {
309 std::thread::Builder::new()
310 .stack_size(32 * 1024 * 1024)
311 .spawn(|| {
312 let registry = ParserRegistry::new();
313 let lens = ParseEmitLens::new(®istry, "json");
314 let mut parsed = lens.parse(br#"{"a": 1}"#).expect("parse");
315
316 let total_constraint_count: usize = parsed.constraints.values().map(Vec::len).sum();
317 strip_complement(&mut parsed);
318 let stripped_total: usize = parsed.constraints.values().map(Vec::len).sum();
319
320 assert!(
321 stripped_total < total_constraint_count,
322 "strip_complement must remove byte-position constraints"
323 );
324 let preserved = parsed.constraints.values().any(|cs| {
327 cs.iter()
328 .any(|c| c.sort.as_ref() == "chose-alt-fingerprint")
329 });
330 assert!(
331 preserved,
332 "strip_complement must preserve chose-alt-fingerprint witnesses"
333 );
334 })
335 .expect("spawn")
336 .join()
337 .expect("worker panicked");
338 }
339
340 #[cfg(feature = "lang-json")]
341 #[test]
342 fn edge_multiset_distinguishes_structurally_different_schemas() {
343 std::thread::Builder::new()
344 .stack_size(32 * 1024 * 1024)
345 .spawn(|| {
346 let registry = ParserRegistry::new();
347 let lens = ParseEmitLens::new(®istry, "json");
348 let s1 = lens.parse(br#"{"a": 1}"#).expect("parse");
349 let s2 = lens.parse(b"[1]").expect("parse");
350 let m1 = edge_multiset(&s1);
351 let m2 = edge_multiset(&s2);
352 assert_ne!(
353 m1, m2,
354 "object and array schemas have distinct edge-shape multisets"
355 );
356 })
357 .expect("spawn")
358 .join()
359 .expect("worker panicked");
360 }
361
362 #[test]
363 fn first_divergence_finds_count_mismatch() {
364 let mut a = BTreeMap::new();
365 a.insert("x".to_owned(), 1);
366 let mut b = BTreeMap::new();
367 b.insert("x".to_owned(), 2);
368 assert_eq!(
369 first_divergence(&a, &b),
370 Some(("x".to_owned(), Some(1), Some(2)))
371 );
372 }
373
374 #[test]
375 fn first_divergence_finds_extra_key_in_actual() {
376 let a = BTreeMap::new();
377 let mut b = BTreeMap::new();
378 b.insert("y".to_owned(), 3);
379 assert_eq!(
380 first_divergence(&a, &b),
381 Some(("y".to_owned(), None, Some(3)))
382 );
383 }
384
385 #[test]
386 fn first_divergence_returns_none_on_match() {
387 let mut a = BTreeMap::new();
388 a.insert("x".to_owned(), 1);
389 let b = a.clone();
390 assert_eq!(first_divergence(&a, &b), None);
391 }
392}