1use std::collections::BTreeMap;
52
53use panproto_schema::Schema;
54
55use crate::error::ParseError;
56use crate::registry::ParserRegistry;
57
58pub struct ParseEmitLens<'r> {
63 registry: &'r ParserRegistry,
64 protocol: String,
65}
66
67impl<'r> ParseEmitLens<'r> {
68 #[must_use]
70 pub fn new(registry: &'r ParserRegistry, protocol: impl Into<String>) -> Self {
71 Self {
72 registry,
73 protocol: protocol.into(),
74 }
75 }
76
77 pub fn parse(&self, source: &[u8]) -> Result<Schema, ParseError> {
85 self.registry
86 .parse_with_protocol(&self.protocol, source, "parse_emit_lens")
87 }
88
89 pub fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
98 self.registry
99 .emit_pretty_with_protocol(&self.protocol, schema)
100 }
101}
102
103#[derive(Debug, thiserror::Error)]
105#[non_exhaustive]
106pub enum LawViolation {
107 #[error("EmitParse law violated for protocol {protocol}: {detail}")]
109 EmitParse {
110 protocol: String,
112 detail: String,
114 },
115 #[error("ParseEmit law violated for protocol {protocol}: {detail}")]
117 ParseEmit {
118 protocol: String,
120 detail: String,
122 },
123 #[error("underlying parse/emit error: {0}")]
125 Underlying(#[from] ParseError),
126}
127
128pub fn strip_complement(schema: &mut Schema) {
134 for constraints in schema.constraints.values_mut() {
135 constraints.retain(|c| {
136 let s = c.sort.as_ref();
137 !(s == "start-byte" || s == "end-byte" || s.starts_with("interstitial-"))
138 });
139 }
140}
141
142#[must_use]
148pub fn kind_multiset(schema: &Schema) -> BTreeMap<String, usize> {
149 let mut map = BTreeMap::new();
150 for v in schema.vertices.values() {
151 *map.entry(v.kind.to_string()).or_insert(0) += 1;
152 }
153 map
154}
155
156#[must_use]
170pub fn edge_multiset(schema: &Schema) -> BTreeMap<(String, String, String), usize> {
171 let mut map: BTreeMap<(String, String, String), usize> = BTreeMap::new();
172 for edge in schema.edges.keys() {
173 let src_kind = schema
174 .vertices
175 .get(&edge.src)
176 .map(|v| v.kind.to_string())
177 .unwrap_or_default();
178 let tgt_kind = schema
179 .vertices
180 .get(&edge.tgt)
181 .map(|v| v.kind.to_string())
182 .unwrap_or_default();
183 *map.entry((src_kind, edge.kind.to_string(), tgt_kind))
184 .or_insert(0) += 1;
185 }
186 map
187}
188
189pub fn check_emit_parse(lens: &ParseEmitLens<'_>, schema: &Schema) -> Result<(), LawViolation> {
203 let mut stripped = schema.clone();
204 strip_complement(&mut stripped);
205 let expected_kinds = kind_multiset(&stripped);
206 let expected_edges = edge_multiset(&stripped);
207
208 let bytes = lens.emit(&stripped)?;
209 let mut round = lens.parse(&bytes)?;
210 strip_complement(&mut round);
211 let actual_kinds = kind_multiset(&round);
212 let actual_edges = edge_multiset(&round);
213
214 if expected_kinds != actual_kinds {
215 return Err(LawViolation::EmitParse {
216 protocol: lens.protocol.clone(),
217 detail: format!(
218 "vertex-kind multiset mismatch: expected {} distinct kinds, got {}; \
219 first divergence: {:?}",
220 expected_kinds.len(),
221 actual_kinds.len(),
222 first_divergence(&expected_kinds, &actual_kinds),
223 ),
224 });
225 }
226 if expected_edges != actual_edges {
227 return Err(LawViolation::EmitParse {
228 protocol: lens.protocol.clone(),
229 detail: format!(
230 "edge-shape multiset mismatch: expected {} distinct edge shapes, got {}",
231 expected_edges.len(),
232 actual_edges.len(),
233 ),
234 });
235 }
236 Ok(())
237}
238
239pub fn check_parse_emit(lens: &ParseEmitLens<'_>, bytes: &[u8]) -> Result<(), LawViolation> {
255 let parsed = lens.parse(bytes)?;
256 check_emit_parse(lens, &parsed)
257}
258
259fn first_divergence(
260 expected: &BTreeMap<String, usize>,
261 actual: &BTreeMap<String, usize>,
262) -> Option<(String, Option<usize>, Option<usize>)> {
263 for (k, &v) in expected {
264 if actual.get(k) != Some(&v) {
265 return Some((k.clone(), Some(v), actual.get(k).copied()));
266 }
267 }
268 for (k, &v) in actual {
269 if !expected.contains_key(k) {
270 return Some((k.clone(), None, Some(v)));
271 }
272 }
273 None
274}
275
276#[cfg(test)]
277#[cfg(feature = "grammars")]
278#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic, dead_code)]
279mod tests {
280 use super::*;
281
282 fn run_check(protocol: &str, source: &[u8]) {
283 let registry = ParserRegistry::new();
284 let lens = ParseEmitLens::new(®istry, protocol);
285 check_parse_emit(&lens, source)
286 .unwrap_or_else(|e| panic!("law check failed for {protocol}: {e}"));
287 }
288
289 #[cfg(feature = "lang-json")]
290 #[test]
291 fn json_lens_satisfies_laws() {
292 std::thread::Builder::new()
293 .stack_size(32 * 1024 * 1024)
294 .spawn(|| run_check("json", br#"{"a": 1, "b": [2, 3]}"#))
295 .expect("spawn")
296 .join()
297 .expect("worker panicked");
298 }
299
300 #[cfg(feature = "lang-toml")]
301 #[test]
302 fn toml_lens_satisfies_laws() {
303 std::thread::Builder::new()
304 .stack_size(32 * 1024 * 1024)
305 .spawn(|| run_check("toml", b"name = \"foo\"\nversion = \"1.0\"\n"))
306 .expect("spawn")
307 .join()
308 .expect("worker panicked");
309 }
310
311 #[cfg(feature = "lang-json")]
312 #[test]
313 fn json_check_emit_parse_directly() {
314 std::thread::Builder::new()
315 .stack_size(32 * 1024 * 1024)
316 .spawn(|| {
317 let registry = ParserRegistry::new();
318 let lens = ParseEmitLens::new(®istry, "json");
319 let parsed = lens.parse(br#"[1, 2, 3]"#).expect("parse");
320 check_emit_parse(&lens, &parsed).expect("retraction holds for parsed schema");
321 })
322 .expect("spawn")
323 .join()
324 .expect("worker panicked");
325 }
326
327 #[cfg(feature = "lang-json")]
328 #[test]
329 fn strip_complement_removes_byte_constraints_only() {
330 std::thread::Builder::new()
331 .stack_size(32 * 1024 * 1024)
332 .spawn(|| {
333 let registry = ParserRegistry::new();
334 let lens = ParseEmitLens::new(®istry, "json");
335 let mut parsed = lens.parse(br#"{"a": 1}"#).expect("parse");
336
337 let total_constraint_count: usize = parsed.constraints.values().map(Vec::len).sum();
338 strip_complement(&mut parsed);
339 let stripped_total: usize = parsed.constraints.values().map(Vec::len).sum();
340
341 assert!(
342 stripped_total < total_constraint_count,
343 "strip_complement must remove byte-position constraints"
344 );
345 let preserved = parsed.constraints.values().any(|cs| {
348 cs.iter()
349 .any(|c| c.sort.as_ref() == "chose-alt-fingerprint")
350 });
351 assert!(
352 preserved,
353 "strip_complement must preserve chose-alt-fingerprint witnesses"
354 );
355 })
356 .expect("spawn")
357 .join()
358 .expect("worker panicked");
359 }
360
361 #[cfg(feature = "lang-json")]
362 #[test]
363 fn edge_multiset_distinguishes_structurally_different_schemas() {
364 std::thread::Builder::new()
365 .stack_size(32 * 1024 * 1024)
366 .spawn(|| {
367 let registry = ParserRegistry::new();
368 let lens = ParseEmitLens::new(®istry, "json");
369 let s1 = lens.parse(br#"{"a": 1}"#).expect("parse");
370 let s2 = lens.parse(br#"[1]"#).expect("parse");
371 let m1 = edge_multiset(&s1);
372 let m2 = edge_multiset(&s2);
373 assert_ne!(
374 m1, m2,
375 "object and array schemas have distinct edge-shape multisets"
376 );
377 })
378 .expect("spawn")
379 .join()
380 .expect("worker panicked");
381 }
382
383 #[test]
384 fn first_divergence_finds_count_mismatch() {
385 let mut a = BTreeMap::new();
386 a.insert("x".to_owned(), 1);
387 let mut b = BTreeMap::new();
388 b.insert("x".to_owned(), 2);
389 assert_eq!(
390 first_divergence(&a, &b),
391 Some(("x".to_owned(), Some(1), Some(2)))
392 );
393 }
394
395 #[test]
396 fn first_divergence_finds_extra_key_in_actual() {
397 let a = BTreeMap::new();
398 let mut b = BTreeMap::new();
399 b.insert("y".to_owned(), 3);
400 assert_eq!(
401 first_divergence(&a, &b),
402 Some(("y".to_owned(), None, Some(3)))
403 );
404 }
405
406 #[test]
407 fn first_divergence_returns_none_on_match() {
408 let mut a = BTreeMap::new();
409 a.insert("x".to_owned(), 1);
410 let b = a.clone();
411 assert_eq!(first_divergence(&a, &b), None);
412 }
413}