panproto_parse/registry.rs
1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4use std::sync::Arc;
5
6use panproto_schema::{AbstractSchema, DecoratedSchema, Schema};
7use rustc_hash::FxHashMap;
8
9use crate::error::ParseError;
10use crate::layout_policy::LayoutPolicy;
11use crate::theory_extract::ExtractedTheoryMeta;
12
13/// A full-AST parser and emitter for a specific programming language.
14///
15/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
16/// providing parse (source → Schema) and emit (Schema → source) operations.
17pub trait AstParser: Send + Sync {
18 /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
19 fn protocol_name(&self) -> &str;
20
21 /// Parse source code into a full-AST [`Schema`].
22 ///
23 /// # Errors
24 ///
25 /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
26 fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
27
28 /// Emit a [`Schema`] back to source code bytes.
29 ///
30 /// The emitter walks the schema graph top-down, using formatting constraints
31 /// (comment, indent, blank-lines-before) to reproduce the original formatting.
32 ///
33 /// # Errors
34 ///
35 /// Returns [`ParseError::EmitFailed`] if emission fails.
36 fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
37
38 /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
39 fn supported_extensions(&self) -> &[&str];
40
41 /// The auto-derived theory metadata for this language.
42 fn theory_meta(&self) -> &ExtractedTheoryMeta;
43
44 /// Render a by-construction [`Schema`] (one with no parse-recovered
45 /// byte positions or interstitials) to source bytes.
46 ///
47 /// Unlike [`emit`](Self::emit), which reconstructs source from
48 /// byte-position fragments stored on the schema during `parse`,
49 /// `emit_pretty` walks tree-sitter `grammar.json` production rules
50 /// to render schemas built from scratch via `SchemaBuilder`.
51 ///
52 /// # Errors
53 ///
54 /// Returns [`ParseError::EmitFailed`] when the language has no
55 /// vendored `grammar.json`, when a vertex's kind is not a grammar
56 /// rule, or when a required field has no corresponding schema edge.
57 fn emit_pretty(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
58 self.emit_pretty_with_policy(schema, &crate::emit_pretty::FormatPolicy::default())
59 }
60
61 /// Render a by-construction [`Schema`] under a caller-supplied
62 /// [`FormatPolicy`](crate::emit_pretty::FormatPolicy).
63 ///
64 /// The policy governs every configurable aspect of the rendered
65 /// output: separator between glued tokens, newline byte sequence,
66 /// indent width, line-break and indent-open/close token sets. The
67 /// default policy (used by [`emit_pretty`](Self::emit_pretty))
68 /// targets syntactic validity with ASCII conventions; callers
69 /// supplying their own policy can pin idiomatic formatting.
70 ///
71 /// # Errors
72 ///
73 /// Returns [`ParseError::EmitFailed`] when the language has no
74 /// vendored `grammar.json`, when a vertex's kind is not a grammar
75 /// rule, or when a required field has no corresponding schema edge.
76 fn emit_pretty_with_policy(
77 &self,
78 schema: &Schema,
79 policy: &crate::emit_pretty::FormatPolicy,
80 ) -> Result<Vec<u8>, ParseError> {
81 let _ = (schema, policy);
82 Err(ParseError::EmitFailed {
83 protocol: self.protocol_name().to_owned(),
84 reason: format!(
85 "emit_pretty_with_policy not implemented for protocol '{}'",
86 self.protocol_name()
87 ),
88 })
89 }
90}
91
92/// Registry of all full-AST parsers, keyed by protocol name.
93///
94/// Provides language detection by file extension and dispatches parse/emit
95/// operations to the appropriate language parser.
96pub struct ParserRegistry {
97 /// Parsers keyed by protocol name.
98 ///
99 /// Held by `Arc` (not `Box`) so the same handle can be shared with
100 /// the layout-enrichment registry without re-wrapping at every
101 /// lookup. Registration installs both: the parser into `parsers`
102 /// and a thin adapter into the lens crate's enrichment registry.
103 parsers: FxHashMap<String, Arc<dyn AstParser>>,
104 /// Extension → protocol name mapping.
105 extension_map: FxHashMap<String, String>,
106}
107
108impl ParserRegistry {
109 /// Create a new registry populated with all enabled language parsers.
110 ///
111 /// With the `grammars` feature (default), this populates the registry from
112 /// `panproto-grammars`, which provides up to 261 tree-sitter languages.
113 /// Without the `grammars` feature, this returns an empty registry; call
114 /// [`register`](Self::register) to add parsers manually using individual
115 /// grammar crates.
116 #[must_use]
117 pub fn new() -> Self {
118 let mut registry = Self {
119 parsers: FxHashMap::default(),
120 extension_map: FxHashMap::default(),
121 };
122
123 #[cfg(feature = "grammars")]
124 for grammar in panproto_grammars::grammars() {
125 let config = crate::languages::walker_configs::walker_config_for(grammar.name);
126 match crate::languages::common::LanguageParser::from_language_with_grammar_json(
127 grammar.name,
128 grammar.extensions.to_vec(),
129 grammar.language,
130 grammar.node_types,
131 grammar.tags_query,
132 config,
133 grammar.grammar_json,
134 ) {
135 Ok(p) => registry.register(Box::new(p)),
136 Err(err) => {
137 let _ = err;
138 #[cfg(debug_assertions)]
139 eprintln!(
140 "warning: grammar '{}' theory extraction failed: {err}",
141 grammar.name
142 );
143 }
144 }
145 }
146
147 registry
148 }
149
150 /// Register a parser implementation.
151 ///
152 /// In addition to keying the parser by its protocol name, this
153 /// installs a [`LayoutEnricher`](panproto_lens::enrichment_registry::LayoutEnricher)
154 /// adapter into the global enrichment registry so that a
155 /// `parse_emit_protolens(protocol, …)` instantiation finds a
156 /// synthesis driver without any further wiring.
157 pub fn register(&mut self, parser: Box<dyn AstParser>) {
158 let name = parser.protocol_name().to_owned();
159 for ext in parser.supported_extensions() {
160 self.extension_map.insert((*ext).to_owned(), name.clone());
161 }
162 let arc: Arc<dyn AstParser> = Arc::from(parser);
163 crate::decorate::register_layout_enricher(Arc::clone(&arc));
164 self.parsers.insert(name, arc);
165 }
166
167 /// Register a tree-sitter language as a full-AST parser.
168 ///
169 /// Used by `panproto-grammars-*` companion crates that ship grammars
170 /// outside the default `panproto-grammars` build. The byte-slice
171 /// arguments must outlive this registry; the canonical pattern is
172 /// for the companion to bake the data into `&'static` rodata at
173 /// compile time and pass references that are valid for the process
174 /// lifetime.
175 ///
176 /// `walker_config` is looked up by `name` from the bundled per-language
177 /// configuration table. Languages without a tailored configuration
178 /// fall back to the default walker config.
179 ///
180 /// # Errors
181 ///
182 /// Returns [`ParseError`] if theory extraction from `node_types_json`
183 /// fails or if the tags query rejects compilation.
184 pub fn register_external_grammar(
185 &mut self,
186 name: &'static str,
187 extensions: Vec<&'static str>,
188 language: tree_sitter::Language,
189 node_types_json: &'static [u8],
190 tags_query: Option<&'static str>,
191 grammar_json: Option<&'static [u8]>,
192 ) -> Result<(), crate::error::ParseError> {
193 let config = crate::languages::walker_configs::walker_config_for(name);
194 let parser = crate::languages::common::LanguageParser::from_language_with_grammar_json(
195 name,
196 extensions,
197 language,
198 node_types_json,
199 tags_query,
200 config,
201 grammar_json,
202 )?;
203 self.register(Box::new(parser));
204 Ok(())
205 }
206
207 /// Owned-data variant of [`register_external_grammar`](Self::register_external_grammar).
208 ///
209 /// Accepts `String` / `Vec<u8>` rather than `&'static` references. The
210 /// caller is presumed not to have process-lifetime rodata available
211 /// (typical dev-time use: bytes read from disk via the Python binding's
212 /// override hook). To match the trait's `'static` lifetime requirement
213 /// the inputs are leaked into the heap; the leak is one-time per
214 /// override.
215 ///
216 /// This is the registration primitive for grammar-author workflows
217 /// where a grammar's `parser.c` / `grammar.json` / `node-types.json`
218 /// are evolving outside the panproto release cadence. Production
219 /// builds should continue to use [`register_external_grammar`](Self::register_external_grammar) with
220 /// `'static` data baked into the binary at compile time.
221 ///
222 /// # Errors
223 ///
224 /// Returns [`ParseError`] if theory extraction or tags-query
225 /// compilation fails.
226 pub fn register_external_grammar_owned(
227 &mut self,
228 name: String,
229 extensions: Vec<String>,
230 language: tree_sitter::Language,
231 node_types_json: Vec<u8>,
232 tags_query: Option<String>,
233 grammar_json: Option<Vec<u8>>,
234 ) -> Result<(), crate::error::ParseError> {
235 let name_static: &'static str = Box::leak(name.into_boxed_str());
236 let extensions_static: Vec<&'static str> = extensions
237 .into_iter()
238 .map(|s| Box::leak(s.into_boxed_str()) as &'static str)
239 .collect();
240 let node_types_static: &'static [u8] = Box::leak(node_types_json.into_boxed_slice());
241 let tags_query_static: Option<&'static str> =
242 tags_query.map(|s| Box::leak(s.into_boxed_str()) as &'static str);
243 let grammar_json_static: Option<&'static [u8]> =
244 grammar_json.map(|v| Box::leak(v.into_boxed_slice()) as &'static [u8]);
245
246 self.register_external_grammar(
247 name_static,
248 extensions_static,
249 language,
250 node_types_static,
251 tags_query_static,
252 grammar_json_static,
253 )
254 }
255
256 /// Remove a registration by protocol name.
257 ///
258 /// Drops the parser and any extension mappings that pointed at it.
259 /// Returns `true` if a parser was removed, `false` if no such
260 /// registration existed. Primarily intended for grammar-author
261 /// workflows where a registered grammar is being replaced by a
262 /// freshly-compiled version mid-process.
263 pub fn unregister(&mut self, name: &str) -> bool {
264 let removed = self.parsers.remove(name).is_some();
265 if removed {
266 self.extension_map.retain(|_, v| v != name);
267 }
268 removed
269 }
270
271 /// Override a registered grammar with new owned data.
272 ///
273 /// Equivalent to [`unregister`](Self::unregister) followed by
274 /// [`register_external_grammar_owned`](Self::register_external_grammar_owned),
275 /// and intended for the same grammar-author dev workflow. Any
276 /// extension mappings previously bound to `name` are replaced by
277 /// the new `extensions`.
278 ///
279 /// # Errors
280 ///
281 /// Returns [`ParseError`] if theory extraction or tags-query
282 /// compilation fails on the new grammar; in that case the prior
283 /// registration is already gone.
284 pub fn override_grammar(
285 &mut self,
286 name: String,
287 extensions: Vec<String>,
288 language: tree_sitter::Language,
289 node_types_json: Vec<u8>,
290 tags_query: Option<String>,
291 grammar_json: Option<Vec<u8>>,
292 ) -> Result<(), crate::error::ParseError> {
293 self.unregister(&name);
294 self.register_external_grammar_owned(
295 name,
296 extensions,
297 language,
298 node_types_json,
299 tags_query,
300 grammar_json,
301 )
302 }
303
304 /// Detect the language protocol for a file path by its extension.
305 ///
306 /// Returns `None` if the extension is not recognized (caller should
307 /// fall back to the `raw_file` protocol).
308 #[must_use]
309 pub fn detect_language(&self, path: &Path) -> Option<&str> {
310 path.extension()
311 .and_then(|ext| ext.to_str())
312 .and_then(|ext| self.extension_map.get(ext))
313 .map(String::as_str)
314 }
315
316 /// Parse a file by detecting its language from the file path.
317 ///
318 /// # Errors
319 ///
320 /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
321 /// Returns other [`ParseError`] variants if parsing fails.
322 pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
323 let protocol = self
324 .detect_language(path)
325 .ok_or_else(|| ParseError::UnknownLanguage {
326 extension: path
327 .extension()
328 .and_then(|e| e.to_str())
329 .unwrap_or("")
330 .to_owned(),
331 })?;
332
333 self.parse_with_protocol(protocol, content, &path.display().to_string())
334 }
335
336 /// Parse source code with a specific protocol name.
337 ///
338 /// # Errors
339 ///
340 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
341 pub fn parse_with_protocol(
342 &self,
343 protocol: &str,
344 content: &[u8],
345 file_path: &str,
346 ) -> Result<Schema, ParseError> {
347 let parser = self
348 .parsers
349 .get(protocol)
350 .ok_or_else(|| ParseError::UnknownLanguage {
351 extension: protocol.to_owned(),
352 })?;
353
354 parser.parse(content, file_path)
355 }
356
357 /// Emit a schema back to source code bytes using the specified protocol.
358 ///
359 /// # Errors
360 ///
361 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
362 pub fn emit_with_protocol(
363 &self,
364 protocol: &str,
365 schema: &Schema,
366 ) -> Result<Vec<u8>, ParseError> {
367 let parser = self
368 .parsers
369 .get(protocol)
370 .ok_or_else(|| ParseError::UnknownLanguage {
371 extension: protocol.to_owned(),
372 })?;
373
374 parser.emit(schema)
375 }
376
377 /// Render a by-construction schema using the named protocol.
378 ///
379 /// # Errors
380 ///
381 /// Returns [`ParseError::UnknownLanguage`] if the protocol is not
382 /// registered, or [`ParseError::EmitFailed`] from the underlying
383 /// parser's `emit_pretty`.
384 pub fn emit_pretty_with_protocol(
385 &self,
386 protocol: &str,
387 schema: &Schema,
388 ) -> Result<Vec<u8>, ParseError> {
389 let parser = self
390 .parsers
391 .get(protocol)
392 .ok_or_else(|| ParseError::UnknownLanguage {
393 extension: protocol.to_owned(),
394 })?;
395
396 parser.emit_pretty(schema)
397 }
398
399 /// Report the test-verification status of `emit_pretty` for a
400 /// given protocol.
401 ///
402 /// The status is a programmatic check that downstream tooling
403 /// (e.g. quivers, schema-migration pipelines) can use to refuse
404 /// emit on protocols whose fixed-point law has never been
405 /// exercised by panproto's test suite. The three tiers are:
406 ///
407 /// * [`EmitVerificationStatus::Verified`] — the protocol has an
408 /// explicit fixed-point or roundtrip test in panproto's suite.
409 /// `emit_pretty(parse(emit_pretty(s))) == emit_pretty(s)` is
410 /// known to hold on representative source.
411 /// * [`EmitVerificationStatus::Generic`] — the protocol is
412 /// registered (a tree-sitter grammar is vendored) and the
413 /// generic dispatch path applies, but no per-language test
414 /// asserts emit correctness. Output is structurally derived
415 /// from `grammar.json` + the universal cassette layer and is
416 /// likely correct, but unverified.
417 /// * [`EmitVerificationStatus::Unsupported`] — the protocol is
418 /// not registered, OR is registered but no `grammar.json` was
419 /// vendored at build time. `emit_pretty` will return
420 /// [`ParseError::EmitFailed`].
421 #[must_use]
422 pub fn emit_verification_status(&self, protocol: &str) -> EmitVerificationStatus {
423 if !self.parsers.contains_key(protocol) {
424 return EmitVerificationStatus::Unsupported;
425 }
426 if VERIFIED_EMIT_PROTOCOLS.binary_search(&protocol).is_ok() {
427 EmitVerificationStatus::Verified
428 } else {
429 EmitVerificationStatus::Generic
430 }
431 }
432}
433
434/// Programmatic verification tier for [`ParserRegistry::emit_verification_status`].
435#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
436pub enum EmitVerificationStatus {
437 /// `emit_pretty` for this protocol has a test in panproto's suite
438 /// asserting the fixed-point law on representative source.
439 Verified,
440 /// The protocol is registered and the generic dispatch path
441 /// applies, but no per-language test asserts emit correctness.
442 Generic,
443 /// The protocol is not registered, or its grammar lacks the
444 /// vendored `grammar.json` that `emit_pretty` requires.
445 Unsupported,
446}
447
448/// Protocols whose `emit_pretty` is verified to a bar that justifies
449/// downstream trust. A protocol qualifies on one of two bases:
450///
451/// 1. **Corpus-verified** — every entry in the grammar author's own
452/// `test/corpus/` round-trips under the full oracle (byte fixed point +
453/// vertex-kind multiset + edge-shape multiset), checked by the strict
454/// `emit_corpus_audit` test. This is the strong bar: the corpus exercises
455/// the whole grammar, not one hand-written sample.
456/// 2. **Backend-verified** — a quivers transpile backend (`python`, `stan`,
457/// `bugs`, `jags`, `julia`, `scheme`, `javascript`) covered by dedicated
458/// emit regression tests for the construct surface quivers actually emits.
459/// These are pinned by `emit_verification_status` tests as a downstream
460/// contract; bringing each to full corpus-pass (basis 1) is tracked work.
461///
462/// A single hand-written round-trip sample is NOT sufficient: an earlier
463/// expansion to 149 protocols on minimal samples was reverted after a corpus
464/// audit showed most failed their own grammar's test corpus.
465///
466/// Names MUST be kept in sorted order so the binary-search lookup in
467/// [`ParserRegistry::emit_verification_status`] works.
468const VERIFIED_EMIT_PROTOCOLS: &[&str] = &[
469 "abc",
470 "actionscript",
471 "ada",
472 "agda",
473 "al",
474 "angular",
475 "apex",
476 "arduino",
477 "asciidoc",
478 "asm",
479 "astro",
480 "awk",
481 "bash",
482 "bass",
483 "batch",
484 "beancount",
485 "bibtex",
486 "bicep",
487 "bitbake",
488 "blade",
489 "brightscript",
490 "bsl",
491 "bugs",
492 "c",
493 "caddy",
494 "cairo",
495 "capnp",
496 "cedar",
497 "cedarschema",
498 "chatito",
499 "chuck",
500 "circom",
501 "clarity",
502 "clojure",
503 "cmake",
504 "cobol",
505 "commonlisp",
506 "cooklang",
507 "corn",
508 "cpon",
509 "cpp",
510 "crystal",
511 "csharp",
512 "csound",
513 "css",
514 "csv",
515 "cuda",
516 "cue",
517 "cylc",
518 "d",
519 "dart",
520 "desktop",
521 "devicetree",
522 "diff",
523 "djot",
524 "dockerfile",
525 "dot",
526 "doxygen",
527 "dtd",
528 "earthfile",
529 "ebnf",
530 "editorconfig",
531 "eds",
532 "eex",
533 "elisp",
534 "elixir",
535 "elm",
536 "elsa",
537 "embedded_template",
538 "enforce",
539 "erlang",
540 "facility",
541 "faust",
542 "fennel",
543 "fidl",
544 "firrtl",
545 "fish",
546 "foam",
547 "forth",
548 "fortran",
549 "fsharp",
550 "fsharp_signature",
551 "func",
552 "gdscript",
553 "git_config",
554 "git_rebase",
555 "gitattributes",
556 "gitcommit",
557 "gitignore",
558 "gleam",
559 "glicol",
560 "glsl",
561 "gn",
562 "go",
563 "godot_resource",
564 "gomod",
565 "gosum",
566 "graphql",
567 "groovy",
568 "gstlaunch",
569 "hack",
570 "hare",
571 "haskell",
572 "haxe",
573 "hcl",
574 "heex",
575 "hlsl",
576 "html",
577 "http",
578 "hurl",
579 "hyprlang",
580 "idris",
581 "ini",
582 "ispc",
583 "jags",
584 "janet",
585 "java",
586 "javascript",
587 "jinja2",
588 "jq",
589 "jsdoc",
590 "json",
591 "jsonnet",
592 "julia",
593 "just",
594 "kconfig",
595 "kdl",
596 "kotlin",
597 "latex",
598 "lean",
599 "ledger",
600 "lilypond",
601 "linkerscript",
602 "liquid",
603 "llvm",
604 "lua",
605 "luadoc",
606 "luap",
607 "luau",
608 "magik",
609 "make",
610 "markdown",
611 "markdown_inline",
612 "matlab",
613 "mermaid",
614 "meson",
615 "mojo",
616 "netlinx",
617 "nginx",
618 "nickel",
619 "nim",
620 "ninja",
621 "nix",
622 "norg",
623 "nqc",
624 "nushell",
625 "objc",
626 "ocaml",
627 "ocaml_interface",
628 "odin",
629 "org",
630 "pascal",
631 "pem",
632 "perl",
633 "pgn",
634 "php",
635 "pkl",
636 "po",
637 "pony",
638 "postscript",
639 "powershell",
640 "printf",
641 "prisma",
642 "prolog",
643 "promql",
644 "properties",
645 "protobuf",
646 "psv",
647 "pug",
648 "puppet",
649 "purescript",
650 "pymanifest",
651 "python",
652 "ql",
653 "qml",
654 "qmldir",
655 "query",
656 "qvr",
657 "r",
658 "racket",
659 "re2c",
660 "readline",
661 "regex",
662 "rego",
663 "requirements",
664 "rescript",
665 "robot",
666 "ron",
667 "rst",
668 "ruby",
669 "rust",
670 "scala",
671 "scheme",
672 "scss",
673 "smali",
674 "smithy",
675 "solidity",
676 "sparql",
677 "sql",
678 "squirrel",
679 "ssh_config",
680 "stan",
681 "stanfunctions",
682 "starlark",
683 "strudel_mini",
684 "supercollider",
685 "svelte",
686 "swift",
687 "tablegen",
688 "tcl",
689 "teal",
690 "templ",
691 "terraform",
692 "textproto",
693 "thrift",
694 "tidal_mini",
695 "tlaplus",
696 "tmux",
697 "toml",
698 "tsv",
699 "tsx",
700 "turtle",
701 "twig",
702 "typescript",
703 "typst",
704 "udev",
705 "ungrammar",
706 "uxntal",
707 "v",
708 "vb",
709 "verilog",
710 "vhdl",
711 "vim",
712 "vimdoc",
713 "vue",
714 "wast",
715 "wat",
716 "wgsl",
717 "wit",
718 "xcompose",
719 "xml",
720 "yaml",
721 "yuck",
722 "zig",
723 "zsh",
724];
725
726impl ParserRegistry {
727 /// Decorate an [`AbstractSchema`] with the layout enrichment
728 /// fibre required by `emit_pretty_with_protocol` and friends.
729 ///
730 /// This is the put-direction of the parse / decorate / emit lens
731 /// at `protocol`. The implementation routes through the same
732 /// grammar walker as `emit_pretty` followed by `parse`, so the
733 /// resulting [`DecoratedSchema`] carries a complete layout fibre
734 /// recovered by the parse-side walker — `start-byte`, `end-byte`,
735 /// every `interstitial-N`, `chose-alt-fingerprint`, and
736 /// `chose-alt-child-kinds`.
737 ///
738 /// The section law holds up to kind- and edge-multiset
739 /// equivalence: `forget_layout(decorate(a)) ≅ a` modulo vertex-id
740 /// renaming. Grammars where parsing consolidates tokens that the
741 /// emitter rendered as separate sequences (e.g. lilypond's `c'4`
742 /// re-parses to a single note) do not preserve a one-to-one
743 /// vertex correspondence, so the result's vertex IDs are always
744 /// freshly minted by the parser.
745 ///
746 /// # Errors
747 ///
748 /// Returns [`ParseError::UnknownLanguage`] when `protocol` is not
749 /// registered, [`ParseError::SchemaConstruction`] when the
750 /// abstract schema was built for a different protocol than
751 /// `protocol`, [`ParseError::EmitFailed`] when the grammar walker
752 /// cannot render the abstract schema (missing `grammar.json`,
753 /// vertex kind not a rule), or any other parser error if the
754 /// re-parse step rejects the canonical bytes (a regression in the
755 /// parse/emit pipeline, not a user bug).
756 pub fn decorate(
757 &self,
758 protocol: &str,
759 abstract_schema: &AbstractSchema,
760 policy: &LayoutPolicy,
761 ) -> Result<DecoratedSchema, ParseError> {
762 let parser = self
763 .parsers
764 .get(protocol)
765 .ok_or_else(|| ParseError::UnknownLanguage {
766 extension: protocol.to_owned(),
767 })?;
768 // `decorate_with_parser` enforces the protocol-match invariant
769 // between the parser and the abstract schema, so no extra guard
770 // is needed here.
771 crate::decorate::decorate_with_parser(parser.as_ref(), abstract_schema, policy)
772 }
773
774 /// Render an [`AbstractSchema`] to canonical source bytes under
775 /// `policy`.
776 ///
777 /// Implementation note: this is exactly the first emit step of
778 /// [`decorate`](Self::decorate) — `decorate` then re-parses to
779 /// recover the layout fibre, but if all the caller wants is the
780 /// bytes, the re-parse is wasted work. Going through
781 /// `emit_pretty_with_policy` directly preserves every field of
782 /// `policy` in the output (`separator`, `newline`, `indent_width`,
783 /// `line_break_after`, `indent_open` / `indent_close`).
784 ///
785 /// # Errors
786 ///
787 /// See [`decorate`](Self::decorate).
788 pub fn pretty_with_protocol(
789 &self,
790 protocol: &str,
791 abstract_schema: &AbstractSchema,
792 policy: &LayoutPolicy,
793 ) -> Result<Vec<u8>, ParseError> {
794 let parser = self
795 .parsers
796 .get(protocol)
797 .ok_or_else(|| ParseError::UnknownLanguage {
798 extension: protocol.to_owned(),
799 })?;
800 check_protocol_match(
801 protocol,
802 abstract_schema.as_schema(),
803 "pretty_with_protocol",
804 )?;
805 parser.emit_pretty_with_policy(abstract_schema.as_schema(), policy)
806 }
807
808 /// Return the canonical [`Protolens`](panproto_lens::Protolens)
809 /// describing the parse / decorate / emit relationship at
810 /// `protocol`.
811 ///
812 /// The protolens encodes the schema-level structure of the
813 /// relationship: source-side strips the layout enrichment fibre,
814 /// target-side adds it via the registered
815 /// [`LayoutEnricher`](panproto_lens::enrichment_registry::LayoutEnricher).
816 /// It composes with the rest of the `panproto-lens` protolens
817 /// algebra for chain-law reasoning. The operational entry points
818 /// for running the relationship on real schemas are
819 /// [`decorate`](Self::decorate),
820 /// [`pretty_with_protocol`](Self::pretty_with_protocol), and
821 /// [`emit_pretty_with_protocol`](Self::emit_pretty_with_protocol).
822 ///
823 /// # Errors
824 ///
825 /// Returns [`ParseError::UnknownLanguage`] when `protocol` is not
826 /// registered.
827 pub fn parse_emit_protolens(
828 &self,
829 protocol: &str,
830 policy: &LayoutPolicy,
831 ) -> Result<panproto_lens::Protolens, ParseError> {
832 if !self.parsers.contains_key(protocol) {
833 return Err(ParseError::UnknownLanguage {
834 extension: protocol.to_owned(),
835 });
836 }
837 Ok(crate::parse_emit_protolens::parse_emit_protolens(
838 protocol, policy,
839 ))
840 }
841
842 /// Get the theory metadata for a specific protocol.
843 #[must_use]
844 pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
845 self.parsers.get(protocol).map(|p| p.theory_meta())
846 }
847
848 /// List all registered protocol names.
849 pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
850 self.parsers.keys().map(String::as_str)
851 }
852
853 /// O(1) lookup: is a parser already registered for `protocol`?
854 ///
855 /// Useful for dedup at the registration boundary. The umbrella
856 /// `panproto-grammars-all` companion pack overlaps with both the
857 /// built-in core grammars and every per-group pack; callers can
858 /// short-circuit before re-registering rather than scanning
859 /// `protocol_names()` linearly.
860 #[must_use]
861 pub fn has_parser(&self, protocol: &str) -> bool {
862 self.parsers.contains_key(protocol)
863 }
864
865 /// Get the number of registered parsers.
866 #[must_use]
867 pub fn len(&self) -> usize {
868 self.parsers.len()
869 }
870
871 /// Check if the registry is empty.
872 #[must_use]
873 pub fn is_empty(&self) -> bool {
874 self.parsers.is_empty()
875 }
876}
877
878impl Default for ParserRegistry {
879 fn default() -> Self {
880 Self::new()
881 }
882}
883
884/// Guard against running parser-tied operations on a schema built
885/// for a different protocol. Catches the user-visible error of
886/// passing (say) a JSON schema to a Python parser before the
887/// underlying grammar walker would surface it as an opaque rule
888/// mismatch.
889fn check_protocol_match(
890 expected: &str,
891 schema: &Schema,
892 operation: &'static str,
893) -> Result<(), ParseError> {
894 if schema.protocol == expected {
895 Ok(())
896 } else {
897 Err(ParseError::SchemaConstruction {
898 reason: format!(
899 "{operation}: protocol mismatch — registry called with '{expected}' but \
900 schema carries protocol '{}'",
901 schema.protocol,
902 ),
903 })
904 }
905}