Skip to main content

panproto_parse/
registry.rs

1//! Parser registry mapping protocol names to full-AST parser implementations.
2
3use std::path::Path;
4use std::sync::Arc;
5
6use panproto_schema::{AbstractSchema, DecoratedSchema, Schema};
7use rustc_hash::FxHashMap;
8
9use crate::error::ParseError;
10use crate::layout_policy::LayoutPolicy;
11use crate::theory_extract::ExtractedTheoryMeta;
12
13/// A full-AST parser and emitter for a specific programming language.
14///
15/// Each implementation wraps a tree-sitter grammar and its auto-derived theory,
16/// providing parse (source → Schema) and emit (Schema → source) operations.
17pub trait AstParser: Send + Sync {
18    /// The panproto protocol name (e.g. `"typescript"`, `"python"`).
19    fn protocol_name(&self) -> &str;
20
21    /// Parse source code into a full-AST [`Schema`].
22    ///
23    /// # Errors
24    ///
25    /// Returns [`ParseError`] if tree-sitter parsing fails or schema construction fails.
26    fn parse(&self, source: &[u8], file_path: &str) -> Result<Schema, ParseError>;
27
28    /// Emit a [`Schema`] back to source code bytes.
29    ///
30    /// The emitter walks the schema graph top-down, using formatting constraints
31    /// (comment, indent, blank-lines-before) to reproduce the original formatting.
32    ///
33    /// # Errors
34    ///
35    /// Returns [`ParseError::EmitFailed`] if emission fails.
36    fn emit(&self, schema: &Schema) -> Result<Vec<u8>, ParseError>;
37
38    /// File extensions this parser handles (e.g. `["ts", "tsx"]`).
39    fn supported_extensions(&self) -> &[&str];
40
41    /// The auto-derived theory metadata for this language.
42    fn theory_meta(&self) -> &ExtractedTheoryMeta;
43
44    /// Render a by-construction [`Schema`] (one with no parse-recovered
45    /// byte positions or interstitials) to source bytes.
46    ///
47    /// Unlike [`emit`](Self::emit), which reconstructs source from
48    /// byte-position fragments stored on the schema during `parse`,
49    /// `emit_pretty` walks tree-sitter `grammar.json` production rules
50    /// to render schemas built from scratch via `SchemaBuilder`.
51    ///
52    /// # Errors
53    ///
54    /// Returns [`ParseError::EmitFailed`] when the language has no
55    /// vendored `grammar.json`, when a vertex's kind is not a grammar
56    /// rule, or when a required field has no corresponding schema edge.
57    fn emit_pretty(&self, schema: &Schema) -> Result<Vec<u8>, ParseError> {
58        self.emit_pretty_with_policy(schema, &crate::emit_pretty::FormatPolicy::default())
59    }
60
61    /// Render a by-construction [`Schema`] under a caller-supplied
62    /// [`FormatPolicy`](crate::emit_pretty::FormatPolicy).
63    ///
64    /// The policy governs every configurable aspect of the rendered
65    /// output: separator between glued tokens, newline byte sequence,
66    /// indent width, line-break and indent-open/close token sets. The
67    /// default policy (used by [`emit_pretty`](Self::emit_pretty))
68    /// targets syntactic validity with ASCII conventions; callers
69    /// supplying their own policy can pin idiomatic formatting.
70    ///
71    /// # Errors
72    ///
73    /// Returns [`ParseError::EmitFailed`] when the language has no
74    /// vendored `grammar.json`, when a vertex's kind is not a grammar
75    /// rule, or when a required field has no corresponding schema edge.
76    fn emit_pretty_with_policy(
77        &self,
78        schema: &Schema,
79        policy: &crate::emit_pretty::FormatPolicy,
80    ) -> Result<Vec<u8>, ParseError> {
81        let _ = (schema, policy);
82        Err(ParseError::EmitFailed {
83            protocol: self.protocol_name().to_owned(),
84            reason: format!(
85                "emit_pretty_with_policy not implemented for protocol '{}'",
86                self.protocol_name()
87            ),
88        })
89    }
90}
91
92/// Registry of all full-AST parsers, keyed by protocol name.
93///
94/// Provides language detection by file extension and dispatches parse/emit
95/// operations to the appropriate language parser.
96pub struct ParserRegistry {
97    /// Parsers keyed by protocol name.
98    ///
99    /// Held by `Arc` (not `Box`) so the same handle can be shared with
100    /// the layout-enrichment registry without re-wrapping at every
101    /// lookup. Registration installs both: the parser into `parsers`
102    /// and a thin adapter into the lens crate's enrichment registry.
103    parsers: FxHashMap<String, Arc<dyn AstParser>>,
104    /// Extension → protocol name mapping.
105    extension_map: FxHashMap<String, String>,
106}
107
108impl ParserRegistry {
109    /// Create a new registry populated with all enabled language parsers.
110    ///
111    /// With the `grammars` feature (default), this populates the registry from
112    /// `panproto-grammars`, which provides up to 261 tree-sitter languages.
113    /// Without the `grammars` feature, this returns an empty registry; call
114    /// [`register`](Self::register) to add parsers manually using individual
115    /// grammar crates.
116    #[must_use]
117    pub fn new() -> Self {
118        let mut registry = Self {
119            parsers: FxHashMap::default(),
120            extension_map: FxHashMap::default(),
121        };
122
123        #[cfg(feature = "grammars")]
124        for grammar in panproto_grammars::grammars() {
125            let config = crate::languages::walker_configs::walker_config_for(grammar.name);
126            match crate::languages::common::LanguageParser::from_language_with_grammar_json(
127                grammar.name,
128                grammar.extensions.to_vec(),
129                grammar.language,
130                grammar.node_types,
131                grammar.tags_query,
132                config,
133                grammar.grammar_json,
134            ) {
135                Ok(p) => registry.register(Box::new(p)),
136                Err(err) => {
137                    let _ = err;
138                    #[cfg(debug_assertions)]
139                    eprintln!(
140                        "warning: grammar '{}' theory extraction failed: {err}",
141                        grammar.name
142                    );
143                }
144            }
145        }
146
147        registry
148    }
149
150    /// Register a parser implementation.
151    ///
152    /// In addition to keying the parser by its protocol name, this
153    /// installs a [`LayoutEnricher`](panproto_lens::enrichment_registry::LayoutEnricher)
154    /// adapter into the global enrichment registry so that a
155    /// `parse_emit_protolens(protocol, …)` instantiation finds a
156    /// synthesis driver without any further wiring.
157    pub fn register(&mut self, parser: Box<dyn AstParser>) {
158        let name = parser.protocol_name().to_owned();
159        for ext in parser.supported_extensions() {
160            self.extension_map.insert((*ext).to_owned(), name.clone());
161        }
162        let arc: Arc<dyn AstParser> = Arc::from(parser);
163        crate::decorate::register_layout_enricher(Arc::clone(&arc));
164        self.parsers.insert(name, arc);
165    }
166
167    /// Register a tree-sitter language as a full-AST parser.
168    ///
169    /// Used by `panproto-grammars-*` companion crates that ship grammars
170    /// outside the default `panproto-grammars` build. The byte-slice
171    /// arguments must outlive this registry; the canonical pattern is
172    /// for the companion to bake the data into `&'static` rodata at
173    /// compile time and pass references that are valid for the process
174    /// lifetime.
175    ///
176    /// `walker_config` is looked up by `name` from the bundled per-language
177    /// configuration table. Languages without a tailored configuration
178    /// fall back to the default walker config.
179    ///
180    /// # Errors
181    ///
182    /// Returns [`ParseError`] if theory extraction from `node_types_json`
183    /// fails or if the tags query rejects compilation.
184    pub fn register_external_grammar(
185        &mut self,
186        name: &'static str,
187        extensions: Vec<&'static str>,
188        language: tree_sitter::Language,
189        node_types_json: &'static [u8],
190        tags_query: Option<&'static str>,
191        grammar_json: Option<&'static [u8]>,
192    ) -> Result<(), crate::error::ParseError> {
193        let config = crate::languages::walker_configs::walker_config_for(name);
194        let parser = crate::languages::common::LanguageParser::from_language_with_grammar_json(
195            name,
196            extensions,
197            language,
198            node_types_json,
199            tags_query,
200            config,
201            grammar_json,
202        )?;
203        self.register(Box::new(parser));
204        Ok(())
205    }
206
207    /// Owned-data variant of [`register_external_grammar`](Self::register_external_grammar).
208    ///
209    /// Accepts `String` / `Vec<u8>` rather than `&'static` references. The
210    /// caller is presumed not to have process-lifetime rodata available
211    /// (typical dev-time use: bytes read from disk via the Python binding's
212    /// override hook). To match the trait's `'static` lifetime requirement
213    /// the inputs are leaked into the heap; the leak is one-time per
214    /// override.
215    ///
216    /// This is the registration primitive for grammar-author workflows
217    /// where a grammar's `parser.c` / `grammar.json` / `node-types.json`
218    /// are evolving outside the panproto release cadence. Production
219    /// builds should continue to use [`register_external_grammar`](Self::register_external_grammar) with
220    /// `'static` data baked into the binary at compile time.
221    ///
222    /// # Errors
223    ///
224    /// Returns [`ParseError`] if theory extraction or tags-query
225    /// compilation fails.
226    pub fn register_external_grammar_owned(
227        &mut self,
228        name: String,
229        extensions: Vec<String>,
230        language: tree_sitter::Language,
231        node_types_json: Vec<u8>,
232        tags_query: Option<String>,
233        grammar_json: Option<Vec<u8>>,
234    ) -> Result<(), crate::error::ParseError> {
235        let name_static: &'static str = Box::leak(name.into_boxed_str());
236        let extensions_static: Vec<&'static str> = extensions
237            .into_iter()
238            .map(|s| Box::leak(s.into_boxed_str()) as &'static str)
239            .collect();
240        let node_types_static: &'static [u8] = Box::leak(node_types_json.into_boxed_slice());
241        let tags_query_static: Option<&'static str> =
242            tags_query.map(|s| Box::leak(s.into_boxed_str()) as &'static str);
243        let grammar_json_static: Option<&'static [u8]> =
244            grammar_json.map(|v| Box::leak(v.into_boxed_slice()) as &'static [u8]);
245
246        self.register_external_grammar(
247            name_static,
248            extensions_static,
249            language,
250            node_types_static,
251            tags_query_static,
252            grammar_json_static,
253        )
254    }
255
256    /// Remove a registration by protocol name.
257    ///
258    /// Drops the parser and any extension mappings that pointed at it.
259    /// Returns `true` if a parser was removed, `false` if no such
260    /// registration existed. Primarily intended for grammar-author
261    /// workflows where a registered grammar is being replaced by a
262    /// freshly-compiled version mid-process.
263    pub fn unregister(&mut self, name: &str) -> bool {
264        let removed = self.parsers.remove(name).is_some();
265        if removed {
266            self.extension_map.retain(|_, v| v != name);
267        }
268        removed
269    }
270
271    /// Override a registered grammar with new owned data.
272    ///
273    /// Equivalent to [`unregister`](Self::unregister) followed by
274    /// [`register_external_grammar_owned`](Self::register_external_grammar_owned),
275    /// and intended for the same grammar-author dev workflow. Any
276    /// extension mappings previously bound to `name` are replaced by
277    /// the new `extensions`.
278    ///
279    /// # Errors
280    ///
281    /// Returns [`ParseError`] if theory extraction or tags-query
282    /// compilation fails on the new grammar; in that case the prior
283    /// registration is already gone.
284    pub fn override_grammar(
285        &mut self,
286        name: String,
287        extensions: Vec<String>,
288        language: tree_sitter::Language,
289        node_types_json: Vec<u8>,
290        tags_query: Option<String>,
291        grammar_json: Option<Vec<u8>>,
292    ) -> Result<(), crate::error::ParseError> {
293        self.unregister(&name);
294        self.register_external_grammar_owned(
295            name,
296            extensions,
297            language,
298            node_types_json,
299            tags_query,
300            grammar_json,
301        )
302    }
303
304    /// Detect the language protocol for a file path by its extension.
305    ///
306    /// Returns `None` if the extension is not recognized (caller should
307    /// fall back to the `raw_file` protocol).
308    #[must_use]
309    pub fn detect_language(&self, path: &Path) -> Option<&str> {
310        path.extension()
311            .and_then(|ext| ext.to_str())
312            .and_then(|ext| self.extension_map.get(ext))
313            .map(String::as_str)
314    }
315
316    /// Parse a file by detecting its language from the file path.
317    ///
318    /// # Errors
319    ///
320    /// Returns [`ParseError::UnknownLanguage`] if the file extension is not recognized.
321    /// Returns other [`ParseError`] variants if parsing fails.
322    pub fn parse_file(&self, path: &Path, content: &[u8]) -> Result<Schema, ParseError> {
323        let protocol = self
324            .detect_language(path)
325            .ok_or_else(|| ParseError::UnknownLanguage {
326                extension: path
327                    .extension()
328                    .and_then(|e| e.to_str())
329                    .unwrap_or("")
330                    .to_owned(),
331            })?;
332
333        self.parse_with_protocol(protocol, content, &path.display().to_string())
334    }
335
336    /// Parse source code with a specific protocol name.
337    ///
338    /// # Errors
339    ///
340    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
341    pub fn parse_with_protocol(
342        &self,
343        protocol: &str,
344        content: &[u8],
345        file_path: &str,
346    ) -> Result<Schema, ParseError> {
347        let parser = self
348            .parsers
349            .get(protocol)
350            .ok_or_else(|| ParseError::UnknownLanguage {
351                extension: protocol.to_owned(),
352            })?;
353
354        parser.parse(content, file_path)
355    }
356
357    /// Emit a schema back to source code bytes using the specified protocol.
358    ///
359    /// # Errors
360    ///
361    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not registered.
362    pub fn emit_with_protocol(
363        &self,
364        protocol: &str,
365        schema: &Schema,
366    ) -> Result<Vec<u8>, ParseError> {
367        let parser = self
368            .parsers
369            .get(protocol)
370            .ok_or_else(|| ParseError::UnknownLanguage {
371                extension: protocol.to_owned(),
372            })?;
373
374        parser.emit(schema)
375    }
376
377    /// Render a by-construction schema using the named protocol.
378    ///
379    /// # Errors
380    ///
381    /// Returns [`ParseError::UnknownLanguage`] if the protocol is not
382    /// registered, or [`ParseError::EmitFailed`] from the underlying
383    /// parser's `emit_pretty`.
384    pub fn emit_pretty_with_protocol(
385        &self,
386        protocol: &str,
387        schema: &Schema,
388    ) -> Result<Vec<u8>, ParseError> {
389        let parser = self
390            .parsers
391            .get(protocol)
392            .ok_or_else(|| ParseError::UnknownLanguage {
393                extension: protocol.to_owned(),
394            })?;
395
396        parser.emit_pretty(schema)
397    }
398
399    /// Report the test-verification status of `emit_pretty` for a
400    /// given protocol.
401    ///
402    /// The status is a programmatic check that downstream tooling
403    /// (e.g. quivers, schema-migration pipelines) can use to refuse
404    /// emit on protocols whose fixed-point law has never been
405    /// exercised by panproto's test suite. The three tiers are:
406    ///
407    /// * [`EmitVerificationStatus::Verified`] — the protocol has an
408    ///   explicit fixed-point or roundtrip test in panproto's suite.
409    ///   `emit_pretty(parse(emit_pretty(s))) == emit_pretty(s)` is
410    ///   known to hold on representative source.
411    /// * [`EmitVerificationStatus::Generic`] — the protocol is
412    ///   registered (a tree-sitter grammar is vendored) and the
413    ///   generic dispatch path applies, but no per-language test
414    ///   asserts emit correctness. Output is structurally derived
415    ///   from `grammar.json` + the universal cassette layer and is
416    ///   likely correct, but unverified.
417    /// * [`EmitVerificationStatus::Unsupported`] — the protocol is
418    ///   not registered, OR is registered but no `grammar.json` was
419    ///   vendored at build time. `emit_pretty` will return
420    ///   [`ParseError::EmitFailed`].
421    #[must_use]
422    pub fn emit_verification_status(&self, protocol: &str) -> EmitVerificationStatus {
423        if !self.parsers.contains_key(protocol) {
424            return EmitVerificationStatus::Unsupported;
425        }
426        if VERIFIED_EMIT_PROTOCOLS.binary_search(&protocol).is_ok() {
427            EmitVerificationStatus::Verified
428        } else {
429            EmitVerificationStatus::Generic
430        }
431    }
432}
433
434/// Programmatic verification tier for [`ParserRegistry::emit_verification_status`].
435#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
436pub enum EmitVerificationStatus {
437    /// `emit_pretty` for this protocol has a test in panproto's suite
438    /// asserting the fixed-point law on representative source.
439    Verified,
440    /// The protocol is registered and the generic dispatch path
441    /// applies, but no per-language test asserts emit correctness.
442    Generic,
443    /// The protocol is not registered, or its grammar lacks the
444    /// vendored `grammar.json` that `emit_pretty` requires.
445    Unsupported,
446}
447
448/// Protocols whose `emit_pretty` is verified to a bar that justifies
449/// downstream trust. A protocol qualifies on one of two bases:
450///
451/// 1. **Corpus-verified** — every entry in the grammar author's own
452///    `test/corpus/` round-trips under the full oracle (byte fixed point +
453///    vertex-kind multiset + edge-shape multiset), checked by the strict
454///    `emit_corpus_audit` test. This is the strong bar: the corpus exercises
455///    the whole grammar, not one hand-written sample.
456/// 2. **Backend-verified** — a quivers transpile backend (`python`, `stan`,
457///    `bugs`, `jags`, `julia`, `scheme`, `javascript`) covered by dedicated
458///    emit regression tests for the construct surface quivers actually emits.
459///    These are pinned by `emit_verification_status` tests as a downstream
460///    contract; bringing each to full corpus-pass (basis 1) is tracked work.
461///
462/// A single hand-written round-trip sample is NOT sufficient: an earlier
463/// expansion to 149 protocols on minimal samples was reverted after a corpus
464/// audit showed most failed their own grammar's test corpus.
465///
466/// Names MUST be kept in sorted order so the binary-search lookup in
467/// [`ParserRegistry::emit_verification_status`] works.
468const VERIFIED_EMIT_PROTOCOLS: &[&str] = &[
469    "abc",
470    "actionscript",
471    "ada",
472    "agda",
473    "al",
474    "angular",
475    "apex",
476    "arduino",
477    "asciidoc",
478    "asm",
479    "astro",
480    "awk",
481    "bash",
482    "bass",
483    "batch",
484    "beancount",
485    "bibtex",
486    "bicep",
487    "bitbake",
488    "blade",
489    "brightscript",
490    "bsl",
491    "bugs",
492    "c",
493    "caddy",
494    "cairo",
495    "capnp",
496    "cedar",
497    "cedarschema",
498    "chatito",
499    "chuck",
500    "circom",
501    "clarity",
502    "clojure",
503    "cmake",
504    "cobol",
505    "commonlisp",
506    "cooklang",
507    "corn",
508    "cpon",
509    "cpp",
510    "crystal",
511    "csharp",
512    "csound",
513    "css",
514    "csv",
515    "cuda",
516    "cue",
517    "cylc",
518    "d",
519    "dart",
520    "desktop",
521    "devicetree",
522    "diff",
523    "djot",
524    "dockerfile",
525    "dot",
526    "doxygen",
527    "dtd",
528    "earthfile",
529    "ebnf",
530    "editorconfig",
531    "eds",
532    "eex",
533    "elisp",
534    "elixir",
535    "elm",
536    "elsa",
537    "embedded_template",
538    "enforce",
539    "erlang",
540    "facility",
541    "faust",
542    "fennel",
543    "fidl",
544    "firrtl",
545    "fish",
546    "foam",
547    "forth",
548    "fortran",
549    "fsharp",
550    "fsharp_signature",
551    "func",
552    "gdscript",
553    "git_config",
554    "git_rebase",
555    "gitattributes",
556    "gitcommit",
557    "gitignore",
558    "gleam",
559    "glicol",
560    "glsl",
561    "gn",
562    "go",
563    "godot_resource",
564    "gomod",
565    "gosum",
566    "graphql",
567    "groovy",
568    "gstlaunch",
569    "hack",
570    "hare",
571    "haskell",
572    "haxe",
573    "hcl",
574    "heex",
575    "hlsl",
576    "html",
577    "http",
578    "hurl",
579    "hyprlang",
580    "idris",
581    "ini",
582    "ispc",
583    "jags",
584    "janet",
585    "java",
586    "javascript",
587    "jinja2",
588    "jq",
589    "jsdoc",
590    "json",
591    "jsonnet",
592    "julia",
593    "just",
594    "kconfig",
595    "kdl",
596    "kotlin",
597    "latex",
598    "lean",
599    "ledger",
600    "lilypond",
601    "linkerscript",
602    "liquid",
603    "llvm",
604    "lua",
605    "luadoc",
606    "luap",
607    "luau",
608    "magik",
609    "make",
610    "markdown",
611    "markdown_inline",
612    "matlab",
613    "mermaid",
614    "meson",
615    "mojo",
616    "netlinx",
617    "nginx",
618    "nickel",
619    "nim",
620    "ninja",
621    "nix",
622    "norg",
623    "nqc",
624    "nushell",
625    "objc",
626    "ocaml",
627    "ocaml_interface",
628    "odin",
629    "org",
630    "pascal",
631    "pem",
632    "perl",
633    "pgn",
634    "php",
635    "pkl",
636    "po",
637    "pony",
638    "postscript",
639    "powershell",
640    "printf",
641    "prisma",
642    "prolog",
643    "promql",
644    "properties",
645    "protobuf",
646    "psv",
647    "pug",
648    "puppet",
649    "purescript",
650    "pymanifest",
651    "python",
652    "ql",
653    "qml",
654    "qmldir",
655    "query",
656    "qvr",
657    "r",
658    "racket",
659    "re2c",
660    "readline",
661    "regex",
662    "rego",
663    "requirements",
664    "rescript",
665    "robot",
666    "ron",
667    "rst",
668    "ruby",
669    "rust",
670    "scala",
671    "scheme",
672    "scss",
673    "smali",
674    "smithy",
675    "solidity",
676    "sparql",
677    "sql",
678    "squirrel",
679    "ssh_config",
680    "stan",
681    "stanfunctions",
682    "starlark",
683    "strudel_mini",
684    "supercollider",
685    "svelte",
686    "swift",
687    "tablegen",
688    "tcl",
689    "teal",
690    "templ",
691    "terraform",
692    "textproto",
693    "thrift",
694    "tidal_mini",
695    "tlaplus",
696    "tmux",
697    "toml",
698    "tsv",
699    "tsx",
700    "turtle",
701    "twig",
702    "typescript",
703    "typst",
704    "udev",
705    "ungrammar",
706    "uxntal",
707    "v",
708    "vb",
709    "verilog",
710    "vhdl",
711    "vim",
712    "vimdoc",
713    "vue",
714    "wast",
715    "wat",
716    "wgsl",
717    "wit",
718    "xcompose",
719    "xml",
720    "yaml",
721    "yuck",
722    "zig",
723    "zsh",
724];
725
726impl ParserRegistry {
727    /// Decorate an [`AbstractSchema`] with the layout enrichment
728    /// fibre required by `emit_pretty_with_protocol` and friends.
729    ///
730    /// This is the put-direction of the parse / decorate / emit lens
731    /// at `protocol`. The implementation routes through the same
732    /// grammar walker as `emit_pretty` followed by `parse`, so the
733    /// resulting [`DecoratedSchema`] carries a complete layout fibre
734    /// recovered by the parse-side walker — `start-byte`, `end-byte`,
735    /// every `interstitial-N`, `chose-alt-fingerprint`, and
736    /// `chose-alt-child-kinds`.
737    ///
738    /// The section law holds up to kind- and edge-multiset
739    /// equivalence: `forget_layout(decorate(a)) ≅ a` modulo vertex-id
740    /// renaming. Grammars where parsing consolidates tokens that the
741    /// emitter rendered as separate sequences (e.g. lilypond's `c'4`
742    /// re-parses to a single note) do not preserve a one-to-one
743    /// vertex correspondence, so the result's vertex IDs are always
744    /// freshly minted by the parser.
745    ///
746    /// # Errors
747    ///
748    /// Returns [`ParseError::UnknownLanguage`] when `protocol` is not
749    /// registered, [`ParseError::SchemaConstruction`] when the
750    /// abstract schema was built for a different protocol than
751    /// `protocol`, [`ParseError::EmitFailed`] when the grammar walker
752    /// cannot render the abstract schema (missing `grammar.json`,
753    /// vertex kind not a rule), or any other parser error if the
754    /// re-parse step rejects the canonical bytes (a regression in the
755    /// parse/emit pipeline, not a user bug).
756    pub fn decorate(
757        &self,
758        protocol: &str,
759        abstract_schema: &AbstractSchema,
760        policy: &LayoutPolicy,
761    ) -> Result<DecoratedSchema, ParseError> {
762        let parser = self
763            .parsers
764            .get(protocol)
765            .ok_or_else(|| ParseError::UnknownLanguage {
766                extension: protocol.to_owned(),
767            })?;
768        // `decorate_with_parser` enforces the protocol-match invariant
769        // between the parser and the abstract schema, so no extra guard
770        // is needed here.
771        crate::decorate::decorate_with_parser(parser.as_ref(), abstract_schema, policy)
772    }
773
774    /// Render an [`AbstractSchema`] to canonical source bytes under
775    /// `policy`.
776    ///
777    /// Implementation note: this is exactly the first emit step of
778    /// [`decorate`](Self::decorate) — `decorate` then re-parses to
779    /// recover the layout fibre, but if all the caller wants is the
780    /// bytes, the re-parse is wasted work. Going through
781    /// `emit_pretty_with_policy` directly preserves every field of
782    /// `policy` in the output (`separator`, `newline`, `indent_width`,
783    /// `line_break_after`, `indent_open` / `indent_close`).
784    ///
785    /// # Errors
786    ///
787    /// See [`decorate`](Self::decorate).
788    pub fn pretty_with_protocol(
789        &self,
790        protocol: &str,
791        abstract_schema: &AbstractSchema,
792        policy: &LayoutPolicy,
793    ) -> Result<Vec<u8>, ParseError> {
794        let parser = self
795            .parsers
796            .get(protocol)
797            .ok_or_else(|| ParseError::UnknownLanguage {
798                extension: protocol.to_owned(),
799            })?;
800        check_protocol_match(
801            protocol,
802            abstract_schema.as_schema(),
803            "pretty_with_protocol",
804        )?;
805        parser.emit_pretty_with_policy(abstract_schema.as_schema(), policy)
806    }
807
808    /// Return the canonical [`Protolens`](panproto_lens::Protolens)
809    /// describing the parse / decorate / emit relationship at
810    /// `protocol`.
811    ///
812    /// The protolens encodes the schema-level structure of the
813    /// relationship: source-side strips the layout enrichment fibre,
814    /// target-side adds it via the registered
815    /// [`LayoutEnricher`](panproto_lens::enrichment_registry::LayoutEnricher).
816    /// It composes with the rest of the `panproto-lens` protolens
817    /// algebra for chain-law reasoning. The operational entry points
818    /// for running the relationship on real schemas are
819    /// [`decorate`](Self::decorate),
820    /// [`pretty_with_protocol`](Self::pretty_with_protocol), and
821    /// [`emit_pretty_with_protocol`](Self::emit_pretty_with_protocol).
822    ///
823    /// # Errors
824    ///
825    /// Returns [`ParseError::UnknownLanguage`] when `protocol` is not
826    /// registered.
827    pub fn parse_emit_protolens(
828        &self,
829        protocol: &str,
830        policy: &LayoutPolicy,
831    ) -> Result<panproto_lens::Protolens, ParseError> {
832        if !self.parsers.contains_key(protocol) {
833            return Err(ParseError::UnknownLanguage {
834                extension: protocol.to_owned(),
835            });
836        }
837        Ok(crate::parse_emit_protolens::parse_emit_protolens(
838            protocol, policy,
839        ))
840    }
841
842    /// Get the theory metadata for a specific protocol.
843    #[must_use]
844    pub fn theory_meta(&self, protocol: &str) -> Option<&ExtractedTheoryMeta> {
845        self.parsers.get(protocol).map(|p| p.theory_meta())
846    }
847
848    /// List all registered protocol names.
849    pub fn protocol_names(&self) -> impl Iterator<Item = &str> {
850        self.parsers.keys().map(String::as_str)
851    }
852
853    /// O(1) lookup: is a parser already registered for `protocol`?
854    ///
855    /// Useful for dedup at the registration boundary. The umbrella
856    /// `panproto-grammars-all` companion pack overlaps with both the
857    /// built-in core grammars and every per-group pack; callers can
858    /// short-circuit before re-registering rather than scanning
859    /// `protocol_names()` linearly.
860    #[must_use]
861    pub fn has_parser(&self, protocol: &str) -> bool {
862        self.parsers.contains_key(protocol)
863    }
864
865    /// Get the number of registered parsers.
866    #[must_use]
867    pub fn len(&self) -> usize {
868        self.parsers.len()
869    }
870
871    /// Check if the registry is empty.
872    #[must_use]
873    pub fn is_empty(&self) -> bool {
874        self.parsers.is_empty()
875    }
876}
877
878impl Default for ParserRegistry {
879    fn default() -> Self {
880        Self::new()
881    }
882}
883
884/// Guard against running parser-tied operations on a schema built
885/// for a different protocol. Catches the user-visible error of
886/// passing (say) a JSON schema to a Python parser before the
887/// underlying grammar walker would surface it as an opaque rule
888/// mismatch.
889fn check_protocol_match(
890    expected: &str,
891    schema: &Schema,
892    operation: &'static str,
893) -> Result<(), ParseError> {
894    if schema.protocol == expected {
895        Ok(())
896    } else {
897        Err(ParseError::SchemaConstruction {
898            reason: format!(
899                "{operation}: protocol mismatch — registry called with '{expected}' but \
900                 schema carries protocol '{}'",
901                schema.protocol,
902            ),
903        })
904    }
905}