panproto_parse/decorate.rs
1//! The put-direction of the parse / decorate / emit lens.
2//!
3//! `decorate` attaches a complete layout enrichment fibre to an
4//! [`AbstractSchema`](panproto_schema::AbstractSchema), producing a
5//! [`DecoratedSchema`](panproto_schema::DecoratedSchema) that
6//! `emit_pretty_with_protocol` can render byte-for-byte. It is a
7//! section of the schema-level forgetful U
8//! [`DecoratedSchema::forget_layout`](panproto_schema::DecoratedSchema::forget_layout)
9//! at the granularity of kind- *and* edge-multiset equivalence (the
10//! ordering invariant — vertex IDs are reborn by the re-parse).
11//!
12//! ## Implementation strategy
13//!
14//! 1. Render the abstract schema to canonical bytes via
15//! `emit_pretty_with_policy`. The de-novo emitter walks
16//! `grammar.json` production rules driven by the caller-supplied
17//! [`LayoutPolicy`].
18//!
19//! 2. Re-parse those bytes. The parse walker attaches the full layout
20//! fibre (`start-byte`, `end-byte`, every `interstitial-N`,
21//! `chose-alt-fingerprint`, `chose-alt-child-kinds`) and invents
22//! fresh vertex IDs.
23//!
24//! Step 2's ID renaming is intrinsic to the parse walker: tree-sitter
25//! reparses can consolidate or fragment tokens at boundaries that the
26//! emit pipeline doesn't know how to mirror (e.g. lilypond's
27//! `c'4` parses as a single note even when the emitter rendered three
28//! tokens `c`, `'`, `4`). Recovering vertex IDs from the abstract
29//! input by parallel walk therefore *cannot* succeed for all
30//! grammars; the documented section law holds at the standard
31//! granularity of [`kind_multiset`](panproto_schema::kind_multiset)
32//! and [`edge_multiset`](panproto_schema::edge_multiset).
33//!
34//! ## Laws
35//!
36//! For every `a : AbstractSchema` and `p : LayoutPolicy`:
37//!
38//! - **Section law (mod kind- and edge-multiset):**
39//! `kind_multiset(forget_layout(decorate(a, p)).as_schema()) ==
40//! kind_multiset(a.as_schema())` AND
41//! `edge_multiset(forget_layout(decorate(a, p)).as_schema()) ==
42//! edge_multiset(a.as_schema())`,
43//! for every protocol with a vendored grammar. The edge-multiset
44//! half is the load-bearing one for sequenced data: order of notes
45//! in a `Pattern<MidiEvent>`, of tokens in a parsed AST, of items
46//! in a homogeneous list, would all collapse without it. Verified
47//! by the `decorate_section_law` integration test.
48//! - **Policy fidelity:** the bytes produced by `pretty_with_protocol`
49//! honour every field of `p` (separator, newline, indent_width,
50//! line_break_after, indent_open / close).
51
52use panproto_gat::{EnrichmentKind, LayoutPolicySpec};
53use panproto_lens::enrichment_registry::{LayoutEnricher, register_enricher};
54use panproto_lens::error::LensError;
55use panproto_schema::{AbstractSchema, DecoratedSchema, Schema};
56
57use crate::error::ParseError;
58use crate::layout_policy::{LayoutPolicy, policy_from_spec};
59use crate::registry::AstParser;
60
61/// Decorate an abstract schema by routing through `emit_pretty_with_policy +
62/// parse` against `parser`.
63///
64/// # Errors
65///
66/// Returns [`ParseError::EmitFailed`] when the abstract schema cannot
67/// be rendered (missing grammar; vertex kind not a grammar rule), or
68/// any other [`ParseError`] variant if the parser cannot re-ingest
69/// its canonical output — the latter indicates a regression in the
70/// parse/emit pipeline rather than a user bug.
71pub fn decorate_with_parser(
72 parser: &dyn AstParser,
73 abstract_schema: &AbstractSchema,
74 policy: &LayoutPolicy,
75) -> Result<DecoratedSchema, ParseError> {
76 let decorated = decorate_schema(parser, abstract_schema.as_schema(), policy)?;
77 Ok(DecoratedSchema::wrap_unchecked(decorated))
78}
79
80/// Schema-level decorate driver shared by [`decorate_with_parser`] and
81/// the [`ParserLayoutEnricher`] adapter installed in the lens crate's
82/// enrichment registry.
83fn decorate_schema(
84 parser: &dyn AstParser,
85 abstract_schema: &Schema,
86 policy: &LayoutPolicy,
87) -> Result<Schema, ParseError> {
88 if abstract_schema.protocol != parser.protocol_name() {
89 return Err(ParseError::SchemaConstruction {
90 reason: format!(
91 "decorate: protocol mismatch — parser is '{}' but schema is '{}'",
92 parser.protocol_name(),
93 abstract_schema.protocol,
94 ),
95 });
96 }
97 let bytes = parser.emit_pretty_with_policy(abstract_schema, policy)?;
98 parser.parse(&bytes, "decorate")
99}
100
101/// Adapter exposing one registered parser as a
102/// [`LayoutEnricher`](panproto_lens::enrichment_registry::LayoutEnricher).
103///
104/// Held by the global enrichment registry; one driver is installed
105/// per protocol at [`ParserRegistry::register`](crate::ParserRegistry::register)
106/// time so that
107/// [`TheoryTransform::AddEnrichment`](panproto_gat::TheoryTransform::AddEnrichment)
108/// dispatches to the right grammar walker without `panproto-lens`
109/// depending on `panproto-parse`.
110struct ParserLayoutEnricher {
111 protocol: String,
112 parser: std::sync::Arc<dyn AstParser>,
113}
114
115impl LayoutEnricher for ParserLayoutEnricher {
116 fn enrich(&self, schema: &Schema, policy: &LayoutPolicySpec) -> Result<Schema, LensError> {
117 let runtime_policy = policy_from_spec(policy);
118 decorate_schema(self.parser.as_ref(), schema, &runtime_policy).map_err(|e| {
119 LensError::EnrichmentSynthesisFailed {
120 kind: EnrichmentKind::Layout,
121 enricher: self.protocol.clone(),
122 detail: e.to_string(),
123 }
124 })
125 }
126}
127
128/// Install a layout-enrichment driver for `parser` into the global
129/// enrichment registry. Called by
130/// [`ParserRegistry::register`](crate::ParserRegistry::register).
131pub(crate) fn register_layout_enricher(parser: std::sync::Arc<dyn AstParser>) {
132 let protocol = parser.protocol_name().to_owned();
133 register_enricher(
134 EnrichmentKind::Layout,
135 protocol.clone(),
136 std::sync::Arc::new(ParserLayoutEnricher { protocol, parser }),
137 );
138}