panproto-schema 0.50.0

Schema representation for panproto
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
//! Typed newtypes for the abstract / decorated schema distinction.
//!
//! A bare [`Schema`] can be in either of two states: it is *abstract*
//! when no constraint sort belongs to the layout enrichment fibre, and
//! it is *decorated* when the parser walker has attached layout
//! witnesses (byte spans, interstitials, CHOICE discriminators).
//!
//! These newtypes lift that distinction to a Rust type so that the
//! parse/decorate/emit lens can be wired through the type system:
//! `decorate` consumes an [`AbstractSchema`] and returns a
//! [`DecoratedSchema`]; the operational `emit_pretty` and `decorate`
//! entry points keep abstract and decorated inputs distinguishable
//! at every call site without `Deref` erasure.
//!
//! ## Construction
//!
//! - [`AbstractSchema::from_layout_free`] validates that no
//!   layout-fibre constraint is present (returns
//!   [`LayoutConstraintsPresent`] when the invariant fails); this is
//!   the checked entry that callers should prefer.
//! - [`AbstractSchema::from_layout_free_unchecked`] skips the scan
//!   for callers that just ran `forget_layout` themselves.
//! - [`DecoratedSchema::wrap_unchecked`] wraps a [`Schema`] without
//!   checking the layout fibre. The legitimate sources are the
//!   parse walker's output and the `decorate` synthesis driver;
//!   misuse degrades emit correctness silently.
//!
//! Construction is *not* sealed at the type system level
//! (panproto's `Schema` does not yet carry a phantom theory parameter
//! that would let us refuse arbitrary cross-crate constructions).
//! The checked / unchecked split is the load-bearing safety net.

use crate::Schema;
use crate::schema::Constraint;

/// Returned by [`AbstractSchema::from_layout_free`] when the input
/// schema carries constraints in the layout enrichment fibre and
/// therefore cannot be treated as abstract.
#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
#[error(
    "cannot construct AbstractSchema: {count} layout-fibre constraint(s) present; \
     call Schema::forget_layout first"
)]
pub struct LayoutConstraintsPresent {
    /// Number of offending constraint entries detected.
    pub count: usize,
}

/// A schema with no layout enrichment.
///
/// Carrying only vertex kinds, edges, and content-level constraints
/// (`literal-value`, `field:*`, and any protocol-defined constraint
/// sorts that are *not* in the layout fibre). Typical sources:
///
/// - [`SchemaBuilder::build_abstract`](crate::SchemaBuilder::build_abstract),
///   which checks the invariant before wrapping.
/// - [`DecoratedSchema::forget_layout`], which projects a decorated
///   schema to its abstract base.
/// - [`AbstractSchema::from_layout_free`] for callers wrapping a
///   `Schema` produced by other means (validates on entry).
#[derive(Clone, Debug)]
pub struct AbstractSchema {
    inner: Schema,
}

/// A schema carrying a complete layout enrichment over its abstract
/// content.
///
/// Typical sources:
///
/// - The result of `ParserRegistry::parse_with_protocol` wrapped via
///   [`DecoratedSchema::wrap_unchecked`].
/// - The return value of `ParserRegistry::decorate` (the put-direction
///   of the parse / decorate / emit lens).
///
/// Direct serialization round-trips a `Schema`; the newtype is
/// enforced only at the Rust type level.
#[derive(Clone, Debug)]
pub struct DecoratedSchema {
    inner: Schema,
}

/// Per-vertex view of the layout witness data carried by a
/// [`DecoratedSchema`].
///
/// This is a read-only projection: it borrows the underlying
/// constraint list so callers can inspect a vertex's byte span,
/// interstitial text, or chosen CHOICE alternative without round-
/// tripping through the schema-level constraint maps.
#[derive(Clone, Copy, Debug)]
pub struct LayoutWitness<'a> {
    constraints: &'a [Constraint],
}

impl AbstractSchema {
    /// Construct an [`AbstractSchema`] from a [`Schema`] that already
    /// satisfies the no-layout invariant.
    ///
    /// The invariant is checked at runtime in every build (debug and
    /// release): a non-layout-free schema is a programming error in
    /// the caller, but a load-bearing one — emit and parse use the
    /// type-level distinction to dispatch, and a silently-wrong
    /// `AbstractSchema` would corrupt downstream behaviour. Returns
    /// `Err(LayoutConstraintsPresent { count })` carrying the number
    /// of offending constraint entries so callers can diagnose.
    ///
    /// # Errors
    ///
    /// Returns [`LayoutConstraintsPresent`] when `schema.is_layout_free()`
    /// returns `false`. Use [`Schema::forget_layout`] first if a
    /// decorated schema needs to be downcast.
    pub fn from_layout_free(schema: Schema) -> Result<Self, LayoutConstraintsPresent> {
        let offending = schema
            .constraints
            .values()
            .flat_map(|cs| cs.iter())
            .filter(|c| panproto_gat::is_layout_sort(c.sort.as_ref()))
            .count();
        if offending == 0 {
            Ok(Self { inner: schema })
        } else {
            Err(LayoutConstraintsPresent { count: offending })
        }
    }

    /// Construct an [`AbstractSchema`] from a [`Schema`] without
    /// checking the layout-free invariant.
    ///
    /// Reserved for callers that have *just* run `forget_layout` on
    /// the input and want to skip the redundant scan. Misuse degrades
    /// emit/decorate correctness silently; prefer
    /// [`from_layout_free`](Self::from_layout_free) elsewhere.
    #[must_use]
    pub const fn from_layout_free_unchecked(schema: Schema) -> Self {
        Self { inner: schema }
    }

    /// Borrow the underlying schema for read-only consumption.
    ///
    /// This is the audited bridge to the raw [`Schema`] type: it is
    /// explicit at every call site that we are crossing the typed
    /// boundary in the get-only direction. There is no
    /// `Deref<Target = Schema>` because that would silently erase the
    /// type-level distinction; every consumer must opt in.
    #[must_use]
    pub const fn as_schema(&self) -> &Schema {
        &self.inner
    }

    /// Returns the schema's protocol name.
    #[must_use]
    pub fn protocol(&self) -> &str {
        &self.inner.protocol
    }

    /// Returns the number of vertices.
    #[must_use]
    pub fn vertex_count(&self) -> usize {
        self.inner.vertex_count()
    }
}

impl DecoratedSchema {
    /// Wrap a [`Schema`] as a [`DecoratedSchema`] without checking the
    /// layout-fibre invariant.
    ///
    /// Construction is *not* enforced at the type level (panproto's
    /// `Schema` does not yet carry a phantom theory parameter), so
    /// this constructor trusts the caller. The legitimate sources are:
    ///
    /// - Output of [`ParserRegistry::parse_with_protocol`](https://docs.rs/panproto-parse) —
    ///   the parse walker attaches a complete layout fibre.
    /// - Output of [`ParserRegistry::decorate`](https://docs.rs/panproto-parse) —
    ///   the put-direction of the parse/emit lens.
    ///
    /// Wrapping a hand-built or otherwise abstract schema produces a
    /// `DecoratedSchema` that subsequent `emit_pretty` calls will
    /// fall back to grammar-walking on (since the layout fibre is
    /// empty), which is well-defined but loses the "round-trips via
    /// byte-position arithmetic" advantage of true decoration.
    #[must_use]
    pub const fn wrap_unchecked(schema: Schema) -> Self {
        Self { inner: schema }
    }

    /// Borrow the underlying schema for read-only consumption.
    ///
    /// See [`AbstractSchema::as_schema`] for the rationale: this is an
    /// explicit, audited bridge to the raw type, intentionally
    /// non-`Deref`.
    #[must_use]
    pub const fn as_schema(&self) -> &Schema {
        &self.inner
    }

    /// Returns the schema's protocol name.
    #[must_use]
    pub fn protocol(&self) -> &str {
        &self.inner.protocol
    }

    /// Project to the abstract schema by forgetting all layout-fibre
    /// constraints. This is the lens get-direction realised in types.
    ///
    /// Cannot fail: `Schema::forget_layout` always returns a
    /// layout-free schema, so the invariant of [`AbstractSchema`] is
    /// satisfied by construction.
    #[must_use]
    pub fn forget_layout(&self) -> AbstractSchema {
        AbstractSchema::from_layout_free_unchecked(self.inner.forget_layout())
    }

    /// Returns a read-only view of the constraint set at `vertex_id`.
    ///
    /// Returns `None` when the vertex has no constraints recorded at
    /// all (`schema.constraints.get(vertex_id) == None`). When the
    /// vertex has constraints but none are in the layout fibre, the
    /// returned witness is non-empty but [`LayoutWitness::iter`]
    /// yields nothing — the layout accessors (`start_byte`,
    /// `end_byte`, …) return `None` for missing entries.
    #[must_use]
    pub fn layout_witness(&self, vertex_id: &str) -> Option<LayoutWitness<'_>> {
        let cs = self.inner.constraints.get(vertex_id)?;
        Some(LayoutWitness { constraints: cs })
    }
}

impl<'a> LayoutWitness<'a> {
    /// Iterate over every layout-fibre constraint at this vertex.
    pub fn iter(&self) -> impl Iterator<Item = &'a Constraint> + '_ {
        self.constraints
            .iter()
            .filter(|c| panproto_gat::is_layout_sort(c.sort.as_ref()))
    }

    /// Return the value of the `start-byte` constraint, if present.
    #[must_use]
    pub fn start_byte(&self) -> Option<usize> {
        self.constraints
            .iter()
            .find(|c| c.sort.as_ref() == "start-byte")
            .and_then(|c| c.value.parse().ok())
    }

    /// Return the value of the `end-byte` constraint, if present.
    #[must_use]
    pub fn end_byte(&self) -> Option<usize> {
        self.constraints
            .iter()
            .find(|c| c.sort.as_ref() == "end-byte")
            .and_then(|c| c.value.parse().ok())
    }

    /// Return the `chose-alt-fingerprint` value, if recorded.
    #[must_use]
    pub fn chose_alt_fingerprint(&self) -> Option<&'a str> {
        self.constraints
            .iter()
            .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
            .map(|c| c.value.as_str())
    }

    /// Return the `chose-alt-child-kinds` value, if recorded.
    #[must_use]
    pub fn chose_alt_child_kinds(&self) -> Option<&'a str> {
        self.constraints
            .iter()
            .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
            .map(|c| c.value.as_str())
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;
    use crate::{EdgeRule, Protocol, SchemaBuilder, SchemaError};
    use panproto_gat::Name;

    fn empty_protocol() -> Protocol {
        Protocol {
            name: "test".to_owned(),
            schema_theory: "ThTest".to_owned(),
            instance_theory: "ThWType".to_owned(),
            edge_rules: vec![EdgeRule {
                edge_kind: "child_of".to_owned(),
                src_kinds: vec!["node".to_owned()],
                tgt_kinds: vec!["node".to_owned()],
            }],
            obj_kinds: vec!["node".to_owned()],
            ..Default::default()
        }
    }

    #[test]
    fn forget_layout_strips_layout_sorts_only() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "10")
            .constraint("v0", "end-byte", "20")
            .constraint("v0", "literal-value", "hi")
            .build()
            .unwrap();

        let stripped = schema.forget_layout();
        let cs = stripped.constraints.get(&Name::from("v0")).unwrap();
        assert_eq!(cs.len(), 1);
        assert_eq!(cs[0].sort.as_ref(), "literal-value");
        assert!(stripped.is_layout_free());
    }

    #[test]
    fn forget_layout_is_idempotent() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "interstitial-0", " ")
            .constraint("v0", "chose-alt-fingerprint", "{ }")
            .build()
            .unwrap();
        let once = schema.forget_layout();
        let twice = once.forget_layout();
        assert_eq!(once.constraints, twice.constraints);
        assert!(twice.is_layout_free());
    }

    #[test]
    fn decorated_layout_witness_round_trips_byte_span() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "3")
            .constraint("v0", "end-byte", "7")
            .build()
            .unwrap();
        let decorated = DecoratedSchema::wrap_unchecked(schema);
        let w = decorated.layout_witness("v0").unwrap();
        assert_eq!(w.start_byte(), Some(3));
        assert_eq!(w.end_byte(), Some(7));
    }

    #[test]
    fn build_abstract_accepts_layout_free_input() {
        let p = empty_protocol();
        let result = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "literal-value", "hi")
            .build_abstract();
        assert!(
            result.is_ok(),
            "build_abstract should accept content-only constraints"
        );
        assert!(result.unwrap().as_schema().is_layout_free());
    }

    #[test]
    fn build_abstract_rejects_layout_constraints() {
        let p = empty_protocol();
        let result = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "0")
            .build_abstract();
        assert!(matches!(
            result,
            Err(SchemaError::LayoutConstraintsOnAbstractBuild)
        ));
    }

    #[test]
    fn build_decorated_accepts_any_constraint_set() {
        let p = empty_protocol();
        let result = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "0")
            .constraint("v0", "end-byte", "4")
            .build_decorated();
        assert!(
            result.is_ok(),
            "build_decorated does not validate the fibre"
        );
    }

    #[test]
    fn layout_witness_iter_filters_to_layout_only() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "0")
            .constraint("v0", "literal-value", "hi")
            .constraint("v0", "interstitial-0", " ")
            .build()
            .unwrap();
        let decorated = DecoratedSchema::wrap_unchecked(schema);
        let w = decorated.layout_witness("v0").unwrap();
        let sorts: Vec<&str> = w.iter().map(|c| c.sort.as_ref()).collect();
        assert!(sorts.contains(&"start-byte"));
        assert!(sorts.contains(&"interstitial-0"));
        assert!(!sorts.contains(&"literal-value"));
    }

    #[test]
    fn layout_witness_returns_chose_alt_constraints() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "chose-alt-fingerprint", "{ }")
            .constraint("v0", "chose-alt-child-kinds", "symbol punctuation")
            .build()
            .unwrap();
        let decorated = DecoratedSchema::wrap_unchecked(schema);
        let w = decorated.layout_witness("v0").unwrap();
        assert_eq!(w.chose_alt_fingerprint(), Some("{ }"));
        assert_eq!(w.chose_alt_child_kinds(), Some("symbol punctuation"));
    }

    #[test]
    fn layout_witness_returns_none_for_missing_vertex() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .build()
            .unwrap();
        let decorated = DecoratedSchema::wrap_unchecked(schema);
        assert!(decorated.layout_witness("nonexistent").is_none());
    }

    #[test]
    fn from_layout_free_reports_offending_count() {
        let p = empty_protocol();
        let schema = SchemaBuilder::new(&p)
            .vertex("v0", "node", None)
            .unwrap()
            .constraint("v0", "start-byte", "0")
            .constraint("v0", "end-byte", "4")
            .constraint("v0", "chose-alt-fingerprint", "{")
            .build()
            .unwrap();
        let err = AbstractSchema::from_layout_free(schema).unwrap_err();
        assert_eq!(err.count, 3);
    }
}