Skip to main content

panproto_inst/
metadata.rs

1//! Metadata types for W-type instance nodes.
2//!
3//! Nodes carry optional metadata: discriminators (for union types),
4//! extra fields (for round-trip preservation), and opaque values.
5
6use std::collections::HashMap;
7
8use panproto_gat::Name;
9use serde::{Deserialize, Serialize};
10
11use crate::value::{FieldPresence, Value};
12
13/// Sum type discriminating the structural shape of a node beyond its
14/// schema anchor.
15///
16/// The schema anchor identifies which vertex of the protocol schema
17/// the node sits over; this sum captures categorical information
18/// orthogonal to that, namely whether the node is the source of a
19/// free-monoid (list) structure, a renamed element from XML aliasing,
20/// or an inline text segment in mixed-content XML. Encoding these as
21/// typed variants rather than reserved-string entries on
22/// [`Node::annotations`] gives the type system jurisdiction over
23/// "marker only on the right node shape" and removes the collision
24/// risk with user-supplied annotation keys.
25///
26/// `Default` is `Plain`: a regular schema-anchored node with no
27/// extra structural shape.
28#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
29#[serde(tag = "kind", rename_all = "snake_case")]
30pub enum NodeShape {
31    /// Regular schema-anchored node. The default.
32    #[default]
33    Plain,
34    /// Node is the source of an ordered collection (free-monoid /
35    /// list functor). Used by `to_json` and friends to emit a JSON
36    /// array even when zero or one child arcs are present.
37    List,
38    /// Node was produced by an XML ALIAS that renamed the parser's
39    /// internal kind to the carried tag. Carries the original XML
40    /// element name so emitters can write back `<NAF>...</NAF>`
41    /// rather than the schema anchor.
42    XmlElement {
43        /// The XML tag name as it appeared in the source document.
44        tag: Name,
45    },
46    /// Inline text run inside a mixed-content XML element. Emitters
47    /// write `node.value` as bare text without surrounding start /
48    /// end tags so `<p>x<em>y</em>z</p>` round-trips with text and
49    /// element children interleaved in source order.
50    XmlTextSegment,
51}
52
53/// A node in a W-type instance tree.
54///
55/// Each node is anchored to a schema vertex and carries optional
56/// value data, a discriminator (for union vertices), and extra
57/// fields for round-trip fidelity.
58#[derive(Clone, Debug, Serialize, Deserialize)]
59pub struct Node {
60    /// Unique numeric identifier within the instance.
61    pub id: u32,
62    /// The schema vertex this node is anchored to.
63    pub anchor: Name,
64    /// The node's value, if it is a leaf.
65    pub value: Option<FieldPresence>,
66    /// Discriminator for union-typed vertices (e.g., `"$type"` value).
67    pub discriminator: Option<Name>,
68    /// Extra fields preserved for round-trip fidelity.
69    pub extra_fields: HashMap<String, Value>,
70    /// Position in an ordered collection (if any).
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub position: Option<u32>,
73    /// Structural shape of the node, orthogonal to the schema anchor.
74    ///
75    /// Defaults to [`NodeShape::Plain`]. Set to [`NodeShape::List`],
76    /// [`NodeShape::XmlElement`], or [`NodeShape::XmlTextSegment`] by
77    /// the CST extractors when they recover a list / aliased
78    /// element / inline text run from a parsed document; consumed by
79    /// the corresponding emitters to drive serialisation choices.
80    #[serde(default, skip_serializing_if = "node_shape_is_default")]
81    pub shape: NodeShape,
82    /// Out-of-band annotations (metadata distinct from data).
83    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
84    pub annotations: HashMap<String, Value>,
85}
86
87const fn node_shape_is_default(shape: &NodeShape) -> bool {
88    matches!(shape, NodeShape::Plain)
89}
90
91impl Node {
92    /// Create a new node with the given id and anchor vertex.
93    #[must_use]
94    pub fn new(id: u32, anchor: impl Into<Name>) -> Self {
95        Self {
96            id,
97            anchor: anchor.into(),
98            value: None,
99            discriminator: None,
100            extra_fields: HashMap::new(),
101            position: None,
102            shape: NodeShape::Plain,
103            annotations: HashMap::new(),
104        }
105    }
106
107    /// Set the node's structural shape.
108    #[must_use]
109    pub fn with_shape(mut self, shape: NodeShape) -> Self {
110        self.shape = shape;
111        self
112    }
113
114    /// Returns `true` iff the node represents an ordered collection
115    /// (the source of a free-monoid structure). Equivalent to
116    /// `matches!(self.shape, NodeShape::List)`.
117    #[must_use]
118    pub const fn is_list(&self) -> bool {
119        matches!(self.shape, NodeShape::List)
120    }
121
122    /// Returns `true` iff the node is an inline XML text segment.
123    #[must_use]
124    pub const fn is_xml_text_segment(&self) -> bool {
125        matches!(self.shape, NodeShape::XmlTextSegment)
126    }
127
128    /// Returns the original XML tag name when the node was produced
129    /// by an aliased XML element; `None` otherwise.
130    #[must_use]
131    pub const fn xml_tag(&self) -> Option<&Name> {
132        match &self.shape {
133            NodeShape::XmlElement { tag } => Some(tag),
134            _ => None,
135        }
136    }
137
138    /// Set the node's value.
139    #[must_use]
140    pub fn with_value(mut self, value: FieldPresence) -> Self {
141        self.value = Some(value);
142        self
143    }
144
145    /// Set the node's discriminator.
146    #[must_use]
147    pub fn with_discriminator(mut self, disc: impl Into<Name>) -> Self {
148        self.discriminator = Some(disc.into());
149        self
150    }
151
152    /// Add an extra field for round-trip preservation.
153    #[must_use]
154    pub fn with_extra_field(mut self, key: impl Into<String>, value: Value) -> Self {
155        self.extra_fields.insert(key.into(), value);
156        self
157    }
158
159    /// Returns `true` if this node has a present value.
160    #[must_use]
161    pub fn has_value(&self) -> bool {
162        self.value.as_ref().is_some_and(FieldPresence::is_present)
163    }
164
165    /// Returns `true` if this node is a leaf (has a value or is null).
166    #[must_use]
167    pub const fn is_leaf(&self) -> bool {
168        self.value.is_some()
169    }
170}
171
172#[cfg(test)]
173#[allow(clippy::expect_used, clippy::unwrap_used)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn node_builder() {
179        let node = Node::new(0, "post:body.text")
180            .with_value(FieldPresence::Present(Value::Str("hello".into())))
181            .with_discriminator("string")
182            .with_extra_field("$lang", Value::Str("en".into()));
183
184        assert_eq!(node.id, 0);
185        assert_eq!(node.anchor, "post:body.text");
186        assert!(node.has_value());
187        assert!(node.is_leaf());
188        assert_eq!(node.discriminator.as_deref(), Some("string"));
189        assert_eq!(
190            node.extra_fields.get("$lang"),
191            Some(&Value::Str("en".into()))
192        );
193    }
194
195    #[test]
196    fn node_without_value() {
197        let node = Node::new(1, "post:body");
198        assert!(!node.has_value());
199        assert!(!node.is_leaf());
200    }
201
202    #[test]
203    fn default_shape_is_plain() {
204        let node = Node::new(0, "v");
205        assert!(matches!(node.shape, NodeShape::Plain));
206        assert!(!node.is_list());
207        assert!(!node.is_xml_text_segment());
208        assert_eq!(node.xml_tag(), None);
209    }
210
211    #[test]
212    fn with_shape_list() {
213        let node = Node::new(0, "v").with_shape(NodeShape::List);
214        assert!(node.is_list());
215        assert!(!node.is_xml_text_segment());
216        assert_eq!(node.xml_tag(), None);
217    }
218
219    #[test]
220    fn with_shape_xml_element_carries_tag() {
221        let node = Node::new(0, "v").with_shape(NodeShape::XmlElement {
222            tag: Name::from("para"),
223        });
224        assert!(!node.is_list());
225        assert!(!node.is_xml_text_segment());
226        assert_eq!(node.xml_tag().map(Name::as_ref), Some("para"));
227    }
228
229    #[test]
230    fn with_shape_xml_text_segment() {
231        let node = Node::new(0, "v").with_shape(NodeShape::XmlTextSegment);
232        assert!(!node.is_list());
233        assert!(node.is_xml_text_segment());
234        assert_eq!(node.xml_tag(), None);
235    }
236
237    #[test]
238    fn shape_serialization_skips_default() {
239        let node = Node::new(0, "v");
240        let json = serde_json::to_string(&node).expect("serialize plain node");
241        assert!(
242            !json.contains("shape"),
243            "Plain shape must skip-serialize: {json}"
244        );
245    }
246
247    #[test]
248    fn shape_serialization_emits_non_default() {
249        let node = Node::new(0, "v").with_shape(NodeShape::List);
250        let json = serde_json::to_string(&node).expect("serialize list node");
251        assert!(json.contains("\"shape\""), "non-Plain shape must serialize");
252        // serde serializes with `rename_all = "snake_case"` and `tag = "kind"`,
253        // so `NodeShape::List` becomes `{"kind":"list"}`.
254        assert!(json.contains("\"list\""), "expected list tag in: {json}");
255    }
256}