Skip to main content

panproto_protocols/web_document/
docx.rs

1//! DOCX/Office Open XML protocol definition.
2//!
3//! Uses Group E theory: constrained multigraph + W-type + metadata.
4
5use std::collections::HashMap;
6use std::hash::BuildHasher;
7
8use panproto_gat::Theory;
9use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
10
11use crate::emit::{children_by_edge, find_roots, vertex_constraints};
12use crate::error::ProtocolError;
13use crate::theories;
14
15/// Returns the DOCX protocol definition.
16#[must_use]
17pub fn protocol() -> Protocol {
18    Protocol {
19        name: "docx".into(),
20        schema_theory: "ThDocxSchema".into(),
21        instance_theory: "ThDocxInstance".into(),
22        edge_rules: edge_rules(),
23        obj_kinds: vec![
24            "document".into(),
25            "body".into(),
26            "paragraph".into(),
27            "run".into(),
28            "text".into(),
29            "table".into(),
30            "row".into(),
31            "cell".into(),
32            "section".into(),
33            "header".into(),
34            "footer".into(),
35            "style".into(),
36            "numbering".into(),
37            "footnote".into(),
38            "image".into(),
39            "hyperlink".into(),
40        ],
41        constraint_sorts: vec![
42            "required".into(),
43            "style-type".into(),
44            "numbering-format".into(),
45        ],
46        has_order: true,
47        nominal_identity: true,
48        ..Protocol::default()
49    }
50}
51
52/// Register the component GATs for DOCX.
53pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
54    theories::register_multigraph_wtype_meta(registry, "ThDocxSchema", "ThDocxInstance");
55}
56
57/// Parse a JSON-based DOCX content model into a [`Schema`].
58///
59/// # Errors
60///
61/// Returns [`ProtocolError`] if parsing fails.
62pub fn parse_docx_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
63    let proto = protocol();
64    let mut builder = SchemaBuilder::new(&proto);
65
66    let elements = json
67        .get("elements")
68        .and_then(serde_json::Value::as_object)
69        .ok_or_else(|| ProtocolError::MissingField("elements".into()))?;
70
71    for (name, def) in elements {
72        let kind = def
73            .get("kind")
74            .and_then(serde_json::Value::as_str)
75            .unwrap_or("document");
76        builder = builder.vertex(name, kind, None)?;
77
78        for field in &["required", "style-type", "numbering-format"] {
79            if let Some(val) = def.get(field).and_then(serde_json::Value::as_str) {
80                builder = builder.constraint(name, field, val);
81            }
82        }
83
84        if let Some(children) = def.get("children").and_then(serde_json::Value::as_object) {
85            for (child_name, child_def) in children {
86                let child_id = format!("{name}.{child_name}");
87                let child_kind = child_def
88                    .get("kind")
89                    .and_then(serde_json::Value::as_str)
90                    .unwrap_or("text");
91                builder = builder.vertex(&child_id, child_kind, None)?;
92                builder = builder.edge(name, &child_id, "prop", Some(child_name))?;
93
94                for field in &["required", "style-type"] {
95                    if let Some(val) = child_def.get(field).and_then(serde_json::Value::as_str) {
96                        builder = builder.constraint(&child_id, field, val);
97                    }
98                }
99            }
100        }
101
102        if let Some(items) = def.get("items").and_then(serde_json::Value::as_array) {
103            for (i, item) in items.iter().enumerate() {
104                if let Some(item_kind) = item.as_str() {
105                    let item_id = format!("{name}:item{i}");
106                    builder = builder.vertex(&item_id, item_kind, None)?;
107                    builder = builder.edge(name, &item_id, "items", Some(item_kind))?;
108                }
109            }
110        }
111    }
112
113    let schema = builder.build()?;
114    Ok(schema)
115}
116
117/// Emit a [`Schema`] as a JSON DOCX schema.
118///
119/// # Errors
120///
121/// Returns [`ProtocolError`] if emission fails.
122pub fn emit_docx_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
123    let structural = &["prop", "items"];
124    let roots = find_roots(schema, structural);
125
126    let mut elements = serde_json::Map::new();
127    for root in &roots {
128        let mut obj = serde_json::Map::new();
129        obj.insert("kind".into(), serde_json::json!(root.kind));
130
131        for c in vertex_constraints(schema, &root.id) {
132            obj.insert(c.sort.to_string(), serde_json::json!(c.value));
133        }
134
135        let props = children_by_edge(schema, &root.id, "prop");
136        if !props.is_empty() {
137            let mut children = serde_json::Map::new();
138            for (edge, child) in &props {
139                let child_name = edge.name.as_deref().unwrap_or(&child.id);
140                let mut child_obj = serde_json::Map::new();
141                child_obj.insert("kind".into(), serde_json::json!(child.kind));
142                for c in vertex_constraints(schema, &child.id) {
143                    child_obj.insert(c.sort.to_string(), serde_json::json!(c.value));
144                }
145                children.insert(child_name.to_string(), serde_json::Value::Object(child_obj));
146            }
147            obj.insert("children".into(), serde_json::Value::Object(children));
148        }
149
150        let items = children_by_edge(schema, &root.id, "items");
151        if !items.is_empty() {
152            let arr: Vec<serde_json::Value> = items
153                .iter()
154                .filter_map(|(e, _)| e.name.as_deref().map(|n| serde_json::json!(n)))
155                .collect();
156            obj.insert("items".into(), serde_json::Value::Array(arr));
157        }
158
159        elements.insert(root.id.to_string(), serde_json::Value::Object(obj));
160    }
161
162    Ok(serde_json::json!({ "elements": elements }))
163}
164
165fn edge_rules() -> Vec<EdgeRule> {
166    vec![
167        EdgeRule {
168            edge_kind: "prop".into(),
169            src_kinds: vec![
170                "document".into(),
171                "body".into(),
172                "paragraph".into(),
173                "run".into(),
174                "table".into(),
175                "row".into(),
176                "cell".into(),
177                "section".into(),
178            ],
179            tgt_kinds: vec![],
180        },
181        EdgeRule {
182            edge_kind: "items".into(),
183            src_kinds: vec![
184                "document".into(),
185                "body".into(),
186                "paragraph".into(),
187                "table".into(),
188            ],
189            tgt_kinds: vec![],
190        },
191    ]
192}
193
194#[cfg(test)]
195#[allow(clippy::expect_used, clippy::unwrap_used)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn protocol_def() {
201        let p = protocol();
202        assert_eq!(p.name, "docx");
203    }
204
205    #[test]
206    fn register_theories_works() {
207        let mut registry = HashMap::new();
208        register_theories(&mut registry);
209        assert!(registry.contains_key("ThDocxSchema"));
210        assert!(registry.contains_key("ThDocxInstance"));
211    }
212
213    #[test]
214    fn parse_and_emit() {
215        let json = serde_json::json!({
216            "elements": {
217                "document": {
218                    "kind": "document",
219                    "children": {
220                        "body": {"kind": "body"}
221                    },
222                    "items": ["paragraph", "table"]
223                }
224            }
225        });
226        let schema = parse_docx_schema(&json).expect("should parse");
227        assert!(schema.has_vertex("document"));
228        assert!(schema.has_vertex("document.body"));
229
230        let emitted = emit_docx_schema(&schema).expect("should emit");
231        let s2 = parse_docx_schema(&emitted).expect("re-parse");
232        assert_eq!(schema.vertex_count(), s2.vertex_count());
233    }
234}