panproto_protocols/web_document/
docx.rs1use std::collections::HashMap;
6use std::hash::BuildHasher;
7
8use panproto_gat::Theory;
9use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
10
11use crate::emit::{children_by_edge, find_roots, vertex_constraints};
12use crate::error::ProtocolError;
13use crate::theories;
14
15#[must_use]
17pub fn protocol() -> Protocol {
18 Protocol {
19 name: "docx".into(),
20 schema_theory: "ThDocxSchema".into(),
21 instance_theory: "ThDocxInstance".into(),
22 edge_rules: edge_rules(),
23 obj_kinds: vec![
24 "document".into(),
25 "body".into(),
26 "paragraph".into(),
27 "run".into(),
28 "text".into(),
29 "table".into(),
30 "row".into(),
31 "cell".into(),
32 "section".into(),
33 "header".into(),
34 "footer".into(),
35 "style".into(),
36 "numbering".into(),
37 "footnote".into(),
38 "image".into(),
39 "hyperlink".into(),
40 ],
41 constraint_sorts: vec![
42 "required".into(),
43 "style-type".into(),
44 "numbering-format".into(),
45 ],
46 has_order: true,
47 nominal_identity: true,
48 ..Protocol::default()
49 }
50}
51
52pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
54 theories::register_multigraph_wtype_meta(registry, "ThDocxSchema", "ThDocxInstance");
55}
56
57pub fn parse_docx_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
63 let proto = protocol();
64 let mut builder = SchemaBuilder::new(&proto);
65
66 let elements = json
67 .get("elements")
68 .and_then(serde_json::Value::as_object)
69 .ok_or_else(|| ProtocolError::MissingField("elements".into()))?;
70
71 for (name, def) in elements {
72 let kind = def
73 .get("kind")
74 .and_then(serde_json::Value::as_str)
75 .unwrap_or("document");
76 builder = builder.vertex(name, kind, None)?;
77
78 for field in &["required", "style-type", "numbering-format"] {
79 if let Some(val) = def.get(field).and_then(serde_json::Value::as_str) {
80 builder = builder.constraint(name, field, val);
81 }
82 }
83
84 if let Some(children) = def.get("children").and_then(serde_json::Value::as_object) {
85 for (child_name, child_def) in children {
86 let child_id = format!("{name}.{child_name}");
87 let child_kind = child_def
88 .get("kind")
89 .and_then(serde_json::Value::as_str)
90 .unwrap_or("text");
91 builder = builder.vertex(&child_id, child_kind, None)?;
92 builder = builder.edge(name, &child_id, "prop", Some(child_name))?;
93
94 for field in &["required", "style-type"] {
95 if let Some(val) = child_def.get(field).and_then(serde_json::Value::as_str) {
96 builder = builder.constraint(&child_id, field, val);
97 }
98 }
99 }
100 }
101
102 if let Some(items) = def.get("items").and_then(serde_json::Value::as_array) {
103 for (i, item) in items.iter().enumerate() {
104 if let Some(item_kind) = item.as_str() {
105 let item_id = format!("{name}:item{i}");
106 builder = builder.vertex(&item_id, item_kind, None)?;
107 builder = builder.edge(name, &item_id, "items", Some(item_kind))?;
108 }
109 }
110 }
111 }
112
113 let schema = builder.build()?;
114 Ok(schema)
115}
116
117pub fn emit_docx_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
123 let structural = &["prop", "items"];
124 let roots = find_roots(schema, structural);
125
126 let mut elements = serde_json::Map::new();
127 for root in &roots {
128 let mut obj = serde_json::Map::new();
129 obj.insert("kind".into(), serde_json::json!(root.kind));
130
131 for c in vertex_constraints(schema, &root.id) {
132 obj.insert(c.sort.to_string(), serde_json::json!(c.value));
133 }
134
135 let props = children_by_edge(schema, &root.id, "prop");
136 if !props.is_empty() {
137 let mut children = serde_json::Map::new();
138 for (edge, child) in &props {
139 let child_name = edge.name.as_deref().unwrap_or(&child.id);
140 let mut child_obj = serde_json::Map::new();
141 child_obj.insert("kind".into(), serde_json::json!(child.kind));
142 for c in vertex_constraints(schema, &child.id) {
143 child_obj.insert(c.sort.to_string(), serde_json::json!(c.value));
144 }
145 children.insert(child_name.to_string(), serde_json::Value::Object(child_obj));
146 }
147 obj.insert("children".into(), serde_json::Value::Object(children));
148 }
149
150 let items = children_by_edge(schema, &root.id, "items");
151 if !items.is_empty() {
152 let arr: Vec<serde_json::Value> = items
153 .iter()
154 .filter_map(|(e, _)| e.name.as_deref().map(|n| serde_json::json!(n)))
155 .collect();
156 obj.insert("items".into(), serde_json::Value::Array(arr));
157 }
158
159 elements.insert(root.id.to_string(), serde_json::Value::Object(obj));
160 }
161
162 Ok(serde_json::json!({ "elements": elements }))
163}
164
165fn edge_rules() -> Vec<EdgeRule> {
166 vec![
167 EdgeRule {
168 edge_kind: "prop".into(),
169 src_kinds: vec![
170 "document".into(),
171 "body".into(),
172 "paragraph".into(),
173 "run".into(),
174 "table".into(),
175 "row".into(),
176 "cell".into(),
177 "section".into(),
178 ],
179 tgt_kinds: vec![],
180 },
181 EdgeRule {
182 edge_kind: "items".into(),
183 src_kinds: vec![
184 "document".into(),
185 "body".into(),
186 "paragraph".into(),
187 "table".into(),
188 ],
189 tgt_kinds: vec![],
190 },
191 ]
192}
193
194#[cfg(test)]
195#[allow(clippy::expect_used, clippy::unwrap_used)]
196mod tests {
197 use super::*;
198
199 #[test]
200 fn protocol_def() {
201 let p = protocol();
202 assert_eq!(p.name, "docx");
203 }
204
205 #[test]
206 fn register_theories_works() {
207 let mut registry = HashMap::new();
208 register_theories(&mut registry);
209 assert!(registry.contains_key("ThDocxSchema"));
210 assert!(registry.contains_key("ThDocxInstance"));
211 }
212
213 #[test]
214 fn parse_and_emit() {
215 let json = serde_json::json!({
216 "elements": {
217 "document": {
218 "kind": "document",
219 "children": {
220 "body": {"kind": "body"}
221 },
222 "items": ["paragraph", "table"]
223 }
224 }
225 });
226 let schema = parse_docx_schema(&json).expect("should parse");
227 assert!(schema.has_vertex("document"));
228 assert!(schema.has_vertex("document.body"));
229
230 let emitted = emit_docx_schema(&schema).expect("should emit");
231 let s2 = parse_docx_schema(&emitted).expect("re-parse");
232 assert_eq!(schema.vertex_count(), s2.vertex_count());
233 }
234}