panproto_protocols/web_document/
docx.rs1use std::collections::HashMap;
6use std::hash::BuildHasher;
7
8use panproto_gat::Theory;
9use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
10
11use crate::emit::{children_by_edge, find_roots, vertex_constraints};
12use crate::error::ProtocolError;
13use crate::theories;
14
15#[must_use]
17pub fn protocol() -> Protocol {
18 Protocol {
19 name: "docx".into(),
20 schema_theory: "ThDocxSchema".into(),
21 instance_theory: "ThDocxInstance".into(),
22 edge_rules: edge_rules(),
23 obj_kinds: vec![
24 "document".into(),
25 "body".into(),
26 "paragraph".into(),
27 "run".into(),
28 "text".into(),
29 "table".into(),
30 "row".into(),
31 "cell".into(),
32 "section".into(),
33 "header".into(),
34 "footer".into(),
35 "style".into(),
36 "numbering".into(),
37 "footnote".into(),
38 "image".into(),
39 "hyperlink".into(),
40 ],
41 constraint_sorts: vec![
42 "required".into(),
43 "style-type".into(),
44 "numbering-format".into(),
45 ],
46 has_order: true,
47 nominal_identity: true,
48 ..Protocol::default()
49 }
50}
51
52pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
54 theories::register_multigraph_wtype_meta(registry, "ThDocxSchema", "ThDocxInstance");
55}
56
57pub fn parse_docx_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
63 let proto = protocol();
64 let mut builder = SchemaBuilder::new(&proto);
65
66 let elements = json
67 .get("elements")
68 .and_then(serde_json::Value::as_object)
69 .ok_or_else(|| ProtocolError::MissingField("elements".into()))?;
70
71 for (name, def) in elements {
72 let kind = def
73 .get("kind")
74 .and_then(serde_json::Value::as_str)
75 .unwrap_or("document");
76 builder = builder.vertex(name, kind, None)?;
77 if kind == "document" {
79 builder = builder.entry(name);
80 }
81
82 for field in &["required", "style-type", "numbering-format"] {
83 if let Some(val) = def.get(field).and_then(serde_json::Value::as_str) {
84 builder = builder.constraint(name, field, val);
85 }
86 }
87
88 if let Some(children) = def.get("children").and_then(serde_json::Value::as_object) {
89 for (child_name, child_def) in children {
90 let child_id = format!("{name}.{child_name}");
91 let child_kind = child_def
92 .get("kind")
93 .and_then(serde_json::Value::as_str)
94 .unwrap_or("text");
95 builder = builder.vertex(&child_id, child_kind, None)?;
96 builder = builder.edge(name, &child_id, "prop", Some(child_name))?;
97
98 for field in &["required", "style-type"] {
99 if let Some(val) = child_def.get(field).and_then(serde_json::Value::as_str) {
100 builder = builder.constraint(&child_id, field, val);
101 }
102 }
103 }
104 }
105
106 if let Some(items) = def.get("items").and_then(serde_json::Value::as_array) {
107 for (i, item) in items.iter().enumerate() {
108 if let Some(item_kind) = item.as_str() {
109 let item_id = format!("{name}:item{i}");
110 builder = builder.vertex(&item_id, item_kind, None)?;
111 builder = builder.edge(name, &item_id, "items", Some(item_kind))?;
112 }
113 }
114 }
115 }
116
117 let schema = builder.build()?;
118 Ok(schema)
119}
120
121pub fn emit_docx_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
127 let structural = &["prop", "items"];
128 let roots = find_roots(schema, structural);
129
130 let mut elements = serde_json::Map::new();
131 for root in &roots {
132 let mut obj = serde_json::Map::new();
133 obj.insert("kind".into(), serde_json::json!(root.kind));
134
135 for c in vertex_constraints(schema, &root.id) {
136 obj.insert(c.sort.to_string(), serde_json::json!(c.value));
137 }
138
139 let props = children_by_edge(schema, &root.id, "prop");
140 if !props.is_empty() {
141 let mut children = serde_json::Map::new();
142 for (edge, child) in &props {
143 let child_name = edge.name.as_deref().unwrap_or(&child.id);
144 let mut child_obj = serde_json::Map::new();
145 child_obj.insert("kind".into(), serde_json::json!(child.kind));
146 for c in vertex_constraints(schema, &child.id) {
147 child_obj.insert(c.sort.to_string(), serde_json::json!(c.value));
148 }
149 children.insert(child_name.to_string(), serde_json::Value::Object(child_obj));
150 }
151 obj.insert("children".into(), serde_json::Value::Object(children));
152 }
153
154 let items = children_by_edge(schema, &root.id, "items");
155 if !items.is_empty() {
156 let arr: Vec<serde_json::Value> = items
157 .iter()
158 .filter_map(|(e, _)| e.name.as_deref().map(|n| serde_json::json!(n)))
159 .collect();
160 obj.insert("items".into(), serde_json::Value::Array(arr));
161 }
162
163 elements.insert(root.id.to_string(), serde_json::Value::Object(obj));
164 }
165
166 Ok(serde_json::json!({ "elements": elements }))
167}
168
169fn edge_rules() -> Vec<EdgeRule> {
170 vec![
171 EdgeRule {
172 edge_kind: "prop".into(),
173 src_kinds: vec![
174 "document".into(),
175 "body".into(),
176 "paragraph".into(),
177 "run".into(),
178 "table".into(),
179 "row".into(),
180 "cell".into(),
181 "section".into(),
182 ],
183 tgt_kinds: vec![],
184 },
185 EdgeRule {
186 edge_kind: "items".into(),
187 src_kinds: vec![
188 "document".into(),
189 "body".into(),
190 "paragraph".into(),
191 "table".into(),
192 ],
193 tgt_kinds: vec![],
194 },
195 ]
196}
197
198#[cfg(test)]
199#[allow(clippy::expect_used, clippy::unwrap_used)]
200mod tests {
201 use super::*;
202
203 #[test]
204 fn protocol_def() {
205 let p = protocol();
206 assert_eq!(p.name, "docx");
207 }
208
209 #[test]
210 fn register_theories_works() {
211 let mut registry = HashMap::new();
212 register_theories(&mut registry);
213 assert!(registry.contains_key("ThDocxSchema"));
214 assert!(registry.contains_key("ThDocxInstance"));
215 }
216
217 #[test]
218 fn parse_and_emit() {
219 let json = serde_json::json!({
220 "elements": {
221 "document": {
222 "kind": "document",
223 "children": {
224 "body": {"kind": "body"}
225 },
226 "items": ["paragraph", "table"]
227 }
228 }
229 });
230 let schema = parse_docx_schema(&json).expect("should parse");
231 assert!(schema.has_vertex("document"));
232 assert!(schema.has_vertex("document.body"));
233
234 let emitted = emit_docx_schema(&schema).expect("should emit");
235 let s2 = parse_docx_schema(&emitted).expect("re-parse");
236 assert_eq!(schema.vertex_count(), s2.vertex_count());
237 }
238}