use std::collections::HashMap;
use std::hash::BuildHasher;
use panproto_gat::Theory;
use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
use crate::emit::{children_by_edge, find_roots, vertex_constraints};
use crate::error::ProtocolError;
use crate::theories;
#[must_use]
pub fn protocol() -> Protocol {
Protocol {
name: "ucca".into(),
schema_theory: "ThUccaSchema".into(),
instance_theory: "ThUccaInstance".into(),
edge_rules: edge_rules(),
obj_kinds: vec![
"passage".into(),
"layer".into(),
"node".into(),
"terminal".into(),
"string".into(),
"integer".into(),
],
constraint_sorts: vec![
"id".into(),
"tag".into(),
"type".into(),
"paragraph".into(),
"is-remote".into(),
"is-implicit".into(),
"position".into(),
"text".into(),
],
has_order: true,
has_coproducts: true,
..Protocol::default()
}
}
pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
theories::register_typed_graph_wtype(registry, "ThUccaSchema", "ThUccaInstance");
}
#[allow(clippy::too_many_lines)]
pub fn parse_ucca(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
let proto = protocol();
let mut builder = SchemaBuilder::new(&proto);
if let Some(passage) = json.get("passage").and_then(serde_json::Value::as_object) {
let passage_id = passage
.get("id")
.and_then(serde_json::Value::as_str)
.unwrap_or("passage0");
builder = builder.vertex(passage_id, "passage", None)?;
if let Some(id_val) = passage.get("id").and_then(serde_json::Value::as_str) {
builder = builder.constraint(passage_id, "id", id_val);
}
}
if let Some(layers) = json.get("layers").and_then(serde_json::Value::as_object) {
for (layer_id, layer_def) in layers {
builder = builder.vertex(layer_id, "layer", None)?;
if let Some(attrs) = layer_def
.get("attrs")
.and_then(serde_json::Value::as_object)
{
for (sort, value) in attrs {
if let Some(v) = value.as_str() {
builder = builder.constraint(layer_id, sort, v);
}
}
}
}
}
if let Some(nodes) = json.get("nodes").and_then(serde_json::Value::as_object) {
for (node_id, node_def) in nodes {
let raw_kind = node_def
.get("kind")
.and_then(serde_json::Value::as_str)
.unwrap_or("node");
let kind = match raw_kind {
"terminal" => "terminal",
_ => "node",
};
builder = builder.vertex(node_id, kind, None)?;
if let Some(attrs) = node_def.get("attrs").and_then(serde_json::Value::as_object) {
for (sort, value) in attrs {
if sort == "category" {
continue;
}
if let Some(v) = value.as_str() {
builder = builder.constraint(node_id, sort, v);
}
}
}
}
}
if let Some(layers) = json.get("layers").and_then(serde_json::Value::as_object) {
for (layer_id, _layer_def) in layers {
if let Some(passage) = json.get("passage").and_then(serde_json::Value::as_object) {
let passage_id = passage
.get("id")
.and_then(serde_json::Value::as_str)
.unwrap_or("passage0");
builder = builder.edge(passage_id, layer_id, "contains", None)?;
}
}
}
if let Some(nodes) = json.get("nodes").and_then(serde_json::Value::as_object) {
for (node_id, node_def) in nodes {
if let Some(layer) = node_def.get("layer").and_then(serde_json::Value::as_str) {
builder = builder.edge(layer, node_id, "contains", None)?;
}
if let Some(edges) = node_def.get("edges").and_then(serde_json::Value::as_array) {
for edge_def in edges {
if let Some(edge_obj) = edge_def.as_object() {
let target = edge_obj
.get("target")
.and_then(serde_json::Value::as_str)
.unwrap_or("");
let category = edge_obj.get("category").and_then(serde_json::Value::as_str);
if !target.is_empty() {
let is_remote = edge_obj
.get("remote")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
if is_remote {
builder = builder.edge(node_id, target, "remote", category)?;
} else {
builder = builder.edge(node_id, target, "edge", category)?;
}
}
}
}
}
if let Some(implicit_targets) = node_def
.get("implicit")
.and_then(serde_json::Value::as_array)
{
for imp in implicit_targets {
if let Some(tgt) = imp.as_str() {
builder = builder.edge(node_id, tgt, "implicit", None)?;
}
}
}
}
}
let schema = builder.build()?;
Ok(schema)
}
pub fn emit_ucca(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
let structural = &["contains"];
let roots = find_roots(schema, structural);
let mut passage_obj = serde_json::Map::new();
let mut layers = serde_json::Map::new();
let mut nodes = serde_json::Map::new();
for root in &roots {
let constraints = vertex_constraints(schema, &root.id);
if root.kind.as_str() == "passage" {
for c in &constraints {
if c.sort == "id" {
passage_obj.insert("id".into(), serde_json::json!(c.value));
}
}
let layer_children = children_by_edge(schema, &root.id, "contains");
for (_edge, layer) in &layer_children {
let mut layer_obj = serde_json::Map::new();
let layer_constraints = vertex_constraints(schema, &layer.id);
if !layer_constraints.is_empty() {
let mut attrs = serde_json::Map::new();
for c in &layer_constraints {
attrs.insert(c.sort.to_string(), serde_json::json!(c.value));
}
layer_obj.insert("attrs".into(), serde_json::Value::Object(attrs));
}
layers.insert(layer.id.to_string(), serde_json::Value::Object(layer_obj));
let node_children = children_by_edge(schema, &layer.id, "contains");
for (_edge, node) in &node_children {
emit_node(schema, node, &layer.id, &mut nodes);
}
}
}
}
let mut result = serde_json::Map::new();
if !passage_obj.is_empty() {
result.insert("passage".into(), serde_json::Value::Object(passage_obj));
}
if !layers.is_empty() {
result.insert("layers".into(), serde_json::Value::Object(layers));
}
if !nodes.is_empty() {
result.insert("nodes".into(), serde_json::Value::Object(nodes));
}
Ok(serde_json::Value::Object(result))
}
fn emit_node(
schema: &Schema,
node: &panproto_schema::Vertex,
layer_id: &str,
nodes: &mut serde_json::Map<String, serde_json::Value>,
) {
let mut node_obj = serde_json::Map::new();
node_obj.insert("kind".into(), serde_json::json!(node.kind));
node_obj.insert("layer".into(), serde_json::json!(layer_id));
let node_constraints = vertex_constraints(schema, &node.id);
if !node_constraints.is_empty() {
let mut attrs = serde_json::Map::new();
for c in &node_constraints {
attrs.insert(c.sort.to_string(), serde_json::json!(c.value));
}
node_obj.insert("attrs".into(), serde_json::Value::Object(attrs));
}
let edge_children = children_by_edge(schema, &node.id, "edge");
let remote_children = children_by_edge(schema, &node.id, "remote");
if !edge_children.is_empty() || !remote_children.is_empty() {
let mut edges = Vec::new();
for (edge, child) in &edge_children {
let mut edge_obj = serde_json::Map::new();
edge_obj.insert("target".into(), serde_json::json!(child.id));
if let Some(cat) = &edge.name {
edge_obj.insert("category".into(), serde_json::json!(cat));
}
edges.push(serde_json::Value::Object(edge_obj));
}
for (edge, child) in &remote_children {
let mut edge_obj = serde_json::Map::new();
edge_obj.insert("target".into(), serde_json::json!(child.id));
edge_obj.insert("remote".into(), serde_json::json!(true));
if let Some(cat) = &edge.name {
edge_obj.insert("category".into(), serde_json::json!(cat));
}
edges.push(serde_json::Value::Object(edge_obj));
}
node_obj.insert("edges".into(), serde_json::Value::Array(edges));
}
let implicit_children = children_by_edge(schema, &node.id, "implicit");
if !implicit_children.is_empty() {
let arr: Vec<serde_json::Value> = implicit_children
.iter()
.map(|(_, child)| serde_json::json!(child.id))
.collect();
node_obj.insert("implicit".into(), serde_json::Value::Array(arr));
}
nodes.insert(node.id.to_string(), serde_json::Value::Object(node_obj));
}
#[allow(clippy::too_many_lines)]
fn edge_rules() -> Vec<EdgeRule> {
vec![
EdgeRule {
edge_kind: "contains".into(),
src_kinds: vec!["passage".into(), "layer".into()],
tgt_kinds: vec![],
},
EdgeRule {
edge_kind: "edge".into(),
src_kinds: vec![],
tgt_kinds: vec![],
},
EdgeRule {
edge_kind: "remote".into(),
src_kinds: vec![],
tgt_kinds: vec![],
},
EdgeRule {
edge_kind: "implicit".into(),
src_kinds: vec![],
tgt_kinds: vec![],
},
]
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn protocol_def() {
let p = protocol();
assert_eq!(p.name, "ucca");
assert_eq!(p.schema_theory, "ThUccaSchema");
assert_eq!(p.instance_theory, "ThUccaInstance");
assert!(p.obj_kinds.contains(&"passage".to_string()));
assert!(p.obj_kinds.contains(&"layer".to_string()));
assert!(p.obj_kinds.contains(&"node".to_string()));
assert!(p.obj_kinds.contains(&"terminal".to_string()));
assert!(!p.obj_kinds.contains(&"participant".to_string()));
assert!(!p.obj_kinds.contains(&"process".to_string()));
assert!(!p.obj_kinds.contains(&"state".to_string()));
assert!(!p.obj_kinds.contains(&"parallel-scene".to_string()));
assert!(!p.obj_kinds.contains(&"scene".to_string()));
assert!(!p.obj_kinds.contains(&"adverbial".to_string()));
assert!(!p.obj_kinds.contains(&"elaborator".to_string()));
assert!(!p.obj_kinds.contains(&"center".to_string()));
assert!(!p.obj_kinds.contains(&"linker".to_string()));
assert!(p.find_edge_rule("contains").is_some());
assert!(p.find_edge_rule("edge").is_some());
assert!(p.find_edge_rule("remote").is_some());
assert!(p.find_edge_rule("implicit").is_some());
assert!(p.find_edge_rule("terminal-of").is_none());
assert!(p.constraint_sorts.contains(&"position".to_string()));
assert!(p.constraint_sorts.contains(&"text".to_string()));
assert!(!p.constraint_sorts.contains(&"category".to_string()));
}
#[test]
fn register_theories_works() {
let mut registry = HashMap::new();
register_theories(&mut registry);
assert!(registry.contains_key("ThUccaSchema"));
assert!(registry.contains_key("ThUccaInstance"));
}
#[test]
fn parse_and_emit() {
let json = serde_json::json!({
"passage": {
"id": "p1"
},
"layers": {
"L0": {
"attrs": {
"type": "foundational"
}
}
},
"nodes": {
"1.1": {
"kind": "node",
"layer": "L0",
"attrs": {
"id": "1.1"
},
"edges": [
{"target": "1.2", "category": "P"},
{"target": "1.3", "category": "A"},
{"target": "1.4", "category": "A", "remote": true}
]
},
"1.2": {
"kind": "node",
"layer": "L0",
"attrs": {
"id": "1.2"
}
},
"1.3": {
"kind": "node",
"layer": "L0",
"attrs": {
"id": "1.3"
}
},
"1.4": {
"kind": "node",
"layer": "L0",
"attrs": {
"id": "1.4"
}
},
"t1": {
"kind": "terminal",
"layer": "L0",
"attrs": {
"text": "runs",
"position": "1"
}
}
}
});
let schema = parse_ucca(&json).expect("should parse");
assert!(schema.has_vertex("p1"));
assert!(schema.has_vertex("L0"));
assert!(schema.has_vertex("1.1"));
assert!(schema.has_vertex("t1"));
assert_eq!(schema.vertices.get("1.1").unwrap().kind, "node");
assert_eq!(schema.vertices.get("1.2").unwrap().kind, "node");
assert_eq!(schema.vertices.get("t1").unwrap().kind, "terminal");
let outgoing_1_1 = schema.outgoing_edges("1.1");
let primary_edges: Vec<_> = outgoing_1_1.iter().filter(|e| e.kind == "edge").collect();
assert_eq!(primary_edges.len(), 2);
let categories: Vec<_> = primary_edges
.iter()
.filter_map(|e| e.name.as_deref())
.collect();
assert!(categories.contains(&"P"));
assert!(categories.contains(&"A"));
let remote_edges: Vec<_> = outgoing_1_1.iter().filter(|e| e.kind == "remote").collect();
assert_eq!(remote_edges.len(), 1);
assert_eq!(remote_edges[0].name.as_deref(), Some("A"));
let emitted = emit_ucca(&schema).expect("emit");
let s2 = parse_ucca(&emitted).expect("re-parse");
assert_eq!(schema.vertex_count(), s2.vertex_count());
assert_eq!(schema.edge_count(), s2.edge_count());
}
#[test]
fn legacy_kind_normalised_to_node() {
let json = serde_json::json!({
"passage": { "id": "p2" },
"layers": {
"L0": { "attrs": { "type": "foundational" } }
},
"nodes": {
"n1": {
"kind": "participant",
"layer": "L0"
},
"n2": {
"kind": "process",
"layer": "L0"
}
}
});
let schema = parse_ucca(&json).expect("should parse legacy kinds");
assert_eq!(schema.vertices.get("n1").unwrap().kind, "node");
assert_eq!(schema.vertices.get("n2").unwrap().kind, "node");
}
#[test]
fn implicit_edge_any_node() {
let json = serde_json::json!({
"passage": { "id": "p3" },
"layers": {
"L0": { "attrs": {} }
},
"nodes": {
"parent": {
"kind": "node",
"layer": "L0",
"implicit": ["child"]
},
"child": {
"kind": "node",
"layer": "L0"
}
}
});
let schema = parse_ucca(&json).expect("implicit edge from any node");
let out = schema.outgoing_edges("parent");
let implicit: Vec<_> = out.iter().filter(|e| e.kind == "implicit").collect();
assert_eq!(implicit.len(), 1);
assert_eq!(implicit[0].tgt, "child");
}
}