use panproto_gat::{Operation, Sort, Theory};
use rustc_hash::FxHashSet;
use crate::error::ParseError;
#[derive(Debug, Clone, serde::Deserialize)]
pub struct NodeType {
#[serde(rename = "type")]
pub node_type: String,
pub named: bool,
#[serde(default)]
pub fields: serde_json::Map<String, serde_json::Value>,
#[serde(default)]
pub children: Option<ChildSpec>,
#[serde(default)]
pub subtypes: Option<Vec<SubtypeRef>>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct ChildSpec {
pub multiple: bool,
pub required: bool,
pub types: Vec<SubtypeRef>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct SubtypeRef {
#[serde(rename = "type")]
pub node_type: String,
pub named: bool,
}
#[derive(Debug, Clone)]
pub struct FieldSpec {
pub name: String,
pub required: bool,
pub multiple: bool,
pub types: Vec<SubtypeRef>,
}
#[derive(Debug, Clone)]
pub struct ExtractedTheoryMeta {
pub theory: Theory,
pub supertypes: FxHashSet<String>,
pub subtype_map: Vec<(String, Vec<String>)>,
pub optional_fields: FxHashSet<String>,
pub ordered_fields: FxHashSet<String>,
pub vertex_kinds: Vec<String>,
pub edge_kinds: Vec<String>,
}
pub fn parse_node_types(json: &[u8]) -> Result<Vec<NodeType>, ParseError> {
serde_json::from_slice(json).map_err(|e| ParseError::NodeTypesJson { source: e })
}
pub fn extract_theory_from_node_types(
theory_name: &str,
json: &[u8],
) -> Result<ExtractedTheoryMeta, ParseError> {
let node_types = parse_node_types(json)?;
extract_theory_from_entries(theory_name, &node_types)
}
pub fn extract_theory_from_entries(
theory_name: &str,
node_types: &[NodeType],
) -> Result<ExtractedTheoryMeta, ParseError> {
let mut sorts: Vec<Sort> = Vec::new();
let mut ops: Vec<Operation> = Vec::new();
let mut supertypes = FxHashSet::default();
let mut subtype_map: Vec<(String, Vec<String>)> = Vec::new();
let mut optional_fields = FxHashSet::default();
let mut ordered_fields = FxHashSet::default();
let mut vertex_kinds: Vec<String> = Vec::new();
let mut edge_kind_set = FxHashSet::default();
let mut seen_sorts = FxHashSet::default();
sorts.push(Sort::simple("Vertex"));
sorts.push(Sort::simple("Edge"));
seen_sorts.insert("Vertex".to_owned());
seen_sorts.insert("Edge".to_owned());
for entry in node_types {
if !entry.named {
continue;
}
let sort_name = &entry.node_type;
if let Some(ref subtypes) = entry.subtypes {
supertypes.insert(sort_name.clone());
let concrete: Vec<String> = subtypes
.iter()
.filter(|s| s.named)
.map(|s| s.node_type.clone())
.collect();
subtype_map.push((sort_name.clone(), concrete));
if seen_sorts.insert(sort_name.clone()) {
sorts.push(Sort::simple(sort_name.as_str()));
vertex_kinds.push(sort_name.clone());
}
continue;
}
if seen_sorts.insert(sort_name.clone()) {
sorts.push(Sort::simple(sort_name.as_str()));
vertex_kinds.push(sort_name.clone());
}
for (field_name, field_value) in &entry.fields {
let spec = parse_field_spec(field_name, field_value)?;
if !spec.required {
optional_fields.insert(field_name.clone());
}
if spec.multiple {
ordered_fields.insert(field_name.clone());
}
if edge_kind_set.insert(field_name.clone()) {
ops.push(Operation::unary(
field_name.as_str(),
"parent",
"Vertex",
"Vertex",
));
}
}
if let Some(ref children) = entry.children {
if children.multiple {
ordered_fields.insert("children".to_owned());
}
if edge_kind_set.insert("child_of".to_owned()) {
ops.push(Operation::unary("child_of", "parent", "Vertex", "Vertex"));
}
}
}
let edge_kinds: Vec<String> = edge_kind_set.into_iter().collect();
let theory = Theory::new(theory_name, sorts, ops, vec![]);
Ok(ExtractedTheoryMeta {
theory,
supertypes,
subtype_map,
optional_fields,
ordered_fields,
vertex_kinds,
edge_kinds,
})
}
pub fn extract_theory_from_language(
theory_name: &str,
language: &tree_sitter::Language,
) -> Result<ExtractedTheoryMeta, ParseError> {
let mut sorts: Vec<Sort> = Vec::new();
let mut ops: Vec<Operation> = Vec::new();
let mut vertex_kinds: Vec<String> = Vec::new();
let mut edge_kind_set = FxHashSet::default();
let mut seen_sorts = FxHashSet::default();
sorts.push(Sort::simple("Vertex"));
sorts.push(Sort::simple("Edge"));
seen_sorts.insert("Vertex".to_owned());
seen_sorts.insert("Edge".to_owned());
let node_count = language.node_kind_count();
for id in 0..node_count {
let Ok(id_u16) = u16::try_from(id) else {
continue;
};
if language.node_kind_is_named(id_u16) {
if let Some(name) = language.node_kind_for_id(id_u16) {
if name.starts_with('_') {
continue;
}
if seen_sorts.insert(name.to_owned()) {
sorts.push(Sort::simple(name));
vertex_kinds.push(name.to_owned());
}
}
}
}
let field_count = language.field_count();
for id in 1..=field_count {
let Ok(id_u16) = u16::try_from(id) else {
continue;
};
if let Some(name) = language.field_name_for_id(id_u16) {
if edge_kind_set.insert(name.to_owned()) {
ops.push(Operation::unary(name, "parent", "Vertex", "Vertex"));
}
}
}
let edge_kinds: Vec<String> = edge_kind_set.into_iter().collect();
let theory = Theory::new(theory_name, sorts, ops, vec![]);
Ok(ExtractedTheoryMeta {
theory,
supertypes: FxHashSet::default(),
subtype_map: Vec::new(),
optional_fields: FxHashSet::default(),
ordered_fields: FxHashSet::default(),
vertex_kinds,
edge_kinds,
})
}
fn parse_field_spec(name: &str, value: &serde_json::Value) -> Result<FieldSpec, ParseError> {
let obj = value
.as_object()
.ok_or_else(|| ParseError::TheoryExtraction {
reason: format!("field '{name}' is not an object"),
})?;
let required = obj
.get("required")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
let multiple = obj
.get("multiple")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
let types: Vec<SubtypeRef> = obj
.get("types")
.and_then(|v| serde_json::from_value(v.clone()).ok())
.unwrap_or_default();
Ok(FieldSpec {
name: name.to_owned(),
required,
multiple,
types,
})
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn extract_minimal_grammar() {
let json = br#"[
{
"type": "program",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [{"type": "statement", "named": true}]
}
},
{
"type": "statement",
"named": true,
"fields": {
"body": {
"multiple": false,
"required": true,
"types": [{"type": "expression", "named": true}]
}
}
},
{
"type": "expression",
"named": true,
"fields": {}
},
{
"type": ";",
"named": false
}
]"#;
let meta = extract_theory_from_node_types("ThTest", json).unwrap();
assert_eq!(meta.theory.sorts.len(), 5);
assert_eq!(meta.theory.ops.len(), 2);
assert_eq!(meta.vertex_kinds.len(), 3);
assert!(meta.vertex_kinds.contains(&"program".to_owned()));
assert!(meta.vertex_kinds.contains(&"statement".to_owned()));
assert!(meta.vertex_kinds.contains(&"expression".to_owned()));
assert_eq!(meta.edge_kinds.len(), 2);
assert!(meta.ordered_fields.contains("children"));
}
#[test]
fn extract_supertype() {
let json = br#"[
{
"type": "_expression",
"named": true,
"subtypes": [
{"type": "binary_expression", "named": true},
{"type": "call_expression", "named": true}
]
},
{
"type": "binary_expression",
"named": true,
"fields": {
"left": {
"multiple": false,
"required": true,
"types": [{"type": "_expression", "named": true}]
},
"right": {
"multiple": false,
"required": true,
"types": [{"type": "_expression", "named": true}]
}
}
},
{
"type": "call_expression",
"named": true,
"fields": {
"function": {
"multiple": false,
"required": true,
"types": [{"type": "_expression", "named": true}]
},
"arguments": {
"multiple": true,
"required": true,
"types": [{"type": "_expression", "named": true}]
}
}
}
]"#;
let meta = extract_theory_from_node_types("ThExprTest", json).unwrap();
assert!(meta.supertypes.contains("_expression"));
assert_eq!(meta.subtype_map.len(), 1);
let (st, subs) = &meta.subtype_map[0];
assert_eq!(st, "_expression");
assert_eq!(subs.len(), 2);
assert!(meta.ordered_fields.contains("arguments"));
assert_eq!(meta.edge_kinds.len(), 4);
}
#[test]
fn anonymous_tokens_skipped() {
let json = br#"[
{"type": "identifier", "named": true, "fields": {}},
{"type": "(", "named": false},
{"type": ")", "named": false}
]"#;
let meta = extract_theory_from_node_types("ThAnon", json).unwrap();
assert_eq!(meta.theory.sorts.len(), 3);
assert_eq!(meta.vertex_kinds.len(), 1);
}
}