1use panproto_gat::{Operation, Sort, Theory};
19use rustc_hash::FxHashSet;
20
21use crate::error::ParseError;
22
23#[derive(Debug, Clone, serde::Deserialize)]
27pub struct NodeType {
28 #[serde(rename = "type")]
30 pub node_type: String,
31 pub named: bool,
33 #[serde(default)]
35 pub fields: serde_json::Map<String, serde_json::Value>,
36 #[serde(default)]
38 pub children: Option<ChildSpec>,
39 #[serde(default)]
41 pub subtypes: Option<Vec<SubtypeRef>>,
42}
43
44#[derive(Debug, Clone, serde::Deserialize)]
46pub struct ChildSpec {
47 pub multiple: bool,
49 pub required: bool,
51 pub types: Vec<SubtypeRef>,
53}
54
55#[derive(Debug, Clone, serde::Deserialize)]
57pub struct SubtypeRef {
58 #[serde(rename = "type")]
60 pub node_type: String,
61 pub named: bool,
63}
64
65#[derive(Debug, Clone)]
67pub struct FieldSpec {
68 pub name: String,
70 pub required: bool,
72 pub multiple: bool,
74 pub types: Vec<SubtypeRef>,
76}
77
78#[derive(Debug, Clone)]
80pub struct ExtractedTheoryMeta {
81 pub theory: Theory,
83 pub supertypes: FxHashSet<String>,
85 pub subtype_map: Vec<(String, Vec<String>)>,
87 pub optional_fields: FxHashSet<String>,
89 pub ordered_fields: FxHashSet<String>,
91 pub vertex_kinds: Vec<String>,
93 pub edge_kinds: Vec<String>,
95}
96
97pub fn parse_node_types(json: &[u8]) -> Result<Vec<NodeType>, ParseError> {
105 let raw: Vec<serde_json::Value> =
111 serde_json::from_slice(json).map_err(|e| ParseError::NodeTypesJson { source: e })?;
112 raw.into_iter()
113 .filter(|entry| {
114 entry
115 .get("type")
116 .and_then(serde_json::Value::as_str)
117 .is_some()
118 })
119 .map(|entry| {
120 serde_json::from_value(entry).map_err(|e| ParseError::NodeTypesJson { source: e })
121 })
122 .collect()
123}
124
125pub fn extract_theory_from_node_types(
136 theory_name: &str,
137 json: &[u8],
138) -> Result<ExtractedTheoryMeta, ParseError> {
139 let node_types = parse_node_types(json)?;
140 extract_theory_from_entries(theory_name, &node_types)
141}
142
143pub fn extract_theory_from_entries(
149 theory_name: &str,
150 node_types: &[NodeType],
151) -> Result<ExtractedTheoryMeta, ParseError> {
152 let mut sorts: Vec<Sort> = Vec::new();
153 let mut ops: Vec<Operation> = Vec::new();
154 let mut supertypes = FxHashSet::default();
155 let mut subtype_map: Vec<(String, Vec<String>)> = Vec::new();
156 let mut optional_fields = FxHashSet::default();
157 let mut ordered_fields = FxHashSet::default();
158 let mut vertex_kinds: Vec<String> = Vec::new();
159 let mut edge_kind_set = FxHashSet::default();
160 let mut seen_sorts = FxHashSet::default();
161
162 sorts.push(Sort::simple("Vertex"));
164 sorts.push(Sort::simple("Edge"));
165 seen_sorts.insert("Vertex".to_owned());
166 seen_sorts.insert("Edge".to_owned());
167
168 for entry in node_types {
169 if !entry.named {
171 continue;
172 }
173
174 let sort_name = &entry.node_type;
175
176 if let Some(ref subtypes) = entry.subtypes {
178 supertypes.insert(sort_name.clone());
179 let concrete: Vec<String> = subtypes
180 .iter()
181 .filter(|s| s.named)
182 .map(|s| s.node_type.clone())
183 .collect();
184 subtype_map.push((sort_name.clone(), concrete));
185
186 if seen_sorts.insert(sort_name.clone()) {
188 sorts.push(Sort::simple(sort_name.as_str()));
189 vertex_kinds.push(sort_name.clone());
190 }
191 continue;
192 }
193
194 if seen_sorts.insert(sort_name.clone()) {
196 sorts.push(Sort::simple(sort_name.as_str()));
197 vertex_kinds.push(sort_name.clone());
198 }
199
200 for (field_name, field_value) in &entry.fields {
202 let spec = parse_field_spec(field_name, field_value)?;
203
204 if !spec.required {
206 optional_fields.insert(field_name.clone());
207 }
208 if spec.multiple {
209 ordered_fields.insert(field_name.clone());
210 }
211
212 if edge_kind_set.insert(field_name.clone()) {
217 ops.push(Operation::unary(
218 field_name.as_str(),
219 "parent",
220 "Vertex",
221 "Vertex",
222 ));
223 }
224 }
225
226 if let Some(ref children) = entry.children {
228 if children.multiple {
229 ordered_fields.insert("children".to_owned());
230 }
231 if edge_kind_set.insert("child_of".to_owned()) {
233 ops.push(Operation::unary("child_of", "parent", "Vertex", "Vertex"));
234 }
235 }
236 }
237
238 let edge_kinds: Vec<String> = edge_kind_set.into_iter().collect();
239
240 let theory = Theory::new(theory_name, sorts, ops, vec![]);
241
242 Ok(ExtractedTheoryMeta {
243 theory,
244 supertypes,
245 subtype_map,
246 optional_fields,
247 ordered_fields,
248 vertex_kinds,
249 edge_kinds,
250 })
251}
252
253pub fn extract_theory_from_language(
268 theory_name: &str,
269 language: &tree_sitter::Language,
270) -> Result<ExtractedTheoryMeta, ParseError> {
271 let mut sorts: Vec<Sort> = Vec::new();
272 let mut ops: Vec<Operation> = Vec::new();
273 let mut vertex_kinds: Vec<String> = Vec::new();
274 let mut edge_kind_set = FxHashSet::default();
275 let mut seen_sorts = FxHashSet::default();
276 sorts.push(Sort::simple("Vertex"));
278 sorts.push(Sort::simple("Edge"));
279 seen_sorts.insert("Vertex".to_owned());
280 seen_sorts.insert("Edge".to_owned());
281
282 let node_count = language.node_kind_count();
284 for id in 0..node_count {
285 let Ok(id_u16) = u16::try_from(id) else {
286 continue;
287 };
288 if language.node_kind_is_named(id_u16) {
289 if let Some(name) = language.node_kind_for_id(id_u16) {
290 if name.starts_with('_') {
292 continue;
293 }
294
295 if seen_sorts.insert(name.to_owned()) {
296 sorts.push(Sort::simple(name));
297 vertex_kinds.push(name.to_owned());
298 }
299 }
300 }
301 }
302
303 let field_count = language.field_count();
305 for id in 1..=field_count {
306 let Ok(id_u16) = u16::try_from(id) else {
307 continue;
308 };
309 if let Some(name) = language.field_name_for_id(id_u16) {
310 if edge_kind_set.insert(name.to_owned()) {
311 ops.push(Operation::unary(name, "parent", "Vertex", "Vertex"));
312 }
313 }
314 }
315
316 let edge_kinds: Vec<String> = edge_kind_set.into_iter().collect();
317
318 let theory = Theory::new(theory_name, sorts, ops, vec![]);
319
320 Ok(ExtractedTheoryMeta {
325 theory,
326 supertypes: FxHashSet::default(),
327 subtype_map: Vec::new(),
328 optional_fields: FxHashSet::default(),
329 ordered_fields: FxHashSet::default(),
330 vertex_kinds,
331 edge_kinds,
332 })
333}
334
335fn parse_field_spec(name: &str, value: &serde_json::Value) -> Result<FieldSpec, ParseError> {
339 let obj = value
340 .as_object()
341 .ok_or_else(|| ParseError::TheoryExtraction {
342 reason: format!("field '{name}' is not an object"),
343 })?;
344
345 let required = obj
346 .get("required")
347 .and_then(serde_json::Value::as_bool)
348 .unwrap_or(false);
349
350 let multiple = obj
351 .get("multiple")
352 .and_then(serde_json::Value::as_bool)
353 .unwrap_or(false);
354
355 let types: Vec<SubtypeRef> = obj
356 .get("types")
357 .and_then(|v| serde_json::from_value(v.clone()).ok())
358 .unwrap_or_default();
359
360 Ok(FieldSpec {
361 name: name.to_owned(),
362 required,
363 multiple,
364 types,
365 })
366}
367
368#[cfg(test)]
369#[allow(clippy::unwrap_used)]
370mod tests {
371 use super::*;
372
373 #[test]
374 fn extract_minimal_grammar() {
375 let json = br#"[
376 {
377 "type": "program",
378 "named": true,
379 "fields": {},
380 "children": {
381 "multiple": true,
382 "required": false,
383 "types": [{"type": "statement", "named": true}]
384 }
385 },
386 {
387 "type": "statement",
388 "named": true,
389 "fields": {
390 "body": {
391 "multiple": false,
392 "required": true,
393 "types": [{"type": "expression", "named": true}]
394 }
395 }
396 },
397 {
398 "type": "expression",
399 "named": true,
400 "fields": {}
401 },
402 {
403 "type": ";",
404 "named": false
405 }
406 ]"#;
407
408 let meta = extract_theory_from_node_types("ThTest", json).unwrap();
409
410 assert_eq!(meta.theory.sorts.len(), 5);
412
413 assert_eq!(meta.theory.ops.len(), 2);
415
416 assert_eq!(meta.vertex_kinds.len(), 3);
418 assert!(meta.vertex_kinds.contains(&"program".to_owned()));
419 assert!(meta.vertex_kinds.contains(&"statement".to_owned()));
420 assert!(meta.vertex_kinds.contains(&"expression".to_owned()));
421
422 assert_eq!(meta.edge_kinds.len(), 2);
424
425 assert!(meta.ordered_fields.contains("children"));
427 }
428
429 #[test]
430 fn extract_supertype() {
431 let json = br#"[
432 {
433 "type": "_expression",
434 "named": true,
435 "subtypes": [
436 {"type": "binary_expression", "named": true},
437 {"type": "call_expression", "named": true}
438 ]
439 },
440 {
441 "type": "binary_expression",
442 "named": true,
443 "fields": {
444 "left": {
445 "multiple": false,
446 "required": true,
447 "types": [{"type": "_expression", "named": true}]
448 },
449 "right": {
450 "multiple": false,
451 "required": true,
452 "types": [{"type": "_expression", "named": true}]
453 }
454 }
455 },
456 {
457 "type": "call_expression",
458 "named": true,
459 "fields": {
460 "function": {
461 "multiple": false,
462 "required": true,
463 "types": [{"type": "_expression", "named": true}]
464 },
465 "arguments": {
466 "multiple": true,
467 "required": true,
468 "types": [{"type": "_expression", "named": true}]
469 }
470 }
471 }
472 ]"#;
473
474 let meta = extract_theory_from_node_types("ThExprTest", json).unwrap();
475
476 assert!(meta.supertypes.contains("_expression"));
478
479 assert_eq!(meta.subtype_map.len(), 1);
481 let (st, subs) = &meta.subtype_map[0];
482 assert_eq!(st, "_expression");
483 assert_eq!(subs.len(), 2);
484
485 assert!(meta.ordered_fields.contains("arguments"));
487
488 assert_eq!(meta.edge_kinds.len(), 4);
490 }
491
492 #[test]
493 fn anonymous_tokens_skipped() {
494 let json = br#"[
495 {"type": "identifier", "named": true, "fields": {}},
496 {"type": "(", "named": false},
497 {"type": ")", "named": false}
498 ]"#;
499
500 let meta = extract_theory_from_node_types("ThAnon", json).unwrap();
501
502 assert_eq!(meta.theory.sorts.len(), 3);
504 assert_eq!(meta.vertex_kinds.len(), 1);
505 }
506}