1use std::collections::HashMap;
17
18use panproto_gat::Name;
19use rustc_hash::FxHashMap;
20use serde::{Deserialize, Serialize};
21
22use crate::element_ops::ElementOps;
23use crate::functor::FInstance;
24use crate::ginstance::GInstance;
25use crate::instance::Instance;
26use crate::metadata::Node;
27use crate::value::{FieldPresence, Value};
28use crate::wtype::{
29 WInstance, build_env_with_children, collect_scalar_child_values, value_to_expr_literal,
30};
31
32#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct InstanceQuery {
35 pub anchor: Name,
37
38 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub predicate: Option<panproto_expr::Expr>,
43
44 #[serde(default, skip_serializing_if = "Option::is_none")]
46 pub group_by: Option<String>,
47
48 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub project: Option<Vec<String>>,
51
52 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub limit: Option<usize>,
55
56 #[serde(default, skip_serializing_if = "Vec::is_empty")]
59 pub path: Vec<Name>,
60}
61
62#[derive(Debug, Clone)]
64pub struct QueryMatch {
65 pub node_id: u32,
67 pub anchor: Name,
69 pub value: Option<FieldPresence>,
71 pub fields: FxHashMap<String, Value>,
73}
74
75#[must_use]
83pub fn execute(
84 query: &InstanceQuery,
85 instance: &WInstance,
86 _schema: &panproto_schema::Schema,
87) -> Vec<QueryMatch> {
88 let eval_config = panproto_expr::EvalConfig::default();
89
90 let candidates: Vec<u32> = instance
92 .nodes
93 .iter()
94 .filter(|(_, n)| n.anchor == query.anchor)
95 .map(|(id, _)| *id)
96 .collect();
97
98 let navigated = if query.path.is_empty() {
100 candidates
101 } else {
102 navigate_path(instance, &candidates, &query.path)
103 };
104
105 let filtered = if let Some(ref pred) = query.predicate {
107 navigated
108 .into_iter()
109 .filter(|&id| {
110 let Some(node) = instance.nodes.get(&id) else {
111 return false;
112 };
113 let env = build_node_env(node, instance);
114 matches!(
115 crate::instance_env::eval_with_instance(
116 pred,
117 &env,
118 &eval_config,
119 instance,
120 Some(id),
121 ),
122 Ok(panproto_expr::Literal::Bool(true))
123 )
124 })
125 .collect()
126 } else {
127 navigated
128 };
129
130 let limited: Vec<u32> = if let Some(limit) = query.limit {
132 filtered.into_iter().take(limit).collect()
133 } else {
134 filtered
135 };
136
137 let mut results: Vec<QueryMatch> = limited
141 .into_iter()
142 .filter_map(|id| {
143 let node = instance.nodes.get(&id)?;
144 let scalars = collect_scalar_child_values(instance, id);
145 let mut combined = scalars;
146 for (key, val) in &node.extra_fields {
147 combined.insert(key.clone(), val.clone());
148 }
149 Some(QueryMatch {
150 node_id: id,
151 anchor: node.anchor.clone(),
152 value: node.value.clone(),
153 fields: project_fields(&combined, query.project.as_ref()),
154 })
155 })
156 .collect();
157
158 if let Some(ref group_key) = query.group_by {
160 results.sort_by(|a, b| {
161 let va = a.fields.get(group_key).map(value_sort_key);
162 let vb = b.fields.get(group_key).map(value_sort_key);
163 va.cmp(&vb)
164 });
165 }
166
167 results
168}
169
170#[must_use]
182pub fn execute_elements<T: ElementOps>(
183 query: &InstanceQuery,
184 instance: &T,
185 _schema: &panproto_schema::Schema,
186) -> Vec<QueryMatch> {
187 let eval_config = panproto_expr::EvalConfig::default();
188
189 let candidates = instance.fiber(&query.anchor);
191
192 let navigated = if query.path.is_empty() {
194 candidates
195 } else {
196 query.path.iter().fold(candidates, |current, edge_kind| {
197 instance.pushforward(¤t, edge_kind)
198 })
199 };
200
201 let filtered = if let Some(ref pred) = query.predicate {
203 navigated
204 .into_iter()
205 .filter(|&id| {
206 let env = instance.stalk(id);
207 matches!(
208 crate::instance_env::eval_with_element_ops(
209 pred,
210 &env,
211 &eval_config,
212 instance,
213 Some(id),
214 ),
215 Ok(panproto_expr::Literal::Bool(true))
216 )
217 })
218 .collect()
219 } else {
220 navigated
221 };
222
223 let limited: Vec<u32> = if let Some(limit) = query.limit {
225 filtered.into_iter().take(limit).collect()
226 } else {
227 filtered
228 };
229
230 let mut results: Vec<QueryMatch> = limited
232 .into_iter()
233 .filter_map(|id| {
234 let anchor = instance.sort(id)?;
235 let value = instance.element_value(id);
236 let all_fields = instance.attributes(id);
237 Some(QueryMatch {
238 node_id: id,
239 anchor,
240 value,
241 fields: project_fields(&all_fields, query.project.as_ref()),
242 })
243 })
244 .collect();
245
246 if let Some(ref group_key) = query.group_by {
248 results.sort_by(|a, b| {
249 let va = a.fields.get(group_key).map(value_sort_key);
250 let vb = b.fields.get(group_key).map(value_sort_key);
251 va.cmp(&vb)
252 });
253 }
254
255 results
256}
257
258#[must_use]
262pub fn execute_graph(
263 query: &InstanceQuery,
264 instance: &GInstance,
265 schema: &panproto_schema::Schema,
266) -> Vec<QueryMatch> {
267 execute_elements(query, instance, schema)
268}
269
270#[must_use]
274pub fn execute_functor(
275 query: &InstanceQuery,
276 instance: &FInstance,
277 schema: &panproto_schema::Schema,
278) -> Vec<QueryMatch> {
279 execute_elements(query, instance, schema)
280}
281
282#[must_use]
285pub fn execute_any(
286 query: &InstanceQuery,
287 instance: &Instance,
288 schema: &panproto_schema::Schema,
289) -> Vec<QueryMatch> {
290 match instance {
291 Instance::WType(w) => execute_elements(query, w, schema),
292 Instance::Functor(f) => execute_elements(query, f, schema),
293 Instance::Graph(g) => execute_elements(query, g, schema),
294 }
295}
296
297fn navigate_path(instance: &WInstance, start_nodes: &[u32], path: &[Name]) -> Vec<u32> {
302 let mut current = start_nodes.to_vec();
303 for edge_kind in path {
304 let mut next = Vec::new();
305 for &node_id in ¤t {
306 for &(src, tgt, ref edge) in &instance.arcs {
307 if src == node_id && edge.kind == *edge_kind {
308 next.push(tgt);
309 }
310 }
311 }
312 current = next;
313 }
314 current
315}
316
317#[must_use]
329pub fn build_node_env(node: &Node, instance: &WInstance) -> panproto_expr::Env {
330 let scalars = collect_scalar_child_values(instance, node.id);
331 let mut env = build_env_with_children(&node.extra_fields, &scalars);
332 env = env.extend(
333 std::sync::Arc::from("_anchor"),
334 panproto_expr::Literal::Str(node.anchor.as_ref().into()),
335 );
336 env = env.extend(
337 std::sync::Arc::from("_id"),
338 panproto_expr::Literal::Int(i64::from(node.id)),
339 );
340 if let Some(FieldPresence::Present(ref v)) = node.value {
341 env = env.extend(std::sync::Arc::from("_value"), value_to_expr_literal(v));
342 }
343 let children_count = instance
345 .arcs
346 .iter()
347 .filter(|(src, _, _)| *src == node.id)
348 .count();
349 #[allow(clippy::cast_possible_wrap)]
350 {
351 env = env.extend(
352 std::sync::Arc::from("_children_count"),
353 panproto_expr::Literal::Int(children_count as i64),
354 );
355 }
356 env
357}
358
359fn value_sort_key(v: &Value) -> String {
364 match v {
365 Value::Str(s) => s.clone(),
366 Value::Int(i) => i.to_string(),
367 Value::Float(f) => f.to_string(),
368 Value::Bool(b) => b.to_string(),
369 Value::Token(t) => t.clone(),
370 Value::Null => String::new(),
371 _ => format!("{v:?}"),
372 }
373}
374
375fn project_fields(
377 fields: &HashMap<String, Value>,
378 project: Option<&Vec<String>>,
379) -> FxHashMap<String, Value> {
380 project.map_or_else(
381 || fields.iter().map(|(k, v)| (k.clone(), v.clone())).collect(),
382 |keys| {
383 let mut result = FxHashMap::default();
384 for key in keys {
385 if let Some(val) = fields.get(key) {
386 result.insert(key.clone(), val.clone());
387 }
388 }
389 result
390 },
391 )
392}
393
394#[cfg(test)]
395#[allow(clippy::unwrap_used, clippy::cast_possible_truncation)]
396mod tests {
397 use super::*;
398 use panproto_schema::{Edge, Protocol, SchemaBuilder};
399
400 fn make_test_schema() -> panproto_schema::Schema {
401 let protocol = Protocol::default();
402 SchemaBuilder::new(&protocol)
403 .vertex("document", "record", None)
404 .unwrap()
405 .vertex("layer", "record", None)
406 .unwrap()
407 .vertex("annotation", "record", None)
408 .unwrap()
409 .edge("document", "layer", "layers", None)
410 .unwrap()
411 .edge("layer", "annotation", "annotations", None)
412 .unwrap()
413 .build()
414 .unwrap()
415 }
416
417 fn make_test_instance() -> WInstance {
418 let mut nodes = HashMap::new();
419 nodes.insert(0, Node::new(0, "document"));
420
421 let mut ann1 = Node::new(1, "layer");
422 ann1.extra_fields
423 .insert("kind".into(), Value::Str("span".into()));
424 nodes.insert(1, ann1);
425
426 let mut ann2 = Node::new(2, "annotation");
427 ann2.extra_fields
428 .insert("label".into(), Value::Str("ingredient".into()));
429 ann2.extra_fields
430 .insert("confidence".into(), Value::Float(0.9));
431 nodes.insert(2, ann2);
432
433 let mut ann3 = Node::new(3, "annotation");
434 ann3.extra_fields
435 .insert("label".into(), Value::Str("step".into()));
436 ann3.extra_fields
437 .insert("confidence".into(), Value::Float(0.4));
438 nodes.insert(3, ann3);
439
440 let edge_layer = Edge {
441 src: Name::from("document"),
442 tgt: Name::from("layer"),
443 kind: Name::from("layers"),
444 name: None,
445 };
446 let edge_ann = Edge {
447 src: Name::from("layer"),
448 tgt: Name::from("annotation"),
449 kind: Name::from("annotations"),
450 name: None,
451 };
452
453 let arcs = vec![
454 (0, 1, edge_layer),
455 (1, 2, edge_ann.clone()),
456 (1, 3, edge_ann),
457 ];
458
459 WInstance::new(nodes, arcs, vec![], 0, Name::from("document"))
460 }
461
462 #[test]
463 fn query_by_anchor() {
464 let inst = make_test_instance();
465 let query = InstanceQuery {
466 anchor: Name::from("annotation"),
467 ..Default::default()
468 };
469 let results = execute(&query, &inst, &make_test_schema());
470 assert_eq!(results.len(), 2);
471 }
472
473 #[test]
474 fn query_with_predicate() {
475 let inst = make_test_instance();
476 let query = InstanceQuery {
477 anchor: Name::from("annotation"),
478 predicate: Some(panproto_expr::Expr::Builtin(
479 panproto_expr::BuiltinOp::Eq,
480 vec![
481 panproto_expr::Expr::Var("label".into()),
482 panproto_expr::Expr::Lit(panproto_expr::Literal::Str("ingredient".into())),
483 ],
484 )),
485 ..Default::default()
486 };
487 let results = execute(&query, &inst, &make_test_schema());
488 assert_eq!(results.len(), 1);
489 assert_eq!(
490 results[0].fields.get("label"),
491 Some(&Value::Str("ingredient".into()))
492 );
493 }
494
495 #[test]
496 fn query_with_path_navigation() {
497 let inst = make_test_instance();
498 let query = InstanceQuery {
500 anchor: Name::from("document"),
501 path: vec![Name::from("layers"), Name::from("annotations")],
502 ..Default::default()
503 };
504 let results = execute(&query, &inst, &make_test_schema());
505 assert_eq!(results.len(), 2);
509 }
510
511 #[test]
512 fn query_with_limit() {
513 let inst = make_test_instance();
514 let query = InstanceQuery {
515 anchor: Name::from("annotation"),
516 limit: Some(1),
517 ..Default::default()
518 };
519 let results = execute(&query, &inst, &make_test_schema());
520 assert_eq!(results.len(), 1);
521 }
522
523 #[test]
524 fn query_with_projection() {
525 let inst = make_test_instance();
526 let query = InstanceQuery {
527 anchor: Name::from("annotation"),
528 project: Some(vec!["label".into()]),
529 ..Default::default()
530 };
531 let results = execute(&query, &inst, &make_test_schema());
532 assert_eq!(results.len(), 2);
533 for r in &results {
535 assert!(r.fields.contains_key("label"));
536 assert!(!r.fields.contains_key("confidence"));
537 }
538 }
539
540 #[test]
541 fn query_no_match() {
542 let inst = make_test_instance();
543 let query = InstanceQuery {
544 anchor: Name::from("nonexistent"),
545 ..Default::default()
546 };
547 let results = execute(&query, &inst, &make_test_schema());
548 assert!(results.is_empty());
549 }
550
551 #[test]
552 fn query_with_group_by() {
553 let mut nodes = HashMap::new();
555 nodes.insert(0, Node::new(0, "document"));
556
557 let mut layer = Node::new(1, "layer");
558 layer
559 .extra_fields
560 .insert("kind".into(), Value::Str("span".into()));
561 nodes.insert(1, layer);
562
563 let categories = ["vegetable", "fruit", "fruit", "vegetable", "grain"];
564 for (i, cat) in categories.iter().enumerate() {
565 let id = (i as u32) + 2;
566 let mut ann = Node::new(id, "annotation");
567 ann.extra_fields
568 .insert("category".into(), Value::Str((*cat).into()));
569 ann.extra_fields
570 .insert("label".into(), Value::Str(format!("item_{i}")));
571 nodes.insert(id, ann);
572 }
573
574 let edge_layer = Edge {
575 src: Name::from("document"),
576 tgt: Name::from("layer"),
577 kind: Name::from("layers"),
578 name: None,
579 };
580 let mut arcs = vec![(0, 1, edge_layer)];
581 for i in 0..categories.len() {
582 let id = (i as u32) + 2;
583 arcs.push((
584 1,
585 id,
586 Edge {
587 src: Name::from("layer"),
588 tgt: Name::from("annotation"),
589 kind: Name::from("annotations"),
590 name: None,
591 },
592 ));
593 }
594
595 let inst = WInstance::new(nodes, arcs, vec![], 0, Name::from("document"));
596
597 let query = InstanceQuery {
598 anchor: Name::from("annotation"),
599 group_by: Some("category".into()),
600 ..Default::default()
601 };
602 let results = execute(&query, &inst, &make_test_schema());
603 assert_eq!(results.len(), 5);
604
605 let categories_out: Vec<&str> = results
607 .iter()
608 .filter_map(|r| match r.fields.get("category") {
609 Some(Value::Str(s)) => Some(s.as_str()),
610 _ => None,
611 })
612 .collect();
613 assert_eq!(
614 categories_out,
615 vec!["fruit", "fruit", "grain", "vegetable", "vegetable"]
616 );
617 }
618
619 fn make_tree_instance_with_child_values() -> WInstance {
624 let mut nodes = HashMap::new();
625
626 nodes.insert(0, Node::new(0, "binding"));
628
629 nodes.insert(
631 1,
632 Node::new(1, "binding.var").with_value(FieldPresence::Present(Value::Str("x0".into()))),
633 );
634 nodes.insert(
635 2,
636 Node::new(2, "binding.type")
637 .with_value(FieldPresence::Present(Value::Str("noun".into()))),
638 );
639
640 nodes.insert(3, Node::new(3, "binding"));
642 nodes.insert(
643 4,
644 Node::new(4, "binding.var").with_value(FieldPresence::Present(Value::Str("x1".into()))),
645 );
646 nodes.insert(
647 5,
648 Node::new(5, "binding.type")
649 .with_value(FieldPresence::Present(Value::Str("verb".into()))),
650 );
651
652 let arcs = vec![
653 (
654 0,
655 1,
656 Edge {
657 src: "binding".into(),
658 tgt: "binding.var".into(),
659 kind: "prop".into(),
660 name: Some("var".into()),
661 },
662 ),
663 (
664 0,
665 2,
666 Edge {
667 src: "binding".into(),
668 tgt: "binding.type".into(),
669 kind: "prop".into(),
670 name: Some("type".into()),
671 },
672 ),
673 (
674 3,
675 4,
676 Edge {
677 src: "binding".into(),
678 tgt: "binding.var".into(),
679 kind: "prop".into(),
680 name: Some("var".into()),
681 },
682 ),
683 (
684 3,
685 5,
686 Edge {
687 src: "binding".into(),
688 tgt: "binding.type".into(),
689 kind: "prop".into(),
690 name: Some("type".into()),
691 },
692 ),
693 ];
694
695 WInstance::new(nodes, arcs, vec![], 0, Name::from("binding"))
696 }
697
698 fn make_binding_schema() -> panproto_schema::Schema {
699 let protocol = Protocol::default();
700 SchemaBuilder::new(&protocol)
701 .vertex("binding", "record", None)
702 .unwrap()
703 .vertex("binding.var", "string", None)
704 .unwrap()
705 .vertex("binding.type", "string", None)
706 .unwrap()
707 .edge("binding", "binding.var", "prop", Some("var"))
708 .unwrap()
709 .edge("binding", "binding.type", "prop", Some("type"))
710 .unwrap()
711 .build()
712 .unwrap()
713 }
714
715 #[test]
716 fn child_value_predicate_matches() {
717 let inst = make_tree_instance_with_child_values();
718 let schema = make_binding_schema();
719
720 let query = InstanceQuery {
722 anchor: Name::from("binding"),
723 predicate: Some(panproto_expr::Expr::Builtin(
724 panproto_expr::BuiltinOp::Eq,
725 vec![
726 panproto_expr::Expr::Var("var".into()),
727 panproto_expr::Expr::Lit(panproto_expr::Literal::Str("x0".into())),
728 ],
729 )),
730 ..Default::default()
731 };
732
733 let results = execute(&query, &inst, &schema);
734 assert_eq!(results.len(), 1);
735 assert_eq!(results[0].node_id, 0);
736 }
737
738 #[test]
739 fn child_values_appear_in_query_match_fields() {
740 let inst = make_tree_instance_with_child_values();
741 let schema = make_binding_schema();
742
743 let query = InstanceQuery {
744 anchor: Name::from("binding"),
745 ..Default::default()
746 };
747
748 let results = execute(&query, &inst, &schema);
749 assert_eq!(results.len(), 2);
750
751 for result in &results {
753 assert!(result.fields.contains_key("var"));
754 assert!(result.fields.contains_key("type"));
755 }
756 }
757
758 #[test]
759 fn extra_fields_override_child_values_in_query() {
760 let mut nodes = HashMap::new();
761 let mut parent = Node::new(0, "thing");
762 parent
763 .extra_fields
764 .insert("name".into(), Value::Str("override".into()));
765 nodes.insert(0, parent);
766
767 nodes.insert(
768 1,
769 Node::new(1, "thing.name")
770 .with_value(FieldPresence::Present(Value::Str("original".into()))),
771 );
772
773 let arcs = vec![(
774 0,
775 1,
776 Edge {
777 src: "thing".into(),
778 tgt: "thing.name".into(),
779 kind: "prop".into(),
780 name: Some("name".into()),
781 },
782 )];
783
784 let inst = WInstance::new(nodes, arcs, vec![], 0, Name::from("thing"));
785
786 let protocol = Protocol::default();
787 let schema = SchemaBuilder::new(&protocol)
788 .vertex("thing", "record", None)
789 .unwrap()
790 .vertex("thing.name", "string", None)
791 .unwrap()
792 .edge("thing", "thing.name", "prop", Some("name"))
793 .unwrap()
794 .build()
795 .unwrap();
796
797 let query = InstanceQuery {
799 anchor: Name::from("thing"),
800 predicate: Some(panproto_expr::Expr::Builtin(
801 panproto_expr::BuiltinOp::Eq,
802 vec![
803 panproto_expr::Expr::Var("name".into()),
804 panproto_expr::Expr::Lit(panproto_expr::Literal::Str("override".into())),
805 ],
806 )),
807 ..Default::default()
808 };
809
810 let results = execute(&query, &inst, &schema);
811 assert_eq!(results.len(), 1);
812 assert_eq!(
813 results[0].fields.get("name"),
814 Some(&Value::Str("override".into()))
815 );
816 }
817
818 #[test]
823 fn execute_any_wtype_dispatch() {
824 let inst = make_test_instance();
825 let schema = make_test_schema();
826 let query = InstanceQuery {
827 anchor: Name::from("annotation"),
828 ..Default::default()
829 };
830
831 let via_direct = execute(&query, &inst, &schema);
832 let via_any = execute_any(&query, &Instance::WType(inst), &schema);
833 assert_eq!(via_direct.len(), via_any.len());
834 }
835
836 #[test]
837 fn execute_any_ginstance_dispatch() {
838 let g = GInstance::new()
839 .with_node(Node::new(0, "person"))
840 .with_node(Node::new(1, "person"))
841 .with_value(0, Value::Str("Alice".into()))
842 .with_value(1, Value::Str("Bob".into()));
843
844 let schema = make_test_schema();
845 let query = InstanceQuery {
846 anchor: Name::from("person"),
847 ..Default::default()
848 };
849
850 let results = execute_any(&query, &Instance::Graph(g), &schema);
851 assert_eq!(results.len(), 2);
852 }
853
854 #[test]
855 fn execute_any_finstance_dispatch() {
856 let mut row = HashMap::new();
857 row.insert("name".to_string(), Value::Str("Alice".into()));
858 let f = FInstance::new().with_table("users", vec![row]);
859
860 let schema = make_test_schema();
861 let query = InstanceQuery {
862 anchor: Name::from("users"),
863 ..Default::default()
864 };
865
866 let results = execute_any(&query, &Instance::Functor(f), &schema);
867 assert_eq!(results.len(), 1);
868 assert_eq!(
869 results[0].fields.get("name"),
870 Some(&Value::Str("Alice".into()))
871 );
872 }
873}