Skip to main content

uni_query/query/executor/
result_normalizer.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! Result normalization - converts internal representations to user-facing types.
5//!
6//! Enforces type system invariants:
7//! - All nodes must be Value::Node (not Value::Map with _vid/_labels)
8//! - All edges must be Value::Edge (not Value::Map with _eid/_type)
9//! - All paths must be Value::Path
10//! - No internal fields exposed in user-facing results
11
12use crate::types::{Edge, Node, Path, Value};
13use anyhow::{Result, anyhow};
14use std::collections::HashMap;
15use uni_common::core::id::{Eid, Vid};
16
17/// Converts raw executor output into clean user-facing value types.
18///
19/// Ensures that `Value::Map` rows carrying internal fields (`_vid`, `_eid`,
20/// `_labels`, etc.) are converted to their proper `Value::Node`, `Value::Edge`,
21/// or `Value::Path` variants before results are returned to callers.
22#[derive(Debug)]
23pub struct ResultNormalizer;
24
25impl ResultNormalizer {
26    /// Normalize a complete row of results.
27    pub fn normalize_row(row: HashMap<String, Value>) -> Result<HashMap<String, Value>> {
28        row.into_iter()
29            .map(|(k, v)| Ok((k, Self::normalize_value(v)?)))
30            .collect()
31    }
32
33    /// Recursively normalize a single value.
34    pub fn normalize_value(value: Value) -> Result<Value> {
35        match value {
36            Value::List(items) => {
37                let normalized: Result<Vec<_>> =
38                    items.into_iter().map(Self::normalize_value).collect();
39                Ok(Value::List(normalized?))
40            }
41
42            Value::Map(map) => {
43                // Check if this map represents a path, node, or edge (order matters: path first)
44                if Self::is_path_map(&map) {
45                    Self::map_to_path(map)
46                } else if Self::is_node_map(&map) {
47                    Self::map_to_node(map)
48                } else if Self::is_edge_map(&map) {
49                    Self::map_to_edge(map)
50                } else {
51                    let normalized: Result<HashMap<_, _>> = map
52                        .into_iter()
53                        .map(|(k, v)| Ok((k, Self::normalize_value(v)?)))
54                        .collect();
55                    Ok(Value::Map(normalized?))
56                }
57            }
58
59            // Already proper graph types or primitives - pass through unchanged
60            _ => Ok(value),
61        }
62    }
63
64    /// Normalize a property value without structural conversion.
65    ///
66    /// Recursively processes nested lists and maps but does NOT convert maps to
67    /// Node/Edge/Path structures. This prevents user data containing keys like
68    /// `_vid` or `_eid` from being incorrectly converted.
69    fn normalize_property_value(value: Value) -> Value {
70        match value {
71            Value::List(items) => Value::List(
72                items
73                    .into_iter()
74                    .map(Self::normalize_property_value)
75                    .collect(),
76            ),
77            Value::Map(map) => Value::Map(
78                map.into_iter()
79                    .map(|(k, v)| (k, Self::normalize_property_value(v)))
80                    .collect(),
81            ),
82            other => other,
83        }
84    }
85
86    /// Check if map represents a node.
87    ///
88    /// Detection is intentionally lenient for top-level result values. Property values
89    /// inside nodes/edges use `normalize_property_value` instead, which skips this check.
90    fn is_node_map(map: &HashMap<String, Value>) -> bool {
91        map.contains_key("_vid") || (map.contains_key("_id") && map.contains_key("label"))
92    }
93
94    /// Check if map represents an edge.
95    ///
96    /// Detection is intentionally lenient for top-level result values. Property values
97    /// inside nodes/edges use `normalize_property_value` instead, which skips this check.
98    fn is_edge_map(map: &HashMap<String, Value>) -> bool {
99        map.contains_key("_eid")
100            || (map.contains_key("_id") && map.contains_key("_src") && map.contains_key("_dst"))
101    }
102
103    /// Check if map represents a path (has "nodes" and "relationships" or "edges").
104    fn is_path_map(map: &HashMap<String, Value>) -> bool {
105        map.contains_key("nodes")
106            && (map.contains_key("relationships") || map.contains_key("edges"))
107    }
108
109    /// Extract a u64 ID from a Value (Int or parseable String).
110    fn value_to_u64(value: &Value) -> Option<u64> {
111        match value {
112            Value::Int(i) => u64::try_from(*i).ok(),
113            Value::String(s) => s.parse().ok(),
114            _ => None,
115        }
116    }
117
118    /// Extract a string from a Value.
119    fn value_to_string(value: &Value) -> Option<String> {
120        if let Value::String(s) = value {
121            Some(s.clone())
122        } else {
123            None
124        }
125    }
126
127    /// Returns `true` if the key is a user-facing property (not an internal or reserved field).
128    fn is_user_property(key: &str) -> bool {
129        !key.starts_with('_')
130            && key != "properties"
131            && key != "label"
132            && key != "type"
133            && key != "overflow_json"
134    }
135
136    /// Extract properties from a dedicated "properties" field (if present) or from inline fields.
137    ///
138    /// This handles two property storage formats:
139    /// 1. A "properties" field containing LargeBinary (JSON) or a Map
140    /// 2. Inline fields in the map (non-underscore fields)
141    fn extract_properties_from_field_or_inline(
142        map: &HashMap<String, Value>,
143    ) -> HashMap<String, Value> {
144        // First try to extract from a dedicated "properties" field
145        if let Some(props_value) = map.get("properties") {
146            match props_value {
147                // Properties stored as a Map
148                Value::Map(m) => {
149                    return Self::prune_null_properties(
150                        m.iter()
151                            .map(|(k, v)| (k.clone(), Self::normalize_property_value(v.clone())))
152                            .collect(),
153                    );
154                }
155                // Properties stored as Bytes (JSON serialized)
156                Value::Bytes(bytes) => {
157                    if let Ok(props) =
158                        serde_json::from_slice::<HashMap<String, serde_json::Value>>(bytes)
159                    {
160                        return Self::prune_null_properties(
161                            props
162                                .into_iter()
163                                .map(|(k, v)| (k, Self::json_value_to_value(v)))
164                                .collect(),
165                        );
166                    }
167                }
168                _ => {}
169            }
170        }
171
172        // Expand _all_props JSONB blob (used by traverse and schemaless scan paths).
173        // _all_props is decoded from JSONB to Value::Map by arrow_to_value.
174        if let Some(Value::Map(all_props)) = map.get("_all_props") {
175            let mut properties: HashMap<String, Value> = all_props
176                .iter()
177                .map(|(k, v)| (k.clone(), Self::normalize_property_value(v.clone())))
178                .collect();
179            // Merge any inline non-internal properties (schema-defined props loaded as columns)
180            for (k, v) in map.iter() {
181                if Self::is_user_property(k) {
182                    properties
183                        .entry(k.clone())
184                        .or_insert_with(|| Self::normalize_property_value(v.clone()));
185                }
186            }
187            return Self::prune_null_properties(properties);
188        }
189
190        // Fall back to extracting inline properties (excluding internal and reserved fields)
191        Self::prune_null_properties(
192            map.iter()
193                .filter(|(k, _)| Self::is_user_property(k))
194                .map(|(k, v)| (k.clone(), Self::normalize_property_value(v.clone())))
195                .collect(),
196        )
197    }
198
199    /// Remove properties with null values from user-facing entity property maps.
200    fn prune_null_properties(mut properties: HashMap<String, Value>) -> HashMap<String, Value> {
201        properties.retain(|_, v| !v.is_null());
202        properties
203    }
204
205    /// Convert a serde_json::Value to our Value type.
206    fn json_value_to_value(json: serde_json::Value) -> Value {
207        match json {
208            serde_json::Value::Null => Value::Null,
209            serde_json::Value::Bool(b) => Value::Bool(b),
210            serde_json::Value::Number(n) => n
211                .as_i64()
212                .map(Value::Int)
213                .or_else(|| n.as_f64().map(Value::Float))
214                .unwrap_or_else(|| Value::String(n.to_string())),
215            serde_json::Value::String(s) => Value::String(s),
216            serde_json::Value::Array(arr) => {
217                Value::List(arr.into_iter().map(Self::json_value_to_value).collect())
218            }
219            serde_json::Value::Object(obj) => Value::Map(
220                obj.into_iter()
221                    .map(|(k, v)| (k, Self::json_value_to_value(v)))
222                    .collect(),
223            ),
224        }
225    }
226
227    /// Convert map to Node, extracting properties and stripping internal fields.
228    fn map_to_node(map: HashMap<String, Value>) -> Result<Value> {
229        let vid = map
230            .get("_vid")
231            .or_else(|| map.get("_id"))
232            .and_then(Self::value_to_u64)
233            .map(Vid::new)
234            .ok_or_else(|| anyhow!("Missing or invalid _vid in node map"))?;
235
236        let labels = if let Some(Value::List(label_list)) = map.get("_labels") {
237            label_list
238                .iter()
239                .filter_map(|v| match v {
240                    Value::String(s) => Some(s.clone()),
241                    _ => None,
242                })
243                .collect()
244        } else if let Some(Value::String(s)) = map.get("_labels") {
245            // Single string fallback for backwards compat within same session
246            if s.is_empty() {
247                vec![]
248            } else {
249                vec![s.clone()]
250            }
251        } else {
252            Vec::new()
253        };
254
255        // Try to extract properties from a dedicated "properties" field (LargeBinary/JSON)
256        // If not present or not parseable, fall back to extracting from inline fields
257        let properties = Self::extract_properties_from_field_or_inline(&map);
258
259        Ok(Value::Node(Node {
260            vid,
261            labels,
262            properties,
263        }))
264    }
265
266    /// Convert map to Edge, extracting properties and stripping internal fields.
267    fn map_to_edge(map: HashMap<String, Value>) -> Result<Value> {
268        let eid = map
269            .get("_eid")
270            .or_else(|| map.get("_id"))
271            .and_then(Self::value_to_u64)
272            .map(Eid::new)
273            .ok_or_else(|| anyhow!("Missing or invalid _eid in edge map"))?;
274
275        // Prefer _type_name (string) over _type (numeric ID) for user-facing output
276        let edge_type = ["_type_name", "_type", "type"]
277            .iter()
278            .find_map(|key| map.get(*key).and_then(Self::value_to_string))
279            .filter(|s| !s.is_empty())
280            .unwrap_or_default();
281
282        let src = map
283            .get("_src")
284            .and_then(Self::value_to_u64)
285            .map(Vid::new)
286            .ok_or_else(|| anyhow!("Missing _src in edge map"))?;
287
288        let dst = map
289            .get("_dst")
290            .and_then(Self::value_to_u64)
291            .map(Vid::new)
292            .ok_or_else(|| anyhow!("Missing _dst in edge map"))?;
293
294        // Try to extract properties from a dedicated "properties" field (LargeBinary/JSON)
295        // If not present or not parseable, fall back to extracting from inline fields
296        let properties = Self::extract_properties_from_field_or_inline(&map);
297
298        Ok(Value::Edge(Edge {
299            eid,
300            edge_type,
301            src,
302            dst,
303            properties,
304        }))
305    }
306
307    /// Convert map to Path, handling both "relationships" and "edges" keys.
308    fn map_to_path(mut map: HashMap<String, Value>) -> Result<Value> {
309        let nodes = Self::extract_path_nodes(
310            map.remove("nodes")
311                .ok_or_else(|| anyhow!("Missing nodes in path map"))?,
312        )?;
313
314        let edges = Self::extract_path_edges(
315            map.remove("relationships")
316                .or_else(|| map.remove("edges"))
317                .ok_or_else(|| anyhow!("Missing relationships/edges in path map"))?,
318        )?;
319
320        Ok(Value::Path(Path { nodes, edges }))
321    }
322
323    /// Extract a list of graph entities from a path component.
324    ///
325    /// `extract_native` pulls the entity from its native Value variant (e.g., `Value::Node`).
326    /// `convert_map` converts a Map representation to the entity type.
327    /// `type_name` is used in error messages (e.g., "node", "edge").
328    fn extract_path_elements<T>(
329        value: Value,
330        extract_native: fn(Value) -> Option<T>,
331        convert_map: fn(HashMap<String, Value>) -> Result<Value>,
332        type_name: &str,
333    ) -> Result<Vec<T>> {
334        let Value::List(items) = value else {
335            return Err(anyhow!("Path {} must be a list", type_name));
336        };
337
338        items
339            .into_iter()
340            .map(|item| match item {
341                Value::Map(m) => extract_native(convert_map(m)?)
342                    .ok_or_else(|| anyhow!("Failed to convert map to {} in path", type_name)),
343                other => extract_native(other)
344                    .ok_or_else(|| anyhow!("Invalid {} type in path list", type_name)),
345            })
346            .collect()
347    }
348
349    /// Extract nodes from a path's nodes list.
350    fn extract_path_nodes(value: Value) -> Result<Vec<Node>> {
351        Self::extract_path_elements(
352            value,
353            |v| match v {
354                Value::Node(n) => Some(n),
355                _ => None,
356            },
357            Self::map_to_node,
358            "nodes",
359        )
360    }
361
362    /// Extract edges from a path's relationships/edges list.
363    fn extract_path_edges(value: Value) -> Result<Vec<Edge>> {
364        Self::extract_path_elements(
365            value,
366            |v| match v {
367                Value::Edge(e) => Some(e),
368                _ => None,
369            },
370            Self::map_to_edge,
371            "edges",
372        )
373    }
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379
380    #[test]
381    fn test_normalize_node_map() {
382        let mut map = HashMap::new();
383        map.insert("_vid".to_string(), Value::Int(123));
384        map.insert(
385            "_labels".to_string(),
386            Value::List(vec![Value::String("Person".to_string())]),
387        );
388        map.insert("name".to_string(), Value::String("Alice".to_string()));
389        map.insert("age".to_string(), Value::Int(30));
390
391        let result = ResultNormalizer::normalize_value(Value::Map(map)).unwrap();
392
393        match result {
394            Value::Node(node) => {
395                assert_eq!(node.vid.as_u64(), 123);
396                assert_eq!(node.labels, vec!["Person".to_string()]);
397                assert_eq!(
398                    node.properties.get("name"),
399                    Some(&Value::String("Alice".to_string()))
400                );
401                assert_eq!(node.properties.get("age"), Some(&Value::Int(30)));
402                // Internal fields should be stripped
403                assert!(!node.properties.contains_key("_vid"));
404                assert!(!node.properties.contains_key("_labels"));
405            }
406            _ => panic!("Expected Node variant"),
407        }
408    }
409
410    #[test]
411    fn test_normalize_edge_map() {
412        let mut map = HashMap::new();
413        map.insert("_eid".to_string(), Value::Int(456));
414        map.insert("_type".to_string(), Value::String("KNOWS".to_string()));
415        map.insert("_src".to_string(), Value::Int(123));
416        map.insert("_dst".to_string(), Value::Int(789));
417        map.insert("since".to_string(), Value::Int(2020));
418
419        let result = ResultNormalizer::normalize_value(Value::Map(map)).unwrap();
420
421        match result {
422            Value::Edge(edge) => {
423                assert_eq!(edge.eid.as_u64(), 456);
424                assert_eq!(edge.edge_type, "KNOWS");
425                assert_eq!(edge.src.as_u64(), 123);
426                assert_eq!(edge.dst.as_u64(), 789);
427                assert_eq!(edge.properties.get("since"), Some(&Value::Int(2020)));
428                // Internal fields should be stripped
429                assert!(!edge.properties.contains_key("_eid"));
430                assert!(!edge.properties.contains_key("_type"));
431            }
432            _ => panic!("Expected Edge variant"),
433        }
434    }
435
436    #[test]
437    fn test_normalize_nested_structures() {
438        let mut inner_map = HashMap::new();
439        inner_map.insert("_vid".to_string(), Value::Int(100));
440        inner_map.insert(
441            "_labels".to_string(),
442            Value::List(vec![Value::String("Node".to_string())]),
443        );
444
445        let list = vec![Value::Map(inner_map.clone()), Value::Int(42)];
446
447        let result = ResultNormalizer::normalize_value(Value::List(list)).unwrap();
448
449        match result {
450            Value::List(items) => {
451                assert_eq!(items.len(), 2);
452                assert!(matches!(items[0], Value::Node(_)));
453                assert_eq!(items[1], Value::Int(42));
454            }
455            _ => panic!("Expected List variant"),
456        }
457    }
458
459    #[test]
460    fn test_normalize_regular_map() {
461        let mut map = HashMap::new();
462        map.insert("key1".to_string(), Value::String("value1".to_string()));
463        map.insert("key2".to_string(), Value::Int(42));
464
465        let result = ResultNormalizer::normalize_value(Value::Map(map)).unwrap();
466
467        match result {
468            Value::Map(m) => {
469                assert_eq!(m.get("key1"), Some(&Value::String("value1".to_string())));
470                assert_eq!(m.get("key2"), Some(&Value::Int(42)));
471            }
472            _ => panic!("Expected Map variant for regular map"),
473        }
474    }
475
476    #[test]
477    fn test_normalize_row() {
478        let mut node_map = HashMap::new();
479        node_map.insert("_vid".to_string(), Value::Int(123));
480        node_map.insert(
481            "_labels".to_string(),
482            Value::List(vec![Value::String("Person".to_string())]),
483        );
484        node_map.insert("name".to_string(), Value::String("Alice".to_string()));
485
486        let mut row = HashMap::new();
487        row.insert("n".to_string(), Value::Map(node_map));
488        row.insert("count".to_string(), Value::Int(5));
489
490        let result = ResultNormalizer::normalize_row(row).unwrap();
491
492        assert!(matches!(result.get("n"), Some(Value::Node(_))));
493        assert_eq!(result.get("count"), Some(&Value::Int(5)));
494    }
495
496    #[test]
497    fn test_map_with_vid_at_top_level_becomes_node() {
498        // At top level, a map with _vid is detected as a node
499        // (even without _labels - labels defaults to empty vec)
500        let mut map = HashMap::new();
501        map.insert("_vid".to_string(), Value::Int(123));
502        map.insert("name".to_string(), Value::String("test".to_string()));
503
504        let result = ResultNormalizer::normalize_value(Value::Map(map)).unwrap();
505
506        match result {
507            Value::Node(node) => {
508                assert_eq!(node.vid.as_u64(), 123);
509                assert!(node.labels.is_empty()); // Default empty labels
510                assert_eq!(
511                    node.properties.get("name"),
512                    Some(&Value::String("test".to_string()))
513                );
514            }
515            _ => panic!("Expected Node variant, got {:?}", result),
516        }
517    }
518
519    #[test]
520    fn test_normalize_node_with_nested_map_containing_vid_key() {
521        // Regression test: user property containing _vid key should NOT be
522        // converted to a Node
523        let mut nested = HashMap::new();
524        nested.insert("_vid".to_string(), Value::String("user-data".to_string()));
525        nested.insert("other".to_string(), Value::Int(42));
526
527        let mut node_map = HashMap::new();
528        node_map.insert("_vid".to_string(), Value::Int(123));
529        node_map.insert(
530            "_labels".to_string(),
531            Value::List(vec![Value::String("Person".to_string())]),
532        );
533        node_map.insert("metadata".to_string(), Value::Map(nested));
534
535        let result = ResultNormalizer::normalize_value(Value::Map(node_map)).unwrap();
536
537        match result {
538            Value::Node(node) => {
539                assert_eq!(node.vid.as_u64(), 123);
540                assert_eq!(node.labels, vec!["Person".to_string()]);
541                // The nested map should remain a Map, NOT become a Node
542                match node.properties.get("metadata") {
543                    Some(Value::Map(m)) => {
544                        assert_eq!(m.get("_vid"), Some(&Value::String("user-data".to_string())));
545                        assert_eq!(m.get("other"), Some(&Value::Int(42)));
546                    }
547                    other => panic!("Expected metadata to be Map, got {:?}", other),
548                }
549            }
550            _ => panic!("Expected Node variant"),
551        }
552    }
553
554    #[test]
555    fn test_normalize_edge_with_nested_map_containing_eid_key() {
556        // Regression test: user property containing _eid key should NOT be
557        // converted to an Edge
558        let mut nested = HashMap::new();
559        nested.insert("_eid".to_string(), Value::String("ref-123".to_string()));
560
561        let mut edge_map = HashMap::new();
562        edge_map.insert("_eid".to_string(), Value::Int(456));
563        edge_map.insert("_type".to_string(), Value::String("KNOWS".to_string()));
564        edge_map.insert("_src".to_string(), Value::Int(123));
565        edge_map.insert("_dst".to_string(), Value::Int(789));
566        edge_map.insert("reference".to_string(), Value::Map(nested));
567
568        let result = ResultNormalizer::normalize_value(Value::Map(edge_map)).unwrap();
569
570        match result {
571            Value::Edge(edge) => {
572                assert_eq!(edge.eid.as_u64(), 456);
573                // The nested map should remain a Map, NOT become an Edge
574                match edge.properties.get("reference") {
575                    Some(Value::Map(m)) => {
576                        assert_eq!(m.get("_eid"), Some(&Value::String("ref-123".to_string())));
577                    }
578                    other => panic!("Expected reference to be Map, got {:?}", other),
579                }
580            }
581            _ => panic!("Expected Edge variant"),
582        }
583    }
584
585    #[test]
586    fn test_normalize_node_prunes_null_properties() {
587        let mut map = HashMap::new();
588        map.insert("_vid".to_string(), Value::Int(1));
589        map.insert(
590            "_labels".to_string(),
591            Value::List(vec![Value::String("Person".to_string())]),
592        );
593        map.insert("name".to_string(), Value::String("Alice".to_string()));
594        map.insert("age".to_string(), Value::Null);
595
596        let result = ResultNormalizer::normalize_value(Value::Map(map)).unwrap();
597        let Value::Node(node) = result else {
598            panic!("Expected Node variant");
599        };
600
601        assert_eq!(
602            node.properties.get("name"),
603            Some(&Value::String("Alice".to_string()))
604        );
605        assert!(!node.properties.contains_key("age"));
606    }
607
608    #[test]
609    fn test_normalize_edge_prunes_null_properties_from_all_props_and_inline() {
610        let mut all_props = HashMap::new();
611        all_props.insert("since".to_string(), Value::Null);
612        all_props.insert("weight".to_string(), Value::Int(7));
613
614        let mut edge_map = HashMap::new();
615        edge_map.insert("_eid".to_string(), Value::Int(10));
616        edge_map.insert("_type".to_string(), Value::String("REL".to_string()));
617        edge_map.insert("_src".to_string(), Value::Int(1));
618        edge_map.insert("_dst".to_string(), Value::Int(2));
619        edge_map.insert("_all_props".to_string(), Value::Map(all_props));
620        edge_map.insert("name".to_string(), Value::Null);
621
622        let result = ResultNormalizer::normalize_value(Value::Map(edge_map)).unwrap();
623        let Value::Edge(edge) = result else {
624            panic!("Expected Edge variant");
625        };
626
627        assert_eq!(edge.properties.get("weight"), Some(&Value::Int(7)));
628        assert!(!edge.properties.contains_key("since"));
629        assert!(!edge.properties.contains_key("name"));
630    }
631}