Skip to main content

uni_common/
cypher_value_codec.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! MessagePack-based binary encoding for CypherValue (uni_common::Value).
5//!
6//! # Design
7//!
8//! All property values are stored as self-describing binary blobs in Arrow
9//! `LargeBinary` columns. Each blob has the format:
10//!
11//! ```text
12//! [tag_byte: u8][msgpack_payload: bytes]
13//! ```
14//!
15//! The tag byte provides O(1) type identification without deserialization.
16//! MessagePack preserves int/float distinction natively (unlike JSON).
17//!
18//! # Tag Constants
19//!
20//! | Tag | Type | Payload |
21//! |-----|------|---------|
22//! | 0 | Null | empty |
23//! | 1 | Bool | msgpack bool |
24//! | 2 | Int | msgpack i64 |
25//! | 3 | Float | msgpack f64 |
26//! | 4 | String | msgpack string |
27//! | 5 | List | msgpack array of recursively-encoded blobs |
28//! | 6 | Map | msgpack map of string → recursively-encoded blobs |
29//! | 7 | Bytes | msgpack binary |
30//! | 8 | Node | msgpack {vid, label, props} |
31//! | 9 | Edge | msgpack {eid, type, src, dst, props} |
32//! | 10 | Path | msgpack {nodes, rels} |
33//! | 11 | Date | msgpack i32 (days since epoch) |
34//! | 12 | Time | msgpack i64 (nanoseconds since midnight) |
35//! | 13 | DateTime | msgpack i64 (nanoseconds since epoch) |
36//! | 14 | Duration | msgpack {months, days, nanos} |
37//! | 15 | Point | msgpack {srid, coords} |
38//! | 16 | Vector | msgpack array of f32 |
39//! | 17 | LocalTime | msgpack i64 (nanoseconds since midnight) |
40//! | 18 | LocalDateTime | msgpack i64 (nanoseconds since epoch) |
41//! | 19 | Btic | 24-byte packed BTIC (lo, hi, meta) |
42//!
43//! Nested values (List elements, Map values, Node/Edge properties) are
44//! recursively encoded as `[tag][payload]` blobs.
45
46use crate::api::error::UniError;
47use crate::core::id::{Eid, Vid};
48use crate::value::{Edge, Node, Path, Value};
49use serde::{Deserialize, Serialize};
50use std::collections::{BTreeMap, HashMap};
51
52// Tag constants
53pub const TAG_NULL: u8 = 0;
54pub const TAG_BOOL: u8 = 1;
55pub const TAG_INT: u8 = 2;
56pub const TAG_FLOAT: u8 = 3;
57pub const TAG_STRING: u8 = 4;
58pub const TAG_LIST: u8 = 5;
59pub const TAG_MAP: u8 = 6;
60pub const TAG_BYTES: u8 = 7;
61pub const TAG_NODE: u8 = 8;
62pub const TAG_EDGE: u8 = 9;
63pub const TAG_PATH: u8 = 10;
64pub const TAG_DATE: u8 = 11;
65pub const TAG_TIME: u8 = 12;
66pub const TAG_DATETIME: u8 = 13;
67pub const TAG_DURATION: u8 = 14;
68// pub const TAG_POINT: u8 = 15;
69pub const TAG_VECTOR: u8 = 16;
70pub const TAG_LOCALTIME: u8 = 17;
71pub const TAG_LOCALDATETIME: u8 = 18;
72pub const TAG_BTIC: u8 = 19;
73pub const TAG_SPARSE_VECTOR: u8 = 20;
74
75// ---------------------------------------------------------------------------
76// rmp_serde + UniError::Storage wrappers
77// ---------------------------------------------------------------------------
78
79/// Deserialize a MessagePack payload, wrapping any error in
80/// `UniError::Storage` with a uniform `"failed to decode <type>: <e>"`
81/// message. Used by every decode arm in this module.
82fn decode_msgpack<'de, T: Deserialize<'de>>(
83    payload: &'de [u8],
84    type_name: &'static str,
85) -> Result<T, UniError> {
86    rmp_serde::from_slice(payload).map_err(|e| UniError::Storage {
87        message: format!("failed to decode {type_name}: {e}"),
88        source: None,
89    })
90}
91
92/// Push `tag` onto `buf`, then append the MessagePack encoding of `value`.
93/// Encoding into a `Vec<u8>` is infallible in practice; we keep the panic
94/// path to match the historical contract.
95fn encode_msgpack<T: Serialize>(buf: &mut Vec<u8>, tag: u8, value: &T, type_name: &'static str) {
96    buf.push(tag);
97    rmp_serde::encode::write(buf, value).unwrap_or_else(|_| panic!("{type_name} encode failed"));
98}
99
100/// Canonicalize a `(indices, values)` pair into a valid [`uni_sparse_vector::SparseVector`].
101///
102/// Defensive, infallible counterpart to ingest validation: sorts term ids, sums the
103/// weights of duplicates, and drops non-finite weights (mirroring the auto-embed
104/// canonicalizer) so the durable [`encode`] path can never panic on a value that
105/// bypassed the executor's `coerce_and_validate_property_value` (issue #95). Mismatched
106/// array lengths collapse to the shorter side rather than aborting the write.
107fn canonical_sparse_vector(indices: &[u32], values: &[f32]) -> uni_sparse_vector::SparseVector {
108    let pairs: Vec<(u32, f32)> = indices
109        .iter()
110        .copied()
111        .zip(values.iter().copied())
112        .filter(|&(_, w)| w.is_finite())
113        .collect();
114    // `from_pairs` over finite weights only re-errors if a duplicate-term summation
115    // overflows to ±inf; fall back to the empty vector so encoding never panics.
116    uni_sparse_vector::SparseVector::from_pairs(pairs).unwrap_or_else(|_| {
117        uni_sparse_vector::SparseVector::new(Vec::new(), Vec::new())
118            .expect("empty sparse vector is always valid")
119    })
120}
121
122// ---------------------------------------------------------------------------
123// Public encode/decode API
124// ---------------------------------------------------------------------------
125
126/// Encode a Value to tagged MessagePack bytes.
127pub fn encode(value: &Value) -> Vec<u8> {
128    let mut buf = Vec::new();
129    encode_to_buf(value, &mut buf);
130    buf
131}
132
133/// Decode tagged MessagePack bytes to a Value.
134pub fn decode(bytes: &[u8]) -> Result<Value, UniError> {
135    if bytes.is_empty() {
136        return Err(UniError::Storage {
137            message: "empty CypherValue bytes".to_string(),
138            source: None,
139        });
140    }
141    let tag = bytes[0];
142    let payload = &bytes[1..];
143
144    match tag {
145        TAG_NULL => Ok(Value::Null),
146        TAG_BOOL => Ok(Value::Bool(decode_msgpack(payload, "bool")?)),
147        TAG_INT => Ok(Value::Int(decode_msgpack(payload, "int")?)),
148        TAG_FLOAT => Ok(Value::Float(decode_msgpack(payload, "float")?)),
149        TAG_STRING => Ok(Value::String(decode_msgpack(payload, "string")?)),
150        TAG_BYTES => Ok(Value::Bytes(decode_msgpack(payload, "bytes")?)),
151        TAG_LIST => {
152            let blobs: Vec<Vec<u8>> = decode_msgpack(payload, "list")?;
153            let items: Result<Vec<Value>, UniError> = blobs.iter().map(|b| decode(b)).collect();
154            Ok(Value::List(items?))
155        }
156        TAG_MAP => {
157            let blob_map: HashMap<String, Vec<u8>> = decode_msgpack(payload, "map")?;
158            let mut map = HashMap::new();
159            for (k, v_blob) in blob_map {
160                map.insert(k, decode(&v_blob)?);
161            }
162            Ok(Value::Map(map))
163        }
164        TAG_NODE => {
165            let np: NodePayload = decode_msgpack(payload, "node")?;
166            let mut props = HashMap::new();
167            for (k, v_blob) in np.properties {
168                props.insert(k, decode(&v_blob)?);
169            }
170            Ok(Value::Node(Node {
171                vid: np.vid,
172                labels: np.labels,
173                properties: props,
174            }))
175        }
176        TAG_EDGE => {
177            let ep: EdgePayload = decode_msgpack(payload, "edge")?;
178            let mut props = HashMap::new();
179            for (k, v_blob) in ep.properties {
180                props.insert(k, decode(&v_blob)?);
181            }
182            Ok(Value::Edge(Edge {
183                eid: ep.eid,
184                edge_type: ep.edge_type,
185                src: ep.src,
186                dst: ep.dst,
187                properties: props,
188            }))
189        }
190        TAG_PATH => {
191            let pp: PathPayload = decode_msgpack(payload, "path")?;
192            let nodes: Result<Vec<Node>, UniError> = pp
193                .nodes
194                .iter()
195                .map(|b| match decode(b)? {
196                    Value::Node(n) => Ok(n),
197                    _ => Err(UniError::Storage {
198                        message: "path node blob is not a Node".to_string(),
199                        source: None,
200                    }),
201                })
202                .collect();
203            let edges: Result<Vec<Edge>, UniError> = pp
204                .edges
205                .iter()
206                .map(|b| match decode(b)? {
207                    Value::Edge(e) => Ok(e),
208                    _ => Err(UniError::Storage {
209                        message: "path edge blob is not an Edge".to_string(),
210                        source: None,
211                    }),
212                })
213                .collect();
214            Ok(Value::Path(Path {
215                nodes: nodes?,
216                edges: edges?,
217            }))
218        }
219        TAG_VECTOR => Ok(Value::Vector(decode_msgpack(payload, "vector")?)),
220        TAG_DATE => Ok(Value::Temporal(crate::value::TemporalValue::Date {
221            days_since_epoch: decode_msgpack(payload, "date")?,
222        })),
223        TAG_LOCALTIME => Ok(Value::Temporal(crate::value::TemporalValue::LocalTime {
224            nanos_since_midnight: decode_msgpack(payload, "localtime")?,
225        })),
226        TAG_TIME => {
227            let tp: TimePayload = decode_msgpack(payload, "time")?;
228            Ok(Value::Temporal(crate::value::TemporalValue::Time {
229                nanos_since_midnight: tp.nanos,
230                offset_seconds: tp.offset,
231            }))
232        }
233        TAG_LOCALDATETIME => Ok(Value::Temporal(
234            crate::value::TemporalValue::LocalDateTime {
235                nanos_since_epoch: decode_msgpack(payload, "localdatetime")?,
236            },
237        )),
238        TAG_DATETIME => {
239            let dp: DateTimePayload = decode_msgpack(payload, "datetime")?;
240            Ok(Value::Temporal(crate::value::TemporalValue::DateTime {
241                nanos_since_epoch: dp.nanos,
242                offset_seconds: dp.offset,
243                timezone_name: dp.tz_name,
244            }))
245        }
246        TAG_DURATION => {
247            let dp: DurationPayload = decode_msgpack(payload, "duration")?;
248            Ok(Value::Temporal(crate::value::TemporalValue::Duration {
249                months: dp.months,
250                days: dp.days,
251                nanos: dp.nanos,
252            }))
253        }
254        TAG_BTIC => {
255            let btic = uni_btic::encode::decode_slice(payload).map_err(|e| UniError::Storage {
256                message: format!("failed to decode BTIC: {e}"),
257                source: None,
258            })?;
259            Ok(Value::Temporal(crate::value::TemporalValue::Btic {
260                lo: btic.lo(),
261                hi: btic.hi(),
262                meta: btic.meta(),
263            }))
264        }
265        TAG_SPARSE_VECTOR => {
266            let sv = uni_sparse_vector::encode::decode_slice(payload).map_err(|e| {
267                UniError::Storage {
268                    message: format!("failed to decode SparseVector: {e}"),
269                    source: None,
270                }
271            })?;
272            let (indices, values) = sv.into_parts();
273            Ok(Value::SparseVector { indices, values })
274        }
275        _ => Err(UniError::Storage {
276            message: format!("unknown CypherValue tag: {tag}"),
277            source: None,
278        }),
279    }
280}
281
282// ---------------------------------------------------------------------------
283// O(1) introspection API (no deserialization)
284// ---------------------------------------------------------------------------
285
286/// Peek at the tag byte without deserializing.
287pub fn peek_tag(bytes: &[u8]) -> Option<u8> {
288    bytes.first().copied()
289}
290
291/// Fast null check.
292pub fn is_null(bytes: &[u8]) -> bool {
293    peek_tag(bytes) == Some(TAG_NULL)
294}
295
296// ---------------------------------------------------------------------------
297// Fast typed decode (skip Value construction)
298// ---------------------------------------------------------------------------
299
300/// Decode an int directly without constructing a Value.
301pub fn decode_int(bytes: &[u8]) -> Option<i64> {
302    if bytes.first().copied() != Some(TAG_INT) {
303        return None;
304    }
305    rmp_serde::from_slice(&bytes[1..]).ok()
306}
307
308/// Decode a float directly without constructing a Value.
309pub fn decode_float(bytes: &[u8]) -> Option<f64> {
310    if bytes.first().copied() != Some(TAG_FLOAT) {
311        return None;
312    }
313    rmp_serde::from_slice(&bytes[1..]).ok()
314}
315
316/// Decode a bool directly without constructing a Value.
317pub fn decode_bool(bytes: &[u8]) -> Option<bool> {
318    if bytes.first().copied() != Some(TAG_BOOL) {
319        return None;
320    }
321    rmp_serde::from_slice(&bytes[1..]).ok()
322}
323
324/// Decode a string directly without constructing a Value.
325pub fn decode_string(bytes: &[u8]) -> Option<String> {
326    if bytes.first().copied() != Some(TAG_STRING) {
327        return None;
328    }
329    rmp_serde::from_slice(&bytes[1..]).ok()
330}
331
332// ---------------------------------------------------------------------------
333// Fast typed encode (skip Value construction)
334// ---------------------------------------------------------------------------
335
336/// Encode an int directly without constructing a Value.
337pub fn encode_int(value: i64) -> Vec<u8> {
338    let mut buf = Vec::new();
339    buf.push(TAG_INT);
340    rmp_serde::encode::write(&mut buf, &value).expect("int encode failed");
341    buf
342}
343
344/// Encode a float directly without constructing a Value.
345pub fn encode_float(value: f64) -> Vec<u8> {
346    let mut buf = Vec::new();
347    buf.push(TAG_FLOAT);
348    rmp_serde::encode::write(&mut buf, &value).expect("float encode failed");
349    buf
350}
351
352/// Encode a bool directly without constructing a Value.
353pub fn encode_bool(value: bool) -> Vec<u8> {
354    let mut buf = Vec::new();
355    buf.push(TAG_BOOL);
356    rmp_serde::encode::write(&mut buf, &value).expect("bool encode failed");
357    buf
358}
359
360/// Encode a string directly without constructing a Value.
361pub fn encode_string(value: &str) -> Vec<u8> {
362    let mut buf = Vec::new();
363    buf.push(TAG_STRING);
364    rmp_serde::encode::write(&mut buf, value).expect("string encode failed");
365    buf
366}
367
368/// Encode null directly.
369pub fn encode_null() -> Vec<u8> {
370    vec![TAG_NULL]
371}
372
373/// Extract a map entry as raw bytes without decoding the entire map.
374///
375/// This is useful for extracting a single property from overflow JSON
376/// without paying the cost of decoding all other properties.
377///
378/// Returns `None` if:
379/// - The blob is not a TAG_MAP
380/// - The key doesn't exist in the map
381/// - Deserialization fails
382pub fn extract_map_entry_raw(blob: &[u8], key: &str) -> Option<Vec<u8>> {
383    if blob.first().copied() != Some(TAG_MAP) {
384        return None;
385    }
386    let payload = &blob[1..];
387    let blob_map: HashMap<String, Vec<u8>> = rmp_serde::from_slice(payload).ok()?;
388    blob_map.get(key).cloned()
389}
390
391// ---------------------------------------------------------------------------
392// Internal helpers
393// ---------------------------------------------------------------------------
394
395fn encode_to_buf(value: &Value, buf: &mut Vec<u8>) {
396    match value {
397        Value::Null => buf.push(TAG_NULL),
398        Value::Bool(b) => encode_msgpack(buf, TAG_BOOL, b, "bool"),
399        Value::Int(i) => encode_msgpack(buf, TAG_INT, i, "int"),
400        Value::Float(f) => encode_msgpack(buf, TAG_FLOAT, f, "float"),
401        Value::String(s) => encode_msgpack(buf, TAG_STRING, s, "string"),
402        Value::Bytes(b) => encode_msgpack(buf, TAG_BYTES, b, "bytes"),
403        Value::List(items) => {
404            let blobs: Vec<Vec<u8>> = items.iter().map(encode).collect();
405            encode_msgpack(buf, TAG_LIST, &blobs, "list");
406        }
407        Value::Map(map) => {
408            let blob_map: BTreeMap<String, Vec<u8>> =
409                map.iter().map(|(k, v)| (k.clone(), encode(v))).collect();
410            encode_msgpack(buf, TAG_MAP, &blob_map, "map");
411        }
412        Value::Node(node) => {
413            let mut props_blobs: Vec<(String, Vec<u8>)> = node
414                .properties
415                .iter()
416                .map(|(k, v)| (k.clone(), encode(v)))
417                .collect();
418            props_blobs.sort_by(|a, b| a.0.cmp(&b.0));
419            let payload = NodePayload {
420                vid: node.vid,
421                labels: node.labels.clone(),
422                properties: props_blobs,
423            };
424            encode_msgpack(buf, TAG_NODE, &payload, "node");
425        }
426        Value::Edge(edge) => {
427            let mut props_blobs: Vec<(String, Vec<u8>)> = edge
428                .properties
429                .iter()
430                .map(|(k, v)| (k.clone(), encode(v)))
431                .collect();
432            props_blobs.sort_by(|a, b| a.0.cmp(&b.0));
433            let payload = EdgePayload {
434                eid: edge.eid,
435                edge_type: edge.edge_type.clone(),
436                src: edge.src,
437                dst: edge.dst,
438                properties: props_blobs,
439            };
440            encode_msgpack(buf, TAG_EDGE, &payload, "edge");
441        }
442        Value::Path(path) => {
443            let payload = PathPayload {
444                nodes: path
445                    .nodes
446                    .iter()
447                    .map(|n| encode(&Value::Node(n.clone())))
448                    .collect(),
449                edges: path
450                    .edges
451                    .iter()
452                    .map(|e| encode(&Value::Edge(e.clone())))
453                    .collect(),
454            };
455            encode_msgpack(buf, TAG_PATH, &payload, "path");
456        }
457        Value::Vector(v) => encode_msgpack(buf, TAG_VECTOR, v, "vector"),
458        Value::SparseVector { indices, values } => {
459            buf.push(TAG_SPARSE_VECTOR);
460            // `encode` is infallible and runs on the durable WAL path, so it must never
461            // panic (M-PANIC-IS-STOP). User writes are canonicalized + validated at ingest
462            // (`coerce_and_validate_property_value`), so on every normal path this is a
463            // no-op re-canonicalization. A value that somehow arrives non-canonical here
464            // (e.g. a direct Rust-API construction bypassing the executor) is sorted, its
465            // duplicate term ids summed, and any non-finite weight dropped — matching the
466            // auto-embed canonicalizer — instead of aborting the write.
467            let sv = canonical_sparse_vector(indices, values);
468            buf.extend_from_slice(&uni_sparse_vector::encode::encode(&sv));
469        }
470        Value::Temporal(t) => match t {
471            crate::value::TemporalValue::Date { days_since_epoch } => {
472                encode_msgpack(buf, TAG_DATE, days_since_epoch, "date");
473            }
474            crate::value::TemporalValue::LocalTime {
475                nanos_since_midnight,
476            } => encode_msgpack(buf, TAG_LOCALTIME, nanos_since_midnight, "localtime"),
477            crate::value::TemporalValue::Time {
478                nanos_since_midnight,
479                offset_seconds,
480            } => {
481                let payload = TimePayload {
482                    nanos: *nanos_since_midnight,
483                    offset: *offset_seconds,
484                };
485                encode_msgpack(buf, TAG_TIME, &payload, "time");
486            }
487            crate::value::TemporalValue::LocalDateTime { nanos_since_epoch } => {
488                encode_msgpack(buf, TAG_LOCALDATETIME, nanos_since_epoch, "localdatetime");
489            }
490            crate::value::TemporalValue::DateTime {
491                nanos_since_epoch,
492                offset_seconds,
493                timezone_name,
494            } => {
495                let payload = DateTimePayload {
496                    nanos: *nanos_since_epoch,
497                    offset: *offset_seconds,
498                    tz_name: timezone_name.clone(),
499                };
500                encode_msgpack(buf, TAG_DATETIME, &payload, "datetime");
501            }
502            crate::value::TemporalValue::Duration {
503                months,
504                days,
505                nanos,
506            } => {
507                let payload = DurationPayload {
508                    months: *months,
509                    days: *days,
510                    nanos: *nanos,
511                };
512                encode_msgpack(buf, TAG_DURATION, &payload, "duration");
513            }
514            crate::value::TemporalValue::Btic { lo, hi, meta } => {
515                buf.push(TAG_BTIC);
516                let btic = uni_btic::Btic::new(*lo, *hi, *meta).expect("invalid BTIC value");
517                buf.extend_from_slice(&uni_btic::encode::encode(&btic));
518            }
519        },
520    }
521}
522
523// ---------------------------------------------------------------------------
524// Serde-compatible payload structs for complex types
525// ---------------------------------------------------------------------------
526
527#[derive(Serialize, Deserialize)]
528struct NodePayload {
529    vid: Vid,
530    labels: Vec<String>,
531    properties: Vec<(String, Vec<u8>)>,
532}
533
534#[derive(Serialize, Deserialize)]
535struct EdgePayload {
536    eid: Eid,
537    edge_type: String,
538    src: Vid,
539    dst: Vid,
540    properties: Vec<(String, Vec<u8>)>,
541}
542
543#[derive(Serialize, Deserialize)]
544struct PathPayload {
545    nodes: Vec<Vec<u8>>,
546    edges: Vec<Vec<u8>>,
547}
548
549#[derive(Serialize, Deserialize)]
550struct TimePayload {
551    nanos: i64,
552    offset: i32,
553}
554
555#[derive(Serialize, Deserialize)]
556struct DateTimePayload {
557    nanos: i64,
558    offset: i32,
559    tz_name: Option<String>,
560}
561
562#[derive(Serialize, Deserialize)]
563struct DurationPayload {
564    months: i64,
565    days: i64,
566    nanos: i64,
567}
568
569// ---------------------------------------------------------------------------
570// Unit tests
571// ---------------------------------------------------------------------------
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576
577    #[test]
578    fn test_round_trip_null() {
579        let v = Value::Null;
580        let bytes = encode(&v);
581        assert_eq!(bytes[0], TAG_NULL);
582        assert_eq!(bytes.len(), 1);
583        let decoded = decode(&bytes).unwrap();
584        assert_eq!(decoded, v);
585    }
586
587    #[test]
588    fn test_round_trip_bool() {
589        for b in [true, false] {
590            let v = Value::Bool(b);
591            let bytes = encode(&v);
592            assert_eq!(bytes[0], TAG_BOOL);
593            let decoded = decode(&bytes).unwrap();
594            assert_eq!(decoded, v);
595        }
596    }
597
598    #[test]
599    fn test_round_trip_int() {
600        for i in [-100, 0, 42, i64::MAX, i64::MIN] {
601            let v = Value::Int(i);
602            let bytes = encode(&v);
603            assert_eq!(bytes[0], TAG_INT);
604            let decoded = decode(&bytes).unwrap();
605            assert_eq!(decoded, v);
606        }
607    }
608
609    #[test]
610    fn test_round_trip_float() {
611        for f in [-3.15, 0.0, 42.5, f64::MAX, f64::MIN] {
612            let v = Value::Float(f);
613            let bytes = encode(&v);
614            assert_eq!(bytes[0], TAG_FLOAT);
615            let decoded = decode(&bytes).unwrap();
616            assert_eq!(decoded, v);
617        }
618    }
619
620    #[test]
621    fn test_round_trip_string() {
622        for s in ["", "hello", "unicode: 🦀"] {
623            let v = Value::String(s.to_string());
624            let bytes = encode(&v);
625            assert_eq!(bytes[0], TAG_STRING);
626            let decoded = decode(&bytes).unwrap();
627            assert_eq!(decoded, v);
628        }
629    }
630
631    #[test]
632    fn test_round_trip_bytes() {
633        let v = Value::Bytes(vec![1, 2, 3, 255]);
634        let bytes = encode(&v);
635        assert_eq!(bytes[0], TAG_BYTES);
636        let decoded = decode(&bytes).unwrap();
637        assert_eq!(decoded, v);
638    }
639
640    #[test]
641    fn test_round_trip_list() {
642        let v = Value::List(vec![
643            Value::Int(1),
644            Value::String("two".to_string()),
645            Value::Float(3.0),
646            Value::Null,
647        ]);
648        let bytes = encode(&v);
649        assert_eq!(bytes[0], TAG_LIST);
650        let decoded = decode(&bytes).unwrap();
651        assert_eq!(decoded, v);
652    }
653
654    #[test]
655    fn test_round_trip_nested_list() {
656        let v = Value::List(vec![
657            Value::Int(1),
658            Value::List(vec![
659                Value::String("nested".to_string()),
660                Value::List(vec![Value::Bool(true)]),
661            ]),
662        ]);
663        let bytes = encode(&v);
664        let decoded = decode(&bytes).unwrap();
665        assert_eq!(decoded, v);
666    }
667
668    #[test]
669    fn test_round_trip_map() {
670        let mut map = HashMap::new();
671        map.insert("a".to_string(), Value::Int(1));
672        map.insert("b".to_string(), Value::String("two".to_string()));
673        map.insert("c".to_string(), Value::Null);
674        let v = Value::Map(map);
675        let bytes = encode(&v);
676        assert_eq!(bytes[0], TAG_MAP);
677        let decoded = decode(&bytes).unwrap();
678        assert_eq!(decoded, v);
679    }
680
681    #[test]
682    fn test_round_trip_node() {
683        let mut props = HashMap::new();
684        props.insert("name".to_string(), Value::String("Alice".to_string()));
685        props.insert("age".to_string(), Value::Int(30));
686        let v = Value::Node(Node {
687            vid: Vid::from(123),
688            labels: vec!["Person".to_string()],
689            properties: props,
690        });
691        let bytes = encode(&v);
692        assert_eq!(bytes[0], TAG_NODE);
693        let decoded = decode(&bytes).unwrap();
694        assert_eq!(decoded, v);
695    }
696
697    #[test]
698    fn test_round_trip_edge() {
699        let mut props = HashMap::new();
700        props.insert("since".to_string(), Value::Int(2020));
701        let v = Value::Edge(Edge {
702            eid: Eid::from(456),
703            edge_type: "KNOWS".to_string(),
704            src: Vid::from(1),
705            dst: Vid::from(2),
706            properties: props,
707        });
708        let bytes = encode(&v);
709        assert_eq!(bytes[0], TAG_EDGE);
710        let decoded = decode(&bytes).unwrap();
711        assert_eq!(decoded, v);
712    }
713
714    #[test]
715    fn test_round_trip_path() {
716        let v = Value::Path(Path {
717            nodes: vec![Node {
718                vid: Vid::from(1),
719                labels: vec!["A".to_string()],
720                properties: HashMap::new(),
721            }],
722            edges: vec![Edge {
723                eid: Eid::from(1),
724                edge_type: "REL".to_string(),
725                src: Vid::from(1),
726                dst: Vid::from(2),
727                properties: HashMap::new(),
728            }],
729        });
730        let bytes = encode(&v);
731        assert_eq!(bytes[0], TAG_PATH);
732        let decoded = decode(&bytes).unwrap();
733        assert_eq!(decoded, v);
734    }
735
736    #[test]
737    fn test_round_trip_vector() {
738        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
739        let bytes = encode(&v);
740        assert_eq!(bytes[0], TAG_VECTOR);
741        let decoded = decode(&bytes).unwrap();
742        assert_eq!(decoded, v);
743    }
744
745    #[test]
746    fn test_round_trip_sparse_vector() {
747        let v = Value::SparseVector {
748            indices: vec![1, 7, 42],
749            values: vec![0.25, -1.5, 3.0],
750        };
751        let bytes = encode(&v);
752        assert_eq!(bytes[0], TAG_SPARSE_VECTOR);
753        let decoded = decode(&bytes).unwrap();
754        assert_eq!(decoded, v);
755    }
756
757    #[test]
758    fn encode_canonicalizes_non_canonical_sparse_without_panicking() {
759        // Regression for issue #95: a `Value::SparseVector` with unsorted/duplicate
760        // term ids or a non-finite weight previously `.expect()`-panicked here on the
761        // durable WAL path. Encoding must now canonicalize defensively and never panic.
762        // Unsorted + duplicate term ids are sorted and summed.
763        let v = Value::SparseVector {
764            indices: vec![9, 1, 9],
765            values: vec![1.0, 2.0, 0.5],
766        };
767        let bytes = encode(&v);
768        assert_eq!(bytes[0], TAG_SPARSE_VECTOR);
769        let decoded = decode(&bytes).unwrap();
770        assert_eq!(
771            decoded,
772            Value::SparseVector {
773                indices: vec![1, 9],
774                values: vec![2.0, 1.5],
775            }
776        );
777
778        // A NaN / ±inf weight is dropped rather than panicking.
779        let v = Value::SparseVector {
780            indices: vec![1, 5],
781            values: vec![f32::NAN, 2.0],
782        };
783        let bytes = encode(&v);
784        let decoded = decode(&bytes).unwrap();
785        assert_eq!(
786            decoded,
787            Value::SparseVector {
788                indices: vec![5],
789                values: vec![2.0],
790            }
791        );
792
793        // A length mismatch collapses to the shorter side instead of aborting.
794        let v = Value::SparseVector {
795            indices: vec![1, 2, 3],
796            values: vec![1.0],
797        };
798        let _ = encode(&v); // must not panic
799    }
800
801    #[test]
802    fn test_round_trip_sparse_vector_empty() {
803        let v = Value::SparseVector {
804            indices: vec![],
805            values: vec![],
806        };
807        let bytes = encode(&v);
808        assert_eq!(bytes[0], TAG_SPARSE_VECTOR);
809        assert_eq!(decode(&bytes).unwrap(), v);
810    }
811
812    #[test]
813    fn test_round_trip_sparse_vector_nested_in_map() {
814        // Nested-in-Map exercises the CV path used for non-declared/nested
815        // sparse values (the tag framing must survive map recursion).
816        let mut m = std::collections::HashMap::new();
817        m.insert(
818            "emb".to_string(),
819            Value::SparseVector {
820                indices: vec![3, 9],
821                values: vec![1.0, 2.0],
822            },
823        );
824        let v = Value::Map(m);
825        let bytes = encode(&v);
826        let decoded = decode(&bytes).unwrap();
827        assert_eq!(decoded, v);
828    }
829
830    #[test]
831    fn test_peek_tag() {
832        assert_eq!(peek_tag(&encode(&Value::Null)), Some(TAG_NULL));
833        assert_eq!(peek_tag(&encode(&Value::Bool(true))), Some(TAG_BOOL));
834        assert_eq!(peek_tag(&encode(&Value::Int(42))), Some(TAG_INT));
835        assert_eq!(peek_tag(&encode(&Value::Float(3.15))), Some(TAG_FLOAT));
836        assert_eq!(
837            peek_tag(&encode(&Value::String("x".to_string()))),
838            Some(TAG_STRING)
839        );
840        assert_eq!(peek_tag(&[]), None);
841    }
842
843    #[test]
844    fn test_is_null() {
845        assert!(is_null(&encode(&Value::Null)));
846        assert!(!is_null(&encode(&Value::Int(0))));
847        assert!(!is_null(&[]));
848    }
849
850    #[test]
851    fn test_fast_decode_int() {
852        let bytes = encode(&Value::Int(42));
853        assert_eq!(decode_int(&bytes), Some(42));
854        assert_eq!(decode_int(&encode(&Value::Float(42.0))), None);
855        assert_eq!(decode_int(&encode(&Value::String("42".to_string()))), None);
856    }
857
858    #[test]
859    fn test_fast_decode_float() {
860        let bytes = encode(&Value::Float(3.15));
861        assert_eq!(decode_float(&bytes), Some(3.15));
862        assert_eq!(decode_float(&encode(&Value::Int(3))), None);
863    }
864
865    #[test]
866    fn test_fast_decode_bool() {
867        let bytes = encode(&Value::Bool(true));
868        assert_eq!(decode_bool(&bytes), Some(true));
869        assert_eq!(decode_bool(&encode(&Value::Int(1))), None);
870    }
871
872    #[test]
873    fn test_fast_decode_string() {
874        let bytes = encode(&Value::String("hello".to_string()));
875        assert_eq!(decode_string(&bytes), Some("hello".to_string()));
876        assert_eq!(decode_string(&encode(&Value::Int(42))), None);
877    }
878
879    #[test]
880    fn test_int_float_distinction() {
881        // This is the key win: JSON loses the int/float distinction
882        let int_val = Value::Int(42);
883        let float_val = Value::Float(42.0);
884
885        let int_bytes = encode(&int_val);
886        let float_bytes = encode(&float_val);
887
888        // Different tags
889        assert_eq!(int_bytes[0], TAG_INT);
890        assert_eq!(float_bytes[0], TAG_FLOAT);
891
892        // Different payloads
893        assert_ne!(int_bytes, float_bytes);
894
895        // Decode preserves distinction
896        assert_eq!(decode(&int_bytes).unwrap(), Value::Int(42));
897        assert_eq!(decode(&float_bytes).unwrap(), Value::Float(42.0));
898    }
899
900    #[test]
901    fn test_round_trip_btic_epoch_instant() {
902        let v = Value::Temporal(crate::value::TemporalValue::Btic {
903            lo: 0,
904            hi: 1,
905            meta: 0x0000_0000_0000_0000,
906        });
907        let bytes = encode(&v);
908        assert_eq!(bytes[0], TAG_BTIC);
909        assert_eq!(bytes.len(), 25); // 1 tag + 24 packed
910        let decoded = decode(&bytes).unwrap();
911        assert_eq!(decoded, v);
912    }
913
914    #[test]
915    fn test_round_trip_btic_year_1985() {
916        let meta = 0x7700_0000_0000_0000u64; // year/year, definite/definite
917        let v = Value::Temporal(crate::value::TemporalValue::Btic {
918            lo: 473_385_600_000,
919            hi: 504_921_600_000,
920            meta,
921        });
922        let bytes = encode(&v);
923        assert_eq!(bytes[0], TAG_BTIC);
924        let decoded = decode(&bytes).unwrap();
925        assert_eq!(decoded, v);
926    }
927
928    #[test]
929    fn test_round_trip_btic_unbounded() {
930        let v = Value::Temporal(crate::value::TemporalValue::Btic {
931            lo: i64::MIN,
932            hi: i64::MAX,
933            meta: 0,
934        });
935        let bytes = encode(&v);
936        assert_eq!(bytes[0], TAG_BTIC);
937        let decoded = decode(&bytes).unwrap();
938        assert_eq!(decoded, v);
939    }
940
941    #[test]
942    fn test_round_trip_btic_with_certainty() {
943        // approximate certainty on both bounds
944        let meta = 0x7750_0000_0000_0000u64; // year/year, approximate/approximate
945        let v = Value::Temporal(crate::value::TemporalValue::Btic {
946            lo: -77_914_137_600_000, // 500 BCE
947            hi: -77_882_601_600_000,
948            meta,
949        });
950        let bytes = encode(&v);
951        let decoded = decode(&bytes).unwrap();
952        assert_eq!(decoded, v);
953    }
954}