Skip to main content

uni_common/core/
id.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use anyhow::{Result, anyhow};
5use multibase::Base;
6use serde::{Deserialize, Serialize};
7use std::fmt;
8use std::str::FromStr;
9
10/// Define a dense, auto-increment 64-bit id newtype (`Vid` / `Eid`).
11///
12/// Both ids are structurally identical โ€” a `u64` with an `INVALID` sentinel and
13/// a top `EPHEMERAL_BIT` for transient, in-query identities minted by
14/// `host.allocate_transient_id()`. This macro keeps them as **distinct** types
15/// (a `Vid` cannot be passed where an `Eid` is expected) while sharing one
16/// source of truth for the impl, so the ephemeral-bit logic can never drift
17/// between them. `$label` drives the `Debug` / `FromStr` diagnostics.
18macro_rules! define_id_newtype {
19    ($(#[$meta:meta])* $name:ident, $label:literal) => {
20        $(#[$meta])*
21        #[derive(Clone, Copy, Hash, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
22        pub struct $name(u64);
23
24        impl $name {
25            /// Creates a new id from a raw u64 value.
26            pub fn new(id: u64) -> Self {
27                Self(id)
28            }
29
30            /// Returns the raw u64 value of this id.
31            pub fn as_u64(&self) -> u64 {
32                self.0
33            }
34
35            /// Sentinel value representing an invalid/null id.
36            pub const INVALID: $name = $name(u64::MAX);
37
38            /// Check if this id is the invalid sentinel.
39            pub fn is_invalid(&self) -> bool {
40                self.0 == u64::MAX
41            }
42
43            /// Top bit reserved for ephemeral (transient, in-query) identities
44            /// allocated by `host.allocate_transient_id()` (M5g / proposal ยง4.13.1).
45            /// Storage write paths must reject any id with this bit set.
46            pub const EPHEMERAL_BIT: u64 = 1u64 << 63;
47
48            /// Construct an ephemeral id from a `transient_id` (bottom 63 bits).
49            /// Returns `INVALID` if `transient_id` overflows the 63-bit range.
50            pub fn ephemeral(transient_id: u64) -> Self {
51                if transient_id >= Self::EPHEMERAL_BIT {
52                    return Self::INVALID;
53                }
54                Self(Self::EPHEMERAL_BIT | transient_id)
55            }
56
57            /// True if this id's high bit is set, i.e. it was minted by
58            /// `host.allocate_transient_id()` and is *not* backed by storage.
59            /// `INVALID` (all bits set) also satisfies this; callers that care
60            /// about the distinction should check `is_invalid()` first.
61            pub fn is_ephemeral(&self) -> bool {
62                self.0 & Self::EPHEMERAL_BIT != 0 && !self.is_invalid()
63            }
64
65            /// Bottom 63 bits when `self` is ephemeral, else `None`.
66            pub fn transient_id(&self) -> Option<u64> {
67                self.is_ephemeral().then_some(self.0 & !Self::EPHEMERAL_BIT)
68            }
69        }
70
71        impl From<u64> for $name {
72            fn from(val: u64) -> Self {
73                Self(val)
74            }
75        }
76
77        impl From<$name> for u64 {
78            fn from(id: $name) -> Self {
79                id.0
80            }
81        }
82
83        impl Default for $name {
84            fn default() -> Self {
85                Self::INVALID
86            }
87        }
88
89        impl fmt::Debug for $name {
90            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91                if self.is_invalid() {
92                    write!(f, concat!($label, "(INVALID)"))
93                } else {
94                    write!(f, concat!($label, "({})"), self.0)
95                }
96            }
97        }
98
99        impl fmt::Display for $name {
100            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101                write!(f, "{}", self.0)
102            }
103        }
104
105        impl FromStr for $name {
106            type Err = anyhow::Error;
107
108            fn from_str(s: &str) -> Result<Self> {
109                let id: u64 = s
110                    .parse()
111                    .map_err(|e| anyhow!(concat!("Invalid ", $label, " '{}': {}"), s, e))?;
112                Ok(Self::new(id))
113            }
114        }
115    };
116}
117
118define_id_newtype!(
119    /// Internal Vertex ID (64 bits) โ€” pure auto-increment.
120    ///
121    /// VIDs are dense, sequential identifiers assigned on vertex creation; they
122    /// no longer embed label information (label lookups go via the
123    /// VidLabelsIndex). For O(1) array indexing during query execution, use
124    /// `DenseIdx` via `VidRemapper`.
125    Vid,
126    "Vid"
127);
128
129define_id_newtype!(
130    /// Internal Edge ID (64 bits) โ€” pure auto-increment.
131    ///
132    /// EIDs are dense, sequential identifiers assigned on edge creation; they no
133    /// longer embed type information (edge-type lookups go via the edge tables).
134    Eid,
135    "Eid"
136);
137
138/// Dense index for O(1) array access during query execution.
139///
140/// During query execution, we load subgraphs into memory with dense arrays.
141/// DenseIdx provides efficient indexing into these arrays, while VidRemapper
142/// handles the bidirectional mapping between sparse VIDs and dense indices.
143#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
144pub struct DenseIdx(pub u32);
145
146impl DenseIdx {
147    /// Creates a new dense index.
148    pub fn new(idx: u32) -> Self {
149        Self(idx)
150    }
151
152    /// Returns the index as usize for array indexing.
153    pub fn as_usize(&self) -> usize {
154        self.0 as usize
155    }
156
157    /// Returns the raw u32 value.
158    pub fn as_u32(&self) -> u32 {
159        self.0
160    }
161
162    /// Sentinel value for invalid index.
163    pub const INVALID: DenseIdx = DenseIdx(u32::MAX);
164
165    /// Check if this is the invalid sentinel.
166    pub fn is_invalid(&self) -> bool {
167        self.0 == u32::MAX
168    }
169}
170
171impl From<u32> for DenseIdx {
172    fn from(val: u32) -> Self {
173        Self(val)
174    }
175}
176
177impl From<usize> for DenseIdx {
178    fn from(val: usize) -> Self {
179        Self(val as u32)
180    }
181}
182
183impl fmt::Display for DenseIdx {
184    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
185        write!(f, "{}", self.0)
186    }
187}
188
189/// UniId: 44-character base32 multibase string (SHA3-256)
190#[derive(Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)]
191pub struct UniId([u8; 32]);
192
193impl UniId {
194    pub fn from_bytes(bytes: [u8; 32]) -> Self {
195        Self(bytes)
196    }
197
198    /// Parses a UniId from a multibase-encoded string.
199    ///
200    /// # Errors
201    ///
202    /// Returns an error if:
203    /// - The string is not valid multibase
204    /// - The encoding is not Base32Lower (the canonical format for UniId)
205    /// - The decoded length is not exactly 32 bytes
206    ///
207    /// # Security
208    ///
209    /// **CWE-345 (Insufficient Verification)**: Validates that the input uses
210    /// the expected Base32Lower encoding, rejecting other multibase formats
211    /// that could cause interoperability issues or confusion.
212    pub fn from_multibase(s: &str) -> Result<Self> {
213        let (base, bytes) =
214            multibase::decode(s).map_err(|e| anyhow!("Multibase decode error: {}", e))?;
215
216        // Validate encoding matches our canonical format
217        if base != Base::Base32Lower {
218            return Err(anyhow!(
219                "UniId must use Base32Lower encoding, got {:?}",
220                base
221            ));
222        }
223
224        let inner: [u8; 32] = bytes.try_into().map_err(|v: Vec<u8>| {
225            anyhow!("Invalid UniId length: expected 32 bytes, got {}", v.len())
226        })?;
227
228        Ok(Self(inner))
229    }
230
231    pub fn to_multibase(&self) -> String {
232        multibase::encode(Base::Base32Lower, self.0)
233    }
234
235    pub fn as_bytes(&self) -> &[u8; 32] {
236        &self.0
237    }
238}
239
240impl fmt::Debug for UniId {
241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
242        write!(f, "UniId({})", self.to_multibase())
243    }
244}
245
246impl fmt::Display for UniId {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        write!(f, "{}", self.to_multibase())
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_vid_basic() {
258        let vid = Vid::new(12345);
259        assert_eq!(vid.as_u64(), 12345);
260        assert!(!vid.is_invalid());
261    }
262
263    #[test]
264    fn test_vid_invalid() {
265        let vid = Vid::INVALID;
266        assert!(vid.is_invalid());
267        assert_eq!(vid.as_u64(), u64::MAX);
268    }
269
270    #[test]
271    fn test_vid_from_str() {
272        let vid: Vid = "42".parse().unwrap();
273        assert_eq!(vid.as_u64(), 42);
274
275        // Round-trip through Display and FromStr
276        let original = Vid::new(12345678);
277        let s = original.to_string();
278        let parsed: Vid = s.parse().unwrap();
279        assert_eq!(original, parsed);
280
281        // Error cases
282        assert!("invalid".parse::<Vid>().is_err());
283        assert!("".parse::<Vid>().is_err());
284    }
285
286    #[test]
287    fn test_eid_basic() {
288        let eid = Eid::new(67890);
289        assert_eq!(eid.as_u64(), 67890);
290        assert!(!eid.is_invalid());
291    }
292
293    #[test]
294    fn test_eid_invalid() {
295        let eid = Eid::INVALID;
296        assert!(eid.is_invalid());
297        assert_eq!(eid.as_u64(), u64::MAX);
298    }
299
300    #[test]
301    fn test_eid_from_str() {
302        let eid: Eid = "100".parse().unwrap();
303        assert_eq!(eid.as_u64(), 100);
304
305        // Round-trip through Display and FromStr
306        let original = Eid::new(0xABCDEF);
307        let s = original.to_string();
308        let parsed: Eid = s.parse().unwrap();
309        assert_eq!(original, parsed);
310
311        // Error cases
312        assert!("invalid".parse::<Eid>().is_err());
313    }
314
315    #[test]
316    fn test_dense_idx() {
317        let idx = DenseIdx::new(100);
318        assert_eq!(idx.as_usize(), 100);
319        assert_eq!(idx.as_u32(), 100);
320        assert!(!idx.is_invalid());
321
322        let invalid = DenseIdx::INVALID;
323        assert!(invalid.is_invalid());
324    }
325
326    #[test]
327    fn test_uni_id_multibase() {
328        let bytes = [0u8; 32];
329        let uid = UniId(bytes);
330        let s = uid.to_multibase();
331        let decoded = UniId::from_multibase(&s).unwrap();
332        assert_eq!(uid, decoded);
333    }
334
335    /// Security tests for CWE-345 (Insufficient Verification).
336    mod security_tests {
337        use super::*;
338
339        /// CWE-345: UniId should reject non-Base32Lower encodings.
340        #[test]
341        fn test_uni_id_rejects_wrong_encoding() {
342            // Create a Base58Btc encoded string (different from our Base32Lower)
343            let bytes = [0u8; 32];
344            let base58_encoded = multibase::encode(multibase::Base::Base58Btc, bytes);
345
346            let result = UniId::from_multibase(&base58_encoded);
347            assert!(result.is_err());
348            assert!(
349                result
350                    .unwrap_err()
351                    .to_string()
352                    .contains("Base32Lower encoding")
353            );
354        }
355
356        /// CWE-345: UniId should reject wrong length.
357        #[test]
358        fn test_uni_id_rejects_wrong_length() {
359            // Encode only 16 bytes instead of 32
360            let short_bytes = [0u8; 16];
361            let encoded = multibase::encode(Base::Base32Lower, short_bytes);
362
363            let result = UniId::from_multibase(&encoded);
364            assert!(result.is_err());
365            assert!(
366                result
367                    .unwrap_err()
368                    .to_string()
369                    .contains("expected 32 bytes")
370            );
371        }
372    }
373}