Skip to main content

sochdb_core/
record_id.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Record ID — the universal document/record identifier for SochDB's multi-model layer.
19//!
20//! A `RecordId` combines a table name with a unique identifier within that table,
21//! modeled after SurrealDB's `thing` concept (`table:id`).
22//!
23//! # Binary Key Encoding
24//!
25//! ```text
26//! [table_id: u32 BE][id_bytes: variable]
27//! ```
28//!
29//! - `table_id` is a 4-byte big-endian table hash (FNV-1a) for sort-ordered prefix scans.
30//! - `id_bytes` is the raw identifier — either a big-endian u64 for integer IDs, or
31//!   UTF-8 bytes for string IDs.
32//!
33//! Big-endian encoding ensures lexicographic byte ordering matches numeric ordering,
34//! enabling efficient range scans on the underlying KV store.
35//!
36//! # Display Format
37//!
38//! `table:id` — e.g. `person:1`, `post:abc`, `user:⟨uuid⟩`
39//!
40//! # Examples
41//!
42//! ```
43//! use sochdb_core::record_id::RecordId;
44//!
45//! // Integer ID
46//! let rid = RecordId::new("person", 42u64);
47//! assert_eq!(rid.table(), "person");
48//! assert_eq!(rid.to_string(), "person:42");
49//!
50//! // String ID
51//! let rid = RecordId::from_string("post", "hello-world");
52//! assert_eq!(rid.to_string(), "post:hello-world");
53//!
54//! // Round-trip through binary key
55//! let key = rid.to_key();
56//! let decoded = RecordId::from_key_with_table(&key, "post").unwrap();
57//! assert_eq!(rid, decoded);
58//! ```
59
60use std::fmt;
61
62/// The identifier part of a RecordId.
63#[derive(Debug, Clone, PartialEq, Eq, Hash)]
64pub enum IdValue {
65    /// Integer identifier (stored as big-endian u64).
66    Integer(u64),
67    /// String identifier (stored as UTF-8 bytes).
68    String(String),
69}
70
71impl IdValue {
72    /// Encode the id value to bytes (for use in storage keys).
73    pub fn to_bytes(&self) -> Vec<u8> {
74        match self {
75            IdValue::Integer(n) => n.to_be_bytes().to_vec(),
76            IdValue::String(s) => s.as_bytes().to_vec(),
77        }
78    }
79}
80
81impl fmt::Display for IdValue {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        match self {
84            IdValue::Integer(n) => write!(f, "{}", n),
85            IdValue::String(s) => write!(f, "{}", s),
86        }
87    }
88}
89
90/// Tag byte prefixed to id_bytes in the binary key to distinguish integer vs string.
91const ID_TAG_INTEGER: u8 = 0x01;
92const ID_TAG_STRING: u8 = 0x02;
93
94/// A `RecordId` is a `(table, id)` pair that uniquely identifies a record
95/// across SochDB's multi-model storage.
96///
97/// It replaces the previous string-based `node_id` / `from_id` / `to_id` pattern
98/// used in the graph overlay, providing:
99/// - Type-safe table scoping
100/// - Compact binary keys for storage
101/// - Sort-ordered prefix scans per table
102#[derive(Debug, Clone, PartialEq, Eq, Hash)]
103pub struct RecordId {
104    table: String,
105    id: IdValue,
106}
107
108impl RecordId {
109    /// Create a RecordId with an integer identifier.
110    pub fn new(table: impl Into<String>, id: u64) -> Self {
111        Self {
112            table: table.into(),
113            id: IdValue::Integer(id),
114        }
115    }
116
117    /// Create a RecordId with a string identifier.
118    pub fn from_string(table: impl Into<String>, id: impl Into<String>) -> Self {
119        Self {
120            table: table.into(),
121            id: IdValue::String(id.into()),
122        }
123    }
124
125    /// Create a RecordId from an IdValue.
126    pub fn with_id(table: impl Into<String>, id: IdValue) -> Self {
127        Self {
128            table: table.into(),
129            id,
130        }
131    }
132
133    /// Table name.
134    pub fn table(&self) -> &str {
135        &self.table
136    }
137
138    /// The identifier value.
139    pub fn id(&self) -> &IdValue {
140        &self.id
141    }
142
143    /// Compute the FNV-1a hash of the table name (used as table_id in binary keys).
144    fn table_hash(table: &str) -> u32 {
145        // FNV-1a 32-bit
146        let mut hash: u32 = 0x811c9dc5;
147        for byte in table.as_bytes() {
148            hash ^= *byte as u32;
149            hash = hash.wrapping_mul(0x01000193);
150        }
151        hash
152    }
153
154    /// Encode to a binary storage key.
155    ///
156    /// Format: `[table_id: u32 BE][tag: u8][id_bytes]`
157    ///
158    /// The table_id is a FNV-1a hash, ensuring records of the same table
159    /// cluster together in lexicographic key order.
160    pub fn to_key(&self) -> Vec<u8> {
161        let table_id = Self::table_hash(&self.table);
162        let id_bytes = self.id.to_bytes();
163        let tag = match &self.id {
164            IdValue::Integer(_) => ID_TAG_INTEGER,
165            IdValue::String(_) => ID_TAG_STRING,
166        };
167        let mut key = Vec::with_capacity(4 + 1 + id_bytes.len());
168        key.extend_from_slice(&table_id.to_be_bytes());
169        key.push(tag);
170        key.extend_from_slice(&id_bytes);
171        key
172    }
173
174    /// Decode from a binary storage key.
175    ///
176    /// Note: The table name is NOT recoverable from the key alone (only the hash is stored).
177    /// Use `from_key_with_table` if you know the table name, or `from_key` for a lossy decode.
178    pub fn from_key(key: &[u8]) -> Option<Self> {
179        if key.len() < 6 {
180            // 4 (table_id) + 1 (tag) + 1 (min id)
181            return None;
182        }
183        let _table_id = u32::from_be_bytes([key[0], key[1], key[2], key[3]]);
184        let tag = key[4];
185        let id_bytes = &key[5..];
186
187        let id = match tag {
188            ID_TAG_INTEGER => {
189                if id_bytes.len() != 8 {
190                    return None;
191                }
192                let n = u64::from_be_bytes([
193                    id_bytes[0],
194                    id_bytes[1],
195                    id_bytes[2],
196                    id_bytes[3],
197                    id_bytes[4],
198                    id_bytes[5],
199                    id_bytes[6],
200                    id_bytes[7],
201                ]);
202                IdValue::Integer(n)
203            }
204            ID_TAG_STRING => {
205                let s = std::str::from_utf8(id_bytes).ok()?;
206                IdValue::String(s.to_string())
207            }
208            _ => return None,
209        };
210
211        // Table name is lost in key encoding — use hash placeholder
212        Some(RecordId {
213            table: format!("#{:08x}", _table_id),
214            id,
215        })
216    }
217
218    /// Decode from a binary key when the table name is known.
219    pub fn from_key_with_table(key: &[u8], table: &str) -> Option<Self> {
220        if key.len() < 6 {
221            return None;
222        }
223        let stored_hash = u32::from_be_bytes([key[0], key[1], key[2], key[3]]);
224        if stored_hash != Self::table_hash(table) {
225            return None; // Hash mismatch
226        }
227        let tag = key[4];
228        let id_bytes = &key[5..];
229
230        let id = match tag {
231            ID_TAG_INTEGER => {
232                if id_bytes.len() != 8 {
233                    return None;
234                }
235                let n = u64::from_be_bytes([
236                    id_bytes[0],
237                    id_bytes[1],
238                    id_bytes[2],
239                    id_bytes[3],
240                    id_bytes[4],
241                    id_bytes[5],
242                    id_bytes[6],
243                    id_bytes[7],
244                ]);
245                IdValue::Integer(n)
246            }
247            ID_TAG_STRING => {
248                let s = std::str::from_utf8(id_bytes).ok()?;
249                IdValue::String(s.to_string())
250            }
251            _ => return None,
252        };
253
254        Some(RecordId {
255            table: table.to_string(),
256            id,
257        })
258    }
259
260    /// Generate the key prefix for all records in a given table.
261    ///
262    /// Useful for prefix scans: `storage.scan(RecordId::table_prefix("person"))`.
263    pub fn table_prefix(table: &str) -> Vec<u8> {
264        Self::table_hash(table).to_be_bytes().to_vec()
265    }
266
267    /// Parse from `table:id` string format.
268    ///
269    /// Supports:
270    /// - `person:42` → integer ID
271    /// - `post:hello-world` → string ID
272    pub fn parse(s: &str) -> Option<Self> {
273        let colon_pos = s.find(':')?;
274        if colon_pos == 0 || colon_pos == s.len() - 1 {
275            return None;
276        }
277        let table = &s[..colon_pos];
278        let id_str = &s[colon_pos + 1..];
279
280        let id = if let Ok(n) = id_str.parse::<u64>() {
281            IdValue::Integer(n)
282        } else {
283            IdValue::String(id_str.to_string())
284        };
285
286        Some(RecordId {
287            table: table.to_string(),
288            id,
289        })
290    }
291}
292
293impl fmt::Display for RecordId {
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        write!(f, "{}:{}", self.table, self.id)
296    }
297}
298
299impl PartialOrd for RecordId {
300    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
301        Some(self.cmp(other))
302    }
303}
304
305impl Ord for RecordId {
306    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
307        // Compare by binary key for consistent ordering with storage
308        self.to_key().cmp(&other.to_key())
309    }
310}
311
312// ============================================================================
313// Serde support (feature-gated for optional use)
314// ============================================================================
315
316impl serde::Serialize for RecordId {
317    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
318        serializer.serialize_str(&self.to_string())
319    }
320}
321
322impl<'de> serde::Deserialize<'de> for RecordId {
323    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
324        let s = String::deserialize(deserializer)?;
325        RecordId::parse(&s).ok_or_else(|| {
326            serde::de::Error::custom(format!("invalid RecordId: '{}' (expected table:id)", s))
327        })
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    #[test]
336    fn test_record_id_integer() {
337        let rid = RecordId::new("person", 42);
338        assert_eq!(rid.table(), "person");
339        assert_eq!(rid.to_string(), "person:42");
340        assert!(matches!(rid.id(), IdValue::Integer(42)));
341    }
342
343    #[test]
344    fn test_record_id_string() {
345        let rid = RecordId::from_string("post", "hello-world");
346        assert_eq!(rid.table(), "post");
347        assert_eq!(rid.to_string(), "post:hello-world");
348    }
349
350    #[test]
351    fn test_record_id_binary_key_roundtrip_integer() {
352        let rid = RecordId::new("person", 42);
353        let key = rid.to_key();
354        assert_eq!(key.len(), 4 + 1 + 8); // table_id + tag + u64
355        let decoded = RecordId::from_key_with_table(&key, "person").unwrap();
356        assert_eq!(rid, decoded);
357    }
358
359    #[test]
360    fn test_record_id_binary_key_roundtrip_string() {
361        let rid = RecordId::from_string("post", "abc");
362        let key = rid.to_key();
363        assert_eq!(key.len(), 4 + 1 + 3); // table_id + tag + "abc"
364        let decoded = RecordId::from_key_with_table(&key, "post").unwrap();
365        assert_eq!(rid, decoded);
366    }
367
368    #[test]
369    fn test_record_id_table_prefix() {
370        let rid1 = RecordId::new("person", 1);
371        let rid2 = RecordId::new("person", 999);
372        let prefix = RecordId::table_prefix("person");
373
374        let key1 = rid1.to_key();
375        let key2 = rid2.to_key();
376
377        assert_eq!(&key1[..4], &prefix);
378        assert_eq!(&key2[..4], &prefix);
379    }
380
381    #[test]
382    fn test_record_id_ordering() {
383        let r1 = RecordId::new("person", 1);
384        let r2 = RecordId::new("person", 2);
385        let r3 = RecordId::new("person", 100);
386
387        // Same table: ordered by ID
388        assert!(r1 < r2);
389        assert!(r2 < r3);
390    }
391
392    #[test]
393    fn test_record_id_parse() {
394        let rid = RecordId::parse("person:42").unwrap();
395        assert_eq!(rid.table(), "person");
396        assert!(matches!(rid.id(), IdValue::Integer(42)));
397
398        let rid = RecordId::parse("post:hello-world").unwrap();
399        assert_eq!(rid.table(), "post");
400        assert!(matches!(rid.id(), IdValue::String(s) if s == "hello-world"));
401
402        assert!(RecordId::parse("").is_none());
403        assert!(RecordId::parse(":42").is_none());
404        assert!(RecordId::parse("person:").is_none());
405    }
406
407    #[test]
408    fn test_record_id_serde_roundtrip() {
409        let rid = RecordId::new("person", 42);
410        let json = serde_json::to_string(&rid).unwrap();
411        assert_eq!(json, "\"person:42\"");
412        let decoded: RecordId = serde_json::from_str(&json).unwrap();
413        assert_eq!(rid, decoded);
414    }
415
416    #[test]
417    fn test_record_id_hash_mismatch() {
418        let rid = RecordId::new("person", 42);
419        let key = rid.to_key();
420        // Try decoding with wrong table name
421        assert!(RecordId::from_key_with_table(&key, "animal").is_none());
422    }
423
424    #[test]
425    fn test_record_id_different_tables_cluster() {
426        let person_prefix = RecordId::table_prefix("person");
427        let post_prefix = RecordId::table_prefix("post");
428        // Different tables have different prefixes (extremely high probability)
429        assert_ne!(person_prefix, post_prefix);
430    }
431
432    #[test]
433    fn test_record_id_from_key_lossy() {
434        let rid = RecordId::new("person", 42);
435        let key = rid.to_key();
436        let decoded = RecordId::from_key(&key).unwrap();
437        // Table name is lost — replaced with hash placeholder
438        assert!(decoded.table().starts_with('#'));
439        assert!(matches!(decoded.id(), IdValue::Integer(42)));
440    }
441}