Skip to main content

sochdb_core/
record_id.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Record ID — the universal document/record identifier for SochDB's multi-model layer.
19//!
20//! A `RecordId` combines a table name with a unique identifier within that table,
21//! modeled after SurrealDB's `thing` concept (`table:id`).
22//!
23//! # Binary Key Encoding
24//!
25//! ```text
26//! [table_id: u32 BE][id_bytes: variable]
27//! ```
28//!
29//! - `table_id` is a 4-byte big-endian table hash (FNV-1a) for sort-ordered prefix scans.
30//! - `id_bytes` is the raw identifier — either a big-endian u64 for integer IDs, or
31//!   UTF-8 bytes for string IDs.
32//!
33//! Big-endian encoding ensures lexicographic byte ordering matches numeric ordering,
34//! enabling efficient range scans on the underlying KV store.
35//!
36//! # Display Format
37//!
38//! `table:id` — e.g. `person:1`, `post:abc`, `user:⟨uuid⟩`
39//!
40//! # Examples
41//!
42//! ```
43//! use sochdb_core::record_id::RecordId;
44//!
45//! // Integer ID
46//! let rid = RecordId::new("person", 42u64);
47//! assert_eq!(rid.table(), "person");
48//! assert_eq!(rid.to_string(), "person:42");
49//!
50//! // String ID
51//! let rid = RecordId::from_string("post", "hello-world");
52//! assert_eq!(rid.to_string(), "post:hello-world");
53//!
54//! // Round-trip through binary key
55//! let key = rid.to_key();
56//! let decoded = RecordId::from_key_with_table(&key, "post").unwrap();
57//! assert_eq!(rid, decoded);
58//! ```
59
60use std::fmt;
61
62/// The identifier part of a RecordId.
63#[derive(Debug, Clone, PartialEq, Eq, Hash)]
64pub enum IdValue {
65    /// Integer identifier (stored as big-endian u64).
66    Integer(u64),
67    /// String identifier (stored as UTF-8 bytes).
68    String(String),
69}
70
71impl IdValue {
72    /// Encode the id value to bytes (for use in storage keys).
73    pub fn to_bytes(&self) -> Vec<u8> {
74        match self {
75            IdValue::Integer(n) => n.to_be_bytes().to_vec(),
76            IdValue::String(s) => s.as_bytes().to_vec(),
77        }
78    }
79}
80
81impl fmt::Display for IdValue {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        match self {
84            IdValue::Integer(n) => write!(f, "{}", n),
85            IdValue::String(s) => write!(f, "{}", s),
86        }
87    }
88}
89
90/// Tag byte prefixed to id_bytes in the binary key to distinguish integer vs string.
91const ID_TAG_INTEGER: u8 = 0x01;
92const ID_TAG_STRING: u8 = 0x02;
93
94/// A `RecordId` is a `(table, id)` pair that uniquely identifies a record
95/// across SochDB's multi-model storage.
96///
97/// It replaces the previous string-based `node_id` / `from_id` / `to_id` pattern
98/// used in the graph overlay, providing:
99/// - Type-safe table scoping
100/// - Compact binary keys for storage
101/// - Sort-ordered prefix scans per table
102#[derive(Debug, Clone, PartialEq, Eq, Hash)]
103pub struct RecordId {
104    table: String,
105    id: IdValue,
106}
107
108impl RecordId {
109    /// Create a RecordId with an integer identifier.
110    pub fn new(table: impl Into<String>, id: u64) -> Self {
111        Self {
112            table: table.into(),
113            id: IdValue::Integer(id),
114        }
115    }
116
117    /// Create a RecordId with a string identifier.
118    pub fn from_string(table: impl Into<String>, id: impl Into<String>) -> Self {
119        Self {
120            table: table.into(),
121            id: IdValue::String(id.into()),
122        }
123    }
124
125    /// Create a RecordId from an IdValue.
126    pub fn with_id(table: impl Into<String>, id: IdValue) -> Self {
127        Self {
128            table: table.into(),
129            id,
130        }
131    }
132
133    /// Table name.
134    pub fn table(&self) -> &str {
135        &self.table
136    }
137
138    /// The identifier value.
139    pub fn id(&self) -> &IdValue {
140        &self.id
141    }
142
143    /// Compute the FNV-1a hash of the table name (used as table_id in binary keys).
144    fn table_hash(table: &str) -> u32 {
145        // FNV-1a 32-bit
146        let mut hash: u32 = 0x811c9dc5;
147        for byte in table.as_bytes() {
148            hash ^= *byte as u32;
149            hash = hash.wrapping_mul(0x01000193);
150        }
151        hash
152    }
153
154    /// Encode to a binary storage key.
155    ///
156    /// Format: `[table_id: u32 BE][tag: u8][id_bytes]`
157    ///
158    /// The table_id is a FNV-1a hash, ensuring records of the same table
159    /// cluster together in lexicographic key order.
160    pub fn to_key(&self) -> Vec<u8> {
161        let table_id = Self::table_hash(&self.table);
162        let id_bytes = self.id.to_bytes();
163        let tag = match &self.id {
164            IdValue::Integer(_) => ID_TAG_INTEGER,
165            IdValue::String(_) => ID_TAG_STRING,
166        };
167        let mut key = Vec::with_capacity(4 + 1 + id_bytes.len());
168        key.extend_from_slice(&table_id.to_be_bytes());
169        key.push(tag);
170        key.extend_from_slice(&id_bytes);
171        key
172    }
173
174    /// Decode from a binary storage key.
175    ///
176    /// Note: The table name is NOT recoverable from the key alone (only the hash is stored).
177    /// Use `from_key_with_table` if you know the table name, or `from_key` for a lossy decode.
178    pub fn from_key(key: &[u8]) -> Option<Self> {
179        if key.len() < 6 {
180            // 4 (table_id) + 1 (tag) + 1 (min id)
181            return None;
182        }
183        let _table_id = u32::from_be_bytes([key[0], key[1], key[2], key[3]]);
184        let tag = key[4];
185        let id_bytes = &key[5..];
186
187        let id = match tag {
188            ID_TAG_INTEGER => {
189                if id_bytes.len() != 8 {
190                    return None;
191                }
192                let n = u64::from_be_bytes([
193                    id_bytes[0], id_bytes[1], id_bytes[2], id_bytes[3],
194                    id_bytes[4], id_bytes[5], id_bytes[6], id_bytes[7],
195                ]);
196                IdValue::Integer(n)
197            }
198            ID_TAG_STRING => {
199                let s = std::str::from_utf8(id_bytes).ok()?;
200                IdValue::String(s.to_string())
201            }
202            _ => return None,
203        };
204
205        // Table name is lost in key encoding — use hash placeholder
206        Some(RecordId {
207            table: format!("#{:08x}", _table_id),
208            id,
209        })
210    }
211
212    /// Decode from a binary key when the table name is known.
213    pub fn from_key_with_table(key: &[u8], table: &str) -> Option<Self> {
214        if key.len() < 6 {
215            return None;
216        }
217        let stored_hash = u32::from_be_bytes([key[0], key[1], key[2], key[3]]);
218        if stored_hash != Self::table_hash(table) {
219            return None; // Hash mismatch
220        }
221        let tag = key[4];
222        let id_bytes = &key[5..];
223
224        let id = match tag {
225            ID_TAG_INTEGER => {
226                if id_bytes.len() != 8 {
227                    return None;
228                }
229                let n = u64::from_be_bytes([
230                    id_bytes[0], id_bytes[1], id_bytes[2], id_bytes[3],
231                    id_bytes[4], id_bytes[5], id_bytes[6], id_bytes[7],
232                ]);
233                IdValue::Integer(n)
234            }
235            ID_TAG_STRING => {
236                let s = std::str::from_utf8(id_bytes).ok()?;
237                IdValue::String(s.to_string())
238            }
239            _ => return None,
240        };
241
242        Some(RecordId {
243            table: table.to_string(),
244            id,
245        })
246    }
247
248    /// Generate the key prefix for all records in a given table.
249    ///
250    /// Useful for prefix scans: `storage.scan(RecordId::table_prefix("person"))`.
251    pub fn table_prefix(table: &str) -> Vec<u8> {
252        Self::table_hash(table).to_be_bytes().to_vec()
253    }
254
255    /// Parse from `table:id` string format.
256    ///
257    /// Supports:
258    /// - `person:42` → integer ID
259    /// - `post:hello-world` → string ID
260    pub fn parse(s: &str) -> Option<Self> {
261        let colon_pos = s.find(':')?;
262        if colon_pos == 0 || colon_pos == s.len() - 1 {
263            return None;
264        }
265        let table = &s[..colon_pos];
266        let id_str = &s[colon_pos + 1..];
267
268        let id = if let Ok(n) = id_str.parse::<u64>() {
269            IdValue::Integer(n)
270        } else {
271            IdValue::String(id_str.to_string())
272        };
273
274        Some(RecordId {
275            table: table.to_string(),
276            id,
277        })
278    }
279}
280
281impl fmt::Display for RecordId {
282    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283        write!(f, "{}:{}", self.table, self.id)
284    }
285}
286
287impl PartialOrd for RecordId {
288    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
289        Some(self.cmp(other))
290    }
291}
292
293impl Ord for RecordId {
294    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
295        // Compare by binary key for consistent ordering with storage
296        self.to_key().cmp(&other.to_key())
297    }
298}
299
300// ============================================================================
301// Serde support (feature-gated for optional use)
302// ============================================================================
303
304impl serde::Serialize for RecordId {
305    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
306        serializer.serialize_str(&self.to_string())
307    }
308}
309
310impl<'de> serde::Deserialize<'de> for RecordId {
311    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
312        let s = String::deserialize(deserializer)?;
313        RecordId::parse(&s).ok_or_else(|| serde::de::Error::custom(
314            format!("invalid RecordId: '{}' (expected table:id)", s),
315        ))
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn test_record_id_integer() {
325        let rid = RecordId::new("person", 42);
326        assert_eq!(rid.table(), "person");
327        assert_eq!(rid.to_string(), "person:42");
328        assert!(matches!(rid.id(), IdValue::Integer(42)));
329    }
330
331    #[test]
332    fn test_record_id_string() {
333        let rid = RecordId::from_string("post", "hello-world");
334        assert_eq!(rid.table(), "post");
335        assert_eq!(rid.to_string(), "post:hello-world");
336    }
337
338    #[test]
339    fn test_record_id_binary_key_roundtrip_integer() {
340        let rid = RecordId::new("person", 42);
341        let key = rid.to_key();
342        assert_eq!(key.len(), 4 + 1 + 8); // table_id + tag + u64
343        let decoded = RecordId::from_key_with_table(&key, "person").unwrap();
344        assert_eq!(rid, decoded);
345    }
346
347    #[test]
348    fn test_record_id_binary_key_roundtrip_string() {
349        let rid = RecordId::from_string("post", "abc");
350        let key = rid.to_key();
351        assert_eq!(key.len(), 4 + 1 + 3); // table_id + tag + "abc"
352        let decoded = RecordId::from_key_with_table(&key, "post").unwrap();
353        assert_eq!(rid, decoded);
354    }
355
356    #[test]
357    fn test_record_id_table_prefix() {
358        let rid1 = RecordId::new("person", 1);
359        let rid2 = RecordId::new("person", 999);
360        let prefix = RecordId::table_prefix("person");
361
362        let key1 = rid1.to_key();
363        let key2 = rid2.to_key();
364
365        assert_eq!(&key1[..4], &prefix);
366        assert_eq!(&key2[..4], &prefix);
367    }
368
369    #[test]
370    fn test_record_id_ordering() {
371        let r1 = RecordId::new("person", 1);
372        let r2 = RecordId::new("person", 2);
373        let r3 = RecordId::new("person", 100);
374
375        // Same table: ordered by ID
376        assert!(r1 < r2);
377        assert!(r2 < r3);
378    }
379
380    #[test]
381    fn test_record_id_parse() {
382        let rid = RecordId::parse("person:42").unwrap();
383        assert_eq!(rid.table(), "person");
384        assert!(matches!(rid.id(), IdValue::Integer(42)));
385
386        let rid = RecordId::parse("post:hello-world").unwrap();
387        assert_eq!(rid.table(), "post");
388        assert!(matches!(rid.id(), IdValue::String(s) if s == "hello-world"));
389
390        assert!(RecordId::parse("").is_none());
391        assert!(RecordId::parse(":42").is_none());
392        assert!(RecordId::parse("person:").is_none());
393    }
394
395    #[test]
396    fn test_record_id_serde_roundtrip() {
397        let rid = RecordId::new("person", 42);
398        let json = serde_json::to_string(&rid).unwrap();
399        assert_eq!(json, "\"person:42\"");
400        let decoded: RecordId = serde_json::from_str(&json).unwrap();
401        assert_eq!(rid, decoded);
402    }
403
404    #[test]
405    fn test_record_id_hash_mismatch() {
406        let rid = RecordId::new("person", 42);
407        let key = rid.to_key();
408        // Try decoding with wrong table name
409        assert!(RecordId::from_key_with_table(&key, "animal").is_none());
410    }
411
412    #[test]
413    fn test_record_id_different_tables_cluster() {
414        let person_prefix = RecordId::table_prefix("person");
415        let post_prefix = RecordId::table_prefix("post");
416        // Different tables have different prefixes (extremely high probability)
417        assert_ne!(person_prefix, post_prefix);
418    }
419
420    #[test]
421    fn test_record_id_from_key_lossy() {
422        let rid = RecordId::new("person", 42);
423        let key = rid.to_key();
424        let decoded = RecordId::from_key(&key).unwrap();
425        // Table name is lost — replaced with hash placeholder
426        assert!(decoded.table().starts_with('#'));
427        assert!(matches!(decoded.id(), IdValue::Integer(42)));
428    }
429}