Skip to main content

nodedb_types/id/
vshard.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Virtual shard identifier.
4
5use std::fmt;
6
7use serde::{Deserialize, Serialize};
8
9/// Identifies a virtual shard (0..1023). Data is hashed to vShards by shard key.
10#[derive(
11    Debug,
12    Clone,
13    Copy,
14    PartialEq,
15    Eq,
16    Hash,
17    Serialize,
18    Deserialize,
19    zerompk::ToMessagePack,
20    zerompk::FromMessagePack,
21    rkyv::Archive,
22    rkyv::Serialize,
23    rkyv::Deserialize,
24)]
25pub struct VShardId(pub(super) u32);
26
27impl VShardId {
28    /// Total number of virtual shards in the system.
29    pub const COUNT: u32 = 1024;
30
31    pub const fn new(id: u32) -> Self {
32        assert!(id < Self::COUNT, "vShard ID must be < 1024");
33        Self(id)
34    }
35
36    pub const fn as_u32(self) -> u32 {
37        self.0
38    }
39
40    /// Compute vShard from a database + collection name pair.
41    ///
42    /// The database identity is mixed into the hash so that the same collection
43    /// name in two different databases routes to independent vShards. Uses a
44    /// DJB-like multiply-31 hash, seeded with the database id bytes, followed
45    /// by a zero separator byte, followed by the collection name bytes.
46    pub fn from_collection_in_database(db: crate::id::DatabaseId, collection: &str) -> Self {
47        let db_bytes = db.as_u64().to_le_bytes();
48        let hash = db_bytes
49            .iter()
50            .chain(std::iter::once(&0u8))
51            .chain(collection.as_bytes().iter())
52            .fold(0u32, |h, &b| h.wrapping_mul(31).wrapping_add(b as u32));
53        Self::new(hash % Self::COUNT)
54    }
55
56    /// Compute vShard from a shard key via consistent hashing.
57    pub fn from_key(key: &[u8]) -> Self {
58        // FxHash-style fast hash, modulo 1024.
59        let mut h: u64 = 0;
60        for &b in key {
61            h = h.wrapping_mul(0x100000001B3).wrapping_add(b as u64);
62        }
63        Self::new((h % Self::COUNT as u64) as u32)
64    }
65}
66
67impl fmt::Display for VShardId {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        write!(f, "vshard:{}", self.0)
70    }
71}
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76
77    #[test]
78    fn vshard_id_above_u16_max_roundtrip() {
79        let v = VShardId(0);
80        assert_eq!(v.as_u32(), 0u32);
81
82        let v = VShardId(1023);
83        assert_eq!(v.as_u32(), 1023u32);
84    }
85
86    #[test]
87    fn vshard_new_above_old_u16_max_would_panic_but_inner_holds_u32() {
88        let v = VShardId(0x0001_0000);
89        assert_eq!(v.as_u32(), 0x0001_0000u32);
90    }
91
92    #[test]
93    fn vshard_from_key_deterministic() {
94        let a = VShardId::from_key(b"user:alice");
95        let b = VShardId::from_key(b"user:alice");
96        assert_eq!(a, b);
97        assert!(a.as_u32() < VShardId::COUNT);
98    }
99
100    #[test]
101    fn vshard_from_key_distributes() {
102        let mut seen = std::collections::HashSet::new();
103        for i in 0u32..1000 {
104            let key = format!("tenant:{i}");
105            seen.insert(VShardId::from_key(key.as_bytes()).as_u32());
106        }
107        assert!(
108            seen.len() > 100,
109            "poor distribution: only {} vShards hit",
110            seen.len()
111        );
112    }
113
114    #[test]
115    fn from_collection_in_database_deterministic() {
116        use crate::id::DatabaseId;
117        let db = DatabaseId::new(1024);
118        let a = VShardId::from_collection_in_database(db, "users");
119        let b = VShardId::from_collection_in_database(db, "users");
120        assert_eq!(a, b);
121        assert!(a.as_u32() < VShardId::COUNT);
122    }
123
124    #[test]
125    fn from_collection_in_database_different_dbs_differ() {
126        use crate::id::DatabaseId;
127        let db0 = DatabaseId::DEFAULT;
128        let db1 = DatabaseId::new(1024);
129        // Same collection name in different databases should typically route
130        // to different vShards (probabilistic; collection "users" is a
131        // canonical example and the two hashes are known to differ).
132        let a = VShardId::from_collection_in_database(db0, "users");
133        let b = VShardId::from_collection_in_database(db1, "users");
134        assert_ne!(
135            a, b,
136            "same collection name, different databases should route differently"
137        );
138    }
139
140    #[test]
141    fn from_collection_in_database_default_in_range() {
142        use crate::id::DatabaseId;
143        let v = VShardId::from_collection_in_database(DatabaseId::DEFAULT, "orders");
144        assert!(v.as_u32() < VShardId::COUNT);
145    }
146}