nodedb_cluster/array_routing.rs
1// SPDX-License-Identifier: BUSL-1.1
2
3//! Tile-aware array routing — maps `(array_name, coord)` → vShard id.
4//!
5//! A distributed Origin shards each array's op-log by tile: ops for tile T of
6//! array A land on the vShard computed from the bytes `array_name || tile_id`.
7//! This spreads a large array across 1 024 vShards without any coordination
8//! beyond the deterministic hash already used for all other shard routing.
9//!
10//! # Key invariant
11//!
12//! Ops for the same (array, tile) always route to the same vShard. Ops for
13//! different tiles of the same array *typically* route to different vShards
14//! (with high probability for arrays whose tile grid is large enough to avoid
15//! hash collisions on a 10-bit output).
16//!
17//! # Returned type
18//!
19//! Functions here return a raw `u16` vShard ID (0..1023). Callers in the
20//! `nodedb` crate wrap this in `VShardId::new(…)`. `VShardId` lives in
21//! `nodedb::types` which `nodedb-cluster` cannot depend on without creating
22//! a circular dependency.
23//!
24//! # Fallbacks
25//!
26//! - Empty coord or tile_extents: fall back to [`array_vshard_for_name`].
27//! - Zero tile extent in any dimension: same fallback (no division by zero).
28//! - `coord.len() != tile_extents.len()`: fallback.
29
30/// Number of virtual shards — must match `VShardId::COUNT` in `nodedb::types`.
31pub const VSHARD_COUNT: u32 = 1024;
32
33// ─── Collection-level fallback hash ──────────────────────────────────────────
34
35/// Compute a vShard ID from an array name alone.
36///
37/// Array-specific name-only fallback used by `array_sync` paths that route
38/// before a coordinate or tile extent is known. Uses the same DJB
39/// multiply-31 hash as `VShardId::from_collection_in_database`, but without
40/// the database scope — array-sync messages carry their own scoping in the
41/// op-log header and route per-array by name.
42pub fn array_vshard_for_name(array_name: &str) -> u32 {
43 let hash = array_name
44 .as_bytes()
45 .iter()
46 .fold(0u32, |h, &b| h.wrapping_mul(31).wrapping_add(b as u32));
47 hash % VSHARD_COUNT
48}
49
50/// Compute a vShard ID from an arbitrary byte key.
51///
52/// Mirrors `VShardId::from_key` in `nodedb::types::id`.
53fn vshard_from_key(key: &[u8]) -> u32 {
54 let mut h: u64 = 0;
55 for &b in key {
56 h = h.wrapping_mul(0x100000001B3).wrapping_add(b as u64);
57 }
58 (h % VSHARD_COUNT as u64) as u32
59}
60
61// ─── Tile-id computation ──────────────────────────────────────────────────────
62
63/// Compute the tile identifier for a coordinate given tile extents.
64///
65/// Uses row-major ordering over the tile grid: each dimension's tile index
66/// is `coord[i] / tile_extents[i]`, and the final `tile_id` multiplies those
67/// indices by the product of the tile-grid dimensions for all *later* dimensions
68/// (standard C-order / row-major strides).
69///
70/// Returns `None` when:
71/// - `coord` and `tile_extents` have different lengths.
72/// - Either slice is empty.
73/// - Any `tile_extents[i]` is zero.
74///
75/// Callers that need a fallback should call [`vshard_for_array_coord`] directly,
76/// which transparently falls back to collection-level routing in all error cases.
77pub fn tile_id_of_coord(coord: &[u64], tile_extents: &[u64]) -> Option<u64> {
78 if coord.is_empty() || tile_extents.is_empty() {
79 return None;
80 }
81 if coord.len() != tile_extents.len() {
82 return None;
83 }
84 if tile_extents.contains(&0) {
85 return None;
86 }
87
88 // Compute per-dimension tile indices.
89 let tile_indices: Vec<u64> = coord
90 .iter()
91 .zip(tile_extents.iter())
92 .map(|(&c, &e)| c / e)
93 .collect();
94
95 // Compute row-major tile_id via stride accumulation from the last dimension.
96 // stride[i] = product of tile widths for dims i+1..N.
97 // We use the tile index itself as a conservative upper bound per dim, which
98 // preserves uniqueness for monotonically growing coordinate spaces and is
99 // sufficient as a hash pre-image.
100 let n = tile_indices.len();
101 let mut tile_id: u64 = 0;
102 let mut stride: u64 = 1;
103
104 for i in (0..n).rev() {
105 tile_id = tile_id.wrapping_add(tile_indices[i].wrapping_mul(stride));
106 stride = stride.wrapping_mul(tile_indices[i].wrapping_add(1).max(1));
107 }
108
109 Some(tile_id)
110}
111
112// ─── vShard routing ───────────────────────────────────────────────────────────
113
114/// Compute the vShard ID for an array op at the given coordinate.
115///
116/// The shard key is `array_name_bytes || tile_id.to_le_bytes()`, hashed via
117/// `vshard_from_key`.
118///
119/// Falls back to `array_vshard_for_name(array_name)` when:
120/// - `coord` / `tile_extents` are empty or mismatched.
121/// - Any `tile_extents[i]` is zero.
122///
123/// Returns a raw `u16` in `0..1023`. Callers wrap it in `VShardId::new(…)`.
124pub fn vshard_for_array_coord(array_name: &str, coord: &[u64], tile_extents: &[u64]) -> u32 {
125 match tile_id_of_coord(coord, tile_extents) {
126 Some(tile_id) => vshard_for_array_tile(array_name, tile_id),
127 None => array_vshard_for_name(array_name),
128 }
129}
130
131/// Compute the vShard ID for an array tile by tile identifier.
132///
133/// Use this when the tile_id is already known (e.g. from the op-log key or
134/// snapshot metadata) and coord re-derivation is unnecessary.
135///
136/// Returns a raw `u16` in `0..1023`. Callers wrap it in `VShardId::new(…)`.
137pub fn vshard_for_array_tile(array_name: &str, tile_id: u64) -> u32 {
138 let mut buf = Vec::with_capacity(array_name.len() + 8);
139 buf.extend_from_slice(array_name.as_bytes());
140 buf.extend_from_slice(&tile_id.to_le_bytes());
141 vshard_from_key(&buf)
142}
143
144// ─── Tests ────────────────────────────────────────────────────────────────────
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 // ── tile_id_of_coord ─────────────────────────────────────────────────────
151
152 #[test]
153 fn tile_id_empty_coord_is_none() {
154 assert!(tile_id_of_coord(&[], &[]).is_none());
155 }
156
157 #[test]
158 fn tile_id_zero_extent_is_none() {
159 assert!(tile_id_of_coord(&[5], &[0]).is_none());
160 }
161
162 #[test]
163 fn tile_id_mismatched_lengths_is_none() {
164 assert!(tile_id_of_coord(&[1, 2], &[4]).is_none());
165 }
166
167 #[test]
168 fn tile_id_single_dim_same_tile() {
169 // coords 0..9 with extent 10 all land in tile 0
170 let t = tile_id_of_coord(&[9], &[10]).unwrap();
171 assert_eq!(t, tile_id_of_coord(&[0], &[10]).unwrap());
172 assert_eq!(t, 0);
173 }
174
175 #[test]
176 fn tile_id_single_dim_different_tiles() {
177 let t0 = tile_id_of_coord(&[0], &[10]).unwrap();
178 let t1 = tile_id_of_coord(&[10], &[10]).unwrap();
179 assert_ne!(
180 t0, t1,
181 "coords in different tiles must yield different tile_ids"
182 );
183 }
184
185 #[test]
186 fn tile_id_two_dim_row_major() {
187 // extent = [4, 4]: tiles are 4×4 blocks
188 // coord (0,0) and (0,3) share tile (0,0) → same tile_id
189 let t00 = tile_id_of_coord(&[0, 0], &[4, 4]).unwrap();
190 let t03 = tile_id_of_coord(&[0, 3], &[4, 4]).unwrap();
191 assert_eq!(t00, t03, "same tile");
192
193 // coord (0,4) is in tile (0,1) — different from (0,0)
194 let t04 = tile_id_of_coord(&[0, 4], &[4, 4]).unwrap();
195 assert_ne!(t00, t04, "different tile column");
196
197 // coord (4,0) is in tile (1,0) — different from (0,0)
198 let t40 = tile_id_of_coord(&[4, 0], &[4, 4]).unwrap();
199 assert_ne!(t00, t40, "different tile row");
200
201 // coord (4,4) is in tile (1,1) — different from (1,0) and (0,1)
202 let t44 = tile_id_of_coord(&[4, 4], &[4, 4]).unwrap();
203 assert_ne!(t40, t44);
204 assert_ne!(t04, t44);
205 }
206
207 // ── array_vshard_for_name ───────────────────────────────────────────────
208
209 #[test]
210 fn collection_fallback_is_deterministic() {
211 let a = array_vshard_for_name("prices");
212 let b = array_vshard_for_name("prices");
213 assert_eq!(a, b);
214 assert!(a < VSHARD_COUNT);
215 }
216
217 // ── vshard_for_array_coord ───────────────────────────────────────────────
218
219 #[test]
220 fn same_tile_gives_same_vshard() {
221 // All coords within tile [0,10) map to the same shard.
222 let s0 = vshard_for_array_coord("prices", &[0], &[10]);
223 let s9 = vshard_for_array_coord("prices", &[9], &[10]);
224 assert_eq!(s0, s9, "same tile → same shard");
225 }
226
227 #[test]
228 fn different_tiles_likely_different_vshards() {
229 // extent = 1 → every coord is its own tile.
230 // With 256 distinct tiles we expect high shard diversity.
231 let shards: Vec<u32> = (0u64..256)
232 .map(|i| vshard_for_array_coord("matrix", &[i], &[1]))
233 .collect();
234
235 let unique: std::collections::HashSet<_> = shards.iter().copied().collect();
236 assert!(
237 unique.len() >= 200,
238 "expected high shard diversity for 256 distinct tiles, got {}",
239 unique.len()
240 );
241 }
242
243 #[test]
244 fn different_arrays_same_tile_different_shards() {
245 let mut differs = 0usize;
246 for t in 0u64..64 {
247 let sa = vshard_for_array_tile("alpha", t);
248 let sb = vshard_for_array_tile("beta", t);
249 if sa != sb {
250 differs += 1;
251 }
252 }
253 assert!(
254 differs >= 32,
255 "expected most tiles to yield different shards for different arrays, got {differs}/64"
256 );
257 }
258
259 #[test]
260 fn fallback_on_empty_coord() {
261 let s = vshard_for_array_coord("prices", &[], &[]);
262 let expected = array_vshard_for_name("prices");
263 assert_eq!(s, expected, "empty coord must fall back to from_collection");
264 }
265
266 #[test]
267 fn fallback_on_zero_extent() {
268 let s = vshard_for_array_coord("prices", &[5], &[0]);
269 let expected = array_vshard_for_name("prices");
270 assert_eq!(s, expected, "zero extent must fall back to from_collection");
271 }
272
273 #[test]
274 fn fallback_on_mismatched_dims() {
275 let s = vshard_for_array_coord("prices", &[1, 2], &[4]);
276 let expected = array_vshard_for_name("prices");
277 assert_eq!(
278 s, expected,
279 "mismatched dims must fall back to from_collection"
280 );
281 }
282
283 // ── vshard_for_array_tile ────────────────────────────────────────────────
284
285 #[test]
286 fn tile_fn_matches_coord_fn_for_same_tile() {
287 let coord = &[7u64, 3];
288 let extents = &[4u64, 4];
289 let tile_id = tile_id_of_coord(coord, extents).unwrap();
290
291 let from_coord = vshard_for_array_coord("mat", coord, extents);
292 let from_tile = vshard_for_array_tile("mat", tile_id);
293 assert_eq!(
294 from_coord, from_tile,
295 "vshard_for_array_coord and vshard_for_array_tile must agree"
296 );
297 }
298
299 #[test]
300 fn all_vshards_in_range() {
301 for t in 0u64..1024 {
302 let s = vshard_for_array_tile("arr", t);
303 assert!(s < VSHARD_COUNT, "vshard {s} out of range for tile {t}");
304 }
305 }
306}