Skip to main content

zlayer_overlay/
allocator.rs

1//! IP address allocation for overlay networks
2//!
3//! Manages allocation and tracking of overlay IP addresses within a CIDR range.
4//! Supports both IPv4 and IPv6 (dual-stack) networks.
5
6use crate::error::{OverlayError, Result};
7use ipnet::{IpNet, Ipv4Net, Ipv6Net};
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
11use std::path::Path;
12
13/// IP allocator for overlay network addresses
14///
15/// Tracks allocated IP addresses and provides next-available allocation
16/// from a configured CIDR range. Supports both IPv4 and IPv6 networks.
17#[derive(Debug, Clone)]
18pub struct IpAllocator {
19    /// Network CIDR range (IPv4 or IPv6)
20    network: IpNet,
21    /// Set of allocated IP addresses
22    allocated: HashSet<IpAddr>,
23}
24
25/// Persistent state for IP allocator
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct IpAllocatorState {
28    /// CIDR string
29    pub cidr: String,
30    /// List of allocated IPs (serializes as strings, backward-compatible)
31    pub allocated: Vec<IpAddr>,
32}
33
34/// Increment an IPv6 address by a u128 offset from a base address.
35///
36/// Returns `None` if the result would overflow.
37fn ipv6_add(base: Ipv6Addr, offset: u128) -> Option<Ipv6Addr> {
38    let base_u128 = u128::from(base);
39    base_u128.checked_add(offset).map(Ipv6Addr::from)
40}
41
42/// Compute the number of host addresses for a given address family and prefix length.
43///
44/// For IPv4: `2^(32 - prefix) - 2` (excludes network and broadcast).
45/// For IPv6: `2^(128 - prefix) - 1` (excludes the network address).
46///
47/// Returns `None` if the result overflows u128 (only for /0 edge cases).
48fn host_count(is_ipv6: bool, prefix_len: u8) -> u128 {
49    if is_ipv6 {
50        let bits = 128 - u32::from(prefix_len);
51        if bits == 128 {
52            // /0 network — saturate
53            u128::MAX
54        } else if bits == 0 {
55            // /128 — single host, no usable addresses (it IS the network address)
56            0
57        } else {
58            // 2^bits - 1 (skip network address)
59            (1u128 << bits) - 1
60        }
61    } else {
62        let bits = 32 - u32::from(prefix_len);
63        if bits <= 1 {
64            // /31 or /32 — no usable hosts in classical networking
65            0
66        } else {
67            // 2^bits - 2 (skip network and broadcast)
68            (1u128 << bits) - 2
69        }
70    }
71}
72
73impl IpAllocator {
74    /// Create a new IP allocator for the given CIDR range
75    ///
76    /// Supports both IPv4 (e.g., "10.200.0.0/16") and IPv6 (e.g., `fd00::/48`).
77    ///
78    /// # Arguments
79    /// * `cidr` - Network CIDR notation
80    ///
81    /// # Errors
82    ///
83    /// Returns `OverlayError::InvalidCidr` if the CIDR string cannot be parsed.
84    ///
85    /// # Example
86    /// ```
87    /// use zlayer_overlay::allocator::IpAllocator;
88    ///
89    /// let v4 = IpAllocator::new("10.200.0.0/16").unwrap();
90    /// let v6 = IpAllocator::new("fd00::/48").unwrap();
91    /// ```
92    pub fn new(cidr: &str) -> Result<Self> {
93        let network: IpNet = cidr
94            .parse()
95            .map_err(|e| OverlayError::InvalidCidr(format!("{cidr}: {e}")))?;
96
97        Ok(Self {
98            network,
99            allocated: HashSet::new(),
100        })
101    }
102
103    /// Create an allocator from persisted state
104    ///
105    /// # Errors
106    ///
107    /// Returns an error if the CIDR is invalid or any IP is out of range.
108    pub fn from_state(state: IpAllocatorState) -> Result<Self> {
109        let mut allocator = Self::new(&state.cidr)?;
110        for ip in state.allocated {
111            allocator.mark_allocated(ip)?;
112        }
113        Ok(allocator)
114    }
115
116    /// Get the current state for persistence
117    #[must_use]
118    pub fn to_state(&self) -> IpAllocatorState {
119        IpAllocatorState {
120            cidr: self.network.to_string(),
121            allocated: self.allocated.iter().copied().collect(),
122        }
123    }
124
125    /// Load allocator state from a file
126    ///
127    /// # Errors
128    ///
129    /// Returns an error if the file cannot be read or the state is invalid.
130    pub async fn load(path: &Path) -> Result<Self> {
131        let contents = tokio::fs::read_to_string(path).await?;
132        let state: IpAllocatorState = serde_json::from_str(&contents)?;
133        Self::from_state(state)
134    }
135
136    /// Save allocator state to a file
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if the file cannot be written or serialization fails.
141    pub async fn save(&self, path: &Path) -> Result<()> {
142        let state = self.to_state();
143        let contents = serde_json::to_string_pretty(&state)?;
144        tokio::fs::write(path, contents).await?;
145        Ok(())
146    }
147
148    /// Allocate the next available IP address
149    ///
150    /// For IPv4, skips the network and broadcast addresses.
151    /// For IPv6, skips the network address.
152    ///
153    /// Returns `None` if all addresses in the CIDR range are allocated.
154    ///
155    /// # Example
156    /// ```
157    /// use zlayer_overlay::allocator::IpAllocator;
158    ///
159    /// let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
160    /// let ip = allocator.allocate().unwrap();
161    /// assert_eq!(ip.to_string(), "10.200.0.1");
162    /// ```
163    pub fn allocate(&mut self) -> Option<IpAddr> {
164        match self.network {
165            IpNet::V4(v4net) => {
166                // IPv4: iterate hosts() which skips network and broadcast
167                for ip in v4net.hosts() {
168                    let addr = IpAddr::V4(ip);
169                    if !self.allocated.contains(&addr) {
170                        self.allocated.insert(addr);
171                        return Some(addr);
172                    }
173                }
174                None
175            }
176            IpNet::V6(v6net) => {
177                // IPv6: counter-based allocation starting from base+1
178                // We skip the network address itself (offset 0) and allocate from offset 1.
179                let base = v6net.network();
180                let total = host_count(true, v6net.prefix_len());
181
182                for offset in 1..=total {
183                    if let Some(candidate) = ipv6_add(base, offset) {
184                        let addr = IpAddr::V6(candidate);
185                        if !self.allocated.contains(&addr) {
186                            self.allocated.insert(addr);
187                            return Some(addr);
188                        }
189                    } else {
190                        break;
191                    }
192                }
193                None
194            }
195        }
196    }
197
198    /// Allocate a specific IP address
199    ///
200    /// # Errors
201    ///
202    /// Returns an error if the IP is already allocated or not in the CIDR range.
203    pub fn allocate_specific(&mut self, ip: IpAddr) -> Result<()> {
204        if !self.network.contains(&ip) {
205            return Err(OverlayError::IpNotInRange(ip, self.network.to_string()));
206        }
207
208        if self.allocated.contains(&ip) {
209            return Err(OverlayError::IpAlreadyAllocated(ip));
210        }
211
212        self.allocated.insert(ip);
213        Ok(())
214    }
215
216    /// Allocate the first usable IP in the range (typically for the leader)
217    ///
218    /// # Example
219    /// ```
220    /// use zlayer_overlay::allocator::IpAllocator;
221    ///
222    /// let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
223    /// let ip = allocator.allocate_first().unwrap();
224    /// assert_eq!(ip.to_string(), "10.200.0.1");
225    /// ```
226    ///
227    /// # Errors
228    ///
229    /// Returns an error if no IPs are available or the first IP is already allocated.
230    pub fn allocate_first(&mut self) -> Result<IpAddr> {
231        let first_ip = self.first_host().ok_or(OverlayError::NoAvailableIps)?;
232
233        if self.allocated.contains(&first_ip) {
234            return Err(OverlayError::IpAlreadyAllocated(first_ip));
235        }
236
237        self.allocated.insert(first_ip);
238        Ok(first_ip)
239    }
240
241    /// Get the first usable host address in the network.
242    ///
243    /// For IPv4: first host from `hosts()` (skips network address).
244    /// For IPv6: network address + 1 (skips the network address).
245    fn first_host(&self) -> Option<IpAddr> {
246        match self.network {
247            IpNet::V4(v4net) => v4net.hosts().next().map(IpAddr::V4),
248            IpNet::V6(v6net) => {
249                let base = v6net.network();
250                ipv6_add(base, 1).map(IpAddr::V6)
251            }
252        }
253    }
254
255    /// Mark an IP address as allocated (for restoring state)
256    ///
257    /// # Errors
258    ///
259    /// Returns an error if the IP is not in the CIDR range.
260    pub fn mark_allocated(&mut self, ip: IpAddr) -> Result<()> {
261        if !self.network.contains(&ip) {
262            return Err(OverlayError::IpNotInRange(ip, self.network.to_string()));
263        }
264        self.allocated.insert(ip);
265        Ok(())
266    }
267
268    /// Release an IP address back to the pool
269    ///
270    /// Returns `true` if the IP was released, `false` if it wasn't allocated.
271    pub fn release(&mut self, ip: IpAddr) -> bool {
272        self.allocated.remove(&ip)
273    }
274
275    /// Check if an IP address is allocated
276    #[must_use]
277    pub fn is_allocated(&self, ip: IpAddr) -> bool {
278        self.allocated.contains(&ip)
279    }
280
281    /// Check if an IP address is within the CIDR range
282    #[must_use]
283    pub fn contains(&self, ip: IpAddr) -> bool {
284        self.network.contains(&ip)
285    }
286
287    /// Get the number of allocated addresses
288    #[must_use]
289    pub fn allocated_count(&self) -> usize {
290        self.allocated.len()
291    }
292
293    /// Get the total number of usable addresses in the range
294    ///
295    /// For IPv6 networks with large host spaces, this saturates at `u32::MAX`.
296    #[must_use]
297    #[allow(clippy::cast_possible_truncation)]
298    pub fn total_hosts(&self) -> u32 {
299        let is_v6 = matches!(self.network, IpNet::V6(_));
300        let count = host_count(is_v6, self.network.prefix_len());
301        // Saturate to u32::MAX for enormous IPv6 subnets
302        if count > u128::from(u32::MAX) {
303            u32::MAX
304        } else {
305            count as u32
306        }
307    }
308
309    /// Get the number of available addresses
310    #[must_use]
311    #[allow(clippy::cast_possible_truncation)]
312    pub fn available_count(&self) -> u32 {
313        self.total_hosts()
314            .saturating_sub(self.allocated.len() as u32)
315    }
316
317    /// Get the CIDR string
318    #[must_use]
319    pub fn cidr(&self) -> String {
320        self.network.to_string()
321    }
322
323    /// Get the network address
324    #[must_use]
325    pub fn network_addr(&self) -> IpAddr {
326        self.network.network()
327    }
328
329    /// Get the broadcast address
330    ///
331    /// For IPv6, returns the last address in the range (all host bits set to 1).
332    #[must_use]
333    pub fn broadcast_addr(&self) -> IpAddr {
334        self.network.broadcast()
335    }
336
337    /// Get the prefix length
338    #[must_use]
339    pub fn prefix_len(&self) -> u8 {
340        self.network.prefix_len()
341    }
342
343    /// Get the host prefix length (32 for IPv4, 128 for IPv6)
344    #[must_use]
345    pub fn host_prefix_len(&self) -> u8 {
346        self.network.max_prefix_len()
347    }
348
349    /// Get all allocated IPs
350    #[must_use]
351    pub fn allocated_ips(&self) -> Vec<IpAddr> {
352        self.allocated.iter().copied().collect()
353    }
354}
355
356/// Leader-side allocator that carves per-node slices out of a cluster CIDR.
357///
358/// Used to fix the latent IP-collision bug where every agent independently
359/// allocated container IPs from the full cluster `/16`. With a `NodeSliceAllocator`
360/// the leader hands each joining node its own non-overlapping slice, and the
361/// agent-local `IpAllocator` is bounded to that slice.
362///
363/// Slice assignment is deterministic within a leader process: the node ID hashes
364/// to a candidate slice index; collisions are resolved by linear probing forward
365/// until a free slot is found. Existing assignments are preserved across leader
366/// restart via `snapshot()` / `restore()`.
367#[derive(Debug, Clone)]
368pub struct NodeSliceAllocator {
369    cluster_cidr: IpNet,
370    slice_prefix: u8,
371    assigned: HashMap<String, IpNet>,
372}
373
374/// Persistent snapshot of a `NodeSliceAllocator` for raft/disk persistence.
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct NodeSliceAllocatorSnapshot {
377    pub cluster_cidr: String,
378    pub slice_prefix: u8,
379    pub assigned: Vec<(String, String)>,
380}
381
382/// Deterministic FNV-1a 64-bit hash for a node ID string.
383///
384/// Chosen over `DefaultHasher` because `DefaultHasher` is seeded per-process
385/// and slice assignments should be reproducible from a snapshot.
386fn hash_node_id(node_id: &str) -> u64 {
387    const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
388    const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
389    let mut hash = FNV_OFFSET;
390    for &b in node_id.as_bytes() {
391        hash ^= u64::from(b);
392        hash = hash.wrapping_mul(FNV_PRIME);
393    }
394    hash
395}
396
397impl NodeSliceAllocator {
398    /// Create a new slice allocator that carves `/slice_prefix`-sized slices
399    /// out of `cluster_cidr`.
400    ///
401    /// # Errors
402    ///
403    /// Returns `OverlayError::InvalidCidr` if `slice_prefix` is not strictly
404    /// more specific than `cluster_cidr.prefix_len()`, or if it exceeds the
405    /// address family's maximum prefix length.
406    pub fn new(cluster_cidr: IpNet, slice_prefix: u8) -> Result<Self> {
407        if slice_prefix <= cluster_cidr.prefix_len() {
408            return Err(OverlayError::InvalidCidr(format!(
409                "slice prefix /{} must be more specific than cluster prefix /{}",
410                slice_prefix,
411                cluster_cidr.prefix_len()
412            )));
413        }
414        if slice_prefix > cluster_cidr.max_prefix_len() {
415            return Err(OverlayError::InvalidCidr(format!(
416                "slice prefix /{} exceeds address family max /{}",
417                slice_prefix,
418                cluster_cidr.max_prefix_len()
419            )));
420        }
421        Ok(Self {
422            cluster_cidr,
423            slice_prefix,
424            assigned: HashMap::new(),
425        })
426    }
427
428    /// Assign (or return an existing) slice for `node_id`.
429    ///
430    /// Idempotent: calling `assign` with a node ID that already has a slice
431    /// returns the existing slice without re-assigning.
432    ///
433    /// # Errors
434    ///
435    /// Returns `OverlayError::NoAvailableIps` if every slice in the cluster
436    /// CIDR is already assigned.
437    pub fn assign(&mut self, node_id: &str) -> Result<IpNet> {
438        if let Some(existing) = self.assigned.get(node_id) {
439            return Ok(*existing);
440        }
441
442        let num_slices = self.num_slices();
443        if num_slices == 0 {
444            return Err(OverlayError::NoAvailableIps);
445        }
446
447        let taken: HashSet<IpNet> = self.assigned.values().copied().collect();
448        let start = hash_node_id(node_id) % num_slices;
449
450        for i in 0..num_slices {
451            let idx = (start + i) % num_slices;
452            let slice = self.slice_at_index(idx);
453            if !taken.contains(&slice) {
454                self.assigned.insert(node_id.to_string(), slice);
455                return Ok(slice);
456            }
457        }
458
459        Err(OverlayError::NoAvailableIps)
460    }
461
462    /// Release `node_id`'s slice back to the free pool.
463    ///
464    /// Returns `true` if a slice was released, `false` if the node was not assigned.
465    pub fn release(&mut self, node_id: &str) -> bool {
466        self.assigned.remove(node_id).is_some()
467    }
468
469    /// Look up a node's assigned slice without mutating state.
470    #[must_use]
471    pub fn slice_for(&self, node_id: &str) -> Option<IpNet> {
472        self.assigned.get(node_id).copied()
473    }
474
475    /// Number of currently-assigned slices.
476    #[must_use]
477    pub fn assigned_count(&self) -> usize {
478        self.assigned.len()
479    }
480
481    /// Total number of slices the cluster CIDR can hold at the configured slice prefix.
482    #[must_use]
483    pub fn capacity(&self) -> u64 {
484        self.num_slices()
485    }
486
487    /// Cluster CIDR the allocator operates over.
488    #[must_use]
489    pub fn cluster_cidr(&self) -> IpNet {
490        self.cluster_cidr
491    }
492
493    /// Slice prefix length (e.g. `28` for `/28` slices).
494    #[must_use]
495    pub fn slice_prefix(&self) -> u8 {
496        self.slice_prefix
497    }
498
499    /// Build a persistable snapshot for durable leader state.
500    #[must_use]
501    pub fn snapshot(&self) -> NodeSliceAllocatorSnapshot {
502        NodeSliceAllocatorSnapshot {
503            cluster_cidr: self.cluster_cidr.to_string(),
504            slice_prefix: self.slice_prefix,
505            assigned: self
506                .assigned
507                .iter()
508                .map(|(k, v)| (k.clone(), v.to_string()))
509                .collect(),
510        }
511    }
512
513    /// Rebuild an allocator from a snapshot.
514    ///
515    /// # Errors
516    ///
517    /// Returns `OverlayError::InvalidCidr` if the snapshot's CIDR or any
518    /// assigned slice fails to parse, or if the slice prefix is inconsistent.
519    pub fn restore(snapshot: NodeSliceAllocatorSnapshot) -> Result<Self> {
520        let cluster_cidr: IpNet = snapshot
521            .cluster_cidr
522            .parse()
523            .map_err(|e| OverlayError::InvalidCidr(format!("{}: {e}", snapshot.cluster_cidr)))?;
524        let mut allocator = Self::new(cluster_cidr, snapshot.slice_prefix)?;
525        for (node_id, slice_str) in snapshot.assigned {
526            let slice: IpNet = slice_str
527                .parse()
528                .map_err(|e| OverlayError::InvalidCidr(format!("{slice_str}: {e}")))?;
529            if slice.prefix_len() != snapshot.slice_prefix {
530                return Err(OverlayError::InvalidCidr(format!(
531                    "assigned slice {slice} does not match configured prefix /{}",
532                    snapshot.slice_prefix
533                )));
534            }
535            if !cluster_cidr.contains(&slice.network()) {
536                return Err(OverlayError::InvalidCidr(format!(
537                    "assigned slice {slice} is not contained in cluster CIDR {cluster_cidr}"
538                )));
539            }
540            allocator.assigned.insert(node_id, slice);
541        }
542        Ok(allocator)
543    }
544
545    fn num_slices(&self) -> u64 {
546        let bits = self.slice_prefix - self.cluster_cidr.prefix_len();
547        // bits is in 1..=32 for v4 or 1..=128 for v6. For a /16 cluster with /28
548        // slices, bits = 12 → 4096 slices, safely inside u64 range.
549        if bits >= 64 {
550            u64::MAX
551        } else {
552            1u64 << bits
553        }
554    }
555
556    fn slice_at_index(&self, idx: u64) -> IpNet {
557        let shift = u32::from(self.cluster_cidr.max_prefix_len() - self.slice_prefix);
558        match self.cluster_cidr {
559            IpNet::V4(v4) => {
560                let base = u32::from(v4.network());
561                // idx fits in 32 bits whenever slice_prefix − cluster_prefix ≤ 32.
562                #[allow(clippy::cast_possible_truncation)]
563                let offset = (idx as u32).wrapping_shl(shift);
564                let slice_addr = Ipv4Addr::from(base.wrapping_add(offset));
565                IpNet::V4(
566                    Ipv4Net::new(slice_addr, self.slice_prefix)
567                        .expect("slice_prefix validated in constructor"),
568                )
569            }
570            IpNet::V6(v6) => {
571                let base = u128::from(v6.network());
572                let offset = u128::from(idx).wrapping_shl(shift);
573                let slice_addr = Ipv6Addr::from(base.wrapping_add(offset));
574                IpNet::V6(
575                    Ipv6Net::new(slice_addr, self.slice_prefix)
576                        .expect("slice_prefix validated in constructor"),
577                )
578            }
579        }
580    }
581}
582
583/// Tracks per-service-per-node subnet assignments carved from the cluster
584/// CIDR. Each `(service_name, node_id)` pair gets its own slice of size
585/// `slice_prefix` (default `/28`). Assignments are deterministic — the same
586/// `(service, node)` pair always maps to the same starting slot via FNV
587/// hash, with linear probing on collision. Mirrors `NodeSliceAllocator`'s
588/// pattern; see that type for the rationale (in particular the choice of
589/// FNV over `DefaultHasher` for cross-process reproducibility).
590///
591/// Snapshot/restore is wired the same way `NodeSliceAllocator` does it, so
592/// the scheduler's Raft state can persist + replay assignments. The
593/// snapshot's `Vec<((String, String), IpNet)>` is the wire-stable shape:
594/// avoid `HashMap` here because non-deterministic map ordering would yield
595/// unstable serialized bytes under postcard/serde.
596///
597/// Node IDs are stored as `String` (matching `NodeSliceAllocator`); the
598/// scheduler converts its own `NodeId` to/from `String` at the boundary.
599#[derive(Debug, Clone)]
600pub struct ServiceSubnetRegistry {
601    cluster_cidr: IpNet,
602    slice_prefix: u8,
603    /// Map from `(service_name, node_id)` -> assigned slice.
604    assignments: HashMap<(String, String), IpNet>,
605}
606
607/// Persistent snapshot of a `ServiceSubnetRegistry` for raft/disk persistence.
608///
609/// Uses a `Vec` of pairs (rather than a `HashMap`) so the serialized byte
610/// layout is deterministic when Raft replicates / snapshots this state.
611#[derive(Debug, Clone, Serialize, Deserialize)]
612pub struct ServiceSubnetRegistrySnapshot {
613    pub cluster_cidr: IpNet,
614    pub slice_prefix: u8,
615    pub assignments: Vec<((String, String), IpNet)>,
616}
617
618/// Deterministic FNV-1a 64-bit hash over a `(service, node)` pair.
619///
620/// Uses the same FNV constants as `hash_node_id` so the two allocators have
621/// matching reproducibility guarantees. The pair is hashed by feeding the
622/// service bytes, a single `0x1f` (ASCII unit-separator) delimiter, then
623/// the node bytes — the delimiter prevents the pair `("ab", "c")` from
624/// hashing identically to `("a", "bc")`.
625fn hash_service_node(service: &str, node: &str) -> u64 {
626    const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
627    const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
628    let mut hash = FNV_OFFSET;
629    for &b in service.as_bytes() {
630        hash ^= u64::from(b);
631        hash = hash.wrapping_mul(FNV_PRIME);
632    }
633    hash ^= 0x1f_u64;
634    hash = hash.wrapping_mul(FNV_PRIME);
635    for &b in node.as_bytes() {
636        hash ^= u64::from(b);
637        hash = hash.wrapping_mul(FNV_PRIME);
638    }
639    hash
640}
641
642impl ServiceSubnetRegistry {
643    /// Create a new service subnet registry that carves `/slice_prefix`-sized
644    /// slices out of `cluster_cidr`.
645    ///
646    /// # Errors
647    ///
648    /// Returns `OverlayError::InvalidCidr` if `slice_prefix` is not strictly
649    /// more specific than `cluster_cidr.prefix_len()`, or if it exceeds the
650    /// address family's maximum prefix length.
651    pub fn new(cluster_cidr: IpNet, slice_prefix: u8) -> Result<Self> {
652        if slice_prefix <= cluster_cidr.prefix_len() {
653            return Err(OverlayError::InvalidCidr(format!(
654                "slice prefix /{} must be more specific than cluster prefix /{}",
655                slice_prefix,
656                cluster_cidr.prefix_len()
657            )));
658        }
659        if slice_prefix > cluster_cidr.max_prefix_len() {
660            return Err(OverlayError::InvalidCidr(format!(
661                "slice prefix /{} exceeds address family max /{}",
662                slice_prefix,
663                cluster_cidr.max_prefix_len()
664            )));
665        }
666        Ok(Self {
667            cluster_cidr,
668            slice_prefix,
669            assignments: HashMap::new(),
670        })
671    }
672
673    /// Assign (or return an existing) subnet for `(service, node)`.
674    ///
675    /// Idempotent: repeated calls with the same key return the same slice
676    /// without re-assigning.
677    ///
678    /// # Errors
679    ///
680    /// Returns `OverlayError::NoAvailableIps` if every slice in the cluster
681    /// CIDR is already assigned to some other `(service, node)` pair.
682    pub fn assign(&mut self, service: &str, node: &str) -> Result<IpNet> {
683        let key = (service.to_string(), node.to_string());
684        if let Some(existing) = self.assignments.get(&key) {
685            return Ok(*existing);
686        }
687
688        let num_slices = self.num_slices();
689        if num_slices == 0 {
690            return Err(OverlayError::NoAvailableIps);
691        }
692
693        let taken: HashSet<IpNet> = self.assignments.values().copied().collect();
694        let start = hash_service_node(service, node) % num_slices;
695
696        for i in 0..num_slices {
697            let idx = (start + i) % num_slices;
698            let slice = self.slice_at_index(idx);
699            if !taken.contains(&slice) {
700                self.assignments.insert(key, slice);
701                return Ok(slice);
702            }
703        }
704
705        Err(OverlayError::NoAvailableIps)
706    }
707
708    /// Release the subnet for `(service, node)`. Returns the freed slice if
709    /// one was assigned, `None` otherwise.
710    pub fn release(&mut self, service: &str, node: &str) -> Option<IpNet> {
711        let key = (service.to_string(), node.to_string());
712        self.assignments.remove(&key)
713    }
714
715    /// Look up the current assignment for `(service, node)`, if any.
716    #[must_use]
717    pub fn get(&self, service: &str, node: &str) -> Option<IpNet> {
718        let key = (service.to_string(), node.to_string());
719        self.assignments.get(&key).copied()
720    }
721
722    /// Number of currently-assigned `(service, node)` pairs.
723    #[must_use]
724    pub fn assigned_count(&self) -> usize {
725        self.assignments.len()
726    }
727
728    /// Total number of slices the cluster CIDR can hold at the configured
729    /// slice prefix.
730    #[must_use]
731    pub fn capacity(&self) -> u64 {
732        self.num_slices()
733    }
734
735    /// Cluster CIDR the registry operates over.
736    #[must_use]
737    pub fn cluster_cidr(&self) -> IpNet {
738        self.cluster_cidr
739    }
740
741    /// Slice prefix length (e.g. `28` for `/28` slices).
742    #[must_use]
743    pub fn slice_prefix(&self) -> u8 {
744        self.slice_prefix
745    }
746
747    /// Build a persistable snapshot for Raft / durable leader state.
748    ///
749    /// The returned snapshot has assignments sorted by `(service, node)` so
750    /// the serialized bytes are deterministic across processes — important
751    /// when Raft compares snapshots by hash.
752    #[must_use]
753    pub fn snapshot(&self) -> ServiceSubnetRegistrySnapshot {
754        let mut assignments: Vec<((String, String), IpNet)> = self
755            .assignments
756            .iter()
757            .map(|(k, v)| (k.clone(), *v))
758            .collect();
759        assignments.sort_by(|a, b| a.0.cmp(&b.0));
760        ServiceSubnetRegistrySnapshot {
761            cluster_cidr: self.cluster_cidr,
762            slice_prefix: self.slice_prefix,
763            assignments,
764        }
765    }
766
767    /// Rebuild a registry from a snapshot.
768    ///
769    /// # Errors
770    ///
771    /// Returns `OverlayError::InvalidCidr` if the snapshot's slice prefix is
772    /// inconsistent with its assignments, or if any assigned slice is not
773    /// contained in the cluster CIDR.
774    pub fn restore(snapshot: ServiceSubnetRegistrySnapshot) -> Result<Self> {
775        let mut registry = Self::new(snapshot.cluster_cidr, snapshot.slice_prefix)?;
776        for (key, slice) in snapshot.assignments {
777            if slice.prefix_len() != snapshot.slice_prefix {
778                return Err(OverlayError::InvalidCidr(format!(
779                    "assigned slice {slice} does not match configured prefix /{}",
780                    snapshot.slice_prefix
781                )));
782            }
783            if !snapshot.cluster_cidr.contains(&slice.network()) {
784                return Err(OverlayError::InvalidCidr(format!(
785                    "assigned slice {slice} is not contained in cluster CIDR {}",
786                    snapshot.cluster_cidr
787                )));
788            }
789            registry.assignments.insert(key, slice);
790        }
791        Ok(registry)
792    }
793
794    fn num_slices(&self) -> u64 {
795        let bits = self.slice_prefix - self.cluster_cidr.prefix_len();
796        if bits >= 64 {
797            u64::MAX
798        } else {
799            1u64 << bits
800        }
801    }
802
803    fn slice_at_index(&self, idx: u64) -> IpNet {
804        let shift = u32::from(self.cluster_cidr.max_prefix_len() - self.slice_prefix);
805        match self.cluster_cidr {
806            IpNet::V4(v4) => {
807                let base = u32::from(v4.network());
808                #[allow(clippy::cast_possible_truncation)]
809                let offset = (idx as u32).wrapping_shl(shift);
810                let slice_addr = Ipv4Addr::from(base.wrapping_add(offset));
811                IpNet::V4(
812                    Ipv4Net::new(slice_addr, self.slice_prefix)
813                        .expect("slice_prefix validated in constructor"),
814                )
815            }
816            IpNet::V6(v6) => {
817                let base = u128::from(v6.network());
818                let offset = u128::from(idx).wrapping_shl(shift);
819                let slice_addr = Ipv6Addr::from(base.wrapping_add(offset));
820                IpNet::V6(
821                    Ipv6Net::new(slice_addr, self.slice_prefix)
822                        .expect("slice_prefix validated in constructor"),
823                )
824            }
825        }
826    }
827}
828
829/// Helper function to get the first usable IP from a CIDR
830///
831/// Supports both IPv4 and IPv6 CIDR notation.
832///
833/// # Errors
834///
835/// Returns an error if the CIDR is invalid or has no usable hosts.
836pub fn first_ip_from_cidr(cidr: &str) -> Result<IpAddr> {
837    let network: IpNet = cidr
838        .parse()
839        .map_err(|e| OverlayError::InvalidCidr(format!("{cidr}: {e}")))?;
840
841    match network {
842        IpNet::V4(v4net) => v4net
843            .hosts()
844            .next()
845            .map(IpAddr::V4)
846            .ok_or(OverlayError::NoAvailableIps),
847        IpNet::V6(v6net) => {
848            let base = v6net.network();
849            ipv6_add(base, 1)
850                .map(IpAddr::V6)
851                .ok_or(OverlayError::NoAvailableIps)
852        }
853    }
854}
855
856#[cfg(test)]
857mod tests {
858    use super::*;
859    use std::net::{Ipv4Addr, Ipv6Addr};
860
861    /// Increment an IPv4 address by a u32 offset from a base address.
862    ///
863    /// Returns `None` if the result would overflow.
864    fn ipv4_add(base: Ipv4Addr, offset: u32) -> Option<Ipv4Addr> {
865        let base_u32 = u32::from(base);
866        base_u32.checked_add(offset).map(Ipv4Addr::from)
867    }
868
869    // ========================
870    // IPv4 Tests (existing, updated for IpAddr)
871    // ========================
872
873    #[test]
874    fn test_allocator_new() {
875        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
876        assert_eq!(allocator.cidr(), "10.200.0.0/24");
877        assert_eq!(allocator.allocated_count(), 0);
878    }
879
880    #[test]
881    fn test_allocator_invalid_cidr() {
882        let result = IpAllocator::new("invalid");
883        assert!(result.is_err());
884    }
885
886    #[test]
887    fn test_allocate_sequential() {
888        let mut allocator = IpAllocator::new("10.200.0.0/30").unwrap();
889
890        // /30 has 2 usable hosts (excluding network and broadcast)
891        let ip1 = allocator.allocate().unwrap();
892        let ip2 = allocator.allocate().unwrap();
893
894        assert_eq!(ip1.to_string(), "10.200.0.1");
895        assert_eq!(ip2.to_string(), "10.200.0.2");
896
897        // Should be exhausted
898        assert!(allocator.allocate().is_none());
899    }
900
901    #[test]
902    fn test_allocate_first() {
903        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
904
905        let first = allocator.allocate_first().unwrap();
906        assert_eq!(first.to_string(), "10.200.0.1");
907
908        // Can't allocate first again
909        assert!(allocator.allocate_first().is_err());
910    }
911
912    #[test]
913    fn test_allocate_specific() {
914        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
915
916        let specific_ip: IpAddr = "10.200.0.50".parse().unwrap();
917        allocator.allocate_specific(specific_ip).unwrap();
918
919        assert!(allocator.is_allocated(specific_ip));
920
921        // Can't allocate same IP again
922        assert!(allocator.allocate_specific(specific_ip).is_err());
923    }
924
925    #[test]
926    fn test_allocate_specific_out_of_range() {
927        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
928
929        let out_of_range: IpAddr = "192.168.1.1".parse().unwrap();
930        assert!(allocator.allocate_specific(out_of_range).is_err());
931    }
932
933    #[test]
934    fn test_release() {
935        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
936
937        let ip = allocator.allocate().unwrap();
938        assert!(allocator.is_allocated(ip));
939
940        assert!(allocator.release(ip));
941        assert!(!allocator.is_allocated(ip));
942
943        // Can allocate same IP again
944        let ip2 = allocator.allocate().unwrap();
945        assert_eq!(ip, ip2);
946    }
947
948    #[test]
949    fn test_mark_allocated() {
950        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
951
952        let ip: IpAddr = "10.200.0.100".parse().unwrap();
953        allocator.mark_allocated(ip).unwrap();
954
955        assert!(allocator.is_allocated(ip));
956    }
957
958    #[test]
959    fn test_contains() {
960        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
961
962        assert!(allocator.contains("10.200.0.50".parse().unwrap()));
963        assert!(!allocator.contains("10.201.0.50".parse().unwrap()));
964    }
965
966    #[test]
967    fn test_total_hosts() {
968        // /24 has 254 usable hosts
969        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
970        assert_eq!(allocator.total_hosts(), 254);
971
972        // /30 has 2 usable hosts
973        let allocator = IpAllocator::new("10.200.0.0/30").unwrap();
974        assert_eq!(allocator.total_hosts(), 2);
975    }
976
977    #[test]
978    fn test_available_count() {
979        let mut allocator = IpAllocator::new("10.200.0.0/30").unwrap();
980
981        assert_eq!(allocator.available_count(), 2);
982
983        allocator.allocate();
984        assert_eq!(allocator.available_count(), 1);
985
986        allocator.allocate();
987        assert_eq!(allocator.available_count(), 0);
988    }
989
990    #[test]
991    fn test_state_roundtrip() {
992        let mut allocator = IpAllocator::new("10.200.0.0/24").unwrap();
993        allocator.allocate();
994        allocator.allocate();
995
996        let state = allocator.to_state();
997        let restored = IpAllocator::from_state(state).unwrap();
998
999        assert_eq!(allocator.cidr(), restored.cidr());
1000        assert_eq!(allocator.allocated_count(), restored.allocated_count());
1001    }
1002
1003    #[test]
1004    fn test_first_ip_from_cidr() {
1005        let ip = first_ip_from_cidr("10.200.0.0/24").unwrap();
1006        assert_eq!(ip.to_string(), "10.200.0.1");
1007    }
1008
1009    #[test]
1010    fn test_network_addr_v4() {
1011        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
1012        assert_eq!(
1013            allocator.network_addr(),
1014            IpAddr::V4("10.200.0.0".parse().unwrap())
1015        );
1016    }
1017
1018    #[test]
1019    fn test_broadcast_addr_v4() {
1020        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
1021        assert_eq!(
1022            allocator.broadcast_addr(),
1023            IpAddr::V4("10.200.0.255".parse().unwrap())
1024        );
1025    }
1026
1027    #[test]
1028    fn test_host_prefix_len_v4() {
1029        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
1030        assert_eq!(allocator.host_prefix_len(), 32);
1031    }
1032
1033    // ========================
1034    // IPv6 Tests
1035    // ========================
1036
1037    #[test]
1038    fn test_allocator_new_v6() {
1039        let allocator = IpAllocator::new("fd00::/48").unwrap();
1040        assert_eq!(allocator.cidr(), "fd00::/48");
1041        assert_eq!(allocator.allocated_count(), 0);
1042    }
1043
1044    #[test]
1045    fn test_allocate_sequential_v6() {
1046        let mut allocator = IpAllocator::new("fd00::/126").unwrap();
1047
1048        // /126 has 3 usable hosts (4 addresses total, minus the network address)
1049        let ip1 = allocator.allocate().unwrap();
1050        let ip2 = allocator.allocate().unwrap();
1051        let ip3 = allocator.allocate().unwrap();
1052
1053        assert_eq!(ip1.to_string(), "fd00::1");
1054        assert_eq!(ip2.to_string(), "fd00::2");
1055        assert_eq!(ip3.to_string(), "fd00::3");
1056
1057        // Should be exhausted
1058        assert!(allocator.allocate().is_none());
1059    }
1060
1061    #[test]
1062    fn test_allocate_first_v6() {
1063        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1064
1065        let first = allocator.allocate_first().unwrap();
1066        assert_eq!(first.to_string(), "fd00::1");
1067
1068        // Can't allocate first again
1069        assert!(allocator.allocate_first().is_err());
1070    }
1071
1072    #[test]
1073    fn test_allocate_specific_v6() {
1074        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1075
1076        let specific_ip: IpAddr = "fd00::beef".parse().unwrap();
1077        allocator.allocate_specific(specific_ip).unwrap();
1078
1079        assert!(allocator.is_allocated(specific_ip));
1080
1081        // Can't allocate same IP again
1082        assert!(allocator.allocate_specific(specific_ip).is_err());
1083    }
1084
1085    #[test]
1086    fn test_allocate_specific_out_of_range_v6() {
1087        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1088
1089        let out_of_range: IpAddr = "fe80::1".parse().unwrap();
1090        assert!(allocator.allocate_specific(out_of_range).is_err());
1091    }
1092
1093    #[test]
1094    fn test_release_v6() {
1095        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1096
1097        let ip = allocator.allocate().unwrap();
1098        assert!(allocator.is_allocated(ip));
1099
1100        assert!(allocator.release(ip));
1101        assert!(!allocator.is_allocated(ip));
1102
1103        // Can allocate same IP again
1104        let ip2 = allocator.allocate().unwrap();
1105        assert_eq!(ip, ip2);
1106    }
1107
1108    #[test]
1109    fn test_mark_allocated_v6() {
1110        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1111
1112        let ip: IpAddr = "fd00::ff".parse().unwrap();
1113        allocator.mark_allocated(ip).unwrap();
1114
1115        assert!(allocator.is_allocated(ip));
1116    }
1117
1118    #[test]
1119    fn test_contains_v6() {
1120        let allocator = IpAllocator::new("fd00::/48").unwrap();
1121
1122        assert!(allocator.contains("fd00::50".parse().unwrap()));
1123        assert!(!allocator.contains("fe80::1".parse().unwrap()));
1124    }
1125
1126    #[test]
1127    fn test_total_hosts_v6_small() {
1128        // /126 has 3 usable hosts (skip network addr)
1129        let allocator = IpAllocator::new("fd00::/126").unwrap();
1130        assert_eq!(allocator.total_hosts(), 3);
1131
1132        // /127 has 1 usable host
1133        let allocator = IpAllocator::new("fd00::/127").unwrap();
1134        assert_eq!(allocator.total_hosts(), 1);
1135    }
1136
1137    #[test]
1138    fn test_total_hosts_v6_large() {
1139        // /48 has 2^80 - 1 usable hosts, which saturates to u32::MAX
1140        let allocator = IpAllocator::new("fd00::/48").unwrap();
1141        assert_eq!(allocator.total_hosts(), u32::MAX);
1142    }
1143
1144    #[test]
1145    fn test_available_count_v6() {
1146        let mut allocator = IpAllocator::new("fd00::/126").unwrap();
1147
1148        assert_eq!(allocator.available_count(), 3);
1149
1150        allocator.allocate();
1151        assert_eq!(allocator.available_count(), 2);
1152
1153        allocator.allocate();
1154        assert_eq!(allocator.available_count(), 1);
1155
1156        allocator.allocate();
1157        assert_eq!(allocator.available_count(), 0);
1158    }
1159
1160    #[test]
1161    fn test_state_roundtrip_v6() {
1162        let mut allocator = IpAllocator::new("fd00::/48").unwrap();
1163        allocator.allocate();
1164        allocator.allocate();
1165
1166        let state = allocator.to_state();
1167
1168        // Verify IpAddr serializes as strings (backward-compatible)
1169        let json = serde_json::to_string_pretty(&state).unwrap();
1170        assert!(json.contains("fd00::1"));
1171        assert!(json.contains("fd00::2"));
1172
1173        let restored = IpAllocator::from_state(state).unwrap();
1174
1175        assert_eq!(allocator.cidr(), restored.cidr());
1176        assert_eq!(allocator.allocated_count(), restored.allocated_count());
1177    }
1178
1179    #[test]
1180    fn test_first_ip_from_cidr_v6() {
1181        let ip = first_ip_from_cidr("fd00::/48").unwrap();
1182        assert_eq!(ip.to_string(), "fd00::1");
1183    }
1184
1185    #[test]
1186    fn test_network_addr_v6() {
1187        let allocator = IpAllocator::new("fd00::/48").unwrap();
1188        assert_eq!(
1189            allocator.network_addr(),
1190            IpAddr::V6("fd00::".parse().unwrap())
1191        );
1192    }
1193
1194    #[test]
1195    fn test_broadcast_addr_v6() {
1196        let allocator = IpAllocator::new("fd00::/126").unwrap();
1197        assert_eq!(
1198            allocator.broadcast_addr(),
1199            IpAddr::V6("fd00::3".parse().unwrap())
1200        );
1201    }
1202
1203    #[test]
1204    fn test_host_prefix_len_v6() {
1205        let allocator = IpAllocator::new("fd00::/48").unwrap();
1206        assert_eq!(allocator.host_prefix_len(), 128);
1207    }
1208
1209    // ========================
1210    // Cross-protocol tests
1211    // ========================
1212
1213    #[test]
1214    fn test_v4_and_v6_allocators_independent() {
1215        let mut v4 = IpAllocator::new("10.200.0.0/30").unwrap();
1216        let mut v6 = IpAllocator::new("fd00::/126").unwrap();
1217
1218        let v4_ip = v4.allocate().unwrap();
1219        let v6_ip = v6.allocate().unwrap();
1220
1221        assert!(v4_ip.is_ipv4());
1222        assert!(v6_ip.is_ipv6());
1223        assert_eq!(v4_ip.to_string(), "10.200.0.1");
1224        assert_eq!(v6_ip.to_string(), "fd00::1");
1225    }
1226
1227    #[test]
1228    fn test_ipv6_does_not_contain_ipv4() {
1229        let allocator = IpAllocator::new("fd00::/48").unwrap();
1230        assert!(!allocator.contains("10.200.0.1".parse().unwrap()));
1231    }
1232
1233    #[test]
1234    fn test_ipv4_does_not_contain_ipv6() {
1235        let allocator = IpAllocator::new("10.200.0.0/24").unwrap();
1236        assert!(!allocator.contains("fd00::1".parse().unwrap()));
1237    }
1238
1239    #[test]
1240    fn test_allocate_specific_wrong_family() {
1241        let mut v4_alloc = IpAllocator::new("10.200.0.0/24").unwrap();
1242        let v6_ip: IpAddr = "fd00::1".parse().unwrap();
1243        assert!(v4_alloc.allocate_specific(v6_ip).is_err());
1244
1245        let mut v6_alloc = IpAllocator::new("fd00::/48").unwrap();
1246        let v4_ip: IpAddr = "10.200.0.1".parse().unwrap();
1247        assert!(v6_alloc.allocate_specific(v4_ip).is_err());
1248    }
1249
1250    // ========================
1251    // Helper function tests
1252    // ========================
1253
1254    #[test]
1255    fn test_ipv4_add() {
1256        let base: Ipv4Addr = "10.0.0.0".parse().unwrap();
1257        assert_eq!(ipv4_add(base, 1), Some("10.0.0.1".parse().unwrap()));
1258        assert_eq!(ipv4_add(base, 256), Some("10.0.1.0".parse().unwrap()));
1259    }
1260
1261    #[test]
1262    fn test_ipv4_add_overflow() {
1263        let base: Ipv4Addr = "255.255.255.255".parse().unwrap();
1264        assert_eq!(ipv4_add(base, 1), None);
1265    }
1266
1267    #[test]
1268    fn test_ipv6_add() {
1269        let base: Ipv6Addr = "fd00::".parse().unwrap();
1270        assert_eq!(ipv6_add(base, 1), Some("fd00::1".parse().unwrap()));
1271        assert_eq!(ipv6_add(base, 0xffff), Some("fd00::ffff".parse().unwrap()));
1272    }
1273
1274    #[test]
1275    fn test_ipv6_add_overflow() {
1276        let base: Ipv6Addr = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff".parse().unwrap();
1277        assert_eq!(ipv6_add(base, 1), None);
1278    }
1279
1280    #[test]
1281    fn test_host_count_v4() {
1282        assert_eq!(host_count(false, 24), 254); // 2^8 - 2
1283        assert_eq!(host_count(false, 30), 2); // 2^2 - 2
1284        assert_eq!(host_count(false, 16), 65534); // 2^16 - 2
1285        assert_eq!(host_count(false, 31), 0); // /31 — no classical hosts
1286        assert_eq!(host_count(false, 32), 0); // /32 — single address
1287    }
1288
1289    #[test]
1290    fn test_host_count_v6() {
1291        assert_eq!(host_count(true, 126), 3); // 2^2 - 1
1292        assert_eq!(host_count(true, 127), 1); // 2^1 - 1
1293        assert_eq!(host_count(true, 128), 0); // /128 — single address (is network addr)
1294        assert_eq!(host_count(true, 64), (1u128 << 64) - 1); // 2^64 - 1
1295    }
1296
1297    // ========================
1298    // NodeSliceAllocator tests
1299    // ========================
1300
1301    fn cluster() -> IpNet {
1302        "10.200.0.0/16".parse().unwrap()
1303    }
1304
1305    #[test]
1306    fn test_slice_new_rejects_equal_prefix() {
1307        let err = NodeSliceAllocator::new(cluster(), 16).unwrap_err();
1308        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1309    }
1310
1311    #[test]
1312    fn test_slice_new_rejects_smaller_prefix() {
1313        let err = NodeSliceAllocator::new(cluster(), 8).unwrap_err();
1314        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1315    }
1316
1317    #[test]
1318    fn test_slice_new_rejects_over_max() {
1319        let err = NodeSliceAllocator::new(cluster(), 33).unwrap_err();
1320        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1321    }
1322
1323    #[test]
1324    fn test_slice_capacity_28_in_16() {
1325        let allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1326        // /16 → /28 ⇒ 2^12 = 4096 slices
1327        assert_eq!(allocator.capacity(), 4096);
1328    }
1329
1330    #[test]
1331    fn test_slice_capacity_24_in_16() {
1332        let allocator = NodeSliceAllocator::new(cluster(), 24).unwrap();
1333        // /16 → /24 ⇒ 2^8 = 256 slices
1334        assert_eq!(allocator.capacity(), 256);
1335    }
1336
1337    #[test]
1338    fn test_slice_assign_is_within_cluster() {
1339        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1340        let slice = allocator.assign("node-a").unwrap();
1341        assert_eq!(slice.prefix_len(), 28);
1342        assert!(cluster().contains(&slice.network()));
1343    }
1344
1345    #[test]
1346    fn test_slice_assign_is_idempotent() {
1347        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1348        let first = allocator.assign("node-a").unwrap();
1349        let second = allocator.assign("node-a").unwrap();
1350        assert_eq!(first, second);
1351        assert_eq!(allocator.assigned_count(), 1);
1352    }
1353
1354    #[test]
1355    fn test_slice_assign_different_nodes_get_different_slices() {
1356        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1357        let a = allocator.assign("node-a").unwrap();
1358        let b = allocator.assign("node-b").unwrap();
1359        let c = allocator.assign("node-c").unwrap();
1360        assert_ne!(a, b);
1361        assert_ne!(b, c);
1362        assert_ne!(a, c);
1363    }
1364
1365    #[test]
1366    fn test_slice_release() {
1367        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1368        let slice = allocator.assign("node-a").unwrap();
1369        assert_eq!(allocator.slice_for("node-a"), Some(slice));
1370
1371        assert!(allocator.release("node-a"));
1372        assert_eq!(allocator.slice_for("node-a"), None);
1373
1374        // Release of unknown node returns false.
1375        assert!(!allocator.release("node-a"));
1376    }
1377
1378    #[test]
1379    fn test_slice_collision_probes_forward() {
1380        // Use a very small cluster → few slices → high probability that two
1381        // arbitrary IDs hash to the same candidate index. Force a true collision
1382        // by manually occupying the slot a second node's hash maps to.
1383        let small: IpNet = "10.200.0.0/28".parse().unwrap();
1384        let mut allocator = NodeSliceAllocator::new(small, 30).unwrap();
1385        // /28 → /30 ⇒ 2^2 = 4 slices
1386        assert_eq!(allocator.capacity(), 4);
1387
1388        // Assign 4 nodes — all must succeed and all must land on distinct slices.
1389        let ids = ["a", "b", "c", "d"];
1390        let mut slices: Vec<IpNet> = Vec::new();
1391        for id in ids {
1392            let slice = allocator.assign(id).unwrap();
1393            assert!(
1394                !slices.contains(&slice),
1395                "slice {slice} re-assigned; all slices must be distinct"
1396            );
1397            slices.push(slice);
1398        }
1399        assert_eq!(allocator.assigned_count(), 4);
1400    }
1401
1402    #[test]
1403    fn test_slice_exhaustion_4096() {
1404        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1405        // Fill every one of the 4096 slices.
1406        for i in 0..4096u32 {
1407            let id = format!("node-{i}");
1408            allocator.assign(&id).unwrap();
1409        }
1410        assert_eq!(allocator.assigned_count(), 4096);
1411
1412        // The next assignment must fail with NoAvailableIps.
1413        let err = allocator.assign("node-4096").unwrap_err();
1414        assert!(matches!(err, OverlayError::NoAvailableIps));
1415    }
1416
1417    #[test]
1418    fn test_slice_snapshot_roundtrip() {
1419        let mut allocator = NodeSliceAllocator::new(cluster(), 28).unwrap();
1420        let slice_a = allocator.assign("node-a").unwrap();
1421        let slice_b = allocator.assign("node-b").unwrap();
1422        let slice_c = allocator.assign("node-c").unwrap();
1423
1424        let snapshot = allocator.snapshot();
1425
1426        // Round-trip through JSON serialization too.
1427        let json = serde_json::to_string(&snapshot).unwrap();
1428        let snapshot_restored: NodeSliceAllocatorSnapshot = serde_json::from_str(&json).unwrap();
1429
1430        let restored = NodeSliceAllocator::restore(snapshot_restored).unwrap();
1431        assert_eq!(restored.slice_for("node-a"), Some(slice_a));
1432        assert_eq!(restored.slice_for("node-b"), Some(slice_b));
1433        assert_eq!(restored.slice_for("node-c"), Some(slice_c));
1434        assert_eq!(restored.capacity(), 4096);
1435        assert_eq!(restored.slice_prefix(), 28);
1436        assert_eq!(restored.cluster_cidr(), cluster());
1437    }
1438
1439    #[test]
1440    fn test_slice_restore_rejects_mismatched_prefix() {
1441        let snapshot = NodeSliceAllocatorSnapshot {
1442            cluster_cidr: "10.200.0.0/16".to_string(),
1443            slice_prefix: 28,
1444            assigned: vec![("node-a".to_string(), "10.200.0.0/24".to_string())],
1445        };
1446        let err = NodeSliceAllocator::restore(snapshot).unwrap_err();
1447        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1448    }
1449
1450    #[test]
1451    fn test_slice_restore_rejects_out_of_cluster() {
1452        let snapshot = NodeSliceAllocatorSnapshot {
1453            cluster_cidr: "10.200.0.0/16".to_string(),
1454            slice_prefix: 28,
1455            assigned: vec![("node-a".to_string(), "10.201.0.0/28".to_string())],
1456        };
1457        let err = NodeSliceAllocator::restore(snapshot).unwrap_err();
1458        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1459    }
1460
1461    #[test]
1462    fn test_slice_hash_is_deterministic() {
1463        // Two allocators built fresh should produce the same first-assignment
1464        // index for the same node ID — critical for consistency across leader
1465        // restart on a fresh cluster (before any snapshot exists).
1466        let mut a = NodeSliceAllocator::new(cluster(), 28).unwrap();
1467        let mut b = NodeSliceAllocator::new(cluster(), 28).unwrap();
1468        let slice_a = a.assign("my-node-id").unwrap();
1469        let slice_b = b.assign("my-node-id").unwrap();
1470        assert_eq!(slice_a, slice_b);
1471    }
1472
1473    #[test]
1474    fn test_slice_allocator_v6() {
1475        let cluster_v6: IpNet = "fd00:200::/48".parse().unwrap();
1476        let mut allocator = NodeSliceAllocator::new(cluster_v6, 64).unwrap();
1477        // /48 → /64 ⇒ 2^16 = 65536 slices
1478        assert_eq!(allocator.capacity(), 65536);
1479
1480        let slice = allocator.assign("node-a").unwrap();
1481        assert_eq!(slice.prefix_len(), 64);
1482        assert!(cluster_v6.contains(&slice.network()));
1483    }
1484
1485    // ========================
1486    // ServiceSubnetRegistry tests
1487    // ========================
1488
1489    #[test]
1490    fn service_subnet_assign_is_idempotent() {
1491        let mut reg = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1492        let first = reg.assign("svc-a", "node-1").unwrap();
1493        let second = reg.assign("svc-a", "node-1").unwrap();
1494        assert_eq!(first, second);
1495        assert_eq!(reg.assigned_count(), 1);
1496        assert_eq!(reg.get("svc-a", "node-1"), Some(first));
1497    }
1498
1499    #[test]
1500    fn service_subnet_two_services_disjoint() {
1501        let mut reg = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1502        let a = reg.assign("svc-a", "node-1").unwrap();
1503        let b = reg.assign("svc-b", "node-1").unwrap();
1504        assert_ne!(a, b);
1505        // Slices must be disjoint (neither contains the other's network address).
1506        assert!(!a.contains(&b.network()));
1507        assert!(!b.contains(&a.network()));
1508    }
1509
1510    #[test]
1511    fn service_subnet_same_service_two_nodes_disjoint() {
1512        let mut reg = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1513        let a = reg.assign("svc-a", "node-1").unwrap();
1514        let b = reg.assign("svc-a", "node-2").unwrap();
1515        assert_ne!(a, b);
1516        assert!(!a.contains(&b.network()));
1517        assert!(!b.contains(&a.network()));
1518    }
1519
1520    #[test]
1521    fn service_subnet_release_reclaims_slot() {
1522        let mut reg = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1523        let first = reg.assign("svc-a", "node-1").unwrap();
1524        let released = reg.release("svc-a", "node-1");
1525        assert_eq!(released, Some(first));
1526        assert_eq!(reg.get("svc-a", "node-1"), None);
1527        assert_eq!(reg.assigned_count(), 0);
1528
1529        // Re-assign should land on the same slot because the hash is
1530        // deterministic and no other assignment is occupying it.
1531        let again = reg.assign("svc-a", "node-1").unwrap();
1532        assert_eq!(again, first);
1533
1534        // Releasing an unknown key returns None.
1535        assert_eq!(reg.release("svc-z", "node-z"), None);
1536    }
1537
1538    #[test]
1539    fn service_subnet_snapshot_restore_roundtrip() {
1540        let mut reg = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1541        let a = reg.assign("svc-a", "node-1").unwrap();
1542        let b = reg.assign("svc-a", "node-2").unwrap();
1543        let c = reg.assign("svc-b", "node-1").unwrap();
1544        let d = reg.assign("svc-b", "node-2").unwrap();
1545
1546        let snapshot = reg.snapshot();
1547
1548        // Round-trip through JSON to mimic the Raft serialization boundary.
1549        let json = serde_json::to_string(&snapshot).unwrap();
1550        let snapshot_restored: ServiceSubnetRegistrySnapshot = serde_json::from_str(&json).unwrap();
1551
1552        // Snapshot ordering must be deterministic — re-snapshotting the same
1553        // state must serialize to the same bytes (critical for Raft hashing).
1554        let json2 = serde_json::to_string(&reg.snapshot()).unwrap();
1555        assert_eq!(json, json2);
1556
1557        let restored = ServiceSubnetRegistry::restore(snapshot_restored).unwrap();
1558        assert_eq!(restored.get("svc-a", "node-1"), Some(a));
1559        assert_eq!(restored.get("svc-a", "node-2"), Some(b));
1560        assert_eq!(restored.get("svc-b", "node-1"), Some(c));
1561        assert_eq!(restored.get("svc-b", "node-2"), Some(d));
1562        assert_eq!(restored.assigned_count(), 4);
1563        assert_eq!(restored.slice_prefix(), 28);
1564        assert_eq!(restored.cluster_cidr(), cluster());
1565        assert_eq!(restored.capacity(), 4096);
1566    }
1567
1568    #[test]
1569    fn service_subnet_exhaustion_errors() {
1570        // /29 with /30 slices → 2 slots total.
1571        let small: IpNet = "10.200.0.0/29".parse().unwrap();
1572        let mut reg = ServiceSubnetRegistry::new(small, 30).unwrap();
1573        assert_eq!(reg.capacity(), 2);
1574
1575        reg.assign("svc-a", "node-1").unwrap();
1576        reg.assign("svc-a", "node-2").unwrap();
1577        assert_eq!(reg.assigned_count(), 2);
1578
1579        let err = reg.assign("svc-a", "node-3").unwrap_err();
1580        assert!(matches!(err, OverlayError::NoAvailableIps));
1581
1582        // But re-assigning an existing pair still succeeds (idempotent).
1583        let existing = reg.get("svc-a", "node-1").unwrap();
1584        assert_eq!(reg.assign("svc-a", "node-1").unwrap(), existing);
1585    }
1586
1587    #[test]
1588    fn service_subnet_rejects_bad_prefix() {
1589        // slice prefix equal to cluster prefix.
1590        let err = ServiceSubnetRegistry::new(cluster(), 16).unwrap_err();
1591        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1592        // slice prefix shorter than cluster prefix.
1593        let err = ServiceSubnetRegistry::new(cluster(), 8).unwrap_err();
1594        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1595        // slice prefix beyond max for family.
1596        let err = ServiceSubnetRegistry::new(cluster(), 33).unwrap_err();
1597        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1598    }
1599
1600    #[test]
1601    fn service_subnet_hash_is_deterministic_across_instances() {
1602        // Two registries built fresh must assign the same (service, node)
1603        // pair to the same starting slot — same guarantee as
1604        // `NodeSliceAllocator::test_slice_hash_is_deterministic`.
1605        let mut a = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1606        let mut b = ServiceSubnetRegistry::new(cluster(), 28).unwrap();
1607        let slice_a = a.assign("svc-x", "node-x").unwrap();
1608        let slice_b = b.assign("svc-x", "node-x").unwrap();
1609        assert_eq!(slice_a, slice_b);
1610    }
1611
1612    #[test]
1613    fn service_subnet_restore_rejects_mismatched_prefix() {
1614        let snapshot = ServiceSubnetRegistrySnapshot {
1615            cluster_cidr: "10.200.0.0/16".parse().unwrap(),
1616            slice_prefix: 28,
1617            assignments: vec![(
1618                ("svc-a".to_string(), "node-1".to_string()),
1619                "10.200.0.0/24".parse().unwrap(),
1620            )],
1621        };
1622        let err = ServiceSubnetRegistry::restore(snapshot).unwrap_err();
1623        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1624    }
1625
1626    #[test]
1627    fn service_subnet_restore_rejects_out_of_cluster() {
1628        let snapshot = ServiceSubnetRegistrySnapshot {
1629            cluster_cidr: "10.200.0.0/16".parse().unwrap(),
1630            slice_prefix: 28,
1631            assignments: vec![(
1632                ("svc-a".to_string(), "node-1".to_string()),
1633                "10.201.0.0/28".parse().unwrap(),
1634            )],
1635        };
1636        let err = ServiceSubnetRegistry::restore(snapshot).unwrap_err();
1637        assert!(matches!(err, OverlayError::InvalidCidr(_)));
1638    }
1639}