Skip to main content

arcbox_datapath/
lib.rs

1//! Lock-free datapath primitives for high-performance packet processing.
2//!
3//! Zero-copy buffer pools, SPSC/MPMC ring buffers, cache-line-aligned
4//! statistics, and the [`CachePadded`] primitive — pure, with no VM, VirtIO,
5//! or I/O dependencies. Extracted from `arcbox-net` so any high-throughput
6//! consumer (host-side proxy, VM datapath, packet tools) can reuse them.
7//!
8//! # Performance Targets
9//!
10//! - Ring buffer throughput: >100M ops/sec
11//! - Packet pool allocation: O(1) constant time
12//! - Zero memory copies in hot path
13
14pub mod error;
15pub mod frame_buf;
16pub mod pool;
17pub mod ring;
18pub mod stats;
19
20pub use error::{Error, Result};
21pub use frame_buf::FrameBuf;
22pub use pool::{PacketBuffer, PacketPool, PacketRef};
23pub use ring::LockFreeRing;
24pub use stats::DatapathStats;
25
26/// Cache line size for padding (64 bytes on most architectures).
27pub const CACHE_LINE_SIZE: usize = 64;
28
29/// Default batch size for packet processing.
30pub const DEFAULT_BATCH_SIZE: usize = 64;
31
32/// Default ring buffer capacity (must be power of 2).
33pub const DEFAULT_RING_CAPACITY: usize = 4096;
34
35/// Default packet pool capacity.
36pub const DEFAULT_POOL_CAPACITY: usize = 8192;
37
38/// Maximum packet size (MTU + headers).
39pub const MAX_PACKET_SIZE: usize = 65535;
40
41/// Cache line padding to prevent false sharing.
42#[repr(C, align(64))]
43#[derive(Debug, Default, Clone, Copy)]
44pub struct CachePadded<T>(pub T);
45
46impl<T> CachePadded<T> {
47    /// Creates a new cache-padded value.
48    #[inline]
49    pub const fn new(value: T) -> Self {
50        Self(value)
51    }
52
53    /// Returns a reference to the inner value.
54    #[inline]
55    pub const fn get(&self) -> &T {
56        &self.0
57    }
58
59    /// Returns a mutable reference to the inner value.
60    #[inline]
61    pub fn get_mut(&mut self) -> &mut T {
62        &mut self.0
63    }
64}
65
66/// Software prefetch for upcoming data access.
67///
68/// This hints to the CPU to load data into cache before it's needed,
69/// reducing memory access latency in tight loops.
70#[allow(clippy::inline_always)] // must inline: function contains inline asm, call overhead would defeat the purpose
71#[inline(always)]
72pub fn prefetch_read<T>(ptr: *const T) {
73    #[cfg(target_arch = "aarch64")]
74    unsafe {
75        // PRFM PLDL1KEEP - prefetch for load, keep in L1 cache
76        core::arch::asm!(
77            "prfm pldl1keep, [{ptr}]",
78            ptr = in(reg) ptr,
79            options(nostack, preserves_flags)
80        );
81    }
82    #[cfg(target_arch = "x86_64")]
83    unsafe {
84        core::arch::x86_64::_mm_prefetch(ptr.cast::<i8>(), core::arch::x86_64::_MM_HINT_T0);
85    }
86    #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
87    {
88        let _ = ptr;
89    }
90}
91
92/// Software prefetch for write access.
93#[allow(clippy::inline_always)] // must inline: function contains inline asm, call overhead would defeat the purpose
94#[inline(always)]
95pub fn prefetch_write<T>(ptr: *mut T) {
96    #[cfg(target_arch = "aarch64")]
97    unsafe {
98        // PRFM PSTL1KEEP - prefetch for store, keep in L1 cache
99        core::arch::asm!(
100            "prfm pstl1keep, [{ptr}]",
101            ptr = in(reg) ptr,
102            options(nostack, preserves_flags)
103        );
104    }
105    #[cfg(target_arch = "x86_64")]
106    unsafe {
107        core::arch::x86_64::_mm_prefetch(ptr.cast::<i8>(), core::arch::x86_64::_MM_HINT_T0);
108    }
109    #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
110    {
111        let _ = ptr;
112    }
113}
114
115/// Checks if a value is a power of 2.
116#[inline]
117pub const fn is_power_of_two(n: usize) -> bool {
118    n != 0 && n.is_power_of_two()
119}
120
121/// Rounds up to the next power of 2.
122#[inline]
123pub const fn next_power_of_two(mut n: usize) -> usize {
124    if n == 0 {
125        return 1;
126    }
127    n -= 1;
128    n |= n >> 1;
129    n |= n >> 2;
130    n |= n >> 4;
131    n |= n >> 8;
132    n |= n >> 16;
133    n |= n >> 32;
134    n + 1
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_cache_padded_size() {
143        assert_eq!(std::mem::size_of::<CachePadded<u64>>(), CACHE_LINE_SIZE);
144    }
145
146    #[test]
147    fn test_power_of_two() {
148        assert!(is_power_of_two(1));
149        assert!(is_power_of_two(2));
150        assert!(is_power_of_two(4));
151        assert!(is_power_of_two(4096));
152        assert!(!is_power_of_two(0));
153        assert!(!is_power_of_two(3));
154        assert!(!is_power_of_two(5));
155    }
156
157    #[test]
158    fn test_next_power_of_two() {
159        assert_eq!(next_power_of_two(0), 1);
160        assert_eq!(next_power_of_two(1), 1);
161        assert_eq!(next_power_of_two(2), 2);
162        assert_eq!(next_power_of_two(3), 4);
163        assert_eq!(next_power_of_two(5), 8);
164        assert_eq!(next_power_of_two(1000), 1024);
165    }
166}