Skip to main content

kbpf_basic/map/
mod.rs

1//! BPF map implementations.
2//!
3mod array;
4mod flags;
5mod hash;
6mod lru;
7mod queue;
8pub(crate) mod stream;
9use alloc::{
10    boxed::Box,
11    string::{String, ToString},
12    sync::Arc,
13    vec,
14    vec::Vec,
15};
16use core::{any::Any, ffi::CStr, fmt::Debug, ops::Range};
17
18use crate::{
19    BpfError, BpfResult as Result, KernelAuxiliaryOps, PollWaker,
20    linux_bpf::{BpfMapType, bpf_attr},
21    map::flags::BpfMapCreateFlags,
22};
23
24/// Callback function type for iterating over map elements.
25pub type BpfCallBackFn = fn(key: &[u8], value: &[u8], ctx: *const u8) -> i32;
26
27/// Common operations for BPF maps.
28pub trait BpfMapCommonOps: Send + Sync + Debug + Any {
29    /// Lookup an element in the map.
30    ///
31    /// See <https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/>
32    fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
33        Err(BpfError::EPERM)
34    }
35    /// Update an element in the map.
36    ///
37    /// See <https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_update_elem/>
38    fn update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()> {
39        Err(BpfError::EPERM)
40    }
41    /// Delete an element from the map.
42    ///
43    /// See <https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_delete_elem/>
44    fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
45        Err(BpfError::EPERM)
46    }
47    /// For each element in map, call callback_fn function with map,
48    /// callback_ctx and other map-specific parameters.
49    ///
50    /// See <https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_for_each_map_elem/>
51    fn for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32> {
52        Err(BpfError::EPERM)
53    }
54    /// Look up an element with the given key in the map referred to by the file descriptor fd,
55    /// and if found, delete the element.
56    fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
57        Err(BpfError::EPERM)
58    }
59
60    /// erform a lookup in percpu map for an entry associated to key on cpu.
61    fn lookup_percpu_elem(&mut self, _key: &[u8], _cpu: u32) -> Result<Option<&[u8]>> {
62        Err(BpfError::EPERM)
63    }
64    /// Get the next key in the map. If key is None, get the first key.
65    ///
66    /// Called from syscall
67    fn get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()> {
68        Err(BpfError::EPERM)
69    }
70
71    /// Push an element value in map.
72    fn push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()> {
73        Err(BpfError::EPERM)
74    }
75
76    /// Pop an element value from map.
77    fn pop_elem(&mut self, _value: &mut [u8]) -> Result<()> {
78        Err(BpfError::EPERM)
79    }
80
81    /// Peek an element value from map.
82    fn peek_elem(&self, _value: &mut [u8]) -> Result<()> {
83        Err(BpfError::EPERM)
84    }
85
86    /// Freeze the map.
87    ///
88    /// It's useful for .rodata maps.
89    fn freeze(&self) -> Result<()> {
90        Err(BpfError::EPERM)
91    }
92
93    /// Get the first value pointer.
94    ///
95    /// This is used for BPF_PSEUDO_MAP_VALUE.
96    fn map_values_ptr_range(&self) -> Result<Range<usize>> {
97        Err(BpfError::EPERM)
98    }
99
100    /// Get the memory usage of the map.
101    fn map_mem_usage(&self) -> Result<usize>;
102
103    /// Memory map the map into user space. Return the physical address.
104    fn map_mmap(
105        &self,
106        _offset: usize,
107        _size: usize,
108        _read: bool,
109        _write: bool,
110    ) -> Result<Vec<usize>> {
111        Err(BpfError::EPERM)
112    }
113
114    /// Whether the map is readable.
115    fn readable(&self) -> bool {
116        false
117    }
118
119    /// Whether the map is writable.
120    fn writable(&self) -> bool {
121        false
122    }
123
124    /// Get a reference to the map as `Any`.
125    fn as_any(&self) -> &dyn Any;
126    /// Get a mutable reference to the map as `Any`.
127    fn as_any_mut(&mut self) -> &mut dyn Any;
128}
129
130/// Operations for per-cpu variants.
131pub trait PerCpuVariantsOps: Sync + Send + Debug + 'static {
132    /// Create a new per-cpu variants instance.
133    fn create<T: Clone + Sync + Send + 'static>(value: T) -> Option<Box<dyn PerCpuVariants<T>>>;
134    /// Get the number of CPUs.
135    fn num_cpus() -> u32;
136}
137
138/// PerCpuVariants is a trait for per-cpu data structures.
139#[allow(clippy::mut_from_ref)]
140pub trait PerCpuVariants<T: Clone + Sync + Send>: Sync + Send + Debug {
141    /// Get the per-cpu data for the current CPU.
142    fn get(&self) -> &T;
143    /// Get the per-cpu data for the current CPU.
144    fn get_mut(&self) -> &mut T;
145    /// Get the per-cpu data for the given CPU.
146    ///
147    /// # Safety
148    /// This function is unsafe because it allows access to the per-cpu data for a CPU
149    /// that may not be the current CPU. The caller must ensure that the CPU is valid
150    /// and that the data is not accessed from a different CPU.
151    unsafe fn force_get(&self, cpu: u32) -> &T;
152    /// Get the per-cpu data for the given CPU.
153    ///
154    /// # Safety
155    /// This function is unsafe because it allows access to the per-cpu data for a CPU
156    /// that may not be the current CPU. The caller must ensure that the CPU is valid
157    /// and that the data is not accessed from a different CPU.
158    unsafe fn force_get_mut(&self, cpu: u32) -> &mut T;
159}
160
161bitflags::bitflags! {
162    /// flags for BPF_MAP_UPDATE_ELEM command
163    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
164    pub struct BpfMapUpdateElemFlags: u64 {
165        /// This flag has a value of 0, so setting it together with another flag has no impact. It is meant to be used if no other flags are specified to explicitly state that the command should update the map regardless of if the key already exists or not.
166        const BPF_ANY = 0;
167        /// If this flag is set, the command will make sure that the given key doesn't exist yet. If the same key already exists when this command is executed the -EEXIST error number will be returned.
168        const BPF_NOEXIST = 1;
169        /// If this flag is set, the command will make sure that the given key already exists. If no entry for this key exists, the -ENOENT error number will be returned
170        const BPF_EXISTS = 2;
171        /// If this flag is set, the command will acquire the spin-lock of the map value we are updating. If the map contains no spin-lock in its value, -EINVAL will be returned by the command.
172        const BPF_F_LOCK = 4;
173    }
174}
175
176/// Metadata for a BPF map.
177#[derive(Debug, Clone, Default)]
178pub struct BpfMapMeta {
179    /// The type of the BPF map.
180    pub map_type: BpfMapType,
181    /// The size of the key in bytes.
182    pub key_size: u32,
183    /// The size of the value in bytes.
184    pub value_size: u32,
185    /// The maximum number of entries in the map.
186    pub max_entries: u32,
187    /// The flags for the BPF map.
188    pub map_flags: BpfMapCreateFlags,
189    /// The name of the BPF map.
190    pub _map_name: String,
191}
192
193impl TryFrom<&bpf_attr> for BpfMapMeta {
194    type Error = BpfError;
195    fn try_from(attr: &bpf_attr) -> Result<Self> {
196        let u = unsafe { &attr.__bindgen_anon_1 };
197        let map_name_slice = unsafe {
198            core::slice::from_raw_parts(u.map_name.as_ptr() as *const u8, u.map_name.len())
199        };
200        let map_name = CStr::from_bytes_until_nul(map_name_slice)
201            .map_err(|_| BpfError::EINVAL)?
202            .to_str()
203            .map_err(|_| BpfError::EINVAL)?
204            .to_string();
205        let map_type = BpfMapType::try_from(u.map_type).map_err(|_| BpfError::EINVAL)?;
206
207        let map_flags = BpfMapCreateFlags::from_bits(u.map_flags).ok_or(BpfError::EINVAL)?;
208        Ok(BpfMapMeta {
209            map_type,
210            key_size: u.key_size,
211            value_size: u.value_size,
212            max_entries: u.max_entries,
213            map_flags,
214            _map_name: map_name,
215        })
216    }
217}
218
219/// A unified BPF map that can hold any type of BPF map.
220#[derive(Debug)]
221pub struct UnifiedMap {
222    inner_map: Box<dyn BpfMapCommonOps>,
223    map_meta: BpfMapMeta,
224}
225
226impl UnifiedMap {
227    fn new(map_meta: BpfMapMeta, map: Box<dyn BpfMapCommonOps>) -> Self {
228        Self {
229            inner_map: map,
230            map_meta,
231        }
232    }
233    /// Get a reference to the concrete map.
234    pub fn map(&self) -> &dyn BpfMapCommonOps {
235        self.inner_map.as_ref()
236    }
237
238    /// Get a mutable reference to the concrete map.
239    pub fn map_mut(&mut self) -> &mut dyn BpfMapCommonOps {
240        self.inner_map.as_mut()
241    }
242
243    /// Get the map metadata.
244    pub fn map_meta(&self) -> &BpfMapMeta {
245        &self.map_meta
246    }
247}
248
249/// Create a map and return a file descriptor that refers to
250/// the map.  The close-on-exec file descriptor flag
251/// is automatically enabled for the new file descriptor.
252///
253/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_CREATE/>
254pub fn bpf_map_create<F: KernelAuxiliaryOps, T: PerCpuVariantsOps + 'static>(
255    map_meta: BpfMapMeta,
256    poll_waker: Option<Arc<dyn PollWaker>>,
257) -> Result<UnifiedMap> {
258    log::trace!("The map attr is {:#?}", map_meta);
259    let map: Box<dyn BpfMapCommonOps> = match map_meta.map_type {
260        BpfMapType::BPF_MAP_TYPE_ARRAY => {
261            let array_map = array::ArrayMap::new(&map_meta)?;
262            Box::new(array_map)
263        }
264        BpfMapType::BPF_MAP_TYPE_PERCPU_ARRAY => {
265            let per_cpu_array_map = array::PerCpuArrayMap::<T>::new(&map_meta)?;
266            Box::new(per_cpu_array_map)
267        }
268        BpfMapType::BPF_MAP_TYPE_PERF_EVENT_ARRAY => {
269            let perf_event_array_map = array::PerfEventArrayMap::new(&map_meta, T::num_cpus())?;
270            Box::new(perf_event_array_map)
271        }
272
273        BpfMapType::BPF_MAP_TYPE_CPUMAP
274        | BpfMapType::BPF_MAP_TYPE_DEVMAP
275        | BpfMapType::BPF_MAP_TYPE_DEVMAP_HASH => {
276            log::error!("bpf map type {:?} not implemented", map_meta.map_type);
277            Err(BpfError::EPERM)?
278        }
279        BpfMapType::BPF_MAP_TYPE_HASH => {
280            let hash_map = hash::BpfHashMap::new(&map_meta)?;
281            Box::new(hash_map)
282        }
283        BpfMapType::BPF_MAP_TYPE_PERCPU_HASH => {
284            let per_cpu_hash_map = hash::PerCpuHashMap::<T>::new(&map_meta)?;
285            Box::new(per_cpu_hash_map)
286        }
287        BpfMapType::BPF_MAP_TYPE_QUEUE => {
288            let queue_map = queue::QueueMap::new(&map_meta)?;
289            Box::new(queue_map)
290        }
291        BpfMapType::BPF_MAP_TYPE_STACK => {
292            let stack_map = queue::StackMap::new(&map_meta)?;
293            Box::new(stack_map)
294        }
295        BpfMapType::BPF_MAP_TYPE_LRU_HASH => {
296            let lru_hash_map = lru::LruMap::new(&map_meta)?;
297            Box::new(lru_hash_map)
298        }
299        BpfMapType::BPF_MAP_TYPE_LRU_PERCPU_HASH => {
300            let lru_per_cpu_hash_map = lru::PerCpuLruMap::<T>::new(&map_meta)?;
301            Box::new(lru_per_cpu_hash_map)
302        }
303        BpfMapType::BPF_MAP_TYPE_RINGBUF => {
304            let poll_waker = poll_waker.ok_or(BpfError::EINVAL)?;
305            let ringbuf_map = stream::RingBufMap::<F>::new(&map_meta, poll_waker)?;
306            Box::new(ringbuf_map)
307        }
308        _ => {
309            log::error!("bpf map type {:?} not implemented", map_meta.map_type);
310            Err(BpfError::EPERM)?
311        }
312    };
313    let unified_map = UnifiedMap::new(map_meta, map);
314    Ok(unified_map)
315}
316
317/// Arguments for BPF map update operations.
318#[derive(Debug, Clone, Copy)]
319pub struct BpfMapUpdateArg {
320    /// File descriptor of the BPF map.
321    pub map_fd: u32,
322    /// Pointer to the key.
323    pub key: u64,
324    /// Pointer to the value.
325    pub value: u64,
326    /// Flags for the update operation.
327    pub flags: u64,
328}
329
330impl From<&bpf_attr> for BpfMapUpdateArg {
331    fn from(attr: &bpf_attr) -> Self {
332        let u = unsafe { &attr.__bindgen_anon_2 };
333        let map_fd = u.map_fd;
334        let key = u.key;
335        let value = unsafe { u.__bindgen_anon_1.value };
336        let flags = u.flags;
337        BpfMapUpdateArg {
338            map_fd,
339            key,
340            value,
341            flags,
342        }
343    }
344}
345
346/// Arguments for BPF map get next key operations.
347#[derive(Debug, Clone, Copy)]
348pub struct BpfMapGetNextKeyArg {
349    /// File descriptor of the BPF map.
350    pub map_fd: u32,
351    /// Pointer to the key. If None, get the first key.
352    pub key: Option<u64>,
353    /// Pointer to store the next key.
354    pub next_key: u64,
355}
356
357impl From<&bpf_attr> for BpfMapGetNextKeyArg {
358    fn from(attr: &bpf_attr) -> Self {
359        unsafe {
360            let u = &attr.__bindgen_anon_2;
361            BpfMapGetNextKeyArg {
362                map_fd: u.map_fd,
363                key: if u.key != 0 { Some(u.key) } else { None },
364                next_key: u.__bindgen_anon_1.next_key,
365            }
366        }
367    }
368}
369
370/// Create or update an element (key/value pair) in a specified map.
371///
372/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_UPDATE_ELEM/>
373pub fn bpf_map_update_elem<F: KernelAuxiliaryOps>(arg: BpfMapUpdateArg) -> Result<()> {
374    F::get_unified_map_from_fd(arg.map_fd, |unified_map| {
375        let meta = unified_map.map_meta();
376        let key_size = meta.key_size as usize;
377        let value_size = meta.value_size as usize;
378        let mut key = vec![0u8; key_size];
379        let mut value = vec![0u8; value_size];
380        F::copy_from_user(arg.key as *const u8, key_size, &mut key)?;
381        F::copy_from_user(arg.value as *const u8, value_size, &mut value)?;
382        unified_map.map_mut().update_elem(&key, &value, arg.flags)
383    })
384}
385
386/// Freeze a map to prevent further modifications.
387pub fn bpf_map_freeze<F: KernelAuxiliaryOps>(map_fd: u32) -> Result<()> {
388    F::get_unified_map_from_fd(map_fd, |unified_map| unified_map.map().freeze())
389}
390
391///  Look up an element by key in a specified map and return its value.
392///
393/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_ELEM/>
394pub fn bpf_lookup_elem<F: KernelAuxiliaryOps>(arg: BpfMapUpdateArg) -> Result<()> {
395    // info!("<bpf_lookup_elem>: {:#x?}", arg);
396    F::get_unified_map_from_fd(arg.map_fd, |unified_map| {
397        let meta = unified_map.map_meta();
398        let key_size = meta.key_size as usize;
399        let value_size = meta.value_size as usize;
400        let mut key = vec![0u8; key_size];
401        F::copy_from_user(arg.key as *const u8, key_size, &mut key)?;
402        let map = unified_map.map_mut();
403        let r_value = map.lookup_elem(&key)?;
404        if let Some(r_value) = r_value {
405            F::copy_to_user(arg.value as *mut u8, value_size, r_value)?;
406            Ok(())
407        } else {
408            Err(BpfError::ENOENT)
409        }
410    })
411}
412/// Look up an element by key in a specified map and return the key of the next element.
413///
414/// - If key is `None`, the operation returns zero and sets the next_key pointer to the key of the first element.
415/// - If key is `Some(T)`, the operation returns zero and sets the next_key pointer to the key of the next element.
416/// - If key is the last element, returns -1 and errno is set to ENOENT.
417///
418/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_GET_NEXT_KEY/>
419pub fn bpf_map_get_next_key<F: KernelAuxiliaryOps>(arg: BpfMapGetNextKeyArg) -> Result<()> {
420    // info!("<bpf_map_get_next_key>: {:#x?}", arg);
421    F::get_unified_map_from_fd(arg.map_fd, |unified_map| {
422        let meta = unified_map.map_meta();
423        let key_size = meta.key_size as usize;
424        let map = unified_map.map_mut();
425        let mut next_key = vec![0u8; key_size];
426        if let Some(key_ptr) = arg.key {
427            let mut key = vec![0u8; key_size];
428            F::copy_from_user(key_ptr as *const u8, key_size, &mut key)?;
429            map.get_next_key(Some(&key), &mut next_key)?;
430        } else {
431            map.get_next_key(None, &mut next_key)?;
432        };
433        F::copy_to_user(arg.next_key as *mut u8, key_size, &next_key)?;
434        Ok(())
435    })
436}
437
438/// Look up and delete an element by key in a specified map.
439///
440/// # WARN
441///
442/// Not all map types (particularly array maps) support this operation,
443/// instead a zero value can be written to the map value. Check the map types page to check for support.
444///
445/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_DELETE_ELEM/>
446pub fn bpf_map_delete_elem<F: KernelAuxiliaryOps>(arg: BpfMapUpdateArg) -> Result<()> {
447    // info!("<bpf_map_delete_elem>: {:#x?}", arg);
448    F::get_unified_map_from_fd(arg.map_fd, |unified_map| {
449        let meta = unified_map.map_meta();
450        let key_size = meta.key_size as usize;
451        let mut key = vec![0u8; key_size];
452        F::copy_from_user(arg.key as *const u8, key_size, &mut key)?;
453        unified_map.map_mut().delete_elem(&key)
454    })
455}
456
457/// Iterate and fetch multiple elements in a map.
458///
459/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_BATCH/>
460pub fn bpf_map_lookup_batch<F: KernelAuxiliaryOps>(_arg: BpfMapUpdateArg) -> Result<usize> {
461    // TODO: implement bpf_map_lookup_batch
462    Err(BpfError::EPERM)
463}
464
465/// Look up an element with the given key in the map referred to by the file descriptor fd,
466/// and if found, delete the element.
467///
468/// For BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types, the flags argument needs to be set to 0,
469/// but for other map types, it may be specified as:
470/// - BPF_F_LOCK : If this flag is set, the command will acquire the spin-lock of the map value we are looking up.
471///
472/// If the map contains no spin-lock in its value, -EINVAL will be returned by the command.
473///
474/// The BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types implement this command as a “pop” operation,
475/// deleting the top element rather than one corresponding to key.
476/// The key and key_len parameters should be zeroed when issuing this operation for these map types.
477///
478/// This command is only valid for the following map types:
479/// - BPF_MAP_TYPE_QUEUE
480/// - BPF_MAP_TYPE_STACK
481/// - BPF_MAP_TYPE_HASH
482/// - BPF_MAP_TYPE_PERCPU_HASH
483/// - BPF_MAP_TYPE_LRU_HASH
484/// - BPF_MAP_TYPE_LRU_PERCPU_HASH
485///
486///
487/// See <https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_AND_DELETE_ELEM/>
488pub fn bpf_map_lookup_and_delete_elem<F: KernelAuxiliaryOps>(arg: BpfMapUpdateArg) -> Result<()> {
489    // info!("<bpf_map_lookup_and_delete_elem>: {:#x?}", arg);
490    F::get_unified_map_from_fd(arg.map_fd, |unified_map| {
491        let meta = unified_map.map_meta();
492        let key_size = meta.key_size as usize;
493        let value_size = meta.value_size as usize;
494        let mut key = vec![0u8; key_size];
495        let mut value = vec![0u8; value_size];
496        F::copy_from_user(arg.key as *const u8, key_size, &mut key)?;
497        unified_map
498            .map_mut()
499            .lookup_and_delete_elem(&key, &mut value)?;
500        F::copy_to_user(arg.value as *mut u8, value_size, &value)?;
501        Ok(())
502    })
503}
504
505#[cfg(test)]
506mod tests {
507    use alloc::{boxed::Box, vec::Vec};
508    use core::fmt::Debug;
509
510    use super::{PerCpuVariants, PerCpuVariantsOps};
511
512    #[derive(Debug)]
513    pub struct DummyPerCpuCreator;
514
515    #[derive(Debug)]
516    pub struct DummyPerCpuCreatorFalse;
517
518    pub struct DummyPerCpuVariants<T>(Vec<T>);
519
520    impl<T> Debug for DummyPerCpuVariants<T> {
521        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
522            f.debug_tuple("DummyPerCpuVariants").finish()
523        }
524    }
525
526    impl<T: Clone + Sync + Send> PerCpuVariants<T> for DummyPerCpuVariants<T> {
527        fn get(&self) -> &T {
528            &self.0[0]
529        }
530
531        fn get_mut(&self) -> &mut T {
532            unsafe { &mut *(self.0.as_ptr() as *mut T) }
533        }
534
535        unsafe fn force_get(&self, cpu: u32) -> &T {
536            &self.0[cpu as usize]
537        }
538
539        unsafe fn force_get_mut(&self, cpu: u32) -> &mut T {
540            let ptr = self.0.as_ptr();
541            let ptr = unsafe { ptr.add(cpu as usize) } as *mut T;
542            unsafe { &mut *ptr }
543        }
544    }
545
546    impl PerCpuVariantsOps for DummyPerCpuCreator {
547        fn create<T: Clone + Sync + Send + 'static>(
548            value: T,
549        ) -> Option<Box<dyn PerCpuVariants<T>>> {
550            let mut vec = Vec::new();
551            for _ in 0..Self::num_cpus() {
552                vec.push(value.clone());
553            }
554            Some(Box::new(DummyPerCpuVariants(vec)))
555        }
556
557        fn num_cpus() -> u32 {
558            1
559        }
560    }
561
562    impl PerCpuVariantsOps for DummyPerCpuCreatorFalse {
563        fn create<T: Clone + Sync + Send + 'static>(
564            _value: T,
565        ) -> Option<Box<dyn PerCpuVariants<T>>> {
566            None
567        }
568
569        fn num_cpus() -> u32 {
570            0
571        }
572    }
573}