oxidebpf/maps/
mod.rs

1pub(crate) mod perf_map_poller;
2
3use std::{
4    fmt::{Debug, Display, Formatter},
5    iter::FusedIterator,
6    os::{
7        raw::{c_long, c_uchar, c_uint, c_ulong, c_ushort},
8        unix::io::RawFd,
9    },
10    ptr::null_mut,
11    slice,
12    sync::atomic::{self, AtomicPtr, Ordering},
13};
14
15use crate::{
16    bpf::{
17        constant::bpf_map_type::{self, BPF_MAP_TYPE_PROG_ARRAY},
18        syscall::{bpf_map_create, bpf_map_lookup_elem, bpf_map_update_elem},
19        MapConfig,
20    },
21    cpu_info,
22    error::OxidebpfError,
23    perf::{
24        constant::perf_event_type,
25        syscall::{perf_event_ioc_disable, perf_event_ioc_enable},
26        PerfEventAttr,
27    },
28    program_version::PerfBufferSize,
29    LOGGER,
30};
31
32use nix::errno::errno;
33use slog::info;
34
35#[repr(C)]
36#[derive(Clone, Copy)]
37struct PerfEventHeader {
38    type_: c_uint,
39    misc: c_ushort,
40    size: c_ushort,
41}
42#[repr(C)]
43pub struct PerfEventLostSamples {
44    header: PerfEventHeader,
45    pub id: u64,
46    pub count: u64,
47}
48
49#[repr(C)]
50pub struct PerfEventSample {
51    header: PerfEventHeader,
52    size: u32,
53    // array to data of len `size` stored as as char[1] because Rust's
54    // DST and C's DST are are not FFI compatible. This needs to be a
55    // char[] to avoid padding issues since chars are special in c
56    // padding (in that they do not get pre-padded)
57    data: [std::os::raw::c_char; 1],
58}
59
60#[derive(Debug)]
61pub(crate) enum PerfEvent {
62    Sample(Vec<u8>),
63    Lost(u64),
64}
65
66#[repr(align(8), C)]
67#[derive(Clone, Copy)]
68struct PerfMemBitfield {
69    field: c_ulong,
70}
71
72#[repr(align(8), C)]
73union PerfMemCapabilitiesBitfield {
74    capabilities: c_ulong,
75    bitfield: PerfMemBitfield,
76}
77
78impl Debug for PerfMemCapabilitiesBitfield {
79    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
80        write!(f, "debug not implemented")
81    }
82}
83
84#[repr(C)]
85#[derive(Debug)]
86struct PerfMem {
87    version: c_uint,
88    compat_version: c_uint,
89    lock: c_uint,
90    index: c_uint,
91    offset: c_long,
92    time_enabled: c_ulong,
93    time_running: c_ulong,
94    capabilities: PerfMemCapabilitiesBitfield,
95    pmc_width: c_ushort,
96    time_shift: c_ushort,
97    time_mult: c_uint,
98    time_offset: c_ulong,
99    time_zero: c_ulong,
100    size: c_uint,
101    reserved_1: c_uint,
102    time_cycles: c_ulong,
103    time_mask: c_ulong,
104    __reserved: [c_uchar; 928usize],
105    data_head: c_ulong,
106    data_tail: c_ulong,
107    data_offset: c_ulong,
108    data_size: c_ulong,
109    aux_head: c_ulong,
110    aux_tail: c_ulong,
111    aux_offset: c_ulong,
112    aux_size: c_ulong,
113}
114
115pub struct PerfMap {
116    pub(crate) name: String,
117    base_ptr: AtomicPtr<PerfMem>,
118    buffer_size: usize,
119    page_size: usize,
120    cpuid: i32,
121    pub(crate) ev_fd: RawFd,
122    ev_name: String,
123}
124
125#[derive(Clone, Debug)]
126pub(crate) struct ProgMap {
127    pub base: Map,
128}
129
130#[derive(Clone, Debug)]
131pub struct ArrayMap {
132    pub base: Map,
133}
134
135#[derive(Clone, Debug)]
136pub struct BpfHashMap {
137    pub base: Map,
138}
139
140#[derive(Debug)]
141pub struct Map {
142    pub name: String,       // The name of the map
143    fd: RawFd,              // The file descriptor that represents the map
144    map_config: MapConfig,  // The first struct in the bpf_attr union
145    map_config_size: usize, // The size of the map_config field in bytes
146    loaded: bool,           // Whether or not the map has been loaded
147}
148
149impl Clone for Map {
150    fn clone(&self) -> Self {
151        Self {
152            name: self.name.clone(),
153            fd: unsafe { libc::fcntl(self.fd, libc::F_DUPFD_CLOEXEC, 3) },
154            map_config: self.map_config,
155            map_config_size: self.map_config_size,
156            loaded: self.loaded,
157        }
158    }
159}
160
161impl Drop for Map {
162    fn drop(&mut self) {
163        unsafe {
164            libc::close(self.fd);
165        }
166    }
167}
168
169/// This trait specifies a map that can be read from or written to (e.g., array types).
170pub trait RWMap<T, U> {
171    /// # Safety
172    ///
173    /// This function should only be called when `std::mem::size_of::<T>()` matches
174    /// the value in the map being read from and when `std::mem::size_of::<U>()`
175    /// matches the key.
176    unsafe fn read(&self, key: U) -> Result<T, OxidebpfError>;
177
178    /// # Safety
179    ///
180    /// This function should only be called when `std::mem::size_of::<T>()` matches
181    /// the value in the map being written to and when `std::mem::size_of::<U>()`
182    /// matches the key.
183    unsafe fn write(&self, key: U, value: T) -> Result<(), OxidebpfError>;
184}
185
186pub trait PerCpu {
187    fn cpuid(&self) -> i32;
188}
189
190impl ProgMap {
191    pub(crate) fn new(map_name: &str, max_entries: u32) -> Result<Self, OxidebpfError> {
192        let fd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, 4u32, 4u32, max_entries)?;
193        let map = Map {
194            name: map_name.to_string(),
195            fd,
196            map_config: MapConfig::new(bpf_map_type::BPF_MAP_TYPE_PROG_ARRAY, 4, 4, max_entries),
197            map_config_size: std::mem::size_of::<MapConfig>(),
198            loaded: true,
199        };
200        Ok(ProgMap { base: map })
201    }
202
203    // TODO: these functions are a good candidate for a trait
204    pub(crate) fn set_fd(&mut self, fd: RawFd) {
205        self.base.fd = fd;
206    }
207
208    pub(crate) fn get_fd(&self) -> &RawFd {
209        &self.base.fd
210    }
211
212    pub(crate) fn is_loaded(&self) -> bool {
213        self.base.loaded
214    }
215}
216
217impl PerfMap {
218    // we want cpuid and give back a channel to read from
219    pub fn new_group(
220        map_name: &str,
221        event_attr: PerfEventAttr,
222        event_buffer_size: PerfBufferSize,
223    ) -> Result<Vec<PerfMap>, OxidebpfError> {
224        let page_size = page_size()?;
225        let online_cpus = cpu_info::online()?;
226        let buffer_size = match event_buffer_size {
227            PerfBufferSize::PerCpu(size) => size,
228            PerfBufferSize::Total(total) => total / online_cpus.len(),
229        };
230        let page_count = (buffer_size as f64 / page_size as f64).ceil() as usize;
231        if page_count == 0 {
232            info!(LOGGER.0, "PerfMap::new_group(); bad page count (0)");
233            return Err(OxidebpfError::BadPageCount);
234        }
235
236        let page_count = lte_power_of_two(page_count);
237        let buffer_size = page_count * page_size;
238        // allocate an extra page for metadata
239        let mmap_size = buffer_size + page_size;
240
241        #[cfg(feature = "metrics")]
242        {
243            metrics::describe_histogram!("perfmap.buffer_unread_pct", metrics::Unit::Percent, "");
244            let labels = [("map_name", map_name.to_owned())];
245
246            metrics::gauge!(
247                "perfmap.buffer_size_kb",
248                buffer_size as f64 / 1024_f64,
249                &labels
250            );
251
252            metrics::gauge!("perfmap.num_buffers", online_cpus.len() as f64, &labels);
253        }
254
255        online_cpus
256            .into_iter()
257            .map(|cpuid| {
258                let fd: RawFd =
259                    crate::perf::syscall::perf_event_open(&event_attr, -1, cpuid, -1, 0)?;
260                let base_ptr = unsafe { create_raw_perf(fd, mmap_size) }?;
261
262                perf_event_ioc_enable(fd)?;
263
264                Ok(PerfMap {
265                    name: map_name.to_owned(),
266                    base_ptr: AtomicPtr::new(base_ptr),
267                    buffer_size,
268                    page_size,
269                    cpuid,
270                    ev_fd: fd,
271                    ev_name: "".to_owned(),
272                })
273            })
274            .collect()
275    }
276
277    /// Reads all available events
278    ///
279    /// Stops reading if it encounters an unexpected perf event.
280    ///
281    /// When the returned iterator is dropped it internally marks the
282    /// data as "read" so the ebpf program can re-use that
283    /// data. Because of this we should process the iterator fast as
284    /// to free space for more events.
285    ///
286    /// # Safety
287    ///
288    /// This is only safe if a single iterator is running per perfmap.
289    /// This function is marked as `&self` for easiness of use and
290    /// because it is internal only but it probably should be `&mut
291    /// self`. When the iterator is dropped it internally changes data
292    /// in the mmap that the kernel manages (data_tail to be precise)
293    /// to tell it what is the last bit we read so we shouldn't have
294    /// multiple mutations at the same time.
295    pub(crate) unsafe fn read_all(
296        &self,
297    ) -> impl Iterator<Item = Result<PerfEvent, OxidebpfError>> + '_ {
298        PerfEventIterator::new(self)
299    }
300}
301
302struct PerfEventIterator<'a> {
303    // modified by iterator
304    data_tail: u64,
305    data_head: u64,
306    errored: bool,
307    copy_buf: Vec<u8>, // re-usable buffer to make ring joins be contiguous
308
309    // calculated at creation
310    buffer_size: usize,
311    base: *const u8,
312    metadata: *mut PerfMem,
313
314    // gives us the lifetime we need to prevent the iterator outliving
315    // the perfmap
316    _marker: std::marker::PhantomData<&'a PerfMap>,
317}
318
319impl<'a> PerfEventIterator<'a> {
320    fn new(map: &'a PerfMap) -> Self {
321        // the first page is just metadata
322        let metadata = map.base_ptr.load(Ordering::SeqCst);
323
324        // second page onwards is where the data starts
325        let base = unsafe { (metadata as *const u8).add(map.page_size) };
326
327        // per the docs: "On SMP-capable platforms, after reading
328        // the data_head value, user space should issue an rmb()"
329        let data_head = unsafe { (*metadata).data_head };
330        atomic::fence(std::sync::atomic::Ordering::Acquire);
331
332        let data_tail = unsafe { (*metadata).data_tail };
333
334        let buffer_size = map.buffer_size;
335
336        #[cfg(feature = "metrics")]
337        {
338            let labels = [("map_name", map.name.clone())];
339
340            let used = (data_head - data_tail) % buffer_size as u64;
341            let pct_used = used as f64 / (buffer_size as f64 / 100_f64);
342            metrics::histogram!("perfmap.buffer_unread_pct", pct_used, &labels);
343        }
344
345        PerfEventIterator {
346            data_tail,
347            data_head,
348            errored: false,
349            copy_buf: vec![],
350            buffer_size,
351            base,
352            metadata,
353            _marker: std::marker::PhantomData,
354        }
355    }
356}
357
358impl<'a> Iterator for PerfEventIterator<'a> {
359    type Item = Result<PerfEvent, OxidebpfError>;
360
361    fn next(&mut self) -> Option<Self::Item> {
362        if self.data_head == self.data_tail || self.errored {
363            return None;
364        }
365
366        let start_offset = (self.data_tail % self.buffer_size as u64) as usize;
367
368        unsafe {
369            let mut header = self.base.add(start_offset) as *const PerfEventHeader;
370            let event_size = (*header).size as usize;
371            let capacity_remaining = self.buffer_size - start_offset;
372
373            if capacity_remaining < event_size {
374                // clear old data and reserve just enough for our event
375                self.copy_buf.clear();
376                self.copy_buf.reserve_exact(event_size);
377
378                // copy last remaining end bits of ring buffer
379                self.copy_buf.extend_from_slice(slice::from_raw_parts(
380                    header as *const u8,
381                    capacity_remaining,
382                ));
383
384                // wrap around start to copy first initial bits
385                self.copy_buf.extend_from_slice(slice::from_raw_parts(
386                    self.base,
387                    event_size - capacity_remaining,
388                ));
389
390                header = self.copy_buf.as_ptr() as *const PerfEventHeader;
391            }
392
393            let event = read_event(header);
394
395            // only update the internal tail for now. We will update
396            // the actual tail when dropping the iterator. It would be
397            // safe to update the tail now though since the data is
398            // copied. We could consider modifying the tail sooner if
399            // we aren't sending events fast enough in the future.
400            self.data_tail += event_size as u64;
401
402            if event.is_err() {
403                // stop iteration on errors but still propagate that
404                // first error
405                self.errored = true;
406            }
407
408            Some(event)
409        }
410    }
411}
412
413/// Reads either a sample or a lost event. Errors for anything else
414///
415/// Safety: it has to come from a valid PerfEventHeader and have
416/// memory past the end of the header for the actual data of the event
417unsafe fn read_event(event: *const PerfEventHeader) -> Result<PerfEvent, OxidebpfError> {
418    match (*event).type_ {
419        perf_event_type::PERF_RECORD_SAMPLE => {
420            let sample = event as *const PerfEventSample;
421            let size = (*sample).size;
422            // data is saved as a char[1] but it is really a char[]
423            // (dynamic) in the stack. Rust doesn't like thin pointers
424            // to DSTs so we need to carefully get the pointer to the
425            // array so we can then make a Rust slice out of it.
426            let data = std::ptr::addr_of!((*sample).data) as *const u8;
427
428            // copies the data over which is not stricly necessary but
429            // avoids playing safety chess with std::mem::forget since
430            // we do not want to accidentally drop the data owned by
431            // the perf buffer
432            let data = std::slice::from_raw_parts(data, size as usize).to_vec();
433
434            Ok(PerfEvent::Sample(data))
435        }
436        perf_event_type::PERF_RECORD_LOST => {
437            let sample = event as *const PerfEventLostSamples;
438            Ok(PerfEvent::Lost((*sample).count))
439        }
440        unknown => Err(OxidebpfError::UnknownPerfEvent(unknown)),
441    }
442}
443
444impl Drop for PerfEventIterator<'_> {
445    fn drop(&mut self) {
446        unsafe {
447            atomic::fence(std::sync::atomic::Ordering::SeqCst);
448            (*self.metadata).data_tail = self.data_tail;
449        }
450    }
451}
452
453impl<'a> FusedIterator for PerfEventIterator<'a> {}
454
455impl PerCpu for PerfMap {
456    fn cpuid(&self) -> i32 {
457        self.cpuid
458    }
459}
460
461impl BpfHashMap {
462    /// Create a new BpfHashMap
463    ///
464    /// Calling new will create a new BPF_MAP_TYPE_HASH map. It stores some meta data
465    /// to track it. The array map supports read and write operations to access the
466    /// members of the map
467    ///
468    /// # Safety
469    ///
470    /// The `value_size` and `key_size` you pass in needs to match exactly with the size of the struct/type
471    /// used by any other BPF program that might be using this map. Any `T` or `U` you use in subsequent
472    /// `read()` and `write()` calls needs to match exactly (e.g., with `#[repr(C)]`) with the struct/type
473    /// used by the BPF program as well. Additionally, `std::mem::size_of::<T>()` must match the given
474    /// `value_size` here exactly and `std::mem::size_of::<U>() for the key`. If this conditions are not met,
475    /// the `BpfHashMap` behavior is undefined.
476    ///
477    /// # Examples
478    /// ```no_run
479    /// use oxidebpf::BpfHashMap;
480    /// let map: BpfHashMap = unsafe {BpfHashMap::new(
481    ///    "mymap",
482    ///    std::mem::size_of::<u64>() as u32,
483    ///    std::mem::size_of::<u64>() as u32,
484    ///    1024,
485    /// ).expect("Failed to create map") };
486    /// ```
487    pub unsafe fn new(
488        map_name: &str,
489        key_size: u32,
490        value_size: u32,
491        max_entries: u32,
492    ) -> Result<BpfHashMap, OxidebpfError> {
493        // Manpages say that key size must be 4 bytes for BPF_MAP_TYPE_ARRAY
494        let fd = bpf_map_create(
495            bpf_map_type::BPF_MAP_TYPE_HASH,
496            key_size as c_uint,
497            value_size as c_uint,
498            max_entries,
499        )?;
500        let map = Map {
501            name: map_name.to_string(),
502            fd,
503            map_config: MapConfig::new(
504                bpf_map_type::BPF_MAP_TYPE_HASH,
505                key_size,
506                value_size,
507                max_entries,
508            ),
509            map_config_size: std::mem::size_of::<MapConfig>(),
510            loaded: true,
511        };
512        Ok(BpfHashMap { base: map })
513    }
514
515    pub(crate) fn set_fd(&mut self, fd: RawFd) {
516        self.base.fd = fd;
517    }
518
519    pub(crate) fn get_fd(&self) -> &RawFd {
520        &self.base.fd
521    }
522
523    pub(crate) fn is_loaded(&self) -> bool {
524        self.base.loaded
525    }
526}
527
528impl Display for BpfHashMap {
529    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
530        write!(f, "Name: {}, loaded: {}", self.base.name, self.base.loaded)
531    }
532}
533
534impl ArrayMap {
535    /// Create a new ArrayMap
536    ///
537    /// Calling new will create a new BPF_MAP_TYPE_ARRAY map. It stores some meta data
538    /// to track it. The array map supports read and write operations to access the
539    /// members of the map
540    ///
541    /// # Safety
542    ///
543    /// The `value_size` you pass in needs to match exactly with the size of the struct/type
544    /// used by any other BPF program that might be using this map. Any `T` you use in subsequent
545    /// `read()` and `write()` calls needs to match exactly (e.g., with `#[repr(C)]`) with
546    /// the struct/type used by the BPF program as well. Additionally, `std::mem::size_of::<T>()`
547    /// must match the given `value_size` here exactly. If this conditions are not met, the
548    /// `ArrayMap` behavior is undefined.
549    ///
550    /// # Examples
551    /// ```
552    /// use oxidebpf::ArrayMap;
553    /// let map: ArrayMap = unsafe {ArrayMap::new(
554    ///    "mymap",
555    ///    std::mem::size_of::<u64>() as u32,
556    ///    1024,
557    /// ).expect("Failed to create map") };
558    /// ```
559    pub unsafe fn new(
560        map_name: &str,
561        value_size: u32,
562        max_entries: u32,
563    ) -> Result<ArrayMap, OxidebpfError> {
564        // Manpages say that key size must be 4 bytes for BPF_MAP_TYPE_ARRAY
565        let fd = bpf_map_create(
566            bpf_map_type::BPF_MAP_TYPE_ARRAY,
567            4,
568            value_size as c_uint,
569            max_entries,
570        )?;
571        let map = Map {
572            name: map_name.to_string(),
573            fd,
574            map_config: MapConfig::new(
575                bpf_map_type::BPF_MAP_TYPE_ARRAY,
576                4,
577                value_size,
578                max_entries,
579            ),
580            map_config_size: std::mem::size_of::<MapConfig>(),
581            loaded: true,
582        };
583        Ok(ArrayMap { base: map })
584    }
585
586    pub(crate) fn set_fd(&mut self, fd: RawFd) {
587        self.base.fd = fd;
588    }
589
590    pub(crate) fn get_fd(&self) -> &RawFd {
591        &self.base.fd
592    }
593
594    pub(crate) fn is_loaded(&self) -> bool {
595        self.base.loaded
596    }
597}
598
599impl Display for ArrayMap {
600    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
601        write!(f, "Name: {}, loaded: {}", self.base.name, self.base.loaded)
602    }
603}
604
605impl<T> RWMap<T, c_uint> for ArrayMap {
606    /// Reads an index from a map of type BPF_MAP_TYPE_ARRAY
607    ///
608    /// Initiates a read from `key`. Read verifies that the map has been initialized.
609    /// The value returned will be of the same type that was used when the ArrayMap
610    /// was created
611    ///
612    /// NOTE: This method calls will read a certain amount of memory based on what the
613    /// size of `T` is. Make sure that `T` matches the type of the value (e.g., with `#[repr(C)]`)
614    /// that is being used in the map.
615    ///
616    /// # Example
617    /// ```
618    /// use oxidebpf::{ArrayMap, RWMap};
619    ///
620    /// // this is safe because we are reading and writing a u64, and the value_size we
621    /// // pass into new() is a u64
622    ///
623    /// unsafe {
624    ///     let map: ArrayMap = ArrayMap::new(
625    ///        "mymap",
626    ///        std::mem::size_of::<u64>() as u32,
627    ///        1024,
628    ///     ).expect("Failed to create map");
629    ///     let _ = map.write(0, 12345u64);
630    ///     assert_eq!(
631    ///         12345u64,
632    ///         unsafe { map.read(0).expect("Failed to read value from map") }
633    ///     );
634    /// }
635    /// ```
636    unsafe fn read(&self, key: c_uint) -> Result<T, OxidebpfError> {
637        if !self.base.loaded {
638            info!(
639                LOGGER.0,
640                "ArrayMap::read(); attempted to read unloaded array map {}", self.base.name
641            );
642            return Err(OxidebpfError::MapNotLoaded);
643        }
644        if self.base.fd < 0 {
645            info!(
646                LOGGER.0,
647                "ArrayMap::read(); attempted to read array map with negative fd {}", self.base.name
648            );
649            return Err(OxidebpfError::MapNotLoaded);
650        }
651        if std::mem::size_of::<T>() as u32 != self.base.map_config.value_size {
652            info!(
653                LOGGER.0,
654                "ArrayMap::read(); attempted to read array map with incorrect size; gave {}; should be {}",
655                std::mem::size_of::<T>(),
656                self.base.map_config.value_size
657            );
658            return Err(OxidebpfError::MapValueSizeMismatch);
659        }
660        bpf_map_lookup_elem(self.base.fd, key)
661    }
662
663    /// Writes an index to and index of a map of type BPF_MAP_TYPE_ARRAY
664    ///
665    /// Initiates a write to `key` of `value`. The value needs to match the array
666    /// type that was used when the map was created
667    ///
668    /// NOTE: This method calls will write a certain amount of memory based on what the
669    /// size of `T` is. Make sure that `T` matches the type of the value (e.g., with `#[repr(C)]`)
670    /// that is being used in the map.
671    ///
672    /// # Example
673    /// ```
674    /// use oxidebpf::{ArrayMap, RWMap};
675    ///
676    /// // this is safe because we are reading and writing a u64, and the value_size we
677    /// // pass into new() is a u64
678    ///
679    /// unsafe {
680    ///     let map: ArrayMap = ArrayMap::new(
681    ///        "mymap",
682    ///        std::mem::size_of::<u64>() as u32,
683    ///        1024,
684    ///     ).expect("Failed to create map");
685    ///     let _ = map.write(0, 12345u64);
686    ///     assert_eq!(
687    ///         12345u64,
688    ///         map.read(0).expect("Failed to read value from map")
689    ///     );
690    /// }
691    /// ```
692    unsafe fn write(&self, key: c_uint, value: T) -> Result<(), OxidebpfError> {
693        if !self.base.loaded {
694            info!(
695                LOGGER.0,
696                "ArrayMap::write(); attempted to write unloaded array map {}", self.base.name
697            );
698            return Err(OxidebpfError::MapNotLoaded);
699        }
700        if self.base.fd < 0 {
701            info!(
702                LOGGER.0,
703                "ArrayMap::write(); attempted to write array map with negative fd {}",
704                self.base.name
705            );
706            return Err(OxidebpfError::MapNotLoaded);
707        }
708
709        // Try and verify that size of the value type matches the size of the value field in the map
710        if std::mem::size_of::<T>() as u32 != self.base.map_config.value_size {
711            return Err(OxidebpfError::MapValueSizeMismatch);
712        }
713        bpf_map_update_elem(self.base.fd, key, value)
714    }
715}
716
717impl<T, U> RWMap<T, U> for BpfHashMap {
718    /// Reads an index from a map of type BPF_MAP_TYPE_HASH
719    ///
720    /// Initiates a read from `key`. Read verifies that the map has been initialized.
721    /// The value returned will be of the same type that was used when the BpfHashMap
722    /// was created
723    ///
724    /// NOTE: This method calls will read a certain amount of memory based on what the
725    /// size of `T` and `U` is. Make sure that `T` and `U` matches the type of the value and key
726    /// (e.g., with `#[repr(C)]`) that is being used in the map.
727    ///
728    /// # Example
729    /// ```no_run
730    /// use oxidebpf::{BpfHashMap, RWMap};
731    ///
732    /// // this is safe because we are reading and writing a u64, and the value_size we
733    /// // pass into new() is a u64
734    ///
735    /// unsafe {
736    ///     let map: BpfHashMap = BpfHashMap::new(
737    ///        "mymap",
738    ///        std::mem::size_of::<u64>() as u32,
739    ///        std::mem::size_of::<u64>() as u32,
740    ///        1024,
741    ///     ).expect("Failed to create map");
742    ///     let _ = map.write(87654321u64, 12345u64);
743    ///     assert_eq!(
744    ///         12345u64,
745    ///         unsafe { map.read(87654321u64).expect("Failed to read value from map") }
746    ///     );
747    /// }
748    /// ```
749    unsafe fn read(&self, key: U) -> Result<T, OxidebpfError> {
750        if !self.base.loaded {
751            info!(
752                LOGGER.0,
753                "BpfHashMap::read(); attempted to read unloaded bpf hash map {}", self.base.name
754            );
755            return Err(OxidebpfError::MapNotLoaded);
756        }
757        if self.base.fd < 0 {
758            info!(
759                LOGGER.0,
760                "BpfHashMap::read(); attempted to read bpf hash map with negative fd {}",
761                self.base.name
762            );
763            return Err(OxidebpfError::MapNotLoaded);
764        }
765        if std::mem::size_of::<T>() as u32 != self.base.map_config.value_size {
766            info!(
767                LOGGER.0,
768                "BpfHashMap::read(); attempted to read bpf hash map with incorrect value size; gave {}; should be {}",
769                std::mem::size_of::<T>(),
770                self.base.map_config.value_size
771            );
772            return Err(OxidebpfError::MapValueSizeMismatch);
773        }
774        if std::mem::size_of::<U>() as u32 != self.base.map_config.key_size {
775            info!(
776                LOGGER.0,
777                "BpfHashMap::read(); attempted to read bpf hash map with incorrect key size; gave {}; should be {}",
778                std::mem::size_of::<U>(),
779                self.base.map_config.key_size
780            );
781            return Err(OxidebpfError::MapKeySizeMismatch);
782        }
783        bpf_map_lookup_elem(self.base.fd, key)
784    }
785
786    /// Writes an index to and index of a map of type BPF_MAP_TYPE_ARRAY
787    ///
788    /// Initiates a write to `key` of `value`. The value needs to match the array
789    /// type that was used when the map was created
790    ///
791    /// NOTE: This method calls will write a certain amount of memory based on what the
792    /// size of `T` is. Make sure that `T` matches the type of the value (e.g., with `#[repr(C)]`)
793    /// that is being used in the map.
794    ///
795    /// # Example
796    /// ```no_run
797    /// use oxidebpf::{BpfHashMap, RWMap};
798    /// use std::process;
799    ///
800    /// // this is safe because we are reading and writing a u64, and the value_size we
801    /// // pass into new() is a u64
802    ///
803    /// unsafe {
804    ///     let map: BpfHashMap = BpfHashMap::new(
805    ///        "mymap",
806    ///        std::mem::size_of::<u32>() as u32,
807    ///        std::mem::size_of::<u64>() as u32,
808    ///        1024,
809    ///     ).expect("Failed to create map");
810    ///     let _ = map.write(process::id(), 12345u64);
811    ///     assert_eq!(
812    ///         12345u64,
813    ///         map.read(process::id()).expect("Failed to read value from map")
814    ///     );
815    /// }
816    /// ```
817    unsafe fn write(&self, key: U, value: T) -> Result<(), OxidebpfError> {
818        if !self.base.loaded {
819            info!(
820                LOGGER.0,
821                "BpfHashMap::write(); attempted to write unloaded bpf hash map {}", self.base.name
822            );
823            return Err(OxidebpfError::MapNotLoaded);
824        }
825        if self.base.fd < 0 {
826            info!(
827                LOGGER.0,
828                "BpfHashMap::write(); attempted to write bpf hash map with negative fd {}",
829                self.base.name
830            );
831            return Err(OxidebpfError::MapNotLoaded);
832        }
833
834        // Try and verify that size of the value type matches the size of the value field in the map
835        if std::mem::size_of::<T>() as u32 != self.base.map_config.value_size {
836            info!(
837                LOGGER.0,
838                "BpfHashMap::write(); attempted to write bpf hash map with incorrect value size; gave {}; should be {}",
839                std::mem::size_of::<T>(),
840                self.base.map_config.value_size
841            );
842            return Err(OxidebpfError::MapValueSizeMismatch);
843        }
844        if std::mem::size_of::<U>() as u32 != self.base.map_config.key_size {
845            info!(
846                LOGGER.0,
847                "BpfHashMap::write(); attempted to write bpf hash map with incorrect key size; gave {}; should be {}",
848                std::mem::size_of::<U>(),
849                self.base.map_config.key_size
850            );
851            return Err(OxidebpfError::MapKeySizeMismatch);
852        }
853        bpf_map_update_elem(self.base.fd, key, value)
854    }
855}
856
857impl Drop for PerfMap {
858    fn drop(&mut self) {
859        // if it doesn't work, we're gonna close it anyway so :shrug:
860        let _ = perf_event_ioc_disable(self.ev_fd);
861        unsafe {
862            libc::close(self.ev_fd);
863        }
864    }
865}
866
867impl Drop for ArrayMap {
868    fn drop(&mut self) {
869        self.base.loaded = false;
870    }
871}
872
873// returns a power of two that is equal or less than n
874fn lte_power_of_two(n: usize) -> usize {
875    if n.is_power_of_two() {
876        return n;
877    }
878
879    match n.checked_next_power_of_two() {
880        None => 1 << (usize::BITS - 1),
881        Some(x) => x >> 1,
882    }
883}
884
885fn page_size() -> Result<usize, OxidebpfError> {
886    let raw_size = unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) };
887
888    match raw_size.cmp(&0) {
889        std::cmp::Ordering::Less => {
890            let e = errno();
891            info!(
892                LOGGER.0,
893                "PerfMap::new_group(); perfmap error, size < 0: {}; errno: {}", raw_size, e
894            );
895            Err(OxidebpfError::LinuxError(
896                "perf map get PAGE_SIZE".to_string(),
897                nix::errno::from_i32(e),
898            ))
899        }
900        std::cmp::Ordering::Equal => {
901            info!(
902                LOGGER.0,
903                "PerfMap::new_group(); perfmap error, bad page size (size == 0)"
904            );
905            Err(OxidebpfError::BadPageSize)
906        }
907        std::cmp::Ordering::Greater => Ok(raw_size as usize),
908    }
909}
910
911/// Creates a new PerfMem for the given file descriptor.
912///
913/// On error it will attempt to close the file descriptor and report
914/// if it failed to close it.
915///
916/// # Safety:
917/// The fd must be valid and come from a perf_event_open syscall
918unsafe fn create_raw_perf(fd: RawFd, mmap_size: usize) -> Result<*mut PerfMem, OxidebpfError> {
919    let base_ptr = libc::mmap(
920        null_mut(),
921        mmap_size,
922        libc::PROT_READ | libc::PROT_WRITE,
923        libc::MAP_SHARED,
924        fd,
925        0,
926    );
927
928    if base_ptr == libc::MAP_FAILED {
929        Err(handle_map_failed(fd, mmap_size))
930    } else {
931        Ok(base_ptr as *mut PerfMem)
932    }
933}
934
935unsafe fn handle_map_failed(fd: RawFd, mmap_size: usize) -> OxidebpfError {
936    let mmap_errno = nix::errno::from_i32(errno());
937    if libc::close(fd) < 0 {
938        let e = errno();
939        info!(LOGGER.0, "PerfMap::new_group(); could not close mmap fd, multiple errors; mmap_errno: {}; errno: {}", mmap_errno, e);
940        return OxidebpfError::MultipleErrors(vec![
941            OxidebpfError::LinuxError(
942                format!("perf_map => mmap(fd={},size={})", fd, mmap_size),
943                mmap_errno,
944            ),
945            OxidebpfError::LinuxError(
946                format!("perf_map cleanup => close({})", fd),
947                nix::errno::from_i32(e),
948            ),
949        ]);
950    }
951
952    info!(
953        LOGGER.0,
954        "PerfMap::new_group(); mmap failed while creating perfmap: {:?}", mmap_errno
955    );
956
957    OxidebpfError::LinuxError(
958        format!("per_event_open => mmap(fd={},size={})", fd, mmap_size),
959        mmap_errno,
960    )
961}
962
963#[cfg(test)]
964mod map_tests {
965    use crate::error::OxidebpfError;
966    use crate::maps::RWMap;
967    use crate::maps::{ArrayMap, BpfHashMap};
968    use nix::errno::Errno;
969
970    // Doing the rough equivalent of C's time(NULL);
971    fn time_null() -> u64 {
972        let start = std::time::SystemTime::now();
973        let seed_time = start
974            .duration_since(std::time::UNIX_EPOCH)
975            .expect("All time is broken!!");
976        seed_time.as_millis() as u64
977    }
978
979    // Test the normal behavior of the array map type
980    //
981    // This test simply writes to all the entries in the map and then tries to read
982    // them back. If it successfully reads the values back from the map then it
983    // is considered passing
984    #[test]
985    fn test_map_array() {
986        let array_size: u64 = 100;
987        let map: ArrayMap = unsafe {
988            ArrayMap::new(
989                "mymap",
990                std::mem::size_of::<u64>() as u32,
991                array_size as u32,
992            )
993            .expect("Failed to create new map")
994        };
995
996        // Give it some "randomness"
997        let nums: Vec<u64> = (0..array_size)
998            .map(|v| (v * time_null() + 71) % 128)
999            .collect();
1000
1001        // Write
1002        for (idx, num) in nums.iter().enumerate() {
1003            unsafe { map.write(idx as u32, *num).expect("could not write to map") };
1004        }
1005        for (idx, num) in nums.iter().enumerate() {
1006            assert_eq!(*num, unsafe {
1007                map.read(idx as u32).expect("Failed to read value from map")
1008            });
1009        }
1010
1011        // Updates the entries and retrieves them again
1012        let nums: Vec<u64> = nums.iter().map(|v| (v * time_null() + 71) % 128).collect();
1013        for (idx, num) in nums.iter().enumerate() {
1014            unsafe { map.write(idx as u32, *num).expect("could not write to map") };
1015        }
1016        for (idx, num) in nums.iter().enumerate() {
1017            assert_eq!(*num, unsafe {
1018                map.read(idx as u32).expect("Failed to read value from map")
1019            });
1020        }
1021    }
1022
1023    // Tests a trying to read an element from outside the bounds of the array
1024    #[test]
1025    fn test_map_array_bad_index() {
1026        let array_size: u64 = 10;
1027        let map: ArrayMap = unsafe {
1028            ArrayMap::new(
1029                "mymap",
1030                std::mem::size_of::<u64>() as u32,
1031                array_size as u32,
1032            )
1033            .expect("Failed to create new map")
1034        };
1035
1036        // Give it some "randomness"
1037        let nums: Vec<u64> = (0..array_size)
1038            .map(|v| (v * time_null() + 71) % 128)
1039            .collect();
1040
1041        for (idx, num) in nums.iter().enumerate() {
1042            unsafe { map.write(idx as u32, *num).expect("could not write to map") };
1043        }
1044        let should_fail: Result<u64, OxidebpfError> = unsafe { map.read(100) };
1045        assert!(should_fail.is_err());
1046        match should_fail {
1047            Err(OxidebpfError::LinuxError(_, errno)) => {
1048                assert_eq!(errno, Errno::ENOENT)
1049            }
1050            _ => {
1051                panic!("invalid OxidebpfError: {:?}", should_fail);
1052            }
1053        }
1054    }
1055
1056    // Test writing outside the size of the array
1057    #[test]
1058    fn test_map_array_bad_write_index() {
1059        let array_size: u64 = 10;
1060        let map: ArrayMap = unsafe {
1061            ArrayMap::new(
1062                "mymap",
1063                std::mem::size_of::<u64>() as u32,
1064                array_size as u32,
1065            )
1066            .expect("Failed to create new map")
1067        };
1068
1069        // Give it some "randomness"
1070        let nums: Vec<u64> = (0..array_size)
1071            .map(|v| (v * time_null() + 71) % 128)
1072            .collect();
1073
1074        for (idx, num) in nums.iter().enumerate() {
1075            unsafe { map.write(idx as u32, *num).expect("could not write to map") };
1076        }
1077
1078        // Should return E2BIG
1079        let should_fail = unsafe { map.write(100, 12345u64).err().unwrap() };
1080        match should_fail {
1081            OxidebpfError::LinuxError(_, errno) => {
1082                assert_eq!(errno, Errno::E2BIG)
1083            }
1084            _ => {
1085                panic!("invalid OxidebpfError: {:?}", should_fail);
1086            }
1087        }
1088    }
1089
1090    // Test storing a more complex structure
1091    #[test]
1092    fn test_map_array_complex_structure() {
1093        // A made up structure for this test
1094        struct TestStructure {
1095            durp0: u64,
1096            durp1: String,
1097            durp2: f64,
1098            durp3: bool,
1099        }
1100
1101        // Create the map and initialize a vector of TestStructure
1102        let array_size: u64 = 10;
1103        let map: ArrayMap = unsafe {
1104            ArrayMap::new(
1105                "mymap",
1106                std::mem::size_of::<u64>() as u32,
1107                array_size as u32,
1108            )
1109            .expect("Failed to create new map")
1110        };
1111
1112        let data: Vec<TestStructure> = (0..array_size)
1113            .map(|v| TestStructure {
1114                durp0: v,
1115                durp1: format!("Durp {}", v),
1116                durp2: 0.1234,
1117                durp3: v % 2 == 0,
1118            })
1119            .collect();
1120
1121        // Write the test structures to the map
1122        for (i, tmp) in data.iter().enumerate() {
1123            unsafe { map.write(i as u32, tmp).expect("could not write to map") };
1124        }
1125
1126        // Read the test structures from the map and compare with originals
1127        for (i, item) in data.iter().enumerate() {
1128            let val: &TestStructure =
1129                unsafe { map.read(i as u32).expect("Failed to read value from array") };
1130            assert_eq!(val.durp0, item.durp0);
1131            assert_eq!(val.durp1, item.durp1);
1132            assert_eq!(val.durp2, item.durp2);
1133            assert_eq!(val.durp3, item.durp3);
1134        }
1135    }
1136
1137    #[test]
1138    fn test_hash_map() {
1139        let array_size: u64 = 100;
1140
1141        let map: BpfHashMap = unsafe {
1142            BpfHashMap::new(
1143                "mymap",
1144                std::mem::size_of::<u32>() as u32,
1145                std::mem::size_of::<u64>() as u32,
1146                1024,
1147            )
1148            .expect("Failed to create new map")
1149        };
1150        // Give it some "randomness"
1151        let nums: Vec<u64> = (0..array_size)
1152            .map(|v| (v * time_null() + 71) % 128)
1153            .collect();
1154        for num in nums.iter() {
1155            unsafe {
1156                let _ = map.write(std::process::id(), *num);
1157                let val: u64 = map
1158                    .read(std::process::id())
1159                    .expect("Failed to read value from hashmap");
1160                assert_eq!(val, *num);
1161            }
1162        }
1163    }
1164
1165    #[test]
1166    fn test_hash_map_bad_index() {
1167        let map: BpfHashMap = unsafe {
1168            BpfHashMap::new(
1169                "mymap",
1170                std::mem::size_of::<u32>() as u32,
1171                std::mem::size_of::<u64>() as u32,
1172                1024,
1173            )
1174            .expect("Failed to create new map")
1175        };
1176        let _ = unsafe { map.write(1234, 1234) };
1177        let should_fail: Result<u64, OxidebpfError> = unsafe { map.read(4321) };
1178        assert!(should_fail.is_err());
1179        match should_fail {
1180            Err(OxidebpfError::LinuxError(_, errno)) => {
1181                assert_eq!(errno, Errno::ENOENT)
1182            }
1183            _ => {
1184                panic!("invalid OxidebpfError: {:?}", should_fail);
1185            }
1186        }
1187    }
1188
1189    #[test]
1190    fn test_hash_map_complex_key_value() {
1191        // A made up structure for this test
1192        #[derive(Clone, Copy)]
1193        struct TestStructure<'a> {
1194            durp0: u64,
1195            durp1: &'a str,
1196            durp2: f64,
1197            durp3: bool,
1198        }
1199
1200        // Create the map and initialize a vector of TestStructure
1201        let array_size: u32 = 10;
1202        let map: BpfHashMap = unsafe {
1203            BpfHashMap::new(
1204                "mymap",
1205                std::mem::size_of::<u32>() as u32,
1206                std::mem::size_of::<TestStructure>() as u32,
1207                array_size as u32,
1208            )
1209            .expect("Failed to create new map")
1210        };
1211
1212        let data: Vec<TestStructure> = (0..array_size)
1213            .map(|v| TestStructure {
1214                durp0: v as u64,
1215                durp1: "Durp",
1216                durp2: 0.1234,
1217                durp3: v % 2 == 0,
1218            })
1219            .collect();
1220
1221        // Write the test structures to the map
1222        for (i, item) in data.iter().enumerate() {
1223            unsafe {
1224                map.write(std::process::id() + i as u32, *item)
1225                    .expect("could not write to map");
1226            }
1227            let val: TestStructure = unsafe {
1228                map.read(std::process::id() + i as u32)
1229                    .expect("Failed to read value from array")
1230            };
1231            assert_eq!(val.durp0, item.durp0);
1232            assert_eq!(val.durp1, item.durp1);
1233            assert_eq!(val.durp2, item.durp2);
1234            assert_eq!(val.durp3, item.durp3);
1235        }
1236    }
1237}