Skip to main content

kbpf_basic/perf/
bpf.rs

1//! BPF performance event handling module.
2//!
3
4use super::util::{PerfProbeArgs, *};
5use crate::{BpfResult as Result, linux_bpf::*};
6
7const PAGE_SIZE: usize = 4096;
8
9/// Ring buffer page for perf events.
10#[derive(Debug)]
11pub struct RingPage {
12    size: usize,
13    ptr: usize,
14    data_region_size: usize,
15    lost: usize,
16}
17
18impl RingPage {
19    /// Create an empty RingPage.
20    pub fn empty() -> Self {
21        RingPage {
22            ptr: 0,
23            size: 0,
24            data_region_size: 0,
25            lost: 0,
26        }
27    }
28
29    /// Get the start address of the RingPage.
30    pub fn start(&self) -> usize {
31        self.ptr
32    }
33
34    /// Initialize a RingPage from start address and length.
35    pub fn new_init(start: usize, len: usize) -> Self {
36        Self::init(start as _, len)
37    }
38
39    fn init(ptr: *mut u8, size: usize) -> Self {
40        assert_eq!(size % PAGE_SIZE, 0);
41        assert!(size / PAGE_SIZE >= 2);
42        // The first page will be filled with perf_event_mmap_page
43        unsafe {
44            let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
45            perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
46            perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
47            // user will read sample or lost record from data_tail
48            perf_event_mmap_page.data_tail = 0;
49            // kernel will write sample or lost record from data_head
50            perf_event_mmap_page.data_head = 0;
51            // It is a ring buffer.
52        }
53        RingPage {
54            ptr: ptr as usize,
55            size,
56            data_region_size: size - PAGE_SIZE,
57            lost: 0,
58        }
59    }
60
61    #[inline]
62    fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
63        let capacity = self.data_region_size - data_head + data_tail;
64        data_size <= capacity
65    }
66
67    /// Write a perf event to the ring buffer.
68    pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
69        let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
70        let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
71
72        // user lib will update the tail after read the data,but it will not % data_region_size
73        let perf_header_size = size_of::<perf_event_header>();
74        let can_write_perf_header =
75            self.can_write(perf_header_size, *data_tail as usize, *data_head as usize);
76
77        if can_write_perf_header {
78            let can_write_lost_record = self.can_write(
79                size_of::<LostSamples>(),
80                *data_tail as usize,
81                *data_head as usize,
82            );
83            // if there is lost record, we need to write the lost record first
84            if self.lost > 0 && can_write_lost_record {
85                let new_data_head = self.write_lost(*data_head as usize)?;
86                *data_head = new_data_head as u64;
87                self.lost = 0;
88                // try to write the event again
89                return self.write_event(data);
90            }
91            let sample_size = PerfSample::calculate_size(data.len());
92            let can_write_sample =
93                self.can_write(sample_size, *data_tail as usize, *data_head as usize);
94            if can_write_sample {
95                let new_data_head = self.write_sample(data, *data_head as usize)?;
96                *data_head = new_data_head as u64;
97            } else {
98                self.lost += 1;
99            }
100        } else {
101            self.lost += 1;
102        }
103        Ok(())
104    }
105
106    /// Write any data to the page.
107    ///
108    /// Return the new data_head
109    fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<()> {
110        let data_region_len = self.data_region_size;
111        let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
112        let data_len = data.len();
113        let start = data_head % data_region_len;
114        let end = (data_head + data_len) % data_region_len;
115        if start < end {
116            data_region[start..end].copy_from_slice(data);
117        } else {
118            let first_len = data_region_len - start;
119            data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
120            data_region[0..end].copy_from_slice(&data[first_len..]);
121        }
122        Ok(())
123    }
124    #[inline]
125    fn fill_size(&self, data_head_mod: usize) -> usize {
126        if self.data_region_size - data_head_mod < size_of::<perf_event_header>() {
127            // The remaining space is not enough to write the perf_event_header
128            // We need to fill the remaining space with 0
129            self.data_region_size - data_head_mod
130        } else {
131            0
132        }
133    }
134
135    /// Write a sample to the page.
136    fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
137        let sample_size = PerfSample::calculate_size(data.len());
138        let maybe_end = (data_head + sample_size) % self.data_region_size;
139        let fill_size = self.fill_size(maybe_end);
140        let perf_sample = PerfSample {
141            s_hdr: SampleHeader {
142                header: perf_event_header {
143                    type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
144                    misc: 0,
145                    size: size_of::<SampleHeader>() as u16 + data.len() as u16 + fill_size as u16,
146                },
147                size: data.len() as u32,
148            },
149            value: data,
150        };
151        self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
152        self.write_any(perf_sample.value, data_head + size_of::<SampleHeader>())?;
153        Ok(data_head + sample_size + fill_size)
154    }
155
156    /// Write a lost record to the page.
157    ///
158    /// Return the new data_head
159    fn write_lost(&mut self, data_head: usize) -> Result<usize> {
160        let maybe_end = (data_head + size_of::<LostSamples>()) % self.data_region_size;
161        let fill_size = self.fill_size(maybe_end);
162        let lost = LostSamples {
163            header: perf_event_header {
164                type_: perf_event_type::PERF_RECORD_LOST as u32,
165                misc: 0,
166                size: size_of::<LostSamples>() as u16 + fill_size as u16,
167            },
168            id: 0,
169            count: self.lost as u64,
170        };
171        self.write_any(lost.as_bytes(), data_head)?;
172        Ok(data_head + size_of::<LostSamples>() + fill_size)
173    }
174
175    /// Whether the ring buffer is readable.
176    pub fn readable(&self) -> bool {
177        let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
178        let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
179        data_tail != data_head
180    }
181
182    /// Get the ring buffer as a slice.
183    #[allow(dead_code)]
184    pub fn as_slice(&self) -> &[u8] {
185        unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
186    }
187
188    /// Get the ring buffer as a mutable slice.
189    pub fn as_mut_slice(&mut self) -> &mut [u8] {
190        unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
191    }
192}
193
194/// BPF performance event structure.
195#[derive(Debug)]
196pub struct BpfPerfEvent {
197    _args: PerfProbeArgs,
198    data: BpfPerfEventData,
199}
200
201/// Data for BPF performance event.
202#[derive(Debug)]
203pub struct BpfPerfEventData {
204    enabled: bool,
205    mmap_page: RingPage,
206    offset: usize,
207}
208
209impl BpfPerfEvent {
210    /// Create a new BpfPerfEvent.
211    pub fn new(args: PerfProbeArgs) -> Self {
212        BpfPerfEvent {
213            _args: args,
214            data: BpfPerfEventData {
215                enabled: false,
216                mmap_page: RingPage::empty(),
217                offset: 0,
218            },
219        }
220    }
221
222    /// Bind the perf event to a mmap page.
223    pub fn do_mmap(&mut self, start: usize, len: usize, offset: usize) -> Result<()> {
224        // create mmap page
225        let mmap_page = RingPage::new_init(start, len);
226        self.data.mmap_page = mmap_page;
227        self.data.offset = offset;
228        Ok(())
229    }
230
231    /// Write a perf event to the mmap page.
232    /// Only when the perf event is enabled, the event will be written.
233    pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
234        if self.data.enabled {
235            self.data.mmap_page.write_event(data)?;
236        }
237        Ok(())
238    }
239
240    /// Enable the perf event
241    pub fn enable(&mut self) -> Result<()> {
242        self.data.enabled = true;
243        Ok(())
244    }
245
246    /// Disable the perf event
247    pub fn disable(&mut self) -> Result<()> {
248        self.data.enabled = false;
249        Ok(())
250    }
251
252    /// Whether the perf event is enabled
253    pub fn enabled(&self) -> bool {
254        self.data.enabled
255    }
256
257    /// Whether the perf event is readable
258    pub fn readable(&self) -> bool {
259        self.data.mmap_page.readable()
260    }
261
262    /// Whether the perf event is writable
263    pub fn writeable(&self) -> bool {
264        false
265    }
266}