Skip to main content

kbpf_basic/perf/
bpf.rs

1//! BPF performance event handling module.
2//!
3use core::any::Any;
4
5use super::util::{PerfProbeArgs, *};
6use crate::{BpfError, Result, linux_bpf::*};
7
8const PAGE_SIZE: usize = 4096;
9
10/// Ring buffer page for perf events.
11#[derive(Debug)]
12pub struct RingPage {
13    size: usize,
14    ptr: usize,
15    data_region_size: usize,
16    lost: usize,
17}
18
19impl RingPage {
20    /// Create an empty RingPage.
21    pub fn empty() -> Self {
22        RingPage {
23            ptr: 0,
24            size: 0,
25            data_region_size: 0,
26            lost: 0,
27        }
28    }
29
30    /// Get the start address of the RingPage.
31    pub fn start(&self) -> usize {
32        self.ptr
33    }
34
35    /// Initialize a RingPage from start address and length.
36    pub fn new_init(start: usize, len: usize) -> Self {
37        Self::init(start as _, len)
38    }
39
40    fn init(ptr: *mut u8, size: usize) -> Self {
41        assert_eq!(size % PAGE_SIZE, 0);
42        assert!(size / PAGE_SIZE >= 2);
43        // The first page will be filled with perf_event_mmap_page
44        unsafe {
45            let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
46            perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
47            perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
48            // user will read sample or lost record from data_tail
49            perf_event_mmap_page.data_tail = 0;
50            // kernel will write sample or lost record from data_head
51            perf_event_mmap_page.data_head = 0;
52            // It is a ring buffer.
53        }
54        RingPage {
55            ptr: ptr as usize,
56            size,
57            data_region_size: size - PAGE_SIZE,
58            lost: 0,
59        }
60    }
61
62    #[inline]
63    fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
64        let capacity = self.data_region_size - data_head + data_tail;
65        data_size <= capacity
66    }
67
68    /// Write a perf event to the ring buffer.
69    pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
70        let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
71        let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
72
73        // user lib will update the tail after read the data,but it will not % data_region_size
74        let perf_header_size = size_of::<perf_event_header>();
75        let can_write_perf_header =
76            self.can_write(perf_header_size, *data_tail as usize, *data_head as usize);
77
78        if can_write_perf_header {
79            let can_write_lost_record = self.can_write(
80                size_of::<LostSamples>(),
81                *data_tail as usize,
82                *data_head as usize,
83            );
84            // if there is lost record, we need to write the lost record first
85            if self.lost > 0 && can_write_lost_record {
86                let new_data_head = self.write_lost(*data_head as usize)?;
87                *data_head = new_data_head as u64;
88                self.lost = 0;
89                // try to write the event again
90                return self.write_event(data);
91            }
92            let sample_size = PerfSample::calculate_size(data.len());
93            let can_write_sample =
94                self.can_write(sample_size, *data_tail as usize, *data_head as usize);
95            if can_write_sample {
96                let new_data_head = self.write_sample(data, *data_head as usize)?;
97                *data_head = new_data_head as u64;
98            } else {
99                self.lost += 1;
100            }
101        } else {
102            self.lost += 1;
103        }
104        Ok(())
105    }
106
107    /// Write any data to the page.
108    ///
109    /// Return the new data_head
110    fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<()> {
111        let data_region_len = self.data_region_size;
112        let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
113        let data_len = data.len();
114        let start = data_head % data_region_len;
115        let end = (data_head + data_len) % data_region_len;
116        if start < end {
117            data_region[start..end].copy_from_slice(data);
118        } else {
119            let first_len = data_region_len - start;
120            data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
121            data_region[0..end].copy_from_slice(&data[first_len..]);
122        }
123        Ok(())
124    }
125    #[inline]
126    fn fill_size(&self, data_head_mod: usize) -> usize {
127        if self.data_region_size - data_head_mod < size_of::<perf_event_header>() {
128            // The remaining space is not enough to write the perf_event_header
129            // We need to fill the remaining space with 0
130            self.data_region_size - data_head_mod
131        } else {
132            0
133        }
134    }
135
136    /// Write a sample to the page.
137    fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
138        let sample_size = PerfSample::calculate_size(data.len());
139        let maybe_end = (data_head + sample_size) % self.data_region_size;
140        let fill_size = self.fill_size(maybe_end);
141        let perf_sample = PerfSample {
142            s_hdr: SampleHeader {
143                header: perf_event_header {
144                    type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
145                    misc: 0,
146                    size: size_of::<SampleHeader>() as u16 + data.len() as u16 + fill_size as u16,
147                },
148                size: data.len() as u32,
149            },
150            value: data,
151        };
152        self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
153        self.write_any(perf_sample.value, data_head + size_of::<SampleHeader>())?;
154        Ok(data_head + sample_size + fill_size)
155    }
156
157    /// Write a lost record to the page.
158    ///
159    /// Return the new data_head
160    fn write_lost(&mut self, data_head: usize) -> Result<usize> {
161        let maybe_end = (data_head + size_of::<LostSamples>()) % self.data_region_size;
162        let fill_size = self.fill_size(maybe_end);
163        let lost = LostSamples {
164            header: perf_event_header {
165                type_: perf_event_type::PERF_RECORD_LOST as u32,
166                misc: 0,
167                size: size_of::<LostSamples>() as u16 + fill_size as u16,
168            },
169            id: 0,
170            count: self.lost as u64,
171        };
172        self.write_any(lost.as_bytes(), data_head)?;
173        Ok(data_head + size_of::<LostSamples>() + fill_size)
174    }
175
176    /// Whether the ring buffer is readable.
177    pub fn readable(&self) -> bool {
178        let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
179        let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
180        data_tail != data_head
181    }
182
183    /// Get the ring buffer as a slice.
184    #[allow(dead_code)]
185    pub fn as_slice(&self) -> &[u8] {
186        unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
187    }
188
189    /// Get the ring buffer as a mutable slice.
190    pub fn as_mut_slice(&mut self) -> &mut [u8] {
191        unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
192    }
193}
194
195/// BPF performance event structure.
196#[derive(Debug)]
197pub struct BpfPerfEvent {
198    _args: PerfProbeArgs,
199    data: BpfPerfEventData,
200}
201
202/// Data for BPF performance event.
203#[derive(Debug)]
204pub struct BpfPerfEventData {
205    enabled: bool,
206    mmap_page: RingPage,
207    offset: usize,
208}
209
210impl BpfPerfEvent {
211    /// Create a new BpfPerfEvent.
212    pub fn new(args: PerfProbeArgs) -> Self {
213        BpfPerfEvent {
214            _args: args,
215            data: BpfPerfEventData {
216                enabled: false,
217                mmap_page: RingPage::empty(),
218                offset: 0,
219            },
220        }
221    }
222
223    /// Bind the perf event to a mmap page.
224    pub fn do_mmap(&mut self, start: usize, len: usize, offset: usize) -> Result<()> {
225        // create mmap page
226        let mmap_page = RingPage::new_init(start, len);
227        self.data.mmap_page = mmap_page;
228        self.data.offset = offset;
229        Ok(())
230    }
231
232    /// Write a perf event to the mmap page.
233    /// Only when the perf event is enabled, the event will be written.
234    pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
235        if self.data.enabled {
236            self.data.mmap_page.write_event(data);
237        }
238        Ok(())
239    }
240
241    /// Enable the perf event
242    pub fn enable(&mut self) -> Result<()> {
243        self.data.enabled = true;
244        Ok(())
245    }
246
247    /// Disable the perf event
248    pub fn disable(&mut self) -> Result<()> {
249        self.data.enabled = false;
250        Ok(())
251    }
252
253    /// Whether the perf event is enabled
254    pub fn enabled(&self) -> bool {
255        self.data.enabled
256    }
257
258    /// Whether the perf event is readable
259    pub fn readable(&self) -> bool {
260        self.data.mmap_page.readable()
261    }
262
263    /// Whether the perf event is writable
264    pub fn writeable(&self) -> bool {
265        false
266    }
267}