Skip to main content

aya_friday/
util.rs

1//! Utility functions.
2use std::{
3    collections::BTreeMap,
4    error::Error,
5    ffi::{CStr, CString},
6    fmt::Display,
7    fs::{self, File},
8    io::{self, BufRead, BufReader},
9    mem,
10    num::ParseIntError,
11    os::fd::{AsFd as _, BorrowedFd},
12    path::Path,
13    ptr, slice,
14    str::{FromStr, Utf8Error},
15};
16
17use aya_obj::generated::{TC_H_MAJ_MASK, TC_H_MIN_MASK};
18use libc::{
19    _SC_PAGESIZE, MAP_FAILED, MAP_PRIVATE, PROT_READ, c_int, c_void, if_nametoindex, off_t,
20    sysconf, uname, utsname,
21};
22use log::warn;
23
24use crate::{
25    Pod,
26    sys::{SyscallError, mmap, munmap},
27};
28
29/// Represents a kernel version, in major.minor.release version.
30// Adapted from https://docs.rs/procfs/latest/procfs/sys/kernel/struct.Version.html.
31#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd)]
32pub struct KernelVersion {
33    pub(crate) major: u8,
34    pub(crate) minor: u8,
35    pub(crate) patch: u16,
36}
37
38#[derive(thiserror::Error, Debug)]
39enum CurrentKernelVersionError {
40    #[error("failed to read kernel version")]
41    IO(#[from] io::Error),
42    #[error("failed to parse kernel version")]
43    ParseError(String),
44    #[error("kernel version string is not valid UTF-8")]
45    Utf8(#[from] Utf8Error),
46}
47
48impl KernelVersion {
49    /// Constructor.
50    pub const fn new(major: u8, minor: u8, patch: u16) -> Self {
51        Self {
52            major,
53            minor,
54            patch,
55        }
56    }
57
58    /// Returns the kernel version of the currently running kernel.
59    pub fn current() -> Result<Self, impl Error> {
60        thread_local! {
61            // TODO(https://github.com/rust-lang/rust/issues/109737): Use
62            // `std::cell::OnceCell` when `get_or_try_init` is stabilized.
63            static CACHE: once_cell::unsync::OnceCell<KernelVersion> = const { once_cell::unsync::OnceCell::new() };
64        }
65        CACHE.with(|cell| {
66            // TODO(https://github.com/rust-lang/rust/issues/109737): Replace `once_cell` with
67            // `std::cell::OnceCell`.
68            cell.get_or_try_init(|| {
69                // error: unsupported operation: `open` not available when isolation is enabled
70                if cfg!(miri) {
71                    Ok(Self::new(0xff, 0xff, 0xff))
72                } else {
73                    Self::get_kernel_version()
74                }
75            })
76            .copied()
77        })
78    }
79
80    /// Returns true iff the current kernel version is greater than or equal to the given version.
81    pub(crate) fn at_least(major: u8, minor: u8, patch: u16) -> bool {
82        match Self::current() {
83            Ok(current) => current >= Self::new(major, minor, patch),
84            Err(error) => {
85                warn!("failed to get current kernel version: {error}");
86                false
87            }
88        }
89    }
90
91    /// The equivalent of `LINUX_VERSION_CODE`.
92    pub fn code(self) -> u32 {
93        let Self {
94            major,
95            minor,
96            mut patch,
97        } = self;
98
99        // Certain LTS kernels went above the "max" 255 patch so
100        // backports were done to cap the patch version
101        let max_patch = match (major, minor) {
102            // On 4.4 + 4.9, any patch 257 or above was hardcoded to 255.
103            // See: https://github.com/torvalds/linux/commit/a15813a +
104            // https://github.com/torvalds/linux/commit/42efb098
105            (4, 4 | 9) => 257,
106            // On 4.14, any patch 252 or above was hardcoded to 255.
107            // See: https://github.com/torvalds/linux/commit/e131e0e
108            (4, 14) => 252,
109            // On 4.19, any patch 222 or above was hardcoded to 255.
110            // See: https://github.com/torvalds/linux/commit/a256aac
111            (4, 19) => 222,
112            // For other kernels (i.e., newer LTS kernels as other
113            // ones won't reach 255+ patches) clamp it to 255. See:
114            // https://github.com/torvalds/linux/commit/9b82f13e
115            _ => 255,
116        };
117
118        // anything greater or equal to `max_patch` is hardcoded to
119        // 255.
120        if patch >= max_patch {
121            patch = 255;
122        }
123
124        (u32::from(major) << 16) + (u32::from(minor) << 8) + u32::from(patch)
125    }
126
127    // These (get_ubuntu_kernel_version, parse_ubuntu_kernel_version, read_ubuntu_kernel_version_file)
128    // are ported from https://github.com/torvalds/linux/blob/3f01e9f/tools/lib/bpf/libbpf_probes.c#L21-L101.
129    fn get_ubuntu_kernel_version() -> Result<Option<Self>, CurrentKernelVersionError> {
130        let content = Self::read_ubuntu_kernel_version_file()?;
131        content
132            .and_then(|content| Self::parse_ubuntu_kernel_version(&content).transpose())
133            .transpose()
134    }
135
136    fn read_ubuntu_kernel_version_file() -> Result<Option<String>, CurrentKernelVersionError> {
137        const UBUNTU_KVER_FILE: &str = "/proc/version_signature";
138        match fs::read_to_string(UBUNTU_KVER_FILE) {
139            Ok(s) => Ok(Some(s)),
140            Err(e) => {
141                if e.kind() == io::ErrorKind::NotFound {
142                    Ok(None)
143                } else {
144                    Err(e.into())
145                }
146            }
147        }
148    }
149
150    fn parse_ubuntu_kernel_version(s: &str) -> Result<Option<Self>, CurrentKernelVersionError> {
151        let mut parts = s.split_terminator(char::is_whitespace);
152        let mut next = || {
153            parts
154                .next()
155                .ok_or_else(|| CurrentKernelVersionError::ParseError(s.to_string()))
156        };
157        let _ubuntu: &str = next()?;
158        let _ubuntu_version: &str = next()?;
159        let kernel_version_string = next()?;
160        Self::parse_kernel_version_string(kernel_version_string).map(Some)
161    }
162
163    fn get_debian_kernel_version(
164        info: &utsname,
165    ) -> Result<Option<Self>, CurrentKernelVersionError> {
166        // Safety: man 2 uname:
167        //
168        // The length of the arrays in a struct utsname is unspecified (see NOTES); the fields are
169        // terminated by a null byte ('\0').
170        let s = unsafe { CStr::from_ptr(info.version.as_ptr()) };
171        let s = s.to_str()?;
172        let Some((_prefix, kernel_version_string)) = s.split_once("Debian ") else {
173            return Ok(None);
174        };
175        Self::parse_kernel_version_string(kernel_version_string).map(Some)
176    }
177
178    fn get_kernel_version() -> Result<Self, CurrentKernelVersionError> {
179        if let Ok(Some(v)) = Self::get_ubuntu_kernel_version() {
180            return Ok(v);
181        }
182
183        let mut info = unsafe { mem::zeroed::<utsname>() };
184        if unsafe { uname(&raw mut info) } != 0 {
185            return Err(io::Error::last_os_error().into());
186        }
187
188        if let Some(v) = Self::get_debian_kernel_version(&info)? {
189            return Ok(v);
190        }
191
192        // Safety: man 2 uname:
193        //
194        // The length of the arrays in a struct utsname is unspecified (see NOTES); the fields are
195        // terminated by a null byte ('\0').
196        let s = unsafe { CStr::from_ptr(info.release.as_ptr()) };
197        let s = s.to_str()?;
198        Self::parse_kernel_version_string(s)
199    }
200
201    fn parse_kernel_version_string(s: &str) -> Result<Self, CurrentKernelVersionError> {
202        fn parse<T: FromStr<Err = ParseIntError>>(s: Option<&str>) -> Option<T> {
203            s.map(str::parse).transpose().unwrap_or_default()
204        }
205        let error = || CurrentKernelVersionError::ParseError(s.to_string());
206        let mut parts = s.split(|c: char| c == '.' || !c.is_ascii_digit());
207        let major = parse(parts.next()).ok_or_else(error)?;
208        let minor = parse(parts.next()).ok_or_else(error)?;
209        let patch = parse(parts.next()).ok_or_else(error)?;
210        Ok(Self::new(major, minor, patch))
211    }
212}
213
214impl Display for KernelVersion {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        write!(f, "{}.{}.{}", self.major, self.minor, self.patch)
217    }
218}
219
220/// Returns the numeric IDs of the CPUs currently online.
221pub fn online_cpus() -> Result<Vec<u32>, (&'static str, io::Error)> {
222    const ONLINE_CPUS: &str = "/sys/devices/system/cpu/online";
223
224    read_cpu_ranges(ONLINE_CPUS)
225}
226
227/// Get the number of possible cpus.
228///
229/// See `/sys/devices/system/cpu/possible`.
230pub fn nr_cpus() -> Result<usize, (&'static str, io::Error)> {
231    const POSSIBLE_CPUS: &str = "/sys/devices/system/cpu/possible";
232
233    thread_local! {
234        // TODO(https://github.com/rust-lang/rust/issues/109737): Use
235        // `std::cell::OnceCell` when `get_or_try_init` is stabilized.
236        static CACHE: once_cell::unsync::OnceCell<usize> = const { once_cell::unsync::OnceCell::new() };
237    }
238    CACHE.with(|cell| {
239        // TODO(https://github.com/rust-lang/rust/issues/109737): Replace `once_cell` with
240        // `std::cell::OnceCell`.
241        cell.get_or_try_init(|| {
242            // error: unsupported operation: `open` not available when isolation is enabled
243            if cfg!(miri) {
244                parse_cpu_ranges("0-3").map_err(|error| (POSSIBLE_CPUS, error))
245            } else {
246                read_cpu_ranges(POSSIBLE_CPUS)
247            }
248            .map(|cpus| cpus.len())
249        })
250        .copied()
251    })
252}
253
254fn read_cpu_ranges(path: &'static str) -> Result<Vec<u32>, (&'static str, io::Error)> {
255    (|| {
256        let data = fs::read_to_string(path)?;
257        parse_cpu_ranges(data.trim())
258    })()
259    .map_err(|error| (path, error))
260}
261
262fn parse_cpu_ranges(data: &str) -> Result<Vec<u32>, io::Error> {
263    data.split(',')
264        .map(|range| {
265            let mut iter = range
266                .split('-')
267                .map(|s| s.parse::<u32>().map_err(|ParseIntError { .. }| range));
268            let start = iter.next().unwrap()?; // str::split always returns at least one element.
269            let end = match iter.next() {
270                None => start,
271                Some(end) => {
272                    if iter.next().is_some() {
273                        return Err(range);
274                    }
275                    end?
276                }
277            };
278            Ok(start..=end)
279        })
280        .try_fold(Vec::new(), |mut cpus, range| {
281            let range = range.map_err(|range| io::Error::new(io::ErrorKind::InvalidData, range))?;
282            cpus.extend(range);
283            Ok(cpus)
284        })
285}
286
287/// Loads kernel symbols from `/proc/kallsyms`.
288///
289/// See [`crate::maps::StackTraceMap`] for an example on how to use this to resolve kernel addresses to symbols.
290pub fn kernel_symbols() -> Result<BTreeMap<u64, String>, io::Error> {
291    let mut reader = BufReader::new(File::open("/proc/kallsyms")?);
292    parse_kernel_symbols(&mut reader)
293}
294
295fn parse_kernel_symbols(reader: impl BufRead) -> Result<BTreeMap<u64, String>, io::Error> {
296    reader
297        .lines()
298        .map(|line| {
299            let line = line?;
300            (|| {
301                let mut parts = line.splitn(4, ' ');
302                let addr = parts.next()?;
303                let _kind = parts.next()?;
304                let name = parts.next()?;
305                // TODO(https://github.com/rust-lang/rust-clippy/issues/14112): Remove this
306                // allowance when the lint behaves more sensibly.
307                #[expect(clippy::manual_ok_err, reason = "type ascription")]
308                let addr = match u64::from_str_radix(addr, 16) {
309                    Ok(addr) => Some(addr),
310                    Err(ParseIntError { .. }) => None,
311                }?;
312                Some((addr, name.to_owned()))
313            })()
314            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, line.clone()))
315        })
316        .collect()
317}
318
319/// Returns the prefix used by syscalls.
320///
321/// # Example
322///
323/// ```no_run
324/// # #[expect(deprecated)]
325/// use aya::util::syscall_prefix;
326/// # #[expect(deprecated)]
327/// let prefix = syscall_prefix().unwrap();
328/// let syscall_fname = format!("{prefix}exec");
329/// ```
330///
331/// # Errors
332///
333/// Returns [`std::io::ErrorKind::NotFound`] if the prefix can't be guessed. Returns other [`std::io::Error`] kinds if `/proc/kallsyms` can't be opened or is somehow invalid.
334#[deprecated(
335    since = "0.12.0",
336    note = "On some systems - commonly on 64 bit kernels that support running \
337    32 bit applications - the syscall prefix depends on what architecture an \
338    application is compiled for, therefore attaching to only one prefix is \
339    incorrect and can lead to security issues."
340)]
341pub fn syscall_prefix() -> Result<&'static str, io::Error> {
342    const PREFIXES: [&str; 7] = [
343        "sys_",
344        "__x64_sys_",
345        "__x32_compat_sys_",
346        "__ia32_compat_sys_",
347        "__arm64_sys_",
348        "__s390x_sys_",
349        "__s390_sys_",
350    ];
351    let ksym = kernel_symbols()?;
352    for p in PREFIXES {
353        let prefixed_syscall = format!("{p}bpf");
354        if ksym.values().any(|el| *el == prefixed_syscall) {
355            return Ok(p);
356        }
357    }
358    Err(io::ErrorKind::NotFound.into())
359}
360
361pub(crate) fn ifindex_from_ifname(if_name: &str) -> Result<u32, io::Error> {
362    let c_str_if_name = CString::new(if_name)?;
363    let c_if_name = c_str_if_name.as_ptr();
364    // Safety: libc wrapper
365    let if_index = unsafe { if_nametoindex(c_if_name) };
366    if if_index == 0 {
367        return Err(io::Error::last_os_error());
368    }
369    Ok(if_index)
370}
371
372pub(crate) const fn tc_handler_make(major: u32, minor: u32) -> u32 {
373    (major & TC_H_MAJ_MASK) | (minor & TC_H_MIN_MASK)
374}
375
376/// Include bytes from a file for use in a subsequent [`crate::Ebpf::load`].
377///
378/// This macro differs from the standard `include_bytes!` macro since it also ensures that
379/// the bytes are correctly aligned to be parsed as an ELF binary. This avoid some nasty
380/// compilation errors when the resulting byte array is not the correct alignment.
381///
382/// # Examples
383/// ```ignore
384/// use aya::{Ebpf, include_bytes_aligned};
385///
386/// let mut bpf = Ebpf::load(include_bytes_aligned!(
387///     "/path/to/bpf.o"
388/// ))?;
389///
390/// # Ok::<(), aya::EbpfError>(())
391/// ```
392#[macro_export]
393macro_rules! include_bytes_aligned {
394    ($path:expr) => {{
395        // All eBPF programs are ELF64 objects (regardless of host target) with
396        // 8-byte aligned headers and all eBPF instructions are 8 bytes each.
397        #[repr(C, align(8))]
398        struct Aligned<T: ?Sized>(T);
399
400        // Must be a reference because `Aligned<[u8]>` is not `Sized` and we
401        // can't write `Aligned<[u8; N]>` since `N` is not nameable in the type
402        // in this context (even though the compiler knows it at compile time).
403        const ALIGNED: &Aligned<[u8]> = &Aligned(*include_bytes!($path));
404
405        let &Aligned(ref aligned) = ALIGNED;
406
407        aligned
408    }};
409}
410
411pub(crate) fn page_size() -> usize {
412    // Safety: libc
413    (unsafe { sysconf(_SC_PAGESIZE) }) as usize
414}
415
416// bytes_of converts a <T> to a byte slice
417pub(crate) const fn bytes_of<T: Pod>(val: &T) -> &[u8] {
418    unsafe { slice::from_raw_parts(ptr::from_ref(val).cast(), size_of_val(val)) }
419}
420
421pub(crate) const fn bytes_of_slice<T: Pod>(val: &[T]) -> &[u8] {
422    let size = val.len().wrapping_mul(size_of::<T>());
423    // Safety:
424    // Any alignment is allowed.
425    // The size is determined in this function.
426    // The Pod trait ensures the type is valid to cast to bytes.
427    unsafe { slice::from_raw_parts(val.as_ptr().cast(), size) }
428}
429
430pub(crate) fn bytes_of_bpf_name(bpf_name: &[core::ffi::c_char; 16]) -> &[u8] {
431    let length = bpf_name
432        .iter()
433        .rposition(|ch| *ch != 0)
434        .map_or(0, |pos| pos + 1);
435    unsafe { slice::from_raw_parts(bpf_name.as_ptr().cast(), length) }
436}
437
438// MMap corresponds to a memory-mapped region.
439//
440// The data is unmapped in Drop.
441#[cfg_attr(test, derive(Debug))]
442pub(crate) struct MMap {
443    ptr: ptr::NonNull<c_void>,
444    len: usize,
445}
446
447// Needed because NonNull<T> is !Send and !Sync out of caution that the data
448// might be aliased unsafely.
449unsafe impl Send for MMap {}
450unsafe impl Sync for MMap {}
451
452impl MMap {
453    pub(crate) fn new(
454        fd: BorrowedFd<'_>,
455        len: usize,
456        prot: c_int,
457        flags: c_int,
458        offset: off_t,
459    ) -> Result<Self, SyscallError> {
460        match unsafe { mmap(ptr::null_mut(), len, prot, flags, fd, offset) } {
461            MAP_FAILED => Err(SyscallError {
462                call: "mmap",
463                io_error: io::Error::last_os_error(),
464            }),
465            ptr => {
466                let ptr = ptr::NonNull::new(ptr).ok_or_else(|| {
467                    // This should never happen, but to be paranoid, and so we never need to talk
468                    // about a null pointer, we check it anyway.
469                    SyscallError {
470                        call: "mmap",
471                        io_error: io::Error::other("mmap returned null pointer"),
472                    }
473                })?;
474                Ok(Self { ptr, len })
475            }
476        }
477    }
478
479    /// Maps the file at `path` for reading, using `mmap` with `MAP_PRIVATE`.
480    pub(crate) fn map_copy_read_only(path: &Path) -> Result<Self, io::Error> {
481        let file = File::open(path)?;
482        Self::new(
483            file.as_fd(),
484            file.metadata()?.len().try_into().map_err(|e| {
485                io::Error::new(
486                    io::ErrorKind::FileTooLarge,
487                    format!("file too large to mmap: {e}"),
488                )
489            })?,
490            PROT_READ,
491            MAP_PRIVATE,
492            0,
493        )
494        .map_err(|SyscallError { io_error, call: _ }| io_error)
495    }
496
497    pub(crate) const fn ptr(&self) -> ptr::NonNull<c_void> {
498        self.ptr
499    }
500}
501
502impl AsRef<[u8]> for MMap {
503    fn as_ref(&self) -> &[u8] {
504        let Self { ptr, len } = self;
505        unsafe { slice::from_raw_parts(ptr.as_ptr().cast(), *len) }
506    }
507}
508
509impl Drop for MMap {
510    fn drop(&mut self) {
511        let Self { ptr, len } = *self;
512        let _: i32 = unsafe { munmap(ptr.as_ptr(), len) };
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use assert_matches::assert_matches;
519
520    use super::*;
521
522    #[test]
523    fn test_parse_kernel_version_string() {
524        // cat /proc/version_signature on Proxmox VE 8.1.4.
525        assert_matches!(KernelVersion::parse_ubuntu_kernel_version(""), Err(CurrentKernelVersionError::ParseError(s)) if s.is_empty());
526        // cat /proc/version_signature on Ubuntu 22.04.
527        assert_matches!(KernelVersion::parse_ubuntu_kernel_version( "Ubuntu 5.15.0-82.91-generic 5.15.111"), Ok(Some(kernel_version)) => {
528            assert_eq!(kernel_version, KernelVersion::new(5, 15, 111))
529        });
530        // WSL.
531        assert_matches!(KernelVersion::parse_kernel_version_string("5.15.90.1-microsoft-standard-WSL2"), Ok(kernel_version) => {
532            assert_eq!(kernel_version, KernelVersion::new(5, 15, 90))
533        });
534        // uname -r on Fedora.
535        assert_matches!(KernelVersion::parse_kernel_version_string("6.3.11-200.fc38.x86_64"), Ok(kernel_version) => {
536            assert_eq!(kernel_version, KernelVersion::new(6, 3, 11))
537        });
538    }
539
540    #[test]
541    fn test_parse_online_cpus() {
542        assert_eq!(parse_cpu_ranges("0").unwrap(), vec![0]);
543        assert_eq!(parse_cpu_ranges("0,1").unwrap(), vec![0, 1]);
544        assert_eq!(parse_cpu_ranges("0,1,2").unwrap(), vec![0, 1, 2]);
545        assert_eq!(
546            parse_cpu_ranges("0-7").unwrap(),
547            (0..=7).collect::<Vec<_>>()
548        );
549        assert_eq!(
550            parse_cpu_ranges("0-3,4-7").unwrap(),
551            (0..=7).collect::<Vec<_>>()
552        );
553        assert_eq!(
554            parse_cpu_ranges("0-5,6,7").unwrap(),
555            (0..=7).collect::<Vec<_>>()
556        );
557        assert_matches!(parse_cpu_ranges(""), Err(_));
558        assert_matches!(parse_cpu_ranges("0-1,2-"), Err(_));
559        assert_matches!(parse_cpu_ranges("foo"), Err(_));
560    }
561
562    #[test]
563    fn test_parse_kernel_symbols() {
564        let data = "0000000000002000 A irq_stack_backing_store\n\
565                          0000000000006000 A cpu_tss_rw [foo bar]\n"
566            .as_bytes();
567        let syms = parse_kernel_symbols(&mut BufReader::new(data)).unwrap();
568        assert_eq!(syms.keys().collect::<Vec<_>>(), vec![&0x2000, &0x6000]);
569        assert_eq!(syms[&0x2000u64].as_str(), "irq_stack_backing_store");
570        assert_eq!(syms[&0x6000u64].as_str(), "cpu_tss_rw");
571    }
572}