Skip to main content

bpf_linker/
linker.rs

1use std::{
2    borrow::Cow,
3    collections::HashSet,
4    ffi::{CStr, CString, OsStr},
5    fs,
6    io::{self, Read as _},
7    ops::Deref,
8    os::unix::ffi::OsStrExt as _,
9    path::{Path, PathBuf},
10    str::{self, FromStr},
11};
12
13use ar::Archive;
14use llvm_sys::{
15    error_handling::{LLVMEnablePrettyStackTrace, LLVMInstallFatalErrorHandler},
16    target_machine::LLVMCodeGenFileType,
17};
18use thiserror::Error;
19use tracing::{debug, error, info, warn};
20
21use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer};
22
23/// Linker error
24#[derive(Debug, Error)]
25pub enum LinkerError {
26    /// Invalid Cpu.
27    #[error("invalid CPU {0}")]
28    InvalidCpu(String),
29
30    /// Invalid LLVM target.
31    #[error("invalid LLVM target {0}")]
32    InvalidTarget(String),
33
34    /// An IO Error occurred while linking a module.
35    #[error("`{0}`: {1}")]
36    IoError(PathBuf, io::Error),
37
38    /// The file is not bitcode, an object file containing bitcode or an archive file.
39    #[error("invalid input file `{0}`")]
40    InvalidInputType(PathBuf),
41
42    /// Linking a module failed.
43    #[error("failure linking module {0}")]
44    LinkModuleError(PathBuf),
45
46    /// Parsing an IR module failed.
47    #[error("failure parsing IR module `{0}`: {1}")]
48    IRParseError(PathBuf, String),
49
50    /// Linking a module included in an archive failed.
51    #[error("failure linking module {1} from {0}")]
52    LinkArchiveModuleError(PathBuf, PathBuf),
53
54    /// Optimizing the BPF code failed.
55    #[error("LLVMRunPasses failed: {0}")]
56    OptimizeError(String),
57
58    /// Generating the BPF code failed.
59    #[error("LLVMTargetMachineEmitToFile failed: {0}")]
60    EmitCodeError(String),
61
62    /// Writing the bitcode failed.
63    #[error("LLVMWriteBitcodeToFile failed: {0}")]
64    WriteBitcodeError(io::Error),
65
66    /// Writing the LLVM IR failed.
67    #[error("LLVMPrintModuleToFile failed: {0}")]
68    WriteIRError(String),
69
70    /// There was an error extracting the bitcode embedded in an object file.
71    #[error("error reading embedded bitcode: {0}")]
72    EmbeddedBitcodeError(String),
73
74    /// The input object file does not have embedded bitcode.
75    #[error("no bitcode section found in {0}")]
76    MissingBitcodeSection(PathBuf),
77
78    /// LLVM cannot create a module for linking.
79    #[error("failed to create module")]
80    CreateModuleError,
81}
82
83/// BPF Cpu type
84#[derive(Clone, Copy, Debug)]
85pub enum Cpu {
86    Generic,
87    Probe,
88    V1,
89    V2,
90    V3,
91}
92
93impl Cpu {
94    fn as_c_str(&self) -> &'static CStr {
95        match self {
96            Self::Generic => c"generic",
97            Self::Probe => c"probe",
98            Self::V1 => c"v1",
99            Self::V2 => c"v2",
100            Self::V3 => c"v3",
101        }
102    }
103}
104
105impl std::fmt::Display for Cpu {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        f.pad(match self {
108            Self::Generic => "generic",
109            Self::Probe => "probe",
110            Self::V1 => "v1",
111            Self::V2 => "v2",
112            Self::V3 => "v3",
113        })
114    }
115}
116
117impl FromStr for Cpu {
118    type Err = LinkerError;
119
120    fn from_str(s: &str) -> Result<Self, Self::Err> {
121        Ok(match s {
122            "generic" => Self::Generic,
123            "probe" => Self::Probe,
124            "v1" => Self::V1,
125            "v2" => Self::V2,
126            "v3" => Self::V3,
127            _ => return Err(LinkerError::InvalidCpu(s.to_string())),
128        })
129    }
130}
131
132/// Optimization level
133#[derive(Clone, Copy, Debug)]
134pub enum OptLevel {
135    /// No optimizations. Equivalent to -O0.
136    No,
137    /// Less than the default optimizations. Equivalent to -O1.
138    Less,
139    /// Default level of optimizations. Equivalent to -O2.
140    Default,
141    /// Aggressive optimizations. Equivalent to -O3.
142    Aggressive,
143    /// Optimize for size. Equivalent to -Os.
144    Size,
145    /// Aggressively optimize for size. Equivalent to -Oz.
146    SizeMin,
147}
148
149pub enum LinkerInput<'a> {
150    File { path: &'a Path },
151    Buffer { name: &'a str, bytes: &'a [u8] },
152}
153
154impl<'a> LinkerInput<'a> {
155    pub fn new_from_file(path: &'a Path) -> Self {
156        LinkerInput::File { path }
157    }
158
159    pub fn new_from_buffer(name: &'a str, bytes: &'a [u8]) -> Self {
160        LinkerInput::Buffer { name, bytes }
161    }
162}
163
164enum LinkerInputKind {
165    Bitcode,
166    Elf,
167    MachO,
168    Ir,
169}
170
171impl std::fmt::Display for LinkerInputKind {
172    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173        write!(
174            f,
175            "{}",
176            match self {
177                Self::Bitcode => "Bitcode",
178                Self::Elf => "ELF",
179                Self::MachO => "Mach-O",
180                Self::Ir => "IR",
181            }
182        )
183    }
184}
185
186enum PreparedLinkerInput<'a> {
187    Bitcode(&'a [u8]),
188    Elf(&'a [u8]),
189    MachO(&'a [u8]),
190    Ir(&'a CStr),
191}
192
193enum InputKind {
194    Archive,
195    Linker(LinkerInputKind),
196}
197
198impl std::fmt::Display for InputKind {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        match self {
201            Self::Archive => write!(f, "archive"),
202            Self::Linker(kind) => write!(f, "{}", kind),
203        }
204    }
205}
206
207/// Output type
208#[derive(Clone, Copy, Debug)]
209pub enum OutputType {
210    /// LLVM bitcode.
211    Bitcode,
212    /// Assembly.
213    Assembly,
214    /// LLVM IR.
215    LlvmAssembly,
216    /// ELF object file.
217    Object,
218}
219
220/// Options to configure the linker
221#[derive(Debug)]
222pub struct LinkerOptions {
223    /// The LLVM target to generate code for. If None, the target will be inferred from the input
224    /// modules.
225    pub target: Option<CString>,
226    /// Cpu type.
227    pub cpu: Cpu,
228    /// Cpu features.
229    pub cpu_features: CString,
230    /// Optimization level.
231    pub optimize: OptLevel,
232    /// Whether to aggressively unroll loops. Useful for older kernels that don't support loops.
233    pub unroll_loops: bool,
234    /// Remove `noinline` attributes from functions. Useful for kernels before 5.8 that don't
235    /// support function calls.
236    pub ignore_inline_never: bool,
237    /// Extra command line args to pass to LLVM.
238    pub llvm_args: Vec<CString>,
239    /// Disable passing --bpf-expand-memcpy-in-order to LLVM.
240    pub disable_expand_memcpy_in_order: bool,
241    /// Disable exporting memcpy, memmove, memset, memcmp and bcmp. Exporting
242    /// those is commonly needed when LLVM does not manage to expand memory
243    /// intrinsics to a sequence of loads and stores.
244    pub disable_memory_builtins: bool,
245    /// Emit BTF information
246    pub btf: bool,
247    /// Permit automatic insertion of __bpf_trap calls.
248    /// See: https://github.com/llvm/llvm-project/commit/ab391beb11f733b526b86f9df23734a34657d876
249    pub allow_bpf_trap: bool,
250}
251
252/// BPF Linker
253pub struct Linker {
254    options: LinkerOptions,
255    context: LLVMContext,
256    diagnostic_handler: llvm::InstalledDiagnosticHandler<DiagnosticHandler>,
257    dump_module: Option<PathBuf>,
258}
259
260impl Linker {
261    /// Create a new linker instance with the given options.
262    pub fn new(options: LinkerOptions) -> Self {
263        let (context, diagnostic_handler) = llvm_init(&options);
264
265        Self {
266            options,
267            context,
268            diagnostic_handler,
269            dump_module: None,
270        }
271    }
272
273    /// Set the directory where the linker will dump the linked LLVM IR before and after
274    /// optimization, for debugging and inspection purposes.
275    ///
276    /// When set:
277    /// - The directory is created if it does not already exist.
278    /// - A "pre-opt.ll" file is written with the IR before optimization.
279    /// - A "post-opt.ll" file is written with the IR after optimization.
280    pub fn set_dump_module_path(&mut self, path: impl AsRef<Path>) {
281        self.dump_module = Some(path.as_ref().to_path_buf())
282    }
283
284    /// Link and generate the output code to file.
285    ///
286    /// # Example
287    ///
288    /// ```rust,no_run
289    /// # use std::{collections::HashSet, path::Path, borrow::Cow, ffi::CString};
290    /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType};
291    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
292    /// let path = Path::new("/path/to/object-or-bitcode");
293    /// let bytes: &[u8] = &[]; // An in memory object/bitcode
294    /// # let options = LinkerOptions {
295    /// #     target: None,
296    /// #     cpu: Cpu::Generic,
297    /// #     cpu_features: CString::default(),
298    /// #     optimize: OptLevel::Default,
299    /// #     unroll_loops: false,
300    /// #     ignore_inline_never: false,
301    /// #     llvm_args: vec![],
302    /// #     disable_expand_memcpy_in_order: false,
303    /// #     disable_memory_builtins: false,
304    /// #     allow_bpf_trap: false,
305    /// #     btf: false,
306    /// # };
307    /// # let linker = Linker::new(options);
308    ///
309    /// let export_symbols = ["my_sym_1", "my_sym_2"];
310    ///
311    /// linker.link_to_file(
312    ///     [
313    ///         LinkerInput::new_from_file(path),
314    ///         LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name
315    ///     ],
316    ///     "/path/to/output",
317    ///     OutputType::Object,
318    ///     export_symbols,
319    /// )?;
320    /// # Ok(())
321    /// # }
322    /// ```
323    pub fn link_to_file<'i, 'a, I, P, E>(
324        &self,
325        inputs: I,
326        output: P,
327        output_type: OutputType,
328        export_symbols: E,
329    ) -> Result<(), LinkerError>
330    where
331        I: IntoIterator<Item = LinkerInput<'i>>,
332        E: IntoIterator<Item = &'a str>,
333        P: AsRef<Path>,
334    {
335        let (linked_module, target_machine) = self.link(inputs, export_symbols)?;
336        codegen_to_file(
337            &linked_module,
338            &target_machine,
339            output.as_ref(),
340            output_type,
341        )?;
342        Ok(())
343    }
344
345    /// Link and generate the output code to an in-memory buffer.
346    ///
347    /// # Example
348    ///
349    /// ```rust,no_run
350    /// # use std::{collections::HashSet, path::Path, borrow::Cow, ffi::CString};
351    /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType};
352    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
353    /// let path = Path::new("/path/to/object-or-bitcode");
354    /// let bytes: &[u8] = &[]; // An in memory object/bitcode
355    /// # let options = LinkerOptions {
356    /// #     target: None,
357    /// #     cpu: Cpu::Generic,
358    /// #     cpu_features: CString::default(),
359    /// #     optimize: OptLevel::Default,
360    /// #     unroll_loops: false,
361    /// #     ignore_inline_never: false,
362    /// #     llvm_args: vec![],
363    /// #     disable_expand_memcpy_in_order: false,
364    /// #     disable_memory_builtins: false,
365    /// #     allow_bpf_trap: false,
366    /// #     btf: false,
367    /// # };
368    /// # let linker = Linker::new(options);
369    ///
370    /// let export_symbols = ["my_sym_1", "my_sym_2"];
371    ///
372    /// let out_buf = linker.link_to_buffer(
373    ///     [
374    ///         LinkerInput::new_from_file(path),
375    ///         LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name
376    ///     ],
377    ///     OutputType::Bitcode,
378    ///     export_symbols,
379    /// )?;
380    ///
381    /// // Use the buffer as slice of u8
382    /// let bytes = out_buf.as_slice();
383    /// println!("Linked {} bytes into memory)", bytes.len());
384    ///
385    /// # Ok(())
386    /// # }
387    /// ```
388    pub fn link_to_buffer<'i, 'a, I, E>(
389        &self,
390        inputs: I,
391        output_type: OutputType,
392        export_symbols: E,
393    ) -> Result<LinkerOutput, LinkerError>
394    where
395        I: IntoIterator<Item = LinkerInput<'i>>,
396        E: IntoIterator<Item = &'a str>,
397    {
398        let (linked_module, target_machine) = self.link(inputs, export_symbols)?;
399        codegen_to_buffer(&linked_module, &target_machine, output_type)
400    }
401
402    /// Link and generate the output code.
403    fn link<'ctx, 'i, 'a, I, E>(
404        &'ctx self,
405        inputs: I,
406        export_symbols: E,
407    ) -> Result<(LLVMModule<'ctx>, LLVMTargetMachine), LinkerError>
408    where
409        I: IntoIterator<Item = LinkerInput<'i>>,
410        E: IntoIterator<Item = &'a str>,
411    {
412        let Self {
413            options,
414            context,
415            dump_module,
416            ..
417        } = self;
418
419        let mut module = link_modules(context, inputs)?;
420
421        let target_machine = create_target_machine(options, &module)?;
422
423        if let Some(path) = dump_module {
424            fs::create_dir_all(path).map_err(|err| LinkerError::IoError(path.to_owned(), err))?;
425        }
426        if let Some(path) = dump_module {
427            // dump IR before optimization
428            let path = path.join("pre-opt.ll");
429            let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap();
430            module
431                .write_ir_to_path(&path)
432                .map_err(LinkerError::WriteIRError)?;
433        };
434        optimize(
435            options,
436            context,
437            &target_machine,
438            &mut module,
439            export_symbols,
440        )?;
441        if let Some(path) = dump_module {
442            // dump IR before optimization
443            let path = path.join("post-opt.ll");
444            let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap();
445            module
446                .write_ir_to_path(&path)
447                .map_err(LinkerError::WriteIRError)?;
448        };
449
450        Ok((module, target_machine))
451    }
452
453    pub fn has_errors(&self) -> bool {
454        self.diagnostic_handler.with_view(|h| h.has_errors)
455    }
456}
457
458fn link_modules<'ctx, 'i, I>(
459    context: &'ctx LLVMContext,
460    inputs: I,
461) -> Result<LLVMModule<'ctx>, LinkerError>
462where
463    I: IntoIterator<Item = LinkerInput<'i>>,
464{
465    let mut module = context
466        .create_module(c"linked_module")
467        .ok_or(LinkerError::CreateModuleError)?;
468
469    let mut buf = Vec::new();
470    for input in inputs {
471        let (path, input) = match input {
472            LinkerInput::File { path } => {
473                let data = fs::read(path).map_err(|e| LinkerError::IoError(path.to_owned(), e))?;
474                (path.to_owned(), Cow::Owned(data))
475            }
476            LinkerInput::Buffer { name, bytes } => (
477                PathBuf::from(format!("in_memory::{}", name)),
478                Cow::Borrowed(bytes),
479            ),
480        };
481
482        // determine whether the input is bitcode, ELF with embedded bitcode, an archive file
483        // or an invalid file
484        let in_type = InputKind::detect(input.as_ref())
485            .ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;
486
487        match in_type {
488            InputKind::Archive => {
489                info!("linking archive {}", path.display());
490
491                // Extract the archive and call link_reader() for each item.
492                let mut archive = Archive::new(input.as_ref());
493                while let Some(item) = archive.next_entry() {
494                    let mut item = item.map_err(|e| LinkerError::IoError(path.clone(), e))?;
495                    let name = PathBuf::from(OsStr::from_bytes(item.header().identifier()));
496                    info!("linking archive item {}", name.display());
497
498                    buf.clear();
499                    let _: usize = item
500                        .read_to_end(&mut buf)
501                        .map_err(|e| LinkerError::IoError(name.to_owned(), e))?;
502                    let in_type = match LinkerInputKind::detect(&buf) {
503                        Some(in_type) => in_type,
504                        None => {
505                            info!("ignoring archive item {}: invalid type", name.display());
506                            continue;
507                        }
508                    };
509
510                    let prepared_input = match in_type {
511                        LinkerInputKind::Bitcode => PreparedLinkerInput::Bitcode(&buf),
512                        LinkerInputKind::Elf => PreparedLinkerInput::Elf(&buf),
513                        LinkerInputKind::MachO => PreparedLinkerInput::MachO(&buf),
514                        LinkerInputKind::Ir => {
515                            buf.push(b'\0');
516                            PreparedLinkerInput::Ir(CStr::from_bytes_with_nul(&buf).map_err(
517                                |err| LinkerError::IRParseError(name.to_owned(), err.to_string()),
518                            )?)
519                        }
520                    };
521
522                    match link_data(context, &mut module, &name, prepared_input) {
523                        Ok(()) => continue,
524                        Err(LinkerError::InvalidInputType(name)) => {
525                            info!("ignoring archive item {}: invalid type", name.display());
526                            continue;
527                        }
528                        Err(LinkerError::MissingBitcodeSection(name)) => {
529                            warn!(
530                                "ignoring archive item {}: no embedded bitcode",
531                                name.display()
532                            );
533                            continue;
534                        }
535                        // TODO: this discards the underlying error.
536                        Err(_) => {
537                            return Err(LinkerError::LinkArchiveModuleError(
538                                path.to_owned(),
539                                name.to_owned(),
540                            ));
541                        }
542                    };
543                }
544            }
545            InputKind::Linker(kind) => {
546                let terminated_input: CString;
547                let prepared_input = match kind {
548                    LinkerInputKind::Bitcode => PreparedLinkerInput::Bitcode(input.as_ref()),
549                    LinkerInputKind::Elf => PreparedLinkerInput::Elf(input.as_ref()),
550                    LinkerInputKind::MachO => PreparedLinkerInput::MachO(input.as_ref()),
551                    LinkerInputKind::Ir => {
552                        let input: Vec<_> = input.into_owned();
553                        terminated_input = CString::new(input).map_err(|err| {
554                            LinkerError::IRParseError(path.to_owned(), err.to_string())
555                        })?;
556                        PreparedLinkerInput::Ir(&terminated_input)
557                    }
558                };
559                info!("linking file {} type {kind}", path.display());
560                match link_data(context, &mut module, &path, prepared_input) {
561                    Ok(()) => {}
562                    Err(LinkerError::InvalidInputType(path)) => {
563                        info!("ignoring file {}: invalid type", path.display());
564                        continue;
565                    }
566                    Err(LinkerError::MissingBitcodeSection(path)) => {
567                        warn!("ignoring file {}: no embedded bitcode", path.display());
568                    }
569                    Err(err) => return Err(err),
570                }
571            }
572        }
573    }
574
575    Ok(module)
576}
577
578fn link_data<'ctx>(
579    context: &'ctx LLVMContext,
580    module: &mut LLVMModule<'ctx>,
581    path: &Path,
582    data: PreparedLinkerInput<'_>,
583) -> Result<(), LinkerError> {
584    let mut link_data = |data: &[u8]| {
585        if !llvm::link_bitcode_buffer(context, module, data) {
586            Err(LinkerError::LinkModuleError(path.to_owned()))
587        } else {
588            Ok(())
589        }
590    };
591    match data {
592        PreparedLinkerInput::Bitcode(data) => link_data(data),
593        PreparedLinkerInput::Elf(data) => llvm::with_embedded_bitcode(context, data, link_data)
594            .map_err(LinkerError::EmbeddedBitcodeError)
595            .and_then(|opt| {
596                opt.unwrap_or_else(|| Err(LinkerError::MissingBitcodeSection(path.to_owned())))
597            }),
598        // we need to handle this here since archive files could contain
599        // mach-o files, eg somecrate.rlib containing lib.rmeta which is
600        // mach-o on macos
601        PreparedLinkerInput::MachO(_data) => Err(LinkerError::InvalidInputType(path.to_owned())),
602        PreparedLinkerInput::Ir(data) => {
603            let linked = llvm::link_ir_buffer(context, module, data)
604                .map_err(|e| LinkerError::IRParseError(path.to_owned(), e))?;
605
606            if linked {
607                Ok(())
608            } else {
609                Err(LinkerError::LinkModuleError(path.to_owned()))
610            }
611        }
612    }
613}
614
615fn create_target_machine(
616    options: &LinkerOptions,
617    module: &LLVMModule<'_>,
618) -> Result<LLVMTargetMachine, LinkerError> {
619    let LinkerOptions {
620        target,
621        cpu,
622        cpu_features,
623        ..
624    } = options;
625    // Here's how the output target is selected:
626    //
627    // 1) rustc with builtin BPF support: cargo build --target=bpf[el|eb]-unknown-none
628    //      the input modules are already configured for the correct output target
629    //
630    // 2) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker -C link-arg=--target=bpf[el|eb]
631    //      the input modules are configured for the *host* target, and the output target
632    //      is configured with the `--target` linker argument
633    //
634    // 3) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker
635    //      the input modules are configured for the *host* target, the output target isn't
636    //      set via `--target`, so default to `bpf` (bpfel or bpfeb depending on the host
637    //      endianness)
638    let (triple, target) = match target {
639        // case 1
640        Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)),
641        None => {
642            let c_triple = module.get_target();
643            let c_triple = unsafe { CStr::from_ptr(c_triple) };
644            if c_triple.to_bytes().starts_with(b"bpf") {
645                // case 2
646                (c_triple, llvm::target_from_module(module))
647            } else {
648                // case 3.
649                info!(
650                    "detected non-bpf input target {} and no explicit output --target specified, selecting `bpf'",
651                    OsStr::from_bytes(c_triple.to_bytes()).display()
652                );
653                let c_triple = c"bpf";
654                (c_triple, llvm::target_from_triple(c_triple))
655            }
656        }
657    };
658    let target =
659        target.map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?;
660
661    debug!(
662        "creating target machine: triple: {} cpu: {} features: {}",
663        triple.to_string_lossy(),
664        cpu,
665        cpu_features.to_string_lossy(),
666    );
667
668    let target_machine = LLVMTargetMachine::new(target, triple, cpu.as_c_str(), cpu_features)
669        .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?;
670
671    Ok(target_machine)
672}
673
674fn optimize<'ctx, 'a, E>(
675    options: &LinkerOptions,
676    context: &'ctx LLVMContext,
677    target_machine: &LLVMTargetMachine,
678    module: &mut LLVMModule<'ctx>,
679    export_symbols: E,
680) -> Result<(), LinkerError>
681where
682    E: IntoIterator<Item = &'a str>,
683{
684    let LinkerOptions {
685        disable_memory_builtins,
686        optimize,
687        btf,
688        ignore_inline_never,
689        ..
690    } = options;
691
692    let mut export_symbols: HashSet<Cow<'_, [u8]>> = export_symbols
693        .into_iter()
694        .map(|s| Cow::Borrowed(s.as_bytes()))
695        .collect();
696
697    if !disable_memory_builtins {
698        export_symbols.extend(
699            ["memcpy", "memmove", "memset", "memcmp", "bcmp"]
700                .into_iter()
701                .map(|s| s.as_bytes().into()),
702        );
703    };
704    debug!(
705        "linking exporting symbols {:?}, opt level {:?}",
706        export_symbols, optimize
707    );
708    // run optimizations. Will optionally remove noinline attributes, intern all non exported
709    // programs and maps and remove dead code.
710
711    if *btf {
712        // if we want to emit BTF, we need to sanitize the debug information
713        llvm::DISanitizer::new(context, module).run(&export_symbols);
714    } else {
715        // if we don't need BTF emission, we can strip DI
716        let ok = module.strip_debug_info();
717        debug!("Stripping DI, changed={}", ok);
718    }
719
720    llvm::optimize(
721        target_machine,
722        module,
723        options.optimize,
724        *ignore_inline_never,
725        &export_symbols,
726    )
727    .map_err(LinkerError::OptimizeError)?;
728
729    Ok(())
730}
731
732fn codegen_to_file(
733    module: &LLVMModule<'_>,
734    target_machine: &LLVMTargetMachine,
735    output: &Path,
736    output_type: OutputType,
737) -> Result<(), LinkerError> {
738    info!("writing {:?} to {:?}", output_type, output);
739    let output = CString::new(output.as_os_str().as_encoded_bytes()).unwrap();
740    match output_type {
741        OutputType::Bitcode => module
742            .write_bitcode_to_path(&output)
743            .map_err(LinkerError::WriteBitcodeError),
744        OutputType::LlvmAssembly => module
745            .write_ir_to_path(&output)
746            .map_err(LinkerError::WriteIRError),
747        OutputType::Assembly => target_machine
748            .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMAssemblyFile)
749            .map_err(LinkerError::EmitCodeError),
750        OutputType::Object => target_machine
751            .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMObjectFile)
752            .map_err(LinkerError::EmitCodeError),
753    }
754}
755
756fn codegen_to_buffer(
757    module: &LLVMModule<'_>,
758    target_machine: &LLVMTargetMachine,
759    output_type: OutputType,
760) -> Result<LinkerOutput, LinkerError> {
761    let memory_buffer = match output_type {
762        OutputType::Bitcode => module.write_bitcode_to_memory(),
763        OutputType::LlvmAssembly => module.write_ir_to_memory(),
764        OutputType::Assembly => target_machine
765            .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMAssemblyFile)
766            .map_err(LinkerError::EmitCodeError)?,
767        OutputType::Object => target_machine
768            .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMObjectFile)
769            .map_err(LinkerError::EmitCodeError)?,
770    };
771
772    Ok(LinkerOutput {
773        inner: memory_buffer,
774    })
775}
776
777fn llvm_init(
778    options: &LinkerOptions,
779) -> (
780    LLVMContext,
781    llvm::InstalledDiagnosticHandler<DiagnosticHandler>,
782) {
783    let mut args = Vec::<Cow<'_, CStr>>::new();
784    args.push(c"bpf-linker".into());
785    // Disable cold call site detection. Many accessors in aya-ebpf return Result<T, E>
786    // where the layout is larger than 64 bits, but the LLVM BPF target only supports
787    // up to 64 bits return values. Since the accessors are tiny in terms of code, we
788    // avoid the issue by annotating them with #[inline(always)]. If they are classified
789    // as cold though - and they often are starting from LLVM17 - #[inline(always)]
790    // is ignored and the BPF target fails codegen.
791    args.push(c"--cold-callsite-rel-freq=0".into());
792    if options.unroll_loops {
793        // setting cmdline arguments is the only way to customize the unroll pass with the
794        // C API.
795        args.extend([
796            c"--unroll-runtime".into(),
797            c"--unroll-runtime-multi-exit".into(),
798            CString::new(format!("--unroll-max-upperbound={}", u32::MAX))
799                .unwrap()
800                .into(),
801            CString::new(format!("--unroll-threshold={}", u32::MAX))
802                .unwrap()
803                .into(),
804        ]);
805    }
806    if !options.disable_expand_memcpy_in_order {
807        args.push(c"--bpf-expand-memcpy-in-order".into());
808    }
809    if !options.allow_bpf_trap {
810        // TODO: Remove this once ksyms support is guaranteed.
811        // LLVM introduces __bpf_trap calls at points where __builtin_trap would normally be
812        // emitted. This is currently not supported by aya because __bpf_trap requires a .ksyms
813        // section, but this is not trivial to support. In the meantime, using this flag
814        // returns LLVM to the old behaviour, which did not introduce these calls and therefore
815        // does not require the .ksyms section.
816        args.push(c"--bpf-disable-trap-unreachable".into());
817    }
818    args.extend(options.llvm_args.iter().map(Into::into));
819    info!("LLVM command line: {:?}", args);
820    llvm::init(args.as_slice(), c"BPF linker");
821
822    let mut context = LLVMContext::new();
823
824    let diagnostic_handler = context.set_diagnostic_handler(DiagnosticHandler::default());
825
826    unsafe {
827        LLVMInstallFatalErrorHandler(Some(llvm::fatal_error));
828        LLVMEnablePrettyStackTrace();
829    }
830
831    (context, diagnostic_handler)
832}
833
834#[derive(Default)]
835pub(crate) struct DiagnosticHandler {
836    pub(crate) has_errors: bool,
837    // The handler is passed to LLVM as a raw pointer so it must not be moved.
838    _marker: std::marker::PhantomPinned,
839}
840
841impl llvm::LLVMDiagnosticHandler for DiagnosticHandler {
842    fn handle_diagnostic(
843        &mut self,
844        severity: llvm_sys::LLVMDiagnosticSeverity,
845        message: Cow<'_, str>,
846    ) {
847        // TODO(https://reviews.llvm.org/D155894): Remove this when LLVM no longer emits these
848        // errors.
849        //
850        // See https://github.com/rust-lang/compiler-builtins/blob/a61823f/src/mem/mod.rs#L22-L68.
851        const MATCHERS: &[&str] = &[
852            "A call to built-in function 'memcpy' is not supported.\n",
853            "A call to built-in function 'memmove' is not supported.\n",
854            "A call to built-in function 'memset' is not supported.\n",
855            "A call to built-in function 'memcmp' is not supported.\n",
856            "A call to built-in function 'bcmp' is not supported.\n",
857            "A call to built-in function 'strlen' is not supported.\n",
858        ];
859
860        match severity {
861            llvm_sys::LLVMDiagnosticSeverity::LLVMDSError => {
862                if MATCHERS.iter().any(|matcher| message.ends_with(matcher)) {
863                    return;
864                }
865                self.has_errors = true;
866
867                error!("llvm: {}", message)
868            }
869            llvm_sys::LLVMDiagnosticSeverity::LLVMDSWarning => warn!("llvm: {}", message),
870            llvm_sys::LLVMDiagnosticSeverity::LLVMDSRemark => debug!("remark: {}", message),
871            llvm_sys::LLVMDiagnosticSeverity::LLVMDSNote => debug!("note: {}", message),
872        }
873    }
874}
875
876impl LinkerInputKind {
877    fn detect(data: &[u8]) -> Option<Self> {
878        match data.get(..4) {
879            Some(b"\x42\x43\xC0\xDE" | b"\xDE\xC0\x17\x0b") => Some(Self::Bitcode),
880            Some(b"\x7FELF") => Some(Self::Elf),
881            Some(b"\xcf\xfa\xed\xfe") => Some(Self::MachO),
882            _ => {
883                const PREFIXES: &[&[u8]] = &[
884                    b"; ModuleID",
885                    b"source_filename",
886                    b"target datalayout",
887                    b"target triple",
888                    b"define ",
889                    b"declare ",
890                    b"!llvm",
891                ];
892
893                let trimmed = data.trim_ascii_start();
894
895                PREFIXES
896                    .iter()
897                    .any(|p| trimmed.starts_with(p))
898                    .then_some(Self::Ir)
899            }
900        }
901    }
902}
903
904impl InputKind {
905    fn detect(data: &[u8]) -> Option<Self> {
906        match data.get(..8) {
907            Some(b"!<arch>\x0A") => Some(Self::Archive),
908            _ => LinkerInputKind::detect(data).map(Self::Linker),
909        }
910    }
911}
912
913#[derive(Debug)]
914pub struct LinkerOutput {
915    inner: MemoryBuffer,
916}
917
918impl LinkerOutput {
919    pub fn as_slice(&self) -> &[u8] {
920        self.inner.as_slice()
921    }
922}
923
924impl AsRef<[u8]> for LinkerOutput {
925    fn as_ref(&self) -> &[u8] {
926        self.as_slice()
927    }
928}
929
930impl Deref for LinkerOutput {
931    type Target = [u8];
932
933    fn deref(&self) -> &Self::Target {
934        self.as_slice()
935    }
936}