1use std::{
2 borrow::Cow,
3 collections::HashSet,
4 ffi::{CStr, CString, OsStr},
5 fs,
6 io::{self, Read as _},
7 ops::Deref,
8 os::unix::ffi::OsStrExt as _,
9 path::{Path, PathBuf},
10 str::{self, FromStr},
11};
12
13use ar::Archive;
14use llvm_sys::{
15 error_handling::{LLVMEnablePrettyStackTrace, LLVMInstallFatalErrorHandler},
16 target_machine::LLVMCodeGenFileType,
17};
18use thiserror::Error;
19use tracing::{debug, error, info, warn};
20
21use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer};
22
23#[derive(Debug, Error)]
25pub enum LinkerError {
26 #[error("invalid CPU {0}")]
28 InvalidCpu(String),
29
30 #[error("invalid LLVM target {0}")]
32 InvalidTarget(String),
33
34 #[error("`{0}`: {1}")]
36 IoError(PathBuf, io::Error),
37
38 #[error("invalid input file `{0}`")]
40 InvalidInputType(PathBuf),
41
42 #[error("failure linking module {0}")]
44 LinkModuleError(PathBuf),
45
46 #[error("failure parsing IR module `{0}`: {1}")]
48 IRParseError(PathBuf, String),
49
50 #[error("failure linking module {1} from {0}")]
52 LinkArchiveModuleError(PathBuf, PathBuf),
53
54 #[error("LLVMRunPasses failed: {0}")]
56 OptimizeError(String),
57
58 #[error("LLVMTargetMachineEmitToFile failed: {0}")]
60 EmitCodeError(String),
61
62 #[error("LLVMWriteBitcodeToFile failed: {0}")]
64 WriteBitcodeError(io::Error),
65
66 #[error("LLVMPrintModuleToFile failed: {0}")]
68 WriteIRError(String),
69
70 #[error("error reading embedded bitcode: {0}")]
72 EmbeddedBitcodeError(String),
73
74 #[error("no bitcode section found in {0}")]
76 MissingBitcodeSection(PathBuf),
77
78 #[error("failed to create module")]
80 CreateModuleError,
81}
82
83#[derive(Clone, Copy, Debug)]
85pub enum Cpu {
86 Generic,
87 Probe,
88 V1,
89 V2,
90 V3,
91}
92
93impl Cpu {
94 fn as_c_str(&self) -> &'static CStr {
95 match self {
96 Self::Generic => c"generic",
97 Self::Probe => c"probe",
98 Self::V1 => c"v1",
99 Self::V2 => c"v2",
100 Self::V3 => c"v3",
101 }
102 }
103}
104
105impl std::fmt::Display for Cpu {
106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 f.pad(match self {
108 Self::Generic => "generic",
109 Self::Probe => "probe",
110 Self::V1 => "v1",
111 Self::V2 => "v2",
112 Self::V3 => "v3",
113 })
114 }
115}
116
117impl FromStr for Cpu {
118 type Err = LinkerError;
119
120 fn from_str(s: &str) -> Result<Self, Self::Err> {
121 Ok(match s {
122 "generic" => Self::Generic,
123 "probe" => Self::Probe,
124 "v1" => Self::V1,
125 "v2" => Self::V2,
126 "v3" => Self::V3,
127 _ => return Err(LinkerError::InvalidCpu(s.to_string())),
128 })
129 }
130}
131
132#[derive(Clone, Copy, Debug)]
134pub enum OptLevel {
135 No,
137 Less,
139 Default,
141 Aggressive,
143 Size,
145 SizeMin,
147}
148
149pub enum LinkerInput<'a> {
150 File { path: &'a Path },
151 Buffer { name: &'a str, bytes: &'a [u8] },
152}
153
154impl<'a> LinkerInput<'a> {
155 pub fn new_from_file(path: &'a Path) -> Self {
156 LinkerInput::File { path }
157 }
158
159 pub fn new_from_buffer(name: &'a str, bytes: &'a [u8]) -> Self {
160 LinkerInput::Buffer { name, bytes }
161 }
162}
163
164enum LinkerInputKind {
165 Bitcode,
166 Elf,
167 MachO,
168 Ir,
169}
170
171impl std::fmt::Display for LinkerInputKind {
172 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173 write!(
174 f,
175 "{}",
176 match self {
177 Self::Bitcode => "Bitcode",
178 Self::Elf => "ELF",
179 Self::MachO => "Mach-O",
180 Self::Ir => "IR",
181 }
182 )
183 }
184}
185
186enum PreparedLinkerInput<'a> {
187 Bitcode(&'a [u8]),
188 Elf(&'a [u8]),
189 MachO(&'a [u8]),
190 Ir(&'a CStr),
191}
192
193enum InputKind {
194 Archive,
195 Linker(LinkerInputKind),
196}
197
198impl std::fmt::Display for InputKind {
199 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200 match self {
201 Self::Archive => write!(f, "archive"),
202 Self::Linker(kind) => write!(f, "{}", kind),
203 }
204 }
205}
206
207#[derive(Clone, Copy, Debug)]
209pub enum OutputType {
210 Bitcode,
212 Assembly,
214 LlvmAssembly,
216 Object,
218}
219
220#[derive(Debug)]
222pub struct LinkerOptions {
223 pub target: Option<CString>,
226 pub cpu: Cpu,
228 pub cpu_features: CString,
230 pub optimize: OptLevel,
232 pub unroll_loops: bool,
234 pub ignore_inline_never: bool,
237 pub llvm_args: Vec<CString>,
239 pub disable_expand_memcpy_in_order: bool,
241 pub disable_memory_builtins: bool,
245 pub btf: bool,
247 pub allow_bpf_trap: bool,
250}
251
252pub struct Linker {
254 options: LinkerOptions,
255 context: LLVMContext,
256 diagnostic_handler: llvm::InstalledDiagnosticHandler<DiagnosticHandler>,
257 dump_module: Option<PathBuf>,
258}
259
260impl Linker {
261 pub fn new(options: LinkerOptions) -> Self {
263 let (context, diagnostic_handler) = llvm_init(&options);
264
265 Self {
266 options,
267 context,
268 diagnostic_handler,
269 dump_module: None,
270 }
271 }
272
273 pub fn set_dump_module_path(&mut self, path: impl AsRef<Path>) {
281 self.dump_module = Some(path.as_ref().to_path_buf())
282 }
283
284 pub fn link_to_file<'i, 'a, I, P, E>(
324 &self,
325 inputs: I,
326 output: P,
327 output_type: OutputType,
328 export_symbols: E,
329 ) -> Result<(), LinkerError>
330 where
331 I: IntoIterator<Item = LinkerInput<'i>>,
332 E: IntoIterator<Item = &'a str>,
333 P: AsRef<Path>,
334 {
335 let (linked_module, target_machine) = self.link(inputs, export_symbols)?;
336 codegen_to_file(
337 &linked_module,
338 &target_machine,
339 output.as_ref(),
340 output_type,
341 )?;
342 Ok(())
343 }
344
345 pub fn link_to_buffer<'i, 'a, I, E>(
389 &self,
390 inputs: I,
391 output_type: OutputType,
392 export_symbols: E,
393 ) -> Result<LinkerOutput, LinkerError>
394 where
395 I: IntoIterator<Item = LinkerInput<'i>>,
396 E: IntoIterator<Item = &'a str>,
397 {
398 let (linked_module, target_machine) = self.link(inputs, export_symbols)?;
399 codegen_to_buffer(&linked_module, &target_machine, output_type)
400 }
401
402 fn link<'ctx, 'i, 'a, I, E>(
404 &'ctx self,
405 inputs: I,
406 export_symbols: E,
407 ) -> Result<(LLVMModule<'ctx>, LLVMTargetMachine), LinkerError>
408 where
409 I: IntoIterator<Item = LinkerInput<'i>>,
410 E: IntoIterator<Item = &'a str>,
411 {
412 let Self {
413 options,
414 context,
415 dump_module,
416 ..
417 } = self;
418
419 let mut module = link_modules(context, inputs)?;
420
421 let target_machine = create_target_machine(options, &module)?;
422
423 if let Some(path) = dump_module {
424 fs::create_dir_all(path).map_err(|err| LinkerError::IoError(path.to_owned(), err))?;
425 }
426 if let Some(path) = dump_module {
427 let path = path.join("pre-opt.ll");
429 let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap();
430 module
431 .write_ir_to_path(&path)
432 .map_err(LinkerError::WriteIRError)?;
433 };
434 optimize(
435 options,
436 context,
437 &target_machine,
438 &mut module,
439 export_symbols,
440 )?;
441 if let Some(path) = dump_module {
442 let path = path.join("post-opt.ll");
444 let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap();
445 module
446 .write_ir_to_path(&path)
447 .map_err(LinkerError::WriteIRError)?;
448 };
449
450 Ok((module, target_machine))
451 }
452
453 pub fn has_errors(&self) -> bool {
454 self.diagnostic_handler.with_view(|h| h.has_errors)
455 }
456}
457
458fn link_modules<'ctx, 'i, I>(
459 context: &'ctx LLVMContext,
460 inputs: I,
461) -> Result<LLVMModule<'ctx>, LinkerError>
462where
463 I: IntoIterator<Item = LinkerInput<'i>>,
464{
465 let mut module = context
466 .create_module(c"linked_module")
467 .ok_or(LinkerError::CreateModuleError)?;
468
469 let mut buf = Vec::new();
470 for input in inputs {
471 let (path, input) = match input {
472 LinkerInput::File { path } => {
473 let data = fs::read(path).map_err(|e| LinkerError::IoError(path.to_owned(), e))?;
474 (path.to_owned(), Cow::Owned(data))
475 }
476 LinkerInput::Buffer { name, bytes } => (
477 PathBuf::from(format!("in_memory::{}", name)),
478 Cow::Borrowed(bytes),
479 ),
480 };
481
482 let in_type = InputKind::detect(input.as_ref())
485 .ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?;
486
487 match in_type {
488 InputKind::Archive => {
489 info!("linking archive {}", path.display());
490
491 let mut archive = Archive::new(input.as_ref());
493 while let Some(item) = archive.next_entry() {
494 let mut item = item.map_err(|e| LinkerError::IoError(path.clone(), e))?;
495 let name = PathBuf::from(OsStr::from_bytes(item.header().identifier()));
496 info!("linking archive item {}", name.display());
497
498 buf.clear();
499 let _: usize = item
500 .read_to_end(&mut buf)
501 .map_err(|e| LinkerError::IoError(name.to_owned(), e))?;
502 let in_type = match LinkerInputKind::detect(&buf) {
503 Some(in_type) => in_type,
504 None => {
505 info!("ignoring archive item {}: invalid type", name.display());
506 continue;
507 }
508 };
509
510 let prepared_input = match in_type {
511 LinkerInputKind::Bitcode => PreparedLinkerInput::Bitcode(&buf),
512 LinkerInputKind::Elf => PreparedLinkerInput::Elf(&buf),
513 LinkerInputKind::MachO => PreparedLinkerInput::MachO(&buf),
514 LinkerInputKind::Ir => {
515 buf.push(b'\0');
516 PreparedLinkerInput::Ir(CStr::from_bytes_with_nul(&buf).map_err(
517 |err| LinkerError::IRParseError(name.to_owned(), err.to_string()),
518 )?)
519 }
520 };
521
522 match link_data(context, &mut module, &name, prepared_input) {
523 Ok(()) => continue,
524 Err(LinkerError::InvalidInputType(name)) => {
525 info!("ignoring archive item {}: invalid type", name.display());
526 continue;
527 }
528 Err(LinkerError::MissingBitcodeSection(name)) => {
529 warn!(
530 "ignoring archive item {}: no embedded bitcode",
531 name.display()
532 );
533 continue;
534 }
535 Err(_) => {
537 return Err(LinkerError::LinkArchiveModuleError(
538 path.to_owned(),
539 name.to_owned(),
540 ));
541 }
542 };
543 }
544 }
545 InputKind::Linker(kind) => {
546 let terminated_input: CString;
547 let prepared_input = match kind {
548 LinkerInputKind::Bitcode => PreparedLinkerInput::Bitcode(input.as_ref()),
549 LinkerInputKind::Elf => PreparedLinkerInput::Elf(input.as_ref()),
550 LinkerInputKind::MachO => PreparedLinkerInput::MachO(input.as_ref()),
551 LinkerInputKind::Ir => {
552 let input: Vec<_> = input.into_owned();
553 terminated_input = CString::new(input).map_err(|err| {
554 LinkerError::IRParseError(path.to_owned(), err.to_string())
555 })?;
556 PreparedLinkerInput::Ir(&terminated_input)
557 }
558 };
559 info!("linking file {} type {kind}", path.display());
560 match link_data(context, &mut module, &path, prepared_input) {
561 Ok(()) => {}
562 Err(LinkerError::InvalidInputType(path)) => {
563 info!("ignoring file {}: invalid type", path.display());
564 continue;
565 }
566 Err(LinkerError::MissingBitcodeSection(path)) => {
567 warn!("ignoring file {}: no embedded bitcode", path.display());
568 }
569 Err(err) => return Err(err),
570 }
571 }
572 }
573 }
574
575 Ok(module)
576}
577
578fn link_data<'ctx>(
579 context: &'ctx LLVMContext,
580 module: &mut LLVMModule<'ctx>,
581 path: &Path,
582 data: PreparedLinkerInput<'_>,
583) -> Result<(), LinkerError> {
584 let mut link_data = |data: &[u8]| {
585 if !llvm::link_bitcode_buffer(context, module, data) {
586 Err(LinkerError::LinkModuleError(path.to_owned()))
587 } else {
588 Ok(())
589 }
590 };
591 match data {
592 PreparedLinkerInput::Bitcode(data) => link_data(data),
593 PreparedLinkerInput::Elf(data) => llvm::with_embedded_bitcode(context, data, link_data)
594 .map_err(LinkerError::EmbeddedBitcodeError)
595 .and_then(|opt| {
596 opt.unwrap_or_else(|| Err(LinkerError::MissingBitcodeSection(path.to_owned())))
597 }),
598 PreparedLinkerInput::MachO(_data) => Err(LinkerError::InvalidInputType(path.to_owned())),
602 PreparedLinkerInput::Ir(data) => {
603 let linked = llvm::link_ir_buffer(context, module, data)
604 .map_err(|e| LinkerError::IRParseError(path.to_owned(), e))?;
605
606 if linked {
607 Ok(())
608 } else {
609 Err(LinkerError::LinkModuleError(path.to_owned()))
610 }
611 }
612 }
613}
614
615fn create_target_machine(
616 options: &LinkerOptions,
617 module: &LLVMModule<'_>,
618) -> Result<LLVMTargetMachine, LinkerError> {
619 let LinkerOptions {
620 target,
621 cpu,
622 cpu_features,
623 ..
624 } = options;
625 let (triple, target) = match target {
639 Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)),
641 None => {
642 let c_triple = module.get_target();
643 let c_triple = unsafe { CStr::from_ptr(c_triple) };
644 if c_triple.to_bytes().starts_with(b"bpf") {
645 (c_triple, llvm::target_from_module(module))
647 } else {
648 info!(
650 "detected non-bpf input target {} and no explicit output --target specified, selecting `bpf'",
651 OsStr::from_bytes(c_triple.to_bytes()).display()
652 );
653 let c_triple = c"bpf";
654 (c_triple, llvm::target_from_triple(c_triple))
655 }
656 }
657 };
658 let target =
659 target.map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?;
660
661 debug!(
662 "creating target machine: triple: {} cpu: {} features: {}",
663 triple.to_string_lossy(),
664 cpu,
665 cpu_features.to_string_lossy(),
666 );
667
668 let target_machine = LLVMTargetMachine::new(target, triple, cpu.as_c_str(), cpu_features)
669 .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?;
670
671 Ok(target_machine)
672}
673
674fn optimize<'ctx, 'a, E>(
675 options: &LinkerOptions,
676 context: &'ctx LLVMContext,
677 target_machine: &LLVMTargetMachine,
678 module: &mut LLVMModule<'ctx>,
679 export_symbols: E,
680) -> Result<(), LinkerError>
681where
682 E: IntoIterator<Item = &'a str>,
683{
684 let LinkerOptions {
685 disable_memory_builtins,
686 optimize,
687 btf,
688 ignore_inline_never,
689 ..
690 } = options;
691
692 let mut export_symbols: HashSet<Cow<'_, [u8]>> = export_symbols
693 .into_iter()
694 .map(|s| Cow::Borrowed(s.as_bytes()))
695 .collect();
696
697 if !disable_memory_builtins {
698 export_symbols.extend(
699 ["memcpy", "memmove", "memset", "memcmp", "bcmp"]
700 .into_iter()
701 .map(|s| s.as_bytes().into()),
702 );
703 };
704 debug!(
705 "linking exporting symbols {:?}, opt level {:?}",
706 export_symbols, optimize
707 );
708 if *btf {
712 llvm::DISanitizer::new(context, module).run(&export_symbols);
714 } else {
715 let ok = module.strip_debug_info();
717 debug!("Stripping DI, changed={}", ok);
718 }
719
720 llvm::optimize(
721 target_machine,
722 module,
723 options.optimize,
724 *ignore_inline_never,
725 &export_symbols,
726 )
727 .map_err(LinkerError::OptimizeError)?;
728
729 Ok(())
730}
731
732fn codegen_to_file(
733 module: &LLVMModule<'_>,
734 target_machine: &LLVMTargetMachine,
735 output: &Path,
736 output_type: OutputType,
737) -> Result<(), LinkerError> {
738 info!("writing {:?} to {:?}", output_type, output);
739 let output = CString::new(output.as_os_str().as_encoded_bytes()).unwrap();
740 match output_type {
741 OutputType::Bitcode => module
742 .write_bitcode_to_path(&output)
743 .map_err(LinkerError::WriteBitcodeError),
744 OutputType::LlvmAssembly => module
745 .write_ir_to_path(&output)
746 .map_err(LinkerError::WriteIRError),
747 OutputType::Assembly => target_machine
748 .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMAssemblyFile)
749 .map_err(LinkerError::EmitCodeError),
750 OutputType::Object => target_machine
751 .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMObjectFile)
752 .map_err(LinkerError::EmitCodeError),
753 }
754}
755
756fn codegen_to_buffer(
757 module: &LLVMModule<'_>,
758 target_machine: &LLVMTargetMachine,
759 output_type: OutputType,
760) -> Result<LinkerOutput, LinkerError> {
761 let memory_buffer = match output_type {
762 OutputType::Bitcode => module.write_bitcode_to_memory(),
763 OutputType::LlvmAssembly => module.write_ir_to_memory(),
764 OutputType::Assembly => target_machine
765 .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMAssemblyFile)
766 .map_err(LinkerError::EmitCodeError)?,
767 OutputType::Object => target_machine
768 .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMObjectFile)
769 .map_err(LinkerError::EmitCodeError)?,
770 };
771
772 Ok(LinkerOutput {
773 inner: memory_buffer,
774 })
775}
776
777fn llvm_init(
778 options: &LinkerOptions,
779) -> (
780 LLVMContext,
781 llvm::InstalledDiagnosticHandler<DiagnosticHandler>,
782) {
783 let mut args = Vec::<Cow<'_, CStr>>::new();
784 args.push(c"bpf-linker".into());
785 args.push(c"--cold-callsite-rel-freq=0".into());
792 if options.unroll_loops {
793 args.extend([
796 c"--unroll-runtime".into(),
797 c"--unroll-runtime-multi-exit".into(),
798 CString::new(format!("--unroll-max-upperbound={}", u32::MAX))
799 .unwrap()
800 .into(),
801 CString::new(format!("--unroll-threshold={}", u32::MAX))
802 .unwrap()
803 .into(),
804 ]);
805 }
806 if !options.disable_expand_memcpy_in_order {
807 args.push(c"--bpf-expand-memcpy-in-order".into());
808 }
809 if !options.allow_bpf_trap {
810 args.push(c"--bpf-disable-trap-unreachable".into());
817 }
818 args.extend(options.llvm_args.iter().map(Into::into));
819 info!("LLVM command line: {:?}", args);
820 llvm::init(args.as_slice(), c"BPF linker");
821
822 let mut context = LLVMContext::new();
823
824 let diagnostic_handler = context.set_diagnostic_handler(DiagnosticHandler::default());
825
826 unsafe {
827 LLVMInstallFatalErrorHandler(Some(llvm::fatal_error));
828 LLVMEnablePrettyStackTrace();
829 }
830
831 (context, diagnostic_handler)
832}
833
834#[derive(Default)]
835pub(crate) struct DiagnosticHandler {
836 pub(crate) has_errors: bool,
837 _marker: std::marker::PhantomPinned,
839}
840
841impl llvm::LLVMDiagnosticHandler for DiagnosticHandler {
842 fn handle_diagnostic(
843 &mut self,
844 severity: llvm_sys::LLVMDiagnosticSeverity,
845 message: Cow<'_, str>,
846 ) {
847 const MATCHERS: &[&str] = &[
852 "A call to built-in function 'memcpy' is not supported.\n",
853 "A call to built-in function 'memmove' is not supported.\n",
854 "A call to built-in function 'memset' is not supported.\n",
855 "A call to built-in function 'memcmp' is not supported.\n",
856 "A call to built-in function 'bcmp' is not supported.\n",
857 "A call to built-in function 'strlen' is not supported.\n",
858 ];
859
860 match severity {
861 llvm_sys::LLVMDiagnosticSeverity::LLVMDSError => {
862 if MATCHERS.iter().any(|matcher| message.ends_with(matcher)) {
863 return;
864 }
865 self.has_errors = true;
866
867 error!("llvm: {}", message)
868 }
869 llvm_sys::LLVMDiagnosticSeverity::LLVMDSWarning => warn!("llvm: {}", message),
870 llvm_sys::LLVMDiagnosticSeverity::LLVMDSRemark => debug!("remark: {}", message),
871 llvm_sys::LLVMDiagnosticSeverity::LLVMDSNote => debug!("note: {}", message),
872 }
873 }
874}
875
876impl LinkerInputKind {
877 fn detect(data: &[u8]) -> Option<Self> {
878 match data.get(..4) {
879 Some(b"\x42\x43\xC0\xDE" | b"\xDE\xC0\x17\x0b") => Some(Self::Bitcode),
880 Some(b"\x7FELF") => Some(Self::Elf),
881 Some(b"\xcf\xfa\xed\xfe") => Some(Self::MachO),
882 _ => {
883 const PREFIXES: &[&[u8]] = &[
884 b"; ModuleID",
885 b"source_filename",
886 b"target datalayout",
887 b"target triple",
888 b"define ",
889 b"declare ",
890 b"!llvm",
891 ];
892
893 let trimmed = data.trim_ascii_start();
894
895 PREFIXES
896 .iter()
897 .any(|p| trimmed.starts_with(p))
898 .then_some(Self::Ir)
899 }
900 }
901 }
902}
903
904impl InputKind {
905 fn detect(data: &[u8]) -> Option<Self> {
906 match data.get(..8) {
907 Some(b"!<arch>\x0A") => Some(Self::Archive),
908 _ => LinkerInputKind::detect(data).map(Self::Linker),
909 }
910 }
911}
912
913#[derive(Debug)]
914pub struct LinkerOutput {
915 inner: MemoryBuffer,
916}
917
918impl LinkerOutput {
919 pub fn as_slice(&self) -> &[u8] {
920 self.inner.as_slice()
921 }
922}
923
924impl AsRef<[u8]> for LinkerOutput {
925 fn as_ref(&self) -> &[u8] {
926 self.as_slice()
927 }
928}
929
930impl Deref for LinkerOutput {
931 type Target = [u8];
932
933 fn deref(&self) -> &Self::Target {
934 self.as_slice()
935 }
936}