1use anyhow::{anyhow, Context, Result};
20use capstone::prelude::*;
21use object::{Object, ObjectSection, ObjectSymbol, SectionKind, SymbolKind};
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::fs;
25use std::path::{Path, PathBuf};
26use std::process::Command;
27
28#[derive(Debug, Clone)]
30pub struct AnalyzeOptions {
31 pub include_disassembly: bool,
33 pub max_functions: usize,
35 pub max_symbols: usize,
37 pub max_instructions_per_function: usize,
39}
40
41impl Default for AnalyzeOptions {
42 fn default() -> Self {
43 Self {
44 include_disassembly: true,
45 max_functions: 40,
46 max_symbols: 1000,
47 max_instructions_per_function: 200,
48 }
49 }
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct BinaryReport {
55 pub path: PathBuf,
57 pub format: String,
59 pub architecture: String,
61 pub endianness: String,
63 pub entry: u64,
65 pub size_bytes: u64,
67 pub sections: Vec<SectionInfo>,
69 pub symbols: Vec<SymbolInfo>,
71 pub functions: Vec<FunctionReport>,
73 #[serde(default)]
75 pub disassembly_backend: Option<String>,
76 #[serde(default)]
78 pub disassembly_attempts: Vec<String>,
79 #[serde(default)]
81 pub disassembly_coverage: Option<DisassemblyCoverage>,
82 #[serde(default)]
84 pub function_backend_coverage: Vec<FunctionBackendCoverage>,
85 pub warnings: Vec<String>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, Default)]
91pub struct DisassemblyCoverage {
92 pub total_functions: usize,
94 pub functions_with_instructions: usize,
96 pub capstone_functions: usize,
98 pub objdump_functions: usize,
100 pub missing_functions: usize,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct FunctionBackendCoverage {
107 pub name: String,
109 pub backend: String,
111 pub instruction_count: usize,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct SectionInfo {
118 pub name: String,
120 pub address: u64,
122 pub size: u64,
124 pub kind: String,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SymbolInfo {
131 pub name: String,
133 pub address: u64,
135 pub size: u64,
137 pub kind: String,
139 pub is_global: bool,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct FunctionReport {
146 pub name: String,
148 pub address: u64,
150 pub size: u64,
152 pub instructions: Vec<Instruction>,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct Instruction {
159 pub address: u64,
161 pub text: String,
163}
164
165#[derive(Default)]
167pub struct BinaryAnalyzer;
168
169impl BinaryAnalyzer {
170 pub fn analyze_path(path: &Path, options: &AnalyzeOptions) -> Result<BinaryReport> {
172 let bytes = fs::read(path).with_context(|| format!("failed to read {}", path.display()))?;
173 let object = object::File::parse(bytes.as_slice())
174 .with_context(|| format!("failed to parse object file {}", path.display()))?;
175
176 let sections = object
177 .sections()
178 .map(|section| SectionInfo {
179 name: section.name().unwrap_or("<unknown>").to_string(),
180 address: section.address(),
181 size: section.size(),
182 kind: format!("{:?}", section.kind()),
183 })
184 .collect::<Vec<_>>();
185
186 let mut symbols = object
187 .symbols()
188 .filter(|symbol| symbol.kind() != SymbolKind::Unknown)
189 .map(|symbol| SymbolInfo {
190 name: symbol.name().unwrap_or("<unnamed>").to_string(),
191 address: symbol.address(),
192 size: symbol.size(),
193 kind: format!("{:?}", symbol.kind()),
194 is_global: symbol.is_global(),
195 })
196 .collect::<Vec<_>>();
197
198 symbols.sort_by_key(|s| s.address);
199 symbols.truncate(options.max_symbols);
200
201 let mut functions = collect_functions(&object);
202 functions.sort_by_key(|f| f.address);
203 functions.truncate(options.max_functions);
204
205 let mut warnings = Vec::new();
206 let mut instructions_by_name = HashMap::new();
207 let mut backend_by_name: HashMap<String, String> = HashMap::new();
208 let mut disassembly_backend = None;
209 let mut disassembly_attempts = Vec::new();
210
211 if options.include_disassembly && !functions.is_empty() {
212 let capstone_result =
213 CapstoneDisassembler::new(&object, bytes.as_slice()).and_then(|d| {
214 d.disassemble(path, &functions, options.max_instructions_per_function)
215 });
216
217 match capstone_result {
218 Ok(disassembly) => {
219 disassembly_attempts.push("capstone: ok".to_string());
220 disassembly_backend = Some("capstone".to_string());
221 for (name, instructions) in disassembly {
222 if !instructions.is_empty() {
223 backend_by_name.insert(name.clone(), "capstone".to_string());
224 instructions_by_name.insert(name, instructions);
225 }
226 }
227
228 let missing = functions
229 .iter()
230 .filter(|f| {
231 instructions_by_name
232 .get(&f.name)
233 .is_none_or(std::vec::Vec::is_empty)
234 })
235 .cloned()
236 .collect::<Vec<_>>();
237
238 if !missing.is_empty() {
239 match ObjdumpDisassembler::new().and_then(|d| {
240 d.disassemble(path, &missing, options.max_instructions_per_function)
241 }) {
242 Ok(fallback) => {
243 disassembly_attempts
244 .push("objdump: ok (filled missing)".to_string());
245 if !fallback.is_empty() {
246 disassembly_backend = Some("capstone+objdump".to_string());
247 }
248 for (name, instructions) in fallback {
249 if instructions.is_empty() {
250 continue;
251 }
252 instructions_by_name
253 .entry(name.clone())
254 .or_insert(instructions);
255 backend_by_name.entry(name).or_insert("objdump".to_string());
256 }
257 }
258 Err(obj_err) => {
259 disassembly_attempts.push(format!(
260 "objdump: unavailable while filling missing ({obj_err})"
261 ));
262 }
263 }
264 }
265 }
266 Err(err) => {
267 disassembly_attempts.push(format!("capstone: unavailable ({err})"));
268 match ObjdumpDisassembler::new().and_then(|d| {
269 d.disassemble(path, &functions, options.max_instructions_per_function)
270 }) {
271 Ok(disassembly) => {
272 disassembly_attempts.push("objdump: ok (fallback)".to_string());
273 disassembly_backend = Some("objdump".to_string());
274 for (name, instructions) in disassembly {
275 if instructions.is_empty() {
276 continue;
277 }
278 backend_by_name.insert(name.clone(), "objdump".to_string());
279 instructions_by_name.insert(name, instructions);
280 }
281 }
282 Err(obj_err) => {
283 disassembly_attempts.push(format!("objdump: unavailable ({obj_err})"));
284 warnings.push(format!(
285 "disassembly unavailable: capstone failed ({err}); objdump failed ({obj_err})"
286 ));
287 }
288 }
289 }
290 }
291 }
292
293 let functions = functions
294 .into_iter()
295 .map(|f| FunctionReport {
296 name: f.name.clone(),
297 address: f.address,
298 size: f.size,
299 instructions: instructions_by_name.remove(&f.name).unwrap_or_default(),
300 })
301 .collect::<Vec<_>>();
302
303 let mut coverage = DisassemblyCoverage {
304 total_functions: functions.len(),
305 ..Default::default()
306 };
307 let mut function_backend_coverage = Vec::with_capacity(functions.len());
308 for function in &functions {
309 let instruction_count = function.instructions.len();
310 let backend = if instruction_count == 0 {
311 "none".to_string()
312 } else {
313 backend_by_name
314 .get(&function.name)
315 .cloned()
316 .unwrap_or_else(|| "unknown".to_string())
317 };
318
319 if instruction_count > 0 {
320 coverage.functions_with_instructions += 1;
321 match backend.as_str() {
322 "capstone" => coverage.capstone_functions += 1,
323 "objdump" => coverage.objdump_functions += 1,
324 _ => {}
325 }
326 } else {
327 coverage.missing_functions += 1;
328 }
329
330 function_backend_coverage.push(FunctionBackendCoverage {
331 name: function.name.clone(),
332 backend,
333 instruction_count,
334 });
335 }
336
337 Ok(BinaryReport {
338 path: path.to_path_buf(),
339 format: format!("{:?}", object.format()),
340 architecture: format!("{:?}", object.architecture()),
341 endianness: format!("{:?}", object.endianness()),
342 entry: object.entry(),
343 size_bytes: bytes.len() as u64,
344 sections,
345 symbols,
346 functions,
347 disassembly_backend,
348 disassembly_attempts,
349 disassembly_coverage: Some(coverage),
350 function_backend_coverage,
351 warnings,
352 })
353 }
354}
355
356#[derive(Debug, Clone)]
357struct FunctionSymbol {
358 name: String,
359 address: u64,
360 size: u64,
361}
362
363fn collect_functions(object: &object::File<'_>) -> Vec<FunctionSymbol> {
364 let text_sections = object
365 .sections()
366 .filter(|s| s.kind() == SectionKind::Text)
367 .map(|s| s.index())
368 .collect::<HashSet<_>>();
369
370 object
371 .symbols()
372 .filter(|symbol| symbol.kind() == SymbolKind::Text)
373 .filter(|symbol| !symbol.is_undefined())
374 .filter(|symbol| {
375 symbol
376 .section_index()
377 .is_some_and(|section_index| text_sections.contains(§ion_index))
378 })
379 .filter_map(|symbol| {
380 let name = symbol.name().ok()?.trim().to_string();
381 if name.is_empty() {
382 return None;
383 }
384 Some(FunctionSymbol {
385 name,
386 address: symbol.address(),
387 size: symbol.size(),
388 })
389 })
390 .collect()
391}
392
393trait Disassembler {
394 fn disassemble(
395 &self,
396 path: &Path,
397 functions: &[FunctionSymbol],
398 max_instructions_per_function: usize,
399 ) -> Result<HashMap<String, Vec<Instruction>>>;
400}
401
402#[derive(Debug, Clone)]
403struct CodeRegion {
404 start: u64,
405 data: Vec<u8>,
406}
407
408#[derive(Debug)]
409struct CapstoneDisassembler {
410 cs: Capstone,
411 regions: Vec<CodeRegion>,
412}
413
414impl CapstoneDisassembler {
415 fn new(object: &object::File<'_>, _bytes: &[u8]) -> Result<Self> {
416 let cs = match object.architecture() {
417 object::Architecture::X86_64 => Capstone::new()
418 .x86()
419 .mode(capstone::arch::x86::ArchMode::Mode64)
420 .build()
421 .context("capstone init failed for x86_64")?,
422 object::Architecture::Aarch64 => Capstone::new()
423 .arm64()
424 .mode(capstone::arch::arm64::ArchMode::Arm)
425 .build()
426 .context("capstone init failed for aarch64")?,
427 other => {
428 return Err(anyhow!("capstone unsupported architecture: {other:?}"));
429 }
430 };
431
432 let regions = object
433 .sections()
434 .filter(|section| section.kind() == SectionKind::Text)
435 .filter_map(|section| {
436 let start = section.address();
437 let data = section.data().ok()?;
438 if data.is_empty() {
439 return None;
440 }
441 let owned = data.to_vec();
442 if owned.is_empty() {
443 None
444 } else {
445 Some(CodeRegion { start, data: owned })
446 }
447 })
448 .collect::<Vec<_>>();
449
450 if regions.is_empty() {
451 return Err(anyhow!("no text sections available for capstone"));
452 }
453 Ok(Self { cs, regions })
454 }
455
456 fn slice_for_address(&self, address: u64, size_hint: usize) -> Option<&[u8]> {
457 self.regions.iter().find_map(|region| {
458 let offset = address.checked_sub(region.start)? as usize;
459 if offset >= region.data.len() {
460 return None;
461 }
462 let end = if size_hint > 0 {
463 (offset + size_hint).min(region.data.len())
464 } else {
465 region.data.len()
466 };
467 Some(®ion.data[offset..end])
468 })
469 }
470}
471
472impl Disassembler for CapstoneDisassembler {
473 fn disassemble(
474 &self,
475 _path: &Path,
476 functions: &[FunctionSymbol],
477 max_instructions_per_function: usize,
478 ) -> Result<HashMap<String, Vec<Instruction>>> {
479 if functions.is_empty() {
480 return Ok(HashMap::new());
481 }
482
483 let mut sorted = functions.to_vec();
484 sorted.sort_by_key(|f| f.address);
485
486 let mut out = HashMap::new();
487 for (idx, function) in sorted.iter().enumerate() {
488 let next_addr = sorted
489 .iter()
490 .skip(idx + 1)
491 .find(|f| f.address > function.address)
492 .map(|f| f.address);
493
494 let size_hint = if function.size > 0 {
495 function.size as usize
496 } else {
497 next_addr
498 .and_then(|next| next.checked_sub(function.address))
499 .unwrap_or(64) as usize
500 };
501
502 let Some(code) = self.slice_for_address(function.address, size_hint.max(1)) else {
503 continue;
504 };
505 let Ok(insns) = self.cs.disasm_all(code, function.address) else {
506 continue;
507 };
508
509 let instructions = insns
510 .iter()
511 .take(max_instructions_per_function)
512 .map(|insn| {
513 let mut text = String::new();
514 if let Some(mnemonic) = insn.mnemonic() {
515 text.push_str(mnemonic);
516 }
517 if let Some(op_str) = insn.op_str() {
518 if !text.is_empty() && !op_str.is_empty() {
519 text.push(' ');
520 }
521 text.push_str(op_str);
522 }
523 Instruction {
524 address: insn.address(),
525 text,
526 }
527 })
528 .collect::<Vec<_>>();
529
530 if !instructions.is_empty() {
531 out.insert(function.name.clone(), instructions);
532 }
533 }
534
535 if out.is_empty() {
536 return Err(anyhow!(
537 "capstone produced no disassembly for target functions"
538 ));
539 }
540
541 Ok(out)
542 }
543}
544
545#[derive(Debug, Clone)]
546struct ObjdumpDisassembler {
547 tool: String,
548}
549
550impl ObjdumpDisassembler {
551 fn new() -> Result<Self> {
552 for tool in ["llvm-objdump", "objdump"] {
553 if Command::new(tool).arg("--version").output().is_ok() {
554 return Ok(Self {
555 tool: tool.to_string(),
556 });
557 }
558 }
559 Err(anyhow!("neither llvm-objdump nor objdump is available"))
560 }
561
562 fn parse_output(stdout: &str, max_per_function: usize) -> Vec<DisasmBlock> {
563 let mut current: Option<DisasmBlock> = None;
564 let mut blocks = Vec::new();
565
566 for raw_line in stdout.lines() {
567 let line = raw_line.trim_end();
568
569 if let Some((address, name)) = parse_block_header(line) {
570 if let Some(prev) = current.take() {
571 blocks.push(prev);
572 }
573 current = Some(DisasmBlock {
574 name,
575 address,
576 instructions: Vec::new(),
577 });
578 continue;
579 }
580
581 let Some(block) = current.as_mut() else {
582 continue;
583 };
584
585 let trimmed = line.trim_start();
586 if trimmed.is_empty() {
587 continue;
588 }
589
590 let Some((addr_part, inst_part)) = trimmed.split_once(':') else {
591 continue;
592 };
593 let Ok(address) = u64::from_str_radix(addr_part.trim(), 16) else {
594 continue;
595 };
596
597 let text = inst_part.trim();
598 if text.is_empty() || block.instructions.len() >= max_per_function {
599 continue;
600 }
601
602 block.instructions.push(Instruction {
603 address,
604 text: text.to_string(),
605 });
606 }
607
608 if let Some(last) = current {
609 blocks.push(last);
610 }
611
612 blocks
613 }
614}
615
616impl Disassembler for ObjdumpDisassembler {
617 fn disassemble(
618 &self,
619 path: &Path,
620 functions: &[FunctionSymbol],
621 max_instructions_per_function: usize,
622 ) -> Result<HashMap<String, Vec<Instruction>>> {
623 if functions.is_empty() {
624 return Ok(HashMap::new());
625 }
626
627 let output = Command::new(&self.tool)
628 .arg("-d")
629 .arg("--no-show-raw-insn")
630 .arg(path)
631 .output()
632 .with_context(|| format!("failed to execute {}", self.tool))?;
633
634 if !output.status.success() {
635 let stderr = String::from_utf8_lossy(&output.stderr);
636 return Err(anyhow!(
637 "{} failed with status {}: {}",
638 self.tool,
639 output.status,
640 stderr.trim()
641 ));
642 }
643
644 let stdout = String::from_utf8_lossy(&output.stdout);
645 let blocks = Self::parse_output(&stdout, max_instructions_per_function);
646 Ok(map_blocks_to_functions(functions, &blocks))
647 }
648}
649
650#[derive(Debug, Clone)]
651struct DisasmBlock {
652 name: String,
653 address: u64,
654 instructions: Vec<Instruction>,
655}
656
657fn parse_block_header(line: &str) -> Option<(u64, String)> {
658 let trimmed = line.trim();
659 let (left, right) = trimmed.split_once('<')?;
660 let left = left.trim();
661 if left.is_empty() {
662 return None;
663 }
664 let address = u64::from_str_radix(left, 16).ok()?;
665
666 let right = right.strip_suffix(':')?;
667 let name = right.strip_suffix('>')?.trim();
668 if name.is_empty() {
669 return None;
670 }
671 Some((address, name.to_string()))
672}
673
674fn normalize_symbol_name(name: &str) -> String {
675 let mut value = name.trim().to_string();
676
677 if let Some((base, _)) = value.split_once("@@") {
678 value = base.to_string();
679 } else if let Some((base, _)) = value.split_once('@') {
680 value = base.to_string();
681 }
682
683 if let Some(pos) = value.rfind("::h") {
684 let hash = &value[pos + 3..];
685 if hash.len() == 16 && hash.chars().all(|c| c.is_ascii_hexdigit()) {
686 value.truncate(pos);
687 }
688 }
689
690 value
691}
692
693fn map_blocks_to_functions(
694 functions: &[FunctionSymbol],
695 blocks: &[DisasmBlock],
696) -> HashMap<String, Vec<Instruction>> {
697 let mut out = HashMap::new();
698 if blocks.is_empty() || functions.is_empty() {
699 return out;
700 }
701
702 let mut sorted_functions = functions.to_vec();
703 sorted_functions.sort_by_key(|f| f.address);
704
705 let mut next_addr_by_name = HashMap::new();
706 for (idx, function) in sorted_functions.iter().enumerate() {
707 let next = sorted_functions
708 .iter()
709 .skip(idx + 1)
710 .find(|f| f.address > function.address)
711 .map(|f| f.address);
712 next_addr_by_name.insert(function.name.clone(), next);
713 }
714
715 let mut blocks_by_normalized_name: HashMap<String, Vec<usize>> = HashMap::new();
716 for (idx, block) in blocks.iter().enumerate() {
717 blocks_by_normalized_name
718 .entry(normalize_symbol_name(&block.name))
719 .or_default()
720 .push(idx);
721 }
722
723 let mut used_block_indices = HashSet::new();
724
725 for function in functions {
726 let mut selected = blocks
727 .iter()
728 .enumerate()
729 .filter(|(_, block)| block.address == function.address)
730 .max_by_key(|(_, block)| block.instructions.len())
731 .map(|(idx, _)| idx);
732
733 if selected.is_none() {
734 let end = if function.size > 0 {
735 function.address.saturating_add(function.size)
736 } else {
737 next_addr_by_name
738 .get(&function.name)
739 .and_then(|v| *v)
740 .unwrap_or(function.address.saturating_add(1))
741 };
742
743 selected = blocks
744 .iter()
745 .enumerate()
746 .filter(|(_, block)| block.address >= function.address && block.address < end)
747 .min_by_key(|(_, block)| block.address.saturating_sub(function.address))
748 .map(|(idx, _)| idx);
749 }
750
751 if selected.is_none() {
752 let normalized = normalize_symbol_name(&function.name);
753 if let Some(candidates) = blocks_by_normalized_name.get(&normalized) {
754 selected = candidates
755 .iter()
756 .copied()
757 .find(|idx| !used_block_indices.contains(idx))
758 .or_else(|| candidates.first().copied());
759 }
760 }
761
762 if selected.is_none() {
763 selected = blocks
764 .iter()
765 .enumerate()
766 .min_by_key(|(_, block)| block.address.abs_diff(function.address))
767 .map(|(idx, _)| idx);
768 }
769
770 if let Some(idx) = selected {
771 used_block_indices.insert(idx);
772 out.insert(function.name.clone(), blocks[idx].instructions.clone());
773 }
774 }
775
776 out
777}
778
779#[cfg(test)]
780mod tests {
781 use super::*;
782
783 #[test]
784 fn analyzes_current_executable_metadata() {
785 let exe = std::env::current_exe().expect("current exe");
786 let options = AnalyzeOptions {
787 include_disassembly: false,
788 ..Default::default()
789 };
790
791 let report = BinaryAnalyzer::analyze_path(&exe, &options).expect("analysis should succeed");
792
793 assert!(report.size_bytes > 0);
794 assert!(!report.sections.is_empty());
795 assert!(!report.format.is_empty());
796 }
797
798 #[test]
799 fn parses_objdump_style_function_blocks() {
800 let disasm = r"
8010000000000001139 <main>:
802 1139: push %rbp
803 113a: mov %rsp,%rbp
804
8050000000000001140 <helper>:
806 1140: ret
807";
808
809 let blocks = ObjdumpDisassembler::parse_output(disasm, 10);
810 let main = blocks
811 .iter()
812 .find(|b| b.name == "main")
813 .expect("main exists");
814
815 assert_eq!(main.instructions.len(), 2);
816 assert_eq!(main.instructions[0].address, 0x1139);
817 assert_eq!(main.instructions[0].text, "push %rbp");
818 assert!(blocks.iter().any(|b| b.name == "helper"));
819 }
820
821 #[test]
822 fn maps_blocks_to_functions_by_address_range_and_name_normalization() {
823 let disasm = r"
8240000000000001200 <helper@@GLIBC_2.2.5>:
825 1200: ret
8260000000000001300 <main+0x0>:
827 1300: push %rbp
828 1301: mov %rsp,%rbp
829";
830 let blocks = ObjdumpDisassembler::parse_output(disasm, 10);
831 let functions = vec![
832 FunctionSymbol {
833 name: "main".to_string(),
834 address: 0x1300,
835 size: 32,
836 },
837 FunctionSymbol {
838 name: "helper".to_string(),
839 address: 0x1200,
840 size: 16,
841 },
842 ];
843
844 let mapped = map_blocks_to_functions(&functions, &blocks);
845 let main = mapped.get("main").expect("main mapped");
846 assert_eq!(main.len(), 2);
847 assert_eq!(main[0].address, 0x1300);
848
849 let helper = mapped.get("helper").expect("helper mapped");
850 assert_eq!(helper.len(), 1);
851 assert_eq!(helper[0].address, 0x1200);
852 }
853}