1use crate::binary::{BinaryAnalysis, CodeSection};
2use capstone::prelude::*;
3use petgraph::Direction;
4use petgraph::algo::{dijkstra, dominators};
5use petgraph::graph::{DiGraph, NodeIndex};
6use petgraph::visit::EdgeRef;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum BasicBlockType {
13 FunctionEntry { function_name: String, address: u64 },
15 Sequential {
17 start_address: u64,
18 end_address: u64,
19 },
20 ConditionalBranch { address: u64, condition: String },
22 UnconditionalJump { address: u64, target: u64 },
24 FunctionCall { address: u64, target: String },
26 Return { address: u64 },
28 ExceptionHandler { address: u64, handler_type: String },
30}
31
32#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34pub enum EdgeType {
35 Sequential,
37 ConditionalTrue,
39 ConditionalFalse,
41 Jump,
43 Call,
45 Return,
47 Exception,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BasicBlock {
54 pub block_type: BasicBlockType,
55 pub instructions: Vec<Instruction>,
56 pub size_bytes: usize,
57 pub execution_count: Option<u64>, }
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct Instruction {
63 pub address: u64,
64 pub mnemonic: String,
65 pub operands: String,
66 pub bytes: Vec<u8>,
67 pub size: u32,
68 pub is_branch: bool,
69 pub is_call: bool,
70 pub is_return: bool,
71 pub branch_target: Option<u64>,
72}
73
74#[derive(Debug)]
76pub struct ControlFlowGraph {
77 pub graph: DiGraph<BasicBlock, EdgeType>,
78 address_to_node: HashMap<u64, NodeIndex>,
80 function_entries: HashMap<String, NodeIndex>,
82 pub loops: Vec<LoopInfo>,
84 pub dominators: Option<dominators::Dominators<NodeIndex>>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct LoopInfo {
91 pub header: u64,
92 pub back_edges: Vec<(u64, u64)>,
93 pub loop_blocks: HashSet<u64>,
94 pub nesting_level: usize,
95 pub loop_type: LoopType,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub enum LoopType {
100 Natural,
101 Irreducible,
102 Infinite,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct CallGraphAnalysis {
108 pub call_sites: Vec<CallSite>,
109 pub function_summaries: HashMap<String, FunctionSummary>,
110 pub recursive_functions: Vec<String>,
111 pub dead_functions: Vec<String>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct CallSite {
116 pub caller_address: u64,
117 pub callee: String,
118 pub call_type: CallType,
119 pub arguments: Vec<String>, }
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub enum CallType {
124 Direct,
125 Indirect,
126 Virtual,
127 Tail,
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct FunctionSummary {
132 pub entry_address: u64,
133 pub size_bytes: usize,
134 pub basic_blocks: usize,
135 pub cyclomatic_complexity: usize,
136 pub calls_made: Vec<String>,
137 pub calls_received: Vec<String>,
138 pub has_loops: bool,
139 pub max_call_depth: usize,
140}
141
142impl ControlFlowGraph {
143 pub fn new() -> Self {
144 Self {
145 graph: DiGraph::new(),
146 address_to_node: HashMap::new(),
147 function_entries: HashMap::new(),
148 loops: Vec::new(),
149 dominators: None,
150 }
151 }
152
153 pub fn build_cfg(analysis: &BinaryAnalysis) -> Result<Self, String> {
155 let mut cfg = Self::new();
156
157 let cs = cfg.init_capstone(&analysis.architecture)?;
159
160 if let Some(binary_data) = &analysis.binary_data {
162 cfg.build_basic_blocks_from_binary(analysis, &cs, binary_data)?;
163 } else {
164 cfg.build_basic_blocks_from_symbols(analysis, &cs)?;
166 }
167
168 cfg.detect_loops();
170
171 cfg.build_dominators();
173
174 Ok(cfg)
175 }
176
177 pub fn init_capstone(&self, architecture: &str) -> Result<Capstone, String> {
178 match architecture.to_lowercase().as_str() {
179 "x86_64" | "amd64" => Capstone::new()
180 .x86()
181 .mode(arch::x86::ArchMode::Mode64)
182 .syntax(arch::x86::ArchSyntax::Intel)
183 .detail(true)
184 .build()
185 .map_err(|e| format!("Failed to initialize Capstone for x86_64: {}", e)),
186 "i386" | "x86" => Capstone::new()
187 .x86()
188 .mode(arch::x86::ArchMode::Mode32)
189 .syntax(arch::x86::ArchSyntax::Intel)
190 .detail(true)
191 .build()
192 .map_err(|e| format!("Failed to initialize Capstone for x86: {}", e)),
193 "arm" => Capstone::new()
194 .arm()
195 .mode(arch::arm::ArchMode::Arm)
196 .detail(true)
197 .build()
198 .map_err(|e| format!("Failed to initialize Capstone for ARM: {}", e)),
199 "aarch64" | "arm64" => Capstone::new()
200 .arm64()
201 .mode(arch::arm64::ArchMode::Arm)
202 .detail(true)
203 .build()
204 .map_err(|e| format!("Failed to initialize Capstone for ARM64: {}", e)),
205 "arm_cortex_m" => Capstone::new()
206 .arm()
207 .mode(arch::arm::ArchMode::Thumb)
208 .detail(true)
209 .build()
210 .map_err(|e| format!("Failed to initialize Capstone for ARM Cortex-M: {}", e)),
211 _ => Err(format!("Unsupported architecture: {}", architecture)),
212 }
213 }
214
215 pub fn build_basic_blocks_from_symbols(
216 &mut self,
217 analysis: &BinaryAnalysis,
218 _cs: &Capstone,
219 ) -> Result<(), String> {
220 let entry_point_addr = 0x1000; let entry_block = BasicBlock {
227 block_type: BasicBlockType::FunctionEntry {
228 function_name: "_start".to_string(),
229 address: entry_point_addr,
230 },
231 instructions: vec![Instruction {
232 address: entry_point_addr,
233 mnemonic: "push".to_string(),
234 operands: "rbp".to_string(),
235 bytes: vec![0x55],
236 size: 1,
237 is_branch: false,
238 is_call: false,
239 is_return: false,
240 branch_target: None,
241 }],
242 size_bytes: 1,
243 execution_count: None,
244 };
245
246 let entry_node = self.graph.add_node(entry_block);
247 self.address_to_node.insert(entry_point_addr, entry_node);
248 self.function_entries
249 .insert("_start".to_string(), entry_node);
250
251 let mut current_addr = entry_point_addr + 0x100;
253
254 for symbol in &analysis.detected_symbols {
255 if !symbol.is_empty() && !symbol.starts_with("__") {
256 let func_block = BasicBlock {
257 block_type: BasicBlockType::FunctionEntry {
258 function_name: symbol.clone(),
259 address: current_addr,
260 },
261 instructions: self.create_placeholder_instructions(current_addr, symbol),
262 size_bytes: 32, execution_count: None,
264 };
265
266 let func_node = self.graph.add_node(func_block);
267 self.address_to_node.insert(current_addr, func_node);
268 self.function_entries.insert(symbol.clone(), func_node);
269
270 self.graph.add_edge(entry_node, func_node, EdgeType::Call);
272
273 current_addr += 0x100;
274 }
275 }
276
277 for import in &analysis.imports {
279 let call_addr = current_addr;
280 let call_block = BasicBlock {
281 block_type: BasicBlockType::FunctionCall {
282 address: call_addr,
283 target: import.clone(),
284 },
285 instructions: vec![Instruction {
286 address: call_addr,
287 mnemonic: "call".to_string(),
288 operands: import.clone(),
289 bytes: vec![0xE8, 0x00, 0x00, 0x00, 0x00], size: 5,
291 is_branch: false,
292 is_call: true,
293 is_return: false,
294 branch_target: None,
295 }],
296 size_bytes: 5,
297 execution_count: None,
298 };
299
300 let call_node = self.graph.add_node(call_block);
301 self.address_to_node.insert(call_addr, call_node);
302
303 current_addr += 0x10;
304 }
305
306 Ok(())
307 }
308
309 pub fn build_basic_blocks_from_binary(
310 &mut self,
311 analysis: &BinaryAnalysis,
312 cs: &Capstone,
313 binary_data: &[u8],
314 ) -> Result<(), String> {
315 tracing::info!(
316 "Performing advanced binary disassembly for {} format (size: {} bytes)",
317 analysis.format,
318 binary_data.len()
319 );
320
321 let entry_point = self.extract_entry_point_from_binary(analysis, binary_data)?;
323
324 let code_sections =
326 self.extract_code_sections_by_format(analysis, binary_data, entry_point)?;
327
328 for section in &code_sections {
330 self.disassemble_code_section(cs, binary_data, section, entry_point)?;
331 }
332
333 self.build_control_flow_edges()?;
335
336 Ok(())
337 }
338
339 fn extract_entry_point_from_binary(
340 &self,
341 analysis: &BinaryAnalysis,
342 binary_data: &[u8],
343 ) -> Result<u64, String> {
344 if let Some(ep_str) = &analysis.entry_point {
346 if let Ok(ep) = u64::from_str_radix(ep_str.trim_start_matches("0x"), 16) {
347 return Ok(ep);
348 }
349 }
350
351 match analysis.format.as_str() {
353 "ELF" => self.extract_elf_entry_point(binary_data),
354 "PE" => self.extract_pe_entry_point(binary_data),
355 "Intel HEX" => self.extract_intel_hex_entry_point(binary_data),
356 "Motorola S-record" => self.extract_srec_entry_point(binary_data),
357 "ARM Cortex-M" => self.extract_arm_cortex_m_entry_point(binary_data),
358 "Raw Firmware" => self.extract_raw_firmware_entry_point(analysis, binary_data),
359 "DICOM" => Ok(0x0), _ => {
361 tracing::warn!(
362 "Unknown binary format: {}, using default entry point",
363 analysis.format
364 );
365 Ok(0x1000)
366 }
367 }
368 }
369
370 pub fn extract_code_sections_by_format(
371 &self,
372 analysis: &BinaryAnalysis,
373 binary_data: &[u8],
374 entry_point: u64,
375 ) -> Result<Vec<CodeSection>, String> {
376 if !analysis.code_sections.is_empty() {
377 return Ok(analysis.code_sections.clone());
378 }
379
380 match analysis.format.as_str() {
382 "ELF" => self.extract_elf_code_sections(binary_data),
383 "PE" => self.extract_pe_code_sections(binary_data),
384 "Intel HEX" => self.extract_intel_hex_code_sections(binary_data, entry_point),
385 "Motorola S-record" => self.extract_srec_code_sections(binary_data, entry_point),
386 "ARM Cortex-M" => self.extract_arm_cortex_m_code_sections(binary_data, entry_point),
387 "Raw Firmware" => self.extract_raw_firmware_code_sections(binary_data, entry_point),
388 _ => {
389 Ok(vec![CodeSection {
391 name: ".text".to_string(),
392 start_address: entry_point,
393 end_address: entry_point + std::cmp::min(binary_data.len() as u64, 4096),
394 size: std::cmp::min(binary_data.len() as u64, 4096),
395 permissions: "rx".to_string(),
396 section_type: crate::binary::CodeSectionType::Text,
397 }])
398 }
399 }
400 }
401
402 pub fn extract_elf_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
404 if binary_data.len() < 64 {
405 return Ok(0x1000);
406 }
407
408 if &binary_data[0..4] != b"\x7fELF" {
410 return Ok(0x1000);
411 }
412
413 let is_64bit = binary_data[4] == 2;
414
415 if is_64bit && binary_data.len() >= 32 {
416 let entry_bytes = &binary_data[24..32];
417 let entry_point = u64::from_le_bytes(entry_bytes.try_into().unwrap_or([0; 8]));
418 Ok(entry_point)
419 } else if binary_data.len() >= 28 {
420 let entry_bytes = &binary_data[24..28];
421 let entry_point = u32::from_le_bytes(entry_bytes.try_into().unwrap_or([0; 4])) as u64;
422 Ok(entry_point)
423 } else {
424 Ok(0x1000)
425 }
426 }
427
428 pub fn extract_elf_code_sections(&self, binary_data: &[u8]) -> Result<Vec<CodeSection>, String> {
429 if binary_data.len() < 64 || &binary_data[0..4] != b"\x7fELF" {
430 let entry_point = self.extract_elf_entry_point(binary_data)?;
431 return Ok(vec![CodeSection {
432 name: ".text".to_string(),
433 start_address: entry_point,
434 end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
435 size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
436 permissions: "rx".to_string(),
437 section_type: crate::binary::CodeSectionType::Text,
438 }]);
439 }
440
441 let is_64bit = binary_data[4] == 2;
442 let is_little_endian = binary_data[5] == 1;
443
444 let (shoff, shentsize, shnum, shstrndx) = if is_64bit {
446 if binary_data.len() < 64 {
447 return self.extract_elf_code_sections_fallback(binary_data);
448 }
449 let shoff = if is_little_endian {
450 u64::from_le_bytes(binary_data[40..48].try_into().unwrap_or([0; 8]))
451 } else {
452 u64::from_be_bytes(binary_data[40..48].try_into().unwrap_or([0; 8]))
453 };
454 let shentsize = if is_little_endian {
455 u16::from_le_bytes(binary_data[58..60].try_into().unwrap_or([0; 2]))
456 } else {
457 u16::from_be_bytes(binary_data[58..60].try_into().unwrap_or([0; 2]))
458 };
459 let shnum = if is_little_endian {
460 u16::from_le_bytes(binary_data[60..62].try_into().unwrap_or([0; 2]))
461 } else {
462 u16::from_be_bytes(binary_data[60..62].try_into().unwrap_or([0; 2]))
463 };
464 let shstrndx = if is_little_endian {
465 u16::from_le_bytes(binary_data[62..64].try_into().unwrap_or([0; 2]))
466 } else {
467 u16::from_be_bytes(binary_data[62..64].try_into().unwrap_or([0; 2]))
468 };
469 (shoff, shentsize, shnum, shstrndx)
470 } else {
471 if binary_data.len() < 52 {
472 return self.extract_elf_code_sections_fallback(binary_data);
473 }
474 let shoff = if is_little_endian {
475 u32::from_le_bytes(binary_data[32..36].try_into().unwrap_or([0; 4])) as u64
476 } else {
477 u32::from_be_bytes(binary_data[32..36].try_into().unwrap_or([0; 4])) as u64
478 };
479 let shentsize = if is_little_endian {
480 u16::from_le_bytes(binary_data[46..48].try_into().unwrap_or([0; 2]))
481 } else {
482 u16::from_be_bytes(binary_data[46..48].try_into().unwrap_or([0; 2]))
483 };
484 let shnum = if is_little_endian {
485 u16::from_le_bytes(binary_data[48..50].try_into().unwrap_or([0; 2]))
486 } else {
487 u16::from_be_bytes(binary_data[48..50].try_into().unwrap_or([0; 2]))
488 };
489 let shstrndx = if is_little_endian {
490 u16::from_le_bytes(binary_data[50..52].try_into().unwrap_or([0; 2]))
491 } else {
492 u16::from_be_bytes(binary_data[50..52].try_into().unwrap_or([0; 2]))
493 };
494 (shoff, shentsize, shnum, shstrndx)
495 };
496
497 if shoff == 0 || shnum == 0 || shentsize == 0 {
499 return self.extract_elf_code_sections_fallback(binary_data);
500 }
501
502 if shoff as usize + (shnum as usize * shentsize as usize) > binary_data.len() {
503 return self.extract_elf_code_sections_fallback(binary_data);
504 }
505
506 let string_table = if shstrndx != 0 && (shstrndx as usize) < shnum as usize {
508 self.get_elf_string_table(
509 binary_data,
510 shoff,
511 shentsize,
512 shstrndx,
513 is_64bit,
514 is_little_endian,
515 )
516 } else {
517 None
518 };
519
520 let mut code_sections = Vec::new();
521
522 for i in 0..shnum {
524 let section_offset = shoff as usize + (i as usize * shentsize as usize);
525 if section_offset + shentsize as usize > binary_data.len() {
526 break;
527 }
528
529 let section_data = &binary_data[section_offset..section_offset + shentsize as usize];
530
531 let (sh_name, sh_type, sh_flags, sh_addr, sh_size) = if is_64bit {
532 if section_data.len() < 64 {
533 continue;
534 }
535 let sh_name = if is_little_endian {
536 u32::from_le_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
537 } else {
538 u32::from_be_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
539 };
540 let sh_type = if is_little_endian {
541 u32::from_le_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
542 } else {
543 u32::from_be_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
544 };
545 let sh_flags = if is_little_endian {
546 u64::from_le_bytes(section_data[8..16].try_into().unwrap_or([0; 8]))
547 } else {
548 u64::from_be_bytes(section_data[8..16].try_into().unwrap_or([0; 8]))
549 };
550 let sh_addr = if is_little_endian {
551 u64::from_le_bytes(section_data[16..24].try_into().unwrap_or([0; 8]))
552 } else {
553 u64::from_be_bytes(section_data[16..24].try_into().unwrap_or([0; 8]))
554 };
555 let sh_size = if is_little_endian {
556 u64::from_le_bytes(section_data[32..40].try_into().unwrap_or([0; 8]))
557 } else {
558 u64::from_be_bytes(section_data[32..40].try_into().unwrap_or([0; 8]))
559 };
560 (sh_name, sh_type, sh_flags, sh_addr, sh_size)
561 } else {
562 if section_data.len() < 40 {
563 continue;
564 }
565 let sh_name = if is_little_endian {
566 u32::from_le_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
567 } else {
568 u32::from_be_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
569 };
570 let sh_type = if is_little_endian {
571 u32::from_le_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
572 } else {
573 u32::from_be_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
574 };
575 let sh_flags = if is_little_endian {
576 u32::from_le_bytes(section_data[8..12].try_into().unwrap_or([0; 4])) as u64
577 } else {
578 u32::from_be_bytes(section_data[8..12].try_into().unwrap_or([0; 4])) as u64
579 };
580 let sh_addr = if is_little_endian {
581 u32::from_le_bytes(section_data[12..16].try_into().unwrap_or([0; 4])) as u64
582 } else {
583 u32::from_be_bytes(section_data[12..16].try_into().unwrap_or([0; 4])) as u64
584 };
585 let sh_size = if is_little_endian {
586 u32::from_le_bytes(section_data[20..24].try_into().unwrap_or([0; 4])) as u64
587 } else {
588 u32::from_be_bytes(section_data[20..24].try_into().unwrap_or([0; 4])) as u64
589 };
590 (sh_name, sh_type, sh_flags, sh_addr, sh_size)
591 };
592
593 const SHT_PROGBITS: u32 = 1;
595 const SHF_EXECINSTR: u64 = 0x4;
596
597 if sh_type == SHT_PROGBITS && (sh_flags & SHF_EXECINSTR) != 0 && sh_size > 0 {
598 let section_name = if let Some(ref strtab) = string_table {
600 self.get_string_from_table(strtab, sh_name as usize)
601 .unwrap_or_else(|| format!(".section_{}", i))
602 } else {
603 format!(".section_{}", i)
604 };
605
606 code_sections.push(CodeSection {
607 name: section_name,
608 start_address: sh_addr,
609 end_address: sh_addr + sh_size,
610 size: sh_size,
611 permissions: "rx".to_string(),
612 section_type: crate::binary::CodeSectionType::Text,
613 });
614 }
615 }
616
617 if code_sections.is_empty() {
618 return self.extract_elf_code_sections_fallback(binary_data);
619 }
620
621 Ok(code_sections)
622 }
623
624 fn extract_elf_code_sections_fallback(
625 &self,
626 binary_data: &[u8],
627 ) -> Result<Vec<CodeSection>, String> {
628 let entry_point = self.extract_elf_entry_point(binary_data)?;
629 Ok(vec![CodeSection {
630 name: ".text".to_string(),
631 start_address: entry_point,
632 end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
633 size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
634 permissions: "rx".to_string(),
635 section_type: crate::binary::CodeSectionType::Text,
636 }])
637 }
638
639 pub fn get_elf_string_table(
640 &self,
641 binary_data: &[u8],
642 shoff: u64,
643 shentsize: u16,
644 shstrndx: u16,
645 is_64bit: bool,
646 is_little_endian: bool,
647 ) -> Option<Vec<u8>> {
648 let strtab_section_offset = shoff as usize + (shstrndx as usize * shentsize as usize);
649 if strtab_section_offset + shentsize as usize > binary_data.len() {
650 return None;
651 }
652
653 let section_data =
654 &binary_data[strtab_section_offset..strtab_section_offset + shentsize as usize];
655
656 let (sh_offset, sh_size) = if is_64bit {
657 if section_data.len() < 64 {
658 return None;
659 }
660 let sh_offset = if is_little_endian {
661 u64::from_le_bytes(section_data[24..32].try_into().ok()?)
662 } else {
663 u64::from_be_bytes(section_data[24..32].try_into().ok()?)
664 };
665 let sh_size = if is_little_endian {
666 u64::from_le_bytes(section_data[32..40].try_into().ok()?)
667 } else {
668 u64::from_be_bytes(section_data[32..40].try_into().ok()?)
669 };
670 (sh_offset, sh_size)
671 } else {
672 if section_data.len() < 40 {
673 return None;
674 }
675 let sh_offset = if is_little_endian {
676 u32::from_le_bytes(section_data[16..20].try_into().ok()?) as u64
677 } else {
678 u32::from_be_bytes(section_data[16..20].try_into().ok()?) as u64
679 };
680 let sh_size = if is_little_endian {
681 u32::from_le_bytes(section_data[20..24].try_into().ok()?) as u64
682 } else {
683 u32::from_be_bytes(section_data[20..24].try_into().ok()?) as u64
684 };
685 (sh_offset, sh_size)
686 };
687
688 if sh_offset as usize + sh_size as usize > binary_data.len() {
689 return None;
690 }
691
692 Some(binary_data[sh_offset as usize..(sh_offset + sh_size) as usize].to_vec())
693 }
694
695 pub fn get_string_from_table(&self, string_table: &[u8], offset: usize) -> Option<String> {
696 if offset >= string_table.len() {
697 return None;
698 }
699
700 let mut end = offset;
701 while end < string_table.len() && string_table[end] != 0 {
702 end += 1;
703 }
704
705 String::from_utf8(string_table[offset..end].to_vec()).ok()
706 }
707
708 pub fn extract_pe_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
710 if binary_data.len() < 64 || &binary_data[0..2] != b"MZ" {
711 return Ok(0x401000); }
713
714 let pe_offset =
715 u32::from_le_bytes(binary_data[0x3C..0x3C + 4].try_into().unwrap_or([0; 4])) as usize;
716
717 if pe_offset + 40 > binary_data.len() || &binary_data[pe_offset..pe_offset + 4] != b"PE\0\0"
718 {
719 return Ok(0x401000);
720 }
721
722 let entry_rva_offset = pe_offset + 24 + 16;
723 if entry_rva_offset + 4 <= binary_data.len() {
724 let entry_rva = u32::from_le_bytes(
725 binary_data[entry_rva_offset..entry_rva_offset + 4]
726 .try_into()
727 .unwrap_or([0; 4]),
728 );
729 Ok(entry_rva as u64 + 0x400000)
730 } else {
731 Ok(0x401000)
732 }
733 }
734
735 pub fn extract_pe_code_sections(&self, binary_data: &[u8]) -> Result<Vec<CodeSection>, String> {
736 let entry_point = self.extract_pe_entry_point(binary_data)?;
737 Ok(vec![CodeSection {
738 name: ".text".to_string(),
739 start_address: entry_point,
740 end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
741 size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
742 permissions: "rx".to_string(),
743 section_type: crate::binary::CodeSectionType::Text,
744 }])
745 }
746
747 pub fn extract_intel_hex_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
749 let hex_content = String::from_utf8_lossy(binary_data);
750 let mut lowest_address = u64::MAX;
751 let mut entry_point_found = None;
752
753 for line in hex_content.lines() {
754 if !line.starts_with(':') || line.len() < 11 {
755 continue;
756 }
757
758 if let Ok(record_type) = u8::from_str_radix(&line[7..9], 16) {
759 match record_type {
760 0x00 => {
761 if let Ok(address) = u16::from_str_radix(&line[3..7], 16) {
763 let addr = address as u64;
764 if addr < lowest_address {
765 lowest_address = addr;
766 }
767 }
768 }
769 0x05 => {
770 if line.len() >= 19 {
772 if let Ok(entry) = u32::from_str_radix(&line[9..17], 16) {
773 entry_point_found = Some(entry as u64);
774 }
775 }
776 }
777 0x03 => {
778 if line.len() >= 15 {
780 if let Ok(cs) = u16::from_str_radix(&line[9..13], 16) {
781 if let Ok(ip) = u16::from_str_radix(&line[13..17], 16) {
782 entry_point_found = Some((cs as u64) * 16 + (ip as u64));
783 }
784 }
785 }
786 }
787 _ => {}
788 }
789 }
790 }
791
792 Ok(entry_point_found.unwrap_or(if lowest_address != u64::MAX {
793 lowest_address
794 } else {
795 0x0000
796 }))
797 }
798
799 pub fn extract_intel_hex_code_sections(
800 &self,
801 binary_data: &[u8],
802 entry_point: u64,
803 ) -> Result<Vec<CodeSection>, String> {
804 let hex_content = String::from_utf8_lossy(binary_data);
805 let mut memory_regions = Vec::new();
806 let mut current_base_address = 0u64;
807
808 for line in hex_content.lines() {
809 if !line.starts_with(':') || line.len() < 11 {
810 continue;
811 }
812
813 if let Ok(record_type) = u8::from_str_radix(&line[7..9], 16) {
814 match record_type {
815 0x00 => {
816 if let Ok(byte_count) = u8::from_str_radix(&line[1..3], 16) {
818 if let Ok(address) = u16::from_str_radix(&line[3..7], 16) {
819 let full_address = current_base_address + (address as u64);
820 memory_regions.push((full_address, byte_count as u64));
821 }
822 }
823 }
824 0x04 => {
825 if let Ok(base) = u16::from_str_radix(&line[9..13], 16) {
827 current_base_address = (base as u64) << 16;
828 }
829 }
830 _ => {}
831 }
832 }
833 }
834
835 if memory_regions.is_empty() {
836 return Ok(vec![CodeSection {
837 name: "flash".to_string(),
838 start_address: entry_point,
839 end_address: entry_point + 1024,
840 size: 1024,
841 permissions: "rx".to_string(),
842 section_type: crate::binary::CodeSectionType::Text,
843 }]);
844 }
845
846 memory_regions.sort_by_key(|&(addr, _)| addr);
847 let total_size: u64 = memory_regions.iter().map(|&(_, size)| size).sum();
848
849 Ok(vec![CodeSection {
850 name: "flash".to_string(),
851 start_address: memory_regions[0].0,
852 end_address: memory_regions[0].0 + total_size,
853 size: total_size,
854 permissions: "rx".to_string(),
855 section_type: crate::binary::CodeSectionType::Text,
856 }])
857 }
858
859 pub fn extract_srec_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
861 let srec_content = String::from_utf8_lossy(binary_data);
862 let mut entry_point = None;
863 let mut lowest_address = u64::MAX;
864
865 for line in srec_content.lines() {
866 if !line.starts_with('S') || line.len() < 4 {
867 continue;
868 }
869
870 let record_type = &line[1..2];
871 match record_type {
872 "1" | "2" | "3" => {
873 if let Ok(_byte_count) = u8::from_str_radix(&line[2..4], 16) {
875 let addr_len = match record_type {
876 "1" => 4,
877 "2" => 6,
878 "3" => 8,
879 _ => 4,
880 };
881
882 if line.len() >= 4 + addr_len {
883 if let Ok(address) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
884 if address < lowest_address {
885 lowest_address = address;
886 }
887 }
888 }
889 }
890 }
891 "7" | "8" | "9" => {
892 let addr_len = match record_type {
894 "9" => 4,
895 "8" => 6,
896 "7" => 8,
897 _ => 4,
898 };
899
900 if line.len() >= 4 + addr_len {
901 if let Ok(start_addr) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
902 entry_point = Some(start_addr);
903 }
904 }
905 }
906 _ => {}
907 }
908 }
909
910 Ok(entry_point.unwrap_or(if lowest_address != u64::MAX {
911 lowest_address
912 } else {
913 0x0000
914 }))
915 }
916
917 pub fn extract_srec_code_sections(
918 &self,
919 binary_data: &[u8],
920 entry_point: u64,
921 ) -> Result<Vec<CodeSection>, String> {
922 let srec_content = String::from_utf8_lossy(binary_data);
923 let mut memory_regions = Vec::new();
924
925 for line in srec_content.lines() {
926 if !line.starts_with('S') || line.len() < 4 {
927 continue;
928 }
929
930 let record_type = &line[1..2];
931 if matches!(record_type, "1" | "2" | "3") {
932 if let Ok(byte_count) = u8::from_str_radix(&line[2..4], 16) {
933 let addr_len = match record_type {
934 "1" => 4,
935 "2" => 6,
936 "3" => 8,
937 _ => 4,
938 };
939
940 if line.len() >= 4 + addr_len {
941 if let Ok(address) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
942 let data_bytes = byte_count - (addr_len as u8 / 2) - 1;
943 memory_regions.push((address, data_bytes as u64));
944 }
945 }
946 }
947 }
948 }
949
950 if memory_regions.is_empty() {
951 return Ok(vec![CodeSection {
952 name: "program".to_string(),
953 start_address: entry_point,
954 end_address: entry_point + 1024,
955 size: 1024,
956 permissions: "rx".to_string(),
957 section_type: crate::binary::CodeSectionType::Text,
958 }]);
959 }
960
961 memory_regions.sort_by_key(|&(addr, _)| addr);
962 let total_size: u64 = memory_regions.iter().map(|&(_, size)| size).sum();
963
964 Ok(vec![CodeSection {
965 name: "program".to_string(),
966 start_address: memory_regions[0].0,
967 end_address: memory_regions[0].0 + total_size,
968 size: total_size,
969 permissions: "rx".to_string(),
970 section_type: crate::binary::CodeSectionType::Text,
971 }])
972 }
973
974 pub fn extract_arm_cortex_m_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
976 if binary_data.len() < 8 {
977 return Ok(0x00000000);
978 }
979
980 let reset_vector = u32::from_le_bytes(binary_data[4..8].try_into().unwrap_or([0; 4]));
982
983 Ok((reset_vector & !1) as u64)
985 }
986
987 fn extract_arm_cortex_m_code_sections(
988 &self,
989 binary_data: &[u8],
990 entry_point: u64,
991 ) -> Result<Vec<CodeSection>, String> {
992 let flash_base = if entry_point >= 0x08000000 {
993 0x08000000
994 } else {
995 0x00000000
996 };
997
998 Ok(vec![CodeSection {
999 name: "flash".to_string(),
1000 start_address: flash_base,
1001 end_address: flash_base + binary_data.len() as u64,
1002 size: binary_data.len() as u64,
1003 permissions: "rx".to_string(),
1004 section_type: crate::binary::CodeSectionType::Text,
1005 }])
1006 }
1007
1008 pub fn extract_raw_firmware_entry_point(
1010 &self,
1011 analysis: &BinaryAnalysis,
1012 _binary_data: &[u8],
1013 ) -> Result<u64, String> {
1014 match analysis.architecture.as_str() {
1015 "ARM" | "ARM64" | "AARCH64" => {
1016 if analysis
1017 .detected_symbols
1018 .iter()
1019 .any(|s| s.contains("cortex") || s.contains("thumb"))
1020 {
1021 Ok(0x08000000) } else {
1023 Ok(0x00000000) }
1025 }
1026 "x86" | "x86_64" => Ok(0x00100000), "MIPS" => Ok(0xBFC00000), "PowerPC" => Ok(0xFFF00000), "RISC-V" => Ok(0x80000000), _ => Ok(0x00000000), }
1032 }
1033
1034 pub fn extract_raw_firmware_code_sections(
1035 &self,
1036 binary_data: &[u8],
1037 entry_point: u64,
1038 ) -> Result<Vec<CodeSection>, String> {
1039 Ok(vec![CodeSection {
1040 name: "firmware".to_string(),
1041 start_address: entry_point,
1042 end_address: entry_point + binary_data.len() as u64,
1043 size: binary_data.len() as u64,
1044 permissions: "rx".to_string(),
1045 section_type: crate::binary::CodeSectionType::Text,
1046 }])
1047 }
1048
1049 pub fn disassemble_code_section(
1050 &mut self,
1051 cs: &Capstone,
1052 binary_data: &[u8],
1053 section: &CodeSection,
1054 entry_point: u64,
1055 ) -> Result<(), String> {
1056 let section_start = section.start_address as usize;
1057 let section_size = section.size as usize;
1058
1059 if section_start >= binary_data.len() {
1061 return Ok(()); }
1063
1064 let actual_size = std::cmp::min(section_size, binary_data.len() - section_start);
1065 let section_data = &binary_data[section_start..section_start + actual_size];
1066
1067 tracing::info!(
1068 "Disassembling section {} at 0x{:x} (size: {})",
1069 section.name,
1070 section.start_address,
1071 actual_size
1072 );
1073
1074 let instructions = cs
1076 .disasm_all(section_data, section.start_address)
1077 .map_err(|e| format!("Disassembly failed: {}", e))?;
1078
1079 self.build_basic_blocks_from_instructions(&instructions, entry_point)?;
1081
1082 Ok(())
1083 }
1084
1085 pub fn build_basic_blocks_from_instructions(
1086 &mut self,
1087 instructions: &capstone::Instructions,
1088 entry_point: u64,
1089 ) -> Result<(), String> {
1090 if instructions.is_empty() {
1091 return Ok(());
1092 }
1093
1094 let mut block_starts = std::collections::HashSet::new();
1096 block_starts.insert(entry_point);
1097
1098 for insn in instructions.iter() {
1100 let addr = insn.address();
1101
1102 if self.is_block_boundary_instruction(&insn) {
1104 block_starts.insert(addr);
1105
1106 if let Some(next_insn) = instructions.iter().find(|i| i.address() > addr) {
1108 block_starts.insert(next_insn.address());
1109 }
1110 }
1111
1112 if let Some(target) = self.get_branch_target(&insn) {
1114 block_starts.insert(target);
1115 }
1116 }
1117
1118 let mut starts: Vec<u64> = block_starts.into_iter().collect();
1120 starts.sort();
1121
1122 for i in 0..starts.len() {
1124 let start_addr = starts[i];
1125 let end_addr = if i + 1 < starts.len() {
1126 starts[i + 1]
1127 } else {
1128 instructions
1129 .iter()
1130 .last()
1131 .map(|i| i.address() + i.bytes().len() as u64)
1132 .unwrap_or(start_addr + 1)
1133 };
1134
1135 let block_instructions: Vec<Instruction> = instructions
1137 .iter()
1138 .filter(|insn| insn.address() >= start_addr && insn.address() < end_addr)
1139 .map(|insn| Instruction {
1140 address: insn.address(),
1141 mnemonic: insn.mnemonic().unwrap_or("").to_string(),
1142 operands: insn.op_str().unwrap_or("").to_string(),
1143 bytes: insn.bytes().to_vec(),
1144 size: insn.bytes().len() as u32,
1145 is_branch: self.is_branch_instruction(&insn),
1146 is_call: self.is_call_instruction(&insn),
1147 is_return: self.is_return_instruction(&insn),
1148 branch_target: self.get_branch_target(&insn),
1149 })
1150 .collect();
1151
1152 if block_instructions.is_empty() {
1153 continue;
1154 }
1155
1156 let block_type = if start_addr == entry_point {
1158 BasicBlockType::FunctionEntry {
1159 function_name: format!("entry_{:x}", start_addr),
1160 address: start_addr,
1161 }
1162 } else if self.is_function_entry_heuristic(start_addr, &block_instructions) {
1163 BasicBlockType::FunctionEntry {
1164 function_name: format!("func_{:x}", start_addr),
1165 address: start_addr,
1166 }
1167 } else if self.is_conditional_block(&block_instructions) {
1168 BasicBlockType::ConditionalBranch {
1169 address: start_addr,
1170 condition: self.extract_condition(&block_instructions),
1171 }
1172 } else if self.is_call_block(&block_instructions) {
1173 BasicBlockType::FunctionCall {
1174 address: start_addr,
1175 target: self.extract_call_target(&block_instructions),
1176 }
1177 } else {
1178 BasicBlockType::Sequential {
1179 start_address: start_addr,
1180 end_address: end_addr,
1181 }
1182 };
1183
1184 let basic_block = BasicBlock {
1185 instructions: block_instructions,
1186 block_type,
1187 size_bytes: (end_addr - start_addr) as usize,
1188 execution_count: None,
1189 };
1190
1191 let function_name = if let BasicBlockType::FunctionEntry { function_name, .. } =
1193 &basic_block.block_type
1194 {
1195 Some(function_name.clone())
1196 } else {
1197 None
1198 };
1199
1200 let node = self.graph.add_node(basic_block);
1201 self.address_to_node.insert(start_addr, node);
1202
1203 if let Some(name) = function_name {
1204 self.function_entries.insert(name, node);
1205 }
1206 }
1207
1208 Ok(())
1209 }
1210
1211 pub fn get_block_end_address(&self, block: &BasicBlock) -> u64 {
1212 if let Some(last_insn) = block.instructions.last() {
1213 last_insn.address + last_insn.size as u64
1214 } else {
1215 match &block.block_type {
1216 BasicBlockType::FunctionEntry { address, .. } => *address,
1217 BasicBlockType::Sequential { end_address, .. } => *end_address,
1218 BasicBlockType::ConditionalBranch { address, .. } => *address,
1219 BasicBlockType::UnconditionalJump { address, .. } => *address,
1220 BasicBlockType::FunctionCall { address, .. } => *address,
1221 BasicBlockType::Return { address } => *address,
1222 BasicBlockType::ExceptionHandler { address, .. } => *address,
1223 }
1224 }
1225 }
1226
1227 pub fn build_control_flow_edges(&mut self) -> Result<(), String> {
1228 let nodes: Vec<_> = self.graph.node_indices().collect();
1229
1230 for node_idx in nodes {
1231 if let Some(block) = self.graph.node_weight(node_idx).cloned() {
1232 let end_address = self.get_block_end_address(&block);
1233 match &block.block_type {
1234 BasicBlockType::Sequential { .. } => {
1235 if let Some(next_node) = self.find_block_at_address(end_address) {
1236 self.graph
1237 .add_edge(node_idx, next_node, EdgeType::Sequential);
1238 }
1239 }
1240 BasicBlockType::ConditionalBranch { .. } => {
1241 if let Some(next_node) = self.find_block_at_address(end_address) {
1243 self.graph
1244 .add_edge(node_idx, next_node, EdgeType::ConditionalFalse);
1245 }
1246 }
1248 BasicBlockType::FunctionCall { .. } => {
1249 if let Some(return_node) = self.find_block_at_address(end_address) {
1250 self.graph.add_edge(node_idx, return_node, EdgeType::Call);
1251 }
1252 }
1253 _ => {
1254 if let Some(next_node) = self.find_block_at_address(end_address) {
1255 self.graph
1256 .add_edge(node_idx, next_node, EdgeType::Sequential);
1257 }
1258 }
1259 }
1260 }
1261 }
1262
1263 Ok(())
1264 }
1265
1266 pub fn find_block_at_address(&self, address: u64) -> Option<NodeIndex> {
1267 self.address_to_node.get(&address).copied()
1268 }
1269
1270 pub fn is_function_entry_heuristic(&self, addr: u64, instructions: &[Instruction]) -> bool {
1272 if instructions.is_empty() {
1273 return false;
1274 }
1275
1276 let first_insn = &instructions[0];
1277
1278 match first_insn.mnemonic.as_str() {
1280 "push" if first_insn.operands.contains("bp") || first_insn.operands.contains("rbp") => {
1282 true
1283 }
1284 "mov" if first_insn.operands.contains("bp") || first_insn.operands.contains("rbp") => {
1285 true
1286 }
1287 "sub" if first_insn.operands.contains("sp") || first_insn.operands.contains("rsp") => {
1288 true
1289 }
1290
1291 "push" if first_insn.operands.contains("lr") => true,
1293 "stmdb" if first_insn.operands.contains("sp!") => true,
1294 "str" if first_insn.operands.contains("lr") => true,
1295
1296 "addi" if first_insn.operands.contains("sp") => true,
1298 "sd" if first_insn.operands.contains("ra") => true,
1299
1300 _ => {
1301 addr % 4 == 0 || addr % 8 == 0
1303 }
1304 }
1305 }
1306
1307 pub fn is_block_boundary_instruction(&self, insn: &capstone::Insn) -> bool {
1308 if let Some(mnemonic) = insn.mnemonic() {
1309 matches!(
1310 mnemonic,
1311 "ret" | "retq" | "retn" | "jmp" | "jmpq" | "je" | "jne" | "jz" | "jnz" | "jl" | "jle" | "jg" | "jge" |
1316 "ja" | "jae" | "jb" | "jbe" |
1317 "jo" | "jno" | "js" | "jns" |
1318 "call" | "callq" | "bx" | "blx" | "b" | "bl" |
1322 "bmi" | "bpl" | "bvs" | "bvc" | "bhi" | "bls" |
1323
1324 "beq" | "bne" | "blt" | "ble" | "bgt" | "bge" |
1326
1327 "bltu" | "bgeu" | "jal" | "jalr" |
1329 "j" | "jr" | "bgtz" | "blez" | "bltz" | "bgez"
1330 )
1331 } else {
1332 false
1333 }
1334 }
1335
1336 pub fn is_branch_instruction(&self, insn: &capstone::Insn) -> bool {
1337 if let Some(mnemonic) = insn.mnemonic() {
1338 matches!(
1339 mnemonic,
1340 "jmp"
1341 | "jmpq"
1342 | "je"
1343 | "jne"
1344 | "jz"
1345 | "jnz"
1346 | "jl"
1347 | "jle"
1348 | "jg"
1349 | "jge"
1350 | "ja"
1351 | "jae"
1352 | "jb"
1353 | "jbe"
1354 | "jo"
1355 | "jno"
1356 | "js"
1357 | "jns"
1358 | "b"
1359 | "bl"
1360 | "bx"
1361 | "blx"
1362 | "beq"
1363 | "bne"
1364 | "blt"
1365 | "ble"
1366 | "bgt"
1367 | "bge"
1368 | "bmi"
1369 | "bpl"
1370 | "bvs"
1371 | "bvc"
1372 | "bhi"
1373 | "bls"
1374 | "jal"
1375 | "jalr"
1376 | "j"
1377 | "jr"
1378 | "bgtz"
1379 | "blez"
1380 | "bltz"
1381 | "bgez"
1382 | "bltu"
1383 | "bgeu"
1384 )
1385 } else {
1386 false
1387 }
1388 }
1389
1390 pub fn is_call_instruction(&self, insn: &capstone::Insn) -> bool {
1391 if let Some(mnemonic) = insn.mnemonic() {
1392 matches!(mnemonic, "call" | "callq" | "bl" | "blx" | "jal" | "jalr")
1393 } else {
1394 false
1395 }
1396 }
1397
1398 pub fn is_return_instruction(&self, insn: &capstone::Insn) -> bool {
1399 if let Some(mnemonic) = insn.mnemonic() {
1400 matches!(mnemonic, "ret" | "retq" | "retn" | "bx" | "jr")
1401 } else {
1402 false
1403 }
1404 }
1405
1406 pub fn get_branch_target(&self, insn: &capstone::Insn) -> Option<u64> {
1407 if let Some(op_str) = insn.op_str() {
1408 if op_str.starts_with("0x") {
1410 if let Ok(addr) = u64::from_str_radix(&op_str[2..], 16) {
1411 return Some(addr);
1412 }
1413 }
1414 if let Ok(addr) = op_str.parse::<u64>() {
1416 return Some(addr);
1417 }
1418 }
1419 None
1420 }
1421
1422 pub fn is_conditional_block(&self, instructions: &[Instruction]) -> bool {
1423 instructions.iter().any(|insn| {
1424 matches!(
1425 insn.mnemonic.as_str(),
1426 "je" | "jne" | "jz" | "jnz" |
1428 "jl" | "jle" | "jg" | "jge" |
1429 "ja" | "jae" | "jb" | "jbe" |
1430 "jo" | "jno" | "js" | "jns" |
1431
1432 "bmi" | "bpl" | "bvs" | "bvc" | "bhi" | "bls" |
1434
1435 "beq" | "bne" | "blt" | "ble" | "bgt" | "bge" |
1437
1438 "bltu" | "bgeu" | "bgtz" | "blez" | "bltz" | "bgez"
1440 )
1441 })
1442 }
1443
1444 pub fn is_call_block(&self, instructions: &[Instruction]) -> bool {
1445 instructions.iter().any(|insn| {
1446 matches!(
1447 insn.mnemonic.as_str(),
1448 "call" | "callq" | "bl" | "blx" | "jal" | "jalr" )
1452 })
1453 }
1454
1455 pub fn extract_condition(&self, instructions: &[Instruction]) -> String {
1456 for insn in instructions {
1457 if matches!(
1458 insn.mnemonic.as_str(),
1459 "je" | "jne"
1460 | "jz"
1461 | "jnz"
1462 | "jl"
1463 | "jle"
1464 | "jg"
1465 | "jge"
1466 | "ja"
1467 | "jae"
1468 | "jb"
1469 | "jbe"
1470 | "jo"
1471 | "jno"
1472 | "js"
1473 | "jns"
1474 | "beq"
1475 | "bne"
1476 | "blt"
1477 | "ble"
1478 | "bgt"
1479 | "bge"
1480 | "bmi"
1481 | "bpl"
1482 | "bvs"
1483 | "bvc"
1484 | "bhi"
1485 | "bls"
1486 | "bgtz"
1487 | "blez"
1488 | "bltz"
1489 | "bgez"
1490 | "bltu"
1491 | "bgeu"
1492 ) {
1493 return format!("{} {}", insn.mnemonic, insn.operands);
1494 }
1495 }
1496 "unknown".to_string()
1497 }
1498
1499 pub fn extract_call_target(&self, instructions: &[Instruction]) -> String {
1500 for insn in instructions {
1501 if matches!(
1502 insn.mnemonic.as_str(),
1503 "call" | "callq" | "bl" | "blx" | "jal" | "jalr"
1504 ) {
1505 return insn.operands.clone();
1506 }
1507 }
1508 "unknown".to_string()
1509 }
1510
1511 pub fn create_placeholder_instructions(
1512 &self,
1513 start_addr: u64,
1514 function_name: &str,
1515 ) -> Vec<Instruction> {
1516 let mut instructions = Vec::new();
1518 let mut addr = start_addr;
1519
1520 instructions.push(Instruction {
1522 address: addr,
1523 mnemonic: "push".to_string(),
1524 operands: "rbp".to_string(),
1525 bytes: vec![0x55],
1526 size: 1,
1527 is_branch: false,
1528 is_call: false,
1529 is_return: false,
1530 branch_target: None,
1531 });
1532 addr += 1;
1533
1534 instructions.push(Instruction {
1535 address: addr,
1536 mnemonic: "mov".to_string(),
1537 operands: "rbp, rsp".to_string(),
1538 bytes: vec![0x48, 0x89, 0xE5],
1539 size: 3,
1540 is_branch: false,
1541 is_call: false,
1542 is_return: false,
1543 branch_target: None,
1544 });
1545 addr += 3;
1546
1547 if function_name.contains("malloc") || function_name.contains("alloc") {
1549 instructions.push(Instruction {
1550 address: addr,
1551 mnemonic: "test".to_string(),
1552 operands: "rdi, rdi".to_string(),
1553 bytes: vec![0x48, 0x85, 0xFF],
1554 size: 3,
1555 is_branch: false,
1556 is_call: false,
1557 is_return: false,
1558 branch_target: None,
1559 });
1560 addr += 3;
1561
1562 instructions.push(Instruction {
1563 address: addr,
1564 mnemonic: "jz".to_string(),
1565 operands: format!("0x{:x}", addr + 10),
1566 bytes: vec![0x74, 0x08],
1567 size: 2,
1568 is_branch: true,
1569 is_call: false,
1570 is_return: false,
1571 branch_target: Some(addr + 10),
1572 });
1573 }
1574
1575 addr += 10;
1577 instructions.push(Instruction {
1578 address: addr,
1579 mnemonic: "pop".to_string(),
1580 operands: "rbp".to_string(),
1581 bytes: vec![0x5D],
1582 size: 1,
1583 is_branch: false,
1584 is_call: false,
1585 is_return: false,
1586 branch_target: None,
1587 });
1588 addr += 1;
1589
1590 instructions.push(Instruction {
1591 address: addr,
1592 mnemonic: "ret".to_string(),
1593 operands: "".to_string(),
1594 bytes: vec![0xC3],
1595 size: 1,
1596 is_branch: false,
1597 is_call: false,
1598 is_return: true,
1599 branch_target: None,
1600 });
1601
1602 instructions
1603 }
1604
1605 pub fn detect_loops(&mut self) {
1606 for node_idx in self.graph.node_indices() {
1610 if let Some(block) = self.graph.node_weight(node_idx) {
1611 for edge in self.graph.edges_directed(node_idx, Direction::Outgoing) {
1613 let target = edge.target();
1614
1615 if let (Some(current_addr), Some(target_addr)) = (
1617 self.get_block_address(block),
1618 self.get_block_address_by_node(target),
1619 ) {
1620 if target_addr < current_addr {
1621 let loop_info = LoopInfo {
1623 header: target_addr,
1624 back_edges: vec![(current_addr, target_addr)],
1625 loop_blocks: HashSet::from([current_addr, target_addr]),
1626 nesting_level: 1,
1627 loop_type: LoopType::Natural,
1628 };
1629
1630 self.loops.push(loop_info);
1631 }
1632 }
1633 }
1634 }
1635 }
1636 }
1637
1638 pub fn build_dominators(&mut self) {
1639 if let Some(entry_node) = self.graph.node_indices().next() {
1640 self.dominators = Some(dominators::simple_fast(&self.graph, entry_node));
1641 }
1642 }
1643
1644 pub fn get_block_address(&self, block: &BasicBlock) -> Option<u64> {
1645 match &block.block_type {
1646 BasicBlockType::FunctionEntry { address, .. } => Some(*address),
1647 BasicBlockType::Sequential { start_address, .. } => Some(*start_address),
1648 BasicBlockType::ConditionalBranch { address, .. } => Some(*address),
1649 BasicBlockType::UnconditionalJump { address, .. } => Some(*address),
1650 BasicBlockType::FunctionCall { address, .. } => Some(*address),
1651 BasicBlockType::Return { address } => Some(*address),
1652 BasicBlockType::ExceptionHandler { address, .. } => Some(*address),
1653 }
1654 }
1655
1656 pub fn get_block_address_by_node(&self, node_idx: NodeIndex) -> Option<u64> {
1657 self.graph
1658 .node_weight(node_idx)
1659 .and_then(|block| self.get_block_address(block))
1660 }
1661
1662 pub fn analyze_call_graph(&self, analysis: &BinaryAnalysis) -> CallGraphAnalysis {
1664 let mut call_sites = Vec::new();
1665 let mut function_summaries = HashMap::new();
1666 let mut recursive_functions = Vec::new();
1667 let mut dead_functions = Vec::new();
1668
1669 for (func_name, &node_idx) in &self.function_entries {
1671 let mut calls_made = Vec::new();
1672 let mut total_size = 0;
1673
1674 let reachable = dijkstra(&self.graph, node_idx, None, |_| 1);
1676 let basic_blocks = reachable.len();
1677
1678 for edge in self.graph.edges_directed(node_idx, Direction::Outgoing) {
1680 if matches!(edge.weight(), EdgeType::Call) {
1681 if let Some(target_block) = self.graph.node_weight(edge.target()) {
1682 if let BasicBlockType::FunctionCall { target, .. } =
1683 &target_block.block_type
1684 {
1685 calls_made.push(target.clone());
1686
1687 call_sites.push(CallSite {
1688 caller_address: self.get_block_address(target_block).unwrap_or(0),
1689 callee: target.clone(),
1690 call_type: CallType::Direct,
1691 arguments: vec![], });
1693 }
1694 }
1695 }
1696 }
1697
1698 let mut complexity = 1; for reachable_node in reachable.keys() {
1701 if let Some(block) = self.graph.node_weight(*reachable_node) {
1702 total_size += block.size_bytes;
1703
1704 match &block.block_type {
1705 BasicBlockType::ConditionalBranch { .. } => complexity += 1,
1706 _ => {}
1707 }
1708 }
1709 }
1710
1711 if calls_made.contains(func_name) {
1713 recursive_functions.push(func_name.clone());
1714 }
1715
1716 function_summaries.insert(
1717 func_name.clone(),
1718 FunctionSummary {
1719 entry_address: self.get_block_address_by_node(node_idx).unwrap_or(0),
1720 size_bytes: total_size,
1721 basic_blocks,
1722 cyclomatic_complexity: complexity,
1723 calls_made,
1724 calls_received: vec![], has_loops: self.loops.iter().any(|l| {
1726 l.loop_blocks
1727 .contains(&self.get_block_address_by_node(node_idx).unwrap_or(0))
1728 }),
1729 max_call_depth: 1, },
1731 );
1732 }
1733
1734 for symbol in &analysis.detected_symbols {
1736 if !call_sites.iter().any(|cs| cs.callee == *symbol)
1737 && symbol != "_start"
1738 && symbol != "main"
1739 {
1740 dead_functions.push(symbol.clone());
1741 }
1742 }
1743
1744 CallGraphAnalysis {
1745 call_sites,
1746 function_summaries,
1747 recursive_functions,
1748 dead_functions,
1749 }
1750 }
1751
1752 #[allow(dead_code)]
1754 pub fn get_loop_headers(&self) -> Vec<u64> {
1755 self.loops.iter().map(|l| l.header).collect()
1756 }
1757
1758 #[allow(dead_code)]
1760 pub fn is_in_loop(&self, address: u64) -> bool {
1761 self.loops.iter().any(|l| l.loop_blocks.contains(&address))
1762 }
1763
1764 #[allow(dead_code)]
1766 pub fn get_dominators(&self, node: NodeIndex) -> Option<Vec<NodeIndex>> {
1767 self.dominators.as_ref().map(|dom| {
1768 let mut dominators = Vec::new();
1769 let mut current = Some(node);
1770
1771 while let Some(n) = current {
1772 dominators.push(n);
1773 current = dom.immediate_dominator(n);
1774 }
1775
1776 dominators
1777 })
1778 }
1779}
1780
1781#[derive(Debug, Clone, Serialize, Deserialize)]
1783pub struct ExploitabilityAnalysis {
1784 pub is_reachable: bool,
1785 pub path: Option<Vec<String>>,
1786 pub sink: String,
1787 pub confidence: f32,
1788 pub attack_vectors: Vec<AttackVector>,
1789}
1790
1791#[derive(Debug, Clone, Serialize, Deserialize)]
1792pub struct AttackVector {
1793 pub vector_type: String,
1794 pub entry_points: Vec<u64>,
1795 pub vulnerable_path: Vec<u64>,
1796 pub prerequisites: Vec<String>,
1797 pub impact: String,
1798}
1799
1800impl ExploitabilityAnalysis {
1801 pub fn analyze(cfg: &ControlFlowGraph, sources: &[String], sink: &str) -> Self {
1802 let mut is_reachable = false;
1804 let mut path = None;
1805 let mut attack_vectors = Vec::new();
1806
1807 if let Some(&sink_node) = cfg.function_entries.get(sink) {
1809 for source in sources {
1811 if let Some(&source_node) = cfg.function_entries.get(source) {
1812 let paths = dijkstra(&cfg.graph, source_node, Some(sink_node), |_| 1);
1813
1814 if paths.contains_key(&sink_node) {
1815 is_reachable = true;
1816
1817 let mut path_nodes = Vec::new();
1819 let current = sink_node;
1820 while let Some(block) = cfg.graph.node_weight(current) {
1821 if let Some(addr) = cfg.get_block_address(block) {
1822 path_nodes.push(format!("0x{:x}", addr));
1823 break;
1825 }
1826 }
1827
1828 path = Some(vec![source.clone(), sink.to_string()]);
1829
1830 attack_vectors.push(AttackVector {
1832 vector_type: "Control Flow".to_string(),
1833 entry_points: vec![
1834 cfg.get_block_address_by_node(source_node).unwrap_or(0),
1835 ],
1836 vulnerable_path: vec![
1837 cfg.get_block_address_by_node(sink_node).unwrap_or(0),
1838 ],
1839 prerequisites: vec!["User input control".to_string()],
1840 impact: "Code execution".to_string(),
1841 });
1842
1843 break;
1844 }
1845 }
1846 }
1847 }
1848
1849 ExploitabilityAnalysis {
1850 is_reachable,
1851 path,
1852 sink: sink.to_string(),
1853 confidence: if is_reachable { 0.8 } else { 0.0 },
1854 attack_vectors,
1855 }
1856 }
1857}