nabla_cli/enterprise/secure/
control_flow.rs

1use crate::binary::{BinaryAnalysis, CodeSection};
2use capstone::prelude::*;
3use petgraph::Direction;
4use petgraph::algo::{dijkstra, dominators};
5use petgraph::graph::{DiGraph, NodeIndex};
6use petgraph::visit::EdgeRef;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9
10/// Represents different types of basic blocks in the Control Flow Graph
11#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum BasicBlockType {
13    /// Entry point of a function
14    FunctionEntry { function_name: String, address: u64 },
15    /// Regular basic block with sequential instructions
16    Sequential {
17        start_address: u64,
18        end_address: u64,
19    },
20    /// Conditional branch block
21    ConditionalBranch { address: u64, condition: String },
22    /// Unconditional jump
23    UnconditionalJump { address: u64, target: u64 },
24    /// Function call site
25    FunctionCall { address: u64, target: String },
26    /// Return statement
27    Return { address: u64 },
28    /// Exception handler
29    ExceptionHandler { address: u64, handler_type: String },
30}
31
32/// Represents edge types in the CFG
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34pub enum EdgeType {
35    /// Sequential execution (fall-through)
36    Sequential,
37    /// Conditional branch taken
38    ConditionalTrue,
39    /// Conditional branch not taken
40    ConditionalFalse,
41    /// Unconditional jump
42    Jump,
43    /// Function call
44    Call,
45    /// Function return
46    Return,
47    /// Exception flow
48    Exception,
49}
50
51/// Basic block containing instructions and metadata
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BasicBlock {
54    pub block_type: BasicBlockType,
55    pub instructions: Vec<Instruction>,
56    pub size_bytes: usize,
57    pub execution_count: Option<u64>, // For profiling integration
58}
59
60/// Disassembled instruction
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct Instruction {
63    pub address: u64,
64    pub mnemonic: String,
65    pub operands: String,
66    pub bytes: Vec<u8>,
67    pub size: u32,
68    pub is_branch: bool,
69    pub is_call: bool,
70    pub is_return: bool,
71    pub branch_target: Option<u64>,
72}
73
74/// Control Flow Graph with proper basic block analysis
75#[derive(Debug)]
76pub struct ControlFlowGraph {
77    pub graph: DiGraph<BasicBlock, EdgeType>,
78    /// Maps addresses to node indices for quick lookup
79    address_to_node: HashMap<u64, NodeIndex>,
80    /// Maps function names to their entry blocks
81    function_entries: HashMap<String, NodeIndex>,
82    /// Detected loops in the CFG
83    pub loops: Vec<LoopInfo>,
84    /// Dominator tree information
85    pub dominators: Option<dominators::Dominators<NodeIndex>>,
86}
87
88/// Information about detected loops
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct LoopInfo {
91    pub header: u64,
92    pub back_edges: Vec<(u64, u64)>,
93    pub loop_blocks: HashSet<u64>,
94    pub nesting_level: usize,
95    pub loop_type: LoopType,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub enum LoopType {
100    Natural,
101    Irreducible,
102    Infinite,
103}
104
105/// Results of inter-procedural analysis
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct CallGraphAnalysis {
108    pub call_sites: Vec<CallSite>,
109    pub function_summaries: HashMap<String, FunctionSummary>,
110    pub recursive_functions: Vec<String>,
111    pub dead_functions: Vec<String>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct CallSite {
116    pub caller_address: u64,
117    pub callee: String,
118    pub call_type: CallType,
119    pub arguments: Vec<String>, // Detected argument patterns
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub enum CallType {
124    Direct,
125    Indirect,
126    Virtual,
127    Tail,
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct FunctionSummary {
132    pub entry_address: u64,
133    pub size_bytes: usize,
134    pub basic_blocks: usize,
135    pub cyclomatic_complexity: usize,
136    pub calls_made: Vec<String>,
137    pub calls_received: Vec<String>,
138    pub has_loops: bool,
139    pub max_call_depth: usize,
140}
141
142impl ControlFlowGraph {
143    pub fn new() -> Self {
144        Self {
145            graph: DiGraph::new(),
146            address_to_node: HashMap::new(),
147            function_entries: HashMap::new(),
148            loops: Vec::new(),
149            dominators: None,
150        }
151    }
152
153    /// Build CFG from binary analysis with Capstone disassembly
154    pub fn build_cfg(analysis: &BinaryAnalysis) -> Result<Self, String> {
155        let mut cfg = Self::new();
156
157        // Initialize Capstone based on architecture
158        let cs = cfg.init_capstone(&analysis.architecture)?;
159
160        // Use binary data for advanced analysis if available
161        if let Some(binary_data) = &analysis.binary_data {
162            cfg.build_basic_blocks_from_binary(analysis, &cs, binary_data)?;
163        } else {
164            // Fallback to symbol-based analysis
165            cfg.build_basic_blocks_from_symbols(analysis, &cs)?;
166        }
167
168        // Detect loops
169        cfg.detect_loops();
170
171        // Build dominator tree
172        cfg.build_dominators();
173
174        Ok(cfg)
175    }
176
177    pub fn init_capstone(&self, architecture: &str) -> Result<Capstone, String> {
178        match architecture.to_lowercase().as_str() {
179            "x86_64" | "amd64" => Capstone::new()
180                .x86()
181                .mode(arch::x86::ArchMode::Mode64)
182                .syntax(arch::x86::ArchSyntax::Intel)
183                .detail(true)
184                .build()
185                .map_err(|e| format!("Failed to initialize Capstone for x86_64: {}", e)),
186            "i386" | "x86" => Capstone::new()
187                .x86()
188                .mode(arch::x86::ArchMode::Mode32)
189                .syntax(arch::x86::ArchSyntax::Intel)
190                .detail(true)
191                .build()
192                .map_err(|e| format!("Failed to initialize Capstone for x86: {}", e)),
193            "arm" => Capstone::new()
194                .arm()
195                .mode(arch::arm::ArchMode::Arm)
196                .detail(true)
197                .build()
198                .map_err(|e| format!("Failed to initialize Capstone for ARM: {}", e)),
199            "aarch64" | "arm64" => Capstone::new()
200                .arm64()
201                .mode(arch::arm64::ArchMode::Arm)
202                .detail(true)
203                .build()
204                .map_err(|e| format!("Failed to initialize Capstone for ARM64: {}", e)),
205            "arm_cortex_m" => Capstone::new()
206                .arm()
207                .mode(arch::arm::ArchMode::Thumb)
208                .detail(true)
209                .build()
210                .map_err(|e| format!("Failed to initialize Capstone for ARM Cortex-M: {}", e)),
211            _ => Err(format!("Unsupported architecture: {}", architecture)),
212        }
213    }
214
215    pub fn build_basic_blocks_from_symbols(
216        &mut self,
217        analysis: &BinaryAnalysis,
218        _cs: &Capstone,
219    ) -> Result<(), String> {
220        // For now, create basic blocks from function symbols
221        // In a real implementation, we'd disassemble the actual binary data
222
223        let entry_point_addr = 0x1000; // Placeholder - would extract from binary
224
225        // Create entry point basic block
226        let entry_block = BasicBlock {
227            block_type: BasicBlockType::FunctionEntry {
228                function_name: "_start".to_string(),
229                address: entry_point_addr,
230            },
231            instructions: vec![Instruction {
232                address: entry_point_addr,
233                mnemonic: "push".to_string(),
234                operands: "rbp".to_string(),
235                bytes: vec![0x55],
236                size: 1,
237                is_branch: false,
238                is_call: false,
239                is_return: false,
240                branch_target: None,
241            }],
242            size_bytes: 1,
243            execution_count: None,
244        };
245
246        let entry_node = self.graph.add_node(entry_block);
247        self.address_to_node.insert(entry_point_addr, entry_node);
248        self.function_entries
249            .insert("_start".to_string(), entry_node);
250
251        // Create basic blocks for detected symbols (functions)
252        let mut current_addr = entry_point_addr + 0x100;
253
254        for symbol in &analysis.detected_symbols {
255            if !symbol.is_empty() && !symbol.starts_with("__") {
256                let func_block = BasicBlock {
257                    block_type: BasicBlockType::FunctionEntry {
258                        function_name: symbol.clone(),
259                        address: current_addr,
260                    },
261                    instructions: self.create_placeholder_instructions(current_addr, symbol),
262                    size_bytes: 32, // Placeholder
263                    execution_count: None,
264                };
265
266                let func_node = self.graph.add_node(func_block);
267                self.address_to_node.insert(current_addr, func_node);
268                self.function_entries.insert(symbol.clone(), func_node);
269
270                // Add edge from entry point to this function (simplified)
271                self.graph.add_edge(entry_node, func_node, EdgeType::Call);
272
273                current_addr += 0x100;
274            }
275        }
276
277        // Create call sites for imported functions
278        for import in &analysis.imports {
279            let call_addr = current_addr;
280            let call_block = BasicBlock {
281                block_type: BasicBlockType::FunctionCall {
282                    address: call_addr,
283                    target: import.clone(),
284                },
285                instructions: vec![Instruction {
286                    address: call_addr,
287                    mnemonic: "call".to_string(),
288                    operands: import.clone(),
289                    bytes: vec![0xE8, 0x00, 0x00, 0x00, 0x00], // Placeholder
290                    size: 5,
291                    is_branch: false,
292                    is_call: true,
293                    is_return: false,
294                    branch_target: None,
295                }],
296                size_bytes: 5,
297                execution_count: None,
298            };
299
300            let call_node = self.graph.add_node(call_block);
301            self.address_to_node.insert(call_addr, call_node);
302
303            current_addr += 0x10;
304        }
305
306        Ok(())
307    }
308
309   pub fn build_basic_blocks_from_binary(
310        &mut self,
311        analysis: &BinaryAnalysis,
312        cs: &Capstone,
313        binary_data: &[u8],
314    ) -> Result<(), String> {
315        tracing::info!(
316            "Performing advanced binary disassembly for {} format (size: {} bytes)",
317            analysis.format,
318            binary_data.len()
319        );
320
321        // Extract entry point based on binary format
322        let entry_point = self.extract_entry_point_from_binary(analysis, binary_data)?;
323
324        // Get executable code sections based on format
325        let code_sections =
326            self.extract_code_sections_by_format(analysis, binary_data, entry_point)?;
327
328        // Disassemble each code section
329        for section in &code_sections {
330            self.disassemble_code_section(cs, binary_data, section, entry_point)?;
331        }
332
333        // Build edges between basic blocks
334        self.build_control_flow_edges()?;
335
336        Ok(())
337    }
338
339    fn extract_entry_point_from_binary(
340        &self,
341        analysis: &BinaryAnalysis,
342        binary_data: &[u8],
343    ) -> Result<u64, String> {
344        // First check if entry point is explicitly provided
345        if let Some(ep_str) = &analysis.entry_point {
346            if let Ok(ep) = u64::from_str_radix(ep_str.trim_start_matches("0x"), 16) {
347                return Ok(ep);
348            }
349        }
350
351        // Extract entry point based on binary format
352        match analysis.format.as_str() {
353            "ELF" => self.extract_elf_entry_point(binary_data),
354            "PE" => self.extract_pe_entry_point(binary_data),
355            "Intel HEX" => self.extract_intel_hex_entry_point(binary_data),
356            "Motorola S-record" => self.extract_srec_entry_point(binary_data),
357            "ARM Cortex-M" => self.extract_arm_cortex_m_entry_point(binary_data),
358            "Raw Firmware" => self.extract_raw_firmware_entry_point(analysis, binary_data),
359            "DICOM" => Ok(0x0), // DICOM doesn't have traditional entry points
360            _ => {
361                tracing::warn!(
362                    "Unknown binary format: {}, using default entry point",
363                    analysis.format
364                );
365                Ok(0x1000)
366            }
367        }
368    }
369
370    pub fn extract_code_sections_by_format(
371        &self,
372        analysis: &BinaryAnalysis,
373        binary_data: &[u8],
374        entry_point: u64,
375    ) -> Result<Vec<CodeSection>, String> {
376        if !analysis.code_sections.is_empty() {
377            return Ok(analysis.code_sections.clone());
378        }
379
380        // Generate code sections based on binary format
381        match analysis.format.as_str() {
382            "ELF" => self.extract_elf_code_sections(binary_data),
383            "PE" => self.extract_pe_code_sections(binary_data),
384            "Intel HEX" => self.extract_intel_hex_code_sections(binary_data, entry_point),
385            "Motorola S-record" => self.extract_srec_code_sections(binary_data, entry_point),
386            "ARM Cortex-M" => self.extract_arm_cortex_m_code_sections(binary_data, entry_point),
387            "Raw Firmware" => self.extract_raw_firmware_code_sections(binary_data, entry_point),
388            _ => {
389                // Default single code section
390                Ok(vec![CodeSection {
391                    name: ".text".to_string(),
392                    start_address: entry_point,
393                    end_address: entry_point + std::cmp::min(binary_data.len() as u64, 4096),
394                    size: std::cmp::min(binary_data.len() as u64, 4096),
395                    permissions: "rx".to_string(),
396                    section_type: crate::binary::CodeSectionType::Text,
397                }])
398            }
399        }
400    }
401
402    // ELF Entry Point and Code Section Extraction
403   pub fn extract_elf_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
404        if binary_data.len() < 64 {
405            return Ok(0x1000);
406        }
407
408        // Check ELF magic
409        if &binary_data[0..4] != b"\x7fELF" {
410            return Ok(0x1000);
411        }
412
413        let is_64bit = binary_data[4] == 2;
414
415        if is_64bit && binary_data.len() >= 32 {
416            let entry_bytes = &binary_data[24..32];
417            let entry_point = u64::from_le_bytes(entry_bytes.try_into().unwrap_or([0; 8]));
418            Ok(entry_point)
419        } else if binary_data.len() >= 28 {
420            let entry_bytes = &binary_data[24..28];
421            let entry_point = u32::from_le_bytes(entry_bytes.try_into().unwrap_or([0; 4])) as u64;
422            Ok(entry_point)
423        } else {
424            Ok(0x1000)
425        }
426    }
427
428    pub fn extract_elf_code_sections(&self, binary_data: &[u8]) -> Result<Vec<CodeSection>, String> {
429        if binary_data.len() < 64 || &binary_data[0..4] != b"\x7fELF" {
430            let entry_point = self.extract_elf_entry_point(binary_data)?;
431            return Ok(vec![CodeSection {
432                name: ".text".to_string(),
433                start_address: entry_point,
434                end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
435                size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
436                permissions: "rx".to_string(),
437                section_type: crate::binary::CodeSectionType::Text,
438            }]);
439        }
440
441        let is_64bit = binary_data[4] == 2;
442        let is_little_endian = binary_data[5] == 1;
443
444        // Parse ELF header to get section header table
445        let (shoff, shentsize, shnum, shstrndx) = if is_64bit {
446            if binary_data.len() < 64 {
447                return self.extract_elf_code_sections_fallback(binary_data);
448            }
449            let shoff = if is_little_endian {
450                u64::from_le_bytes(binary_data[40..48].try_into().unwrap_or([0; 8]))
451            } else {
452                u64::from_be_bytes(binary_data[40..48].try_into().unwrap_or([0; 8]))
453            };
454            let shentsize = if is_little_endian {
455                u16::from_le_bytes(binary_data[58..60].try_into().unwrap_or([0; 2]))
456            } else {
457                u16::from_be_bytes(binary_data[58..60].try_into().unwrap_or([0; 2]))
458            };
459            let shnum = if is_little_endian {
460                u16::from_le_bytes(binary_data[60..62].try_into().unwrap_or([0; 2]))
461            } else {
462                u16::from_be_bytes(binary_data[60..62].try_into().unwrap_or([0; 2]))
463            };
464            let shstrndx = if is_little_endian {
465                u16::from_le_bytes(binary_data[62..64].try_into().unwrap_or([0; 2]))
466            } else {
467                u16::from_be_bytes(binary_data[62..64].try_into().unwrap_or([0; 2]))
468            };
469            (shoff, shentsize, shnum, shstrndx)
470        } else {
471            if binary_data.len() < 52 {
472                return self.extract_elf_code_sections_fallback(binary_data);
473            }
474            let shoff = if is_little_endian {
475                u32::from_le_bytes(binary_data[32..36].try_into().unwrap_or([0; 4])) as u64
476            } else {
477                u32::from_be_bytes(binary_data[32..36].try_into().unwrap_or([0; 4])) as u64
478            };
479            let shentsize = if is_little_endian {
480                u16::from_le_bytes(binary_data[46..48].try_into().unwrap_or([0; 2]))
481            } else {
482                u16::from_be_bytes(binary_data[46..48].try_into().unwrap_or([0; 2]))
483            };
484            let shnum = if is_little_endian {
485                u16::from_le_bytes(binary_data[48..50].try_into().unwrap_or([0; 2]))
486            } else {
487                u16::from_be_bytes(binary_data[48..50].try_into().unwrap_or([0; 2]))
488            };
489            let shstrndx = if is_little_endian {
490                u16::from_le_bytes(binary_data[50..52].try_into().unwrap_or([0; 2]))
491            } else {
492                u16::from_be_bytes(binary_data[50..52].try_into().unwrap_or([0; 2]))
493            };
494            (shoff, shentsize, shnum, shstrndx)
495        };
496
497        // Validate section header table parameters
498        if shoff == 0 || shnum == 0 || shentsize == 0 {
499            return self.extract_elf_code_sections_fallback(binary_data);
500        }
501
502        if shoff as usize + (shnum as usize * shentsize as usize) > binary_data.len() {
503            return self.extract_elf_code_sections_fallback(binary_data);
504        }
505
506        // Get string table section for section names
507        let string_table = if shstrndx != 0 && (shstrndx as usize) < shnum as usize {
508            self.get_elf_string_table(
509                binary_data,
510                shoff,
511                shentsize,
512                shstrndx,
513                is_64bit,
514                is_little_endian,
515            )
516        } else {
517            None
518        };
519
520        let mut code_sections = Vec::new();
521
522        // Parse section headers
523        for i in 0..shnum {
524            let section_offset = shoff as usize + (i as usize * shentsize as usize);
525            if section_offset + shentsize as usize > binary_data.len() {
526                break;
527            }
528
529            let section_data = &binary_data[section_offset..section_offset + shentsize as usize];
530
531            let (sh_name, sh_type, sh_flags, sh_addr, sh_size) = if is_64bit {
532                if section_data.len() < 64 {
533                    continue;
534                }
535                let sh_name = if is_little_endian {
536                    u32::from_le_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
537                } else {
538                    u32::from_be_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
539                };
540                let sh_type = if is_little_endian {
541                    u32::from_le_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
542                } else {
543                    u32::from_be_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
544                };
545                let sh_flags = if is_little_endian {
546                    u64::from_le_bytes(section_data[8..16].try_into().unwrap_or([0; 8]))
547                } else {
548                    u64::from_be_bytes(section_data[8..16].try_into().unwrap_or([0; 8]))
549                };
550                let sh_addr = if is_little_endian {
551                    u64::from_le_bytes(section_data[16..24].try_into().unwrap_or([0; 8]))
552                } else {
553                    u64::from_be_bytes(section_data[16..24].try_into().unwrap_or([0; 8]))
554                };
555                let sh_size = if is_little_endian {
556                    u64::from_le_bytes(section_data[32..40].try_into().unwrap_or([0; 8]))
557                } else {
558                    u64::from_be_bytes(section_data[32..40].try_into().unwrap_or([0; 8]))
559                };
560                (sh_name, sh_type, sh_flags, sh_addr, sh_size)
561            } else {
562                if section_data.len() < 40 {
563                    continue;
564                }
565                let sh_name = if is_little_endian {
566                    u32::from_le_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
567                } else {
568                    u32::from_be_bytes(section_data[0..4].try_into().unwrap_or([0; 4]))
569                };
570                let sh_type = if is_little_endian {
571                    u32::from_le_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
572                } else {
573                    u32::from_be_bytes(section_data[4..8].try_into().unwrap_or([0; 4]))
574                };
575                let sh_flags = if is_little_endian {
576                    u32::from_le_bytes(section_data[8..12].try_into().unwrap_or([0; 4])) as u64
577                } else {
578                    u32::from_be_bytes(section_data[8..12].try_into().unwrap_or([0; 4])) as u64
579                };
580                let sh_addr = if is_little_endian {
581                    u32::from_le_bytes(section_data[12..16].try_into().unwrap_or([0; 4])) as u64
582                } else {
583                    u32::from_be_bytes(section_data[12..16].try_into().unwrap_or([0; 4])) as u64
584                };
585                let sh_size = if is_little_endian {
586                    u32::from_le_bytes(section_data[20..24].try_into().unwrap_or([0; 4])) as u64
587                } else {
588                    u32::from_be_bytes(section_data[20..24].try_into().unwrap_or([0; 4])) as u64
589                };
590                (sh_name, sh_type, sh_flags, sh_addr, sh_size)
591            };
592
593            // Check if this is an executable section (SHT_PROGBITS with SHF_EXECINSTR)
594            const SHT_PROGBITS: u32 = 1;
595            const SHF_EXECINSTR: u64 = 0x4;
596
597            if sh_type == SHT_PROGBITS && (sh_flags & SHF_EXECINSTR) != 0 && sh_size > 0 {
598                // Get section name from string table
599                let section_name = if let Some(ref strtab) = string_table {
600                    self.get_string_from_table(strtab, sh_name as usize)
601                        .unwrap_or_else(|| format!(".section_{}", i))
602                } else {
603                    format!(".section_{}", i)
604                };
605
606                code_sections.push(CodeSection {
607                    name: section_name,
608                    start_address: sh_addr,
609                    end_address: sh_addr + sh_size,
610                    size: sh_size,
611                    permissions: "rx".to_string(),
612                    section_type: crate::binary::CodeSectionType::Text,
613                });
614            }
615        }
616
617        if code_sections.is_empty() {
618            return self.extract_elf_code_sections_fallback(binary_data);
619        }
620
621        Ok(code_sections)
622    }
623
624    fn extract_elf_code_sections_fallback(
625        &self,
626        binary_data: &[u8],
627    ) -> Result<Vec<CodeSection>, String> {
628        let entry_point = self.extract_elf_entry_point(binary_data)?;
629        Ok(vec![CodeSection {
630            name: ".text".to_string(),
631            start_address: entry_point,
632            end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
633            size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
634            permissions: "rx".to_string(),
635            section_type: crate::binary::CodeSectionType::Text,
636        }])
637    }
638
639    pub fn get_elf_string_table(
640        &self,
641        binary_data: &[u8],
642        shoff: u64,
643        shentsize: u16,
644        shstrndx: u16,
645        is_64bit: bool,
646        is_little_endian: bool,
647    ) -> Option<Vec<u8>> {
648        let strtab_section_offset = shoff as usize + (shstrndx as usize * shentsize as usize);
649        if strtab_section_offset + shentsize as usize > binary_data.len() {
650            return None;
651        }
652
653        let section_data =
654            &binary_data[strtab_section_offset..strtab_section_offset + shentsize as usize];
655
656        let (sh_offset, sh_size) = if is_64bit {
657            if section_data.len() < 64 {
658                return None;
659            }
660            let sh_offset = if is_little_endian {
661                u64::from_le_bytes(section_data[24..32].try_into().ok()?)
662            } else {
663                u64::from_be_bytes(section_data[24..32].try_into().ok()?)
664            };
665            let sh_size = if is_little_endian {
666                u64::from_le_bytes(section_data[32..40].try_into().ok()?)
667            } else {
668                u64::from_be_bytes(section_data[32..40].try_into().ok()?)
669            };
670            (sh_offset, sh_size)
671        } else {
672            if section_data.len() < 40 {
673                return None;
674            }
675            let sh_offset = if is_little_endian {
676                u32::from_le_bytes(section_data[16..20].try_into().ok()?) as u64
677            } else {
678                u32::from_be_bytes(section_data[16..20].try_into().ok()?) as u64
679            };
680            let sh_size = if is_little_endian {
681                u32::from_le_bytes(section_data[20..24].try_into().ok()?) as u64
682            } else {
683                u32::from_be_bytes(section_data[20..24].try_into().ok()?) as u64
684            };
685            (sh_offset, sh_size)
686        };
687
688        if sh_offset as usize + sh_size as usize > binary_data.len() {
689            return None;
690        }
691
692        Some(binary_data[sh_offset as usize..(sh_offset + sh_size) as usize].to_vec())
693    }
694
695    pub fn get_string_from_table(&self, string_table: &[u8], offset: usize) -> Option<String> {
696        if offset >= string_table.len() {
697            return None;
698        }
699
700        let mut end = offset;
701        while end < string_table.len() && string_table[end] != 0 {
702            end += 1;
703        }
704
705        String::from_utf8(string_table[offset..end].to_vec()).ok()
706    }
707
708    // PE Entry Point and Code Section Extraction
709    pub fn extract_pe_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
710        if binary_data.len() < 64 || &binary_data[0..2] != b"MZ" {
711            return Ok(0x401000); // Default PE image base
712        }
713
714        let pe_offset =
715            u32::from_le_bytes(binary_data[0x3C..0x3C + 4].try_into().unwrap_or([0; 4])) as usize;
716
717        if pe_offset + 40 > binary_data.len() || &binary_data[pe_offset..pe_offset + 4] != b"PE\0\0"
718        {
719            return Ok(0x401000);
720        }
721
722        let entry_rva_offset = pe_offset + 24 + 16;
723        if entry_rva_offset + 4 <= binary_data.len() {
724            let entry_rva = u32::from_le_bytes(
725                binary_data[entry_rva_offset..entry_rva_offset + 4]
726                    .try_into()
727                    .unwrap_or([0; 4]),
728            );
729            Ok(entry_rva as u64 + 0x400000)
730        } else {
731            Ok(0x401000)
732        }
733    }
734
735    pub fn extract_pe_code_sections(&self, binary_data: &[u8]) -> Result<Vec<CodeSection>, String> {
736        let entry_point = self.extract_pe_entry_point(binary_data)?;
737        Ok(vec![CodeSection {
738            name: ".text".to_string(),
739            start_address: entry_point,
740            end_address: entry_point + std::cmp::min(binary_data.len() as u64 / 2, 8192),
741            size: std::cmp::min(binary_data.len() as u64 / 2, 8192),
742            permissions: "rx".to_string(),
743            section_type: crate::binary::CodeSectionType::Text,
744        }])
745    }
746
747    // Intel HEX Entry Point and Code Section Extraction
748    pub fn extract_intel_hex_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
749        let hex_content = String::from_utf8_lossy(binary_data);
750        let mut lowest_address = u64::MAX;
751        let mut entry_point_found = None;
752
753        for line in hex_content.lines() {
754            if !line.starts_with(':') || line.len() < 11 {
755                continue;
756            }
757
758            if let Ok(record_type) = u8::from_str_radix(&line[7..9], 16) {
759                match record_type {
760                    0x00 => {
761                        // Data record
762                        if let Ok(address) = u16::from_str_radix(&line[3..7], 16) {
763                            let addr = address as u64;
764                            if addr < lowest_address {
765                                lowest_address = addr;
766                            }
767                        }
768                    }
769                    0x05 => {
770                        // Start Linear Address
771                        if line.len() >= 19 {
772                            if let Ok(entry) = u32::from_str_radix(&line[9..17], 16) {
773                                entry_point_found = Some(entry as u64);
774                            }
775                        }
776                    }
777                    0x03 => {
778                        // Start Segment Address
779                        if line.len() >= 15 {
780                            if let Ok(cs) = u16::from_str_radix(&line[9..13], 16) {
781                                if let Ok(ip) = u16::from_str_radix(&line[13..17], 16) {
782                                    entry_point_found = Some((cs as u64) * 16 + (ip as u64));
783                                }
784                            }
785                        }
786                    }
787                    _ => {}
788                }
789            }
790        }
791
792        Ok(entry_point_found.unwrap_or(if lowest_address != u64::MAX {
793            lowest_address
794        } else {
795            0x0000
796        }))
797    }
798
799    pub fn extract_intel_hex_code_sections(
800        &self,
801        binary_data: &[u8],
802        entry_point: u64,
803    ) -> Result<Vec<CodeSection>, String> {
804        let hex_content = String::from_utf8_lossy(binary_data);
805        let mut memory_regions = Vec::new();
806        let mut current_base_address = 0u64;
807
808        for line in hex_content.lines() {
809            if !line.starts_with(':') || line.len() < 11 {
810                continue;
811            }
812
813            if let Ok(record_type) = u8::from_str_radix(&line[7..9], 16) {
814                match record_type {
815                    0x00 => {
816                        // Data record
817                        if let Ok(byte_count) = u8::from_str_radix(&line[1..3], 16) {
818                            if let Ok(address) = u16::from_str_radix(&line[3..7], 16) {
819                                let full_address = current_base_address + (address as u64);
820                                memory_regions.push((full_address, byte_count as u64));
821                            }
822                        }
823                    }
824                    0x04 => {
825                        // Extended Linear Address
826                        if let Ok(base) = u16::from_str_radix(&line[9..13], 16) {
827                            current_base_address = (base as u64) << 16;
828                        }
829                    }
830                    _ => {}
831                }
832            }
833        }
834
835        if memory_regions.is_empty() {
836            return Ok(vec![CodeSection {
837                name: "flash".to_string(),
838                start_address: entry_point,
839                end_address: entry_point + 1024,
840                size: 1024,
841                permissions: "rx".to_string(),
842                section_type: crate::binary::CodeSectionType::Text,
843            }]);
844        }
845
846        memory_regions.sort_by_key(|&(addr, _)| addr);
847        let total_size: u64 = memory_regions.iter().map(|&(_, size)| size).sum();
848
849        Ok(vec![CodeSection {
850            name: "flash".to_string(),
851            start_address: memory_regions[0].0,
852            end_address: memory_regions[0].0 + total_size,
853            size: total_size,
854            permissions: "rx".to_string(),
855            section_type: crate::binary::CodeSectionType::Text,
856        }])
857    }
858
859    // Motorola S-Record Entry Point and Code Section Extraction
860    pub fn extract_srec_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
861        let srec_content = String::from_utf8_lossy(binary_data);
862        let mut entry_point = None;
863        let mut lowest_address = u64::MAX;
864
865        for line in srec_content.lines() {
866            if !line.starts_with('S') || line.len() < 4 {
867                continue;
868            }
869
870            let record_type = &line[1..2];
871            match record_type {
872                "1" | "2" | "3" => {
873                    // Data records
874                    if let Ok(_byte_count) = u8::from_str_radix(&line[2..4], 16) {
875                        let addr_len = match record_type {
876                            "1" => 4,
877                            "2" => 6,
878                            "3" => 8,
879                            _ => 4,
880                        };
881
882                        if line.len() >= 4 + addr_len {
883                            if let Ok(address) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
884                                if address < lowest_address {
885                                    lowest_address = address;
886                                }
887                            }
888                        }
889                    }
890                }
891                "7" | "8" | "9" => {
892                    // Start address records
893                    let addr_len = match record_type {
894                        "9" => 4,
895                        "8" => 6,
896                        "7" => 8,
897                        _ => 4,
898                    };
899
900                    if line.len() >= 4 + addr_len {
901                        if let Ok(start_addr) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
902                            entry_point = Some(start_addr);
903                        }
904                    }
905                }
906                _ => {}
907            }
908        }
909
910        Ok(entry_point.unwrap_or(if lowest_address != u64::MAX {
911            lowest_address
912        } else {
913            0x0000
914        }))
915    }
916
917    pub fn extract_srec_code_sections(
918        &self,
919        binary_data: &[u8],
920        entry_point: u64,
921    ) -> Result<Vec<CodeSection>, String> {
922        let srec_content = String::from_utf8_lossy(binary_data);
923        let mut memory_regions = Vec::new();
924
925        for line in srec_content.lines() {
926            if !line.starts_with('S') || line.len() < 4 {
927                continue;
928            }
929
930            let record_type = &line[1..2];
931            if matches!(record_type, "1" | "2" | "3") {
932                if let Ok(byte_count) = u8::from_str_radix(&line[2..4], 16) {
933                    let addr_len = match record_type {
934                        "1" => 4,
935                        "2" => 6,
936                        "3" => 8,
937                        _ => 4,
938                    };
939
940                    if line.len() >= 4 + addr_len {
941                        if let Ok(address) = u64::from_str_radix(&line[4..4 + addr_len], 16) {
942                            let data_bytes = byte_count - (addr_len as u8 / 2) - 1;
943                            memory_regions.push((address, data_bytes as u64));
944                        }
945                    }
946                }
947            }
948        }
949
950        if memory_regions.is_empty() {
951            return Ok(vec![CodeSection {
952                name: "program".to_string(),
953                start_address: entry_point,
954                end_address: entry_point + 1024,
955                size: 1024,
956                permissions: "rx".to_string(),
957                section_type: crate::binary::CodeSectionType::Text,
958            }]);
959        }
960
961        memory_regions.sort_by_key(|&(addr, _)| addr);
962        let total_size: u64 = memory_regions.iter().map(|&(_, size)| size).sum();
963
964        Ok(vec![CodeSection {
965            name: "program".to_string(),
966            start_address: memory_regions[0].0,
967            end_address: memory_regions[0].0 + total_size,
968            size: total_size,
969            permissions: "rx".to_string(),
970            section_type: crate::binary::CodeSectionType::Text,
971        }])
972    }
973
974    // ARM Cortex-M Entry Point and Code Section Extraction
975    pub fn extract_arm_cortex_m_entry_point(&self, binary_data: &[u8]) -> Result<u64, String> {
976        if binary_data.len() < 8 {
977            return Ok(0x00000000);
978        }
979
980        // Reset vector is at offset 4 (little-endian)
981        let reset_vector = u32::from_le_bytes(binary_data[4..8].try_into().unwrap_or([0; 4]));
982
983        // ARM Cortex-M addresses have bit 0 set for Thumb mode, clear it
984        Ok((reset_vector & !1) as u64)
985    }
986
987    fn extract_arm_cortex_m_code_sections(
988        &self,
989        binary_data: &[u8],
990        entry_point: u64,
991    ) -> Result<Vec<CodeSection>, String> {
992        let flash_base = if entry_point >= 0x08000000 {
993            0x08000000
994        } else {
995            0x00000000
996        };
997
998        Ok(vec![CodeSection {
999            name: "flash".to_string(),
1000            start_address: flash_base,
1001            end_address: flash_base + binary_data.len() as u64,
1002            size: binary_data.len() as u64,
1003            permissions: "rx".to_string(),
1004            section_type: crate::binary::CodeSectionType::Text,
1005        }])
1006    }
1007
1008    // Raw Firmware Entry Point and Code Section Extraction
1009    pub fn extract_raw_firmware_entry_point(
1010        &self,
1011        analysis: &BinaryAnalysis,
1012        _binary_data: &[u8],
1013    ) -> Result<u64, String> {
1014        match analysis.architecture.as_str() {
1015            "ARM" | "ARM64" | "AARCH64" => {
1016                if analysis
1017                    .detected_symbols
1018                    .iter()
1019                    .any(|s| s.contains("cortex") || s.contains("thumb"))
1020                {
1021                    Ok(0x08000000) // STM32-style
1022                } else {
1023                    Ok(0x00000000) // Generic ARM
1024                }
1025            }
1026            "x86" | "x86_64" => Ok(0x00100000), // Common x86 firmware base
1027            "MIPS" => Ok(0xBFC00000),           // MIPS boot ROM
1028            "PowerPC" => Ok(0xFFF00000),        // PowerPC boot vector
1029            "RISC-V" => Ok(0x80000000),         // RISC-V common base
1030            _ => Ok(0x00000000),                // Default
1031        }
1032    }
1033
1034    pub fn extract_raw_firmware_code_sections(
1035        &self,
1036        binary_data: &[u8],
1037        entry_point: u64,
1038    ) -> Result<Vec<CodeSection>, String> {
1039        Ok(vec![CodeSection {
1040            name: "firmware".to_string(),
1041            start_address: entry_point,
1042            end_address: entry_point + binary_data.len() as u64,
1043            size: binary_data.len() as u64,
1044            permissions: "rx".to_string(),
1045            section_type: crate::binary::CodeSectionType::Text,
1046        }])
1047    }
1048
1049    pub fn disassemble_code_section(
1050        &mut self,
1051        cs: &Capstone,
1052        binary_data: &[u8],
1053        section: &CodeSection,
1054        entry_point: u64,
1055    ) -> Result<(), String> {
1056        let section_start = section.start_address as usize;
1057        let section_size = section.size as usize;
1058
1059        // Bounds checking
1060        if section_start >= binary_data.len() {
1061            return Ok(()); // Section is beyond binary data
1062        }
1063
1064        let actual_size = std::cmp::min(section_size, binary_data.len() - section_start);
1065        let section_data = &binary_data[section_start..section_start + actual_size];
1066
1067        tracing::info!(
1068            "Disassembling section {} at 0x{:x} (size: {})",
1069            section.name,
1070            section.start_address,
1071            actual_size
1072        );
1073
1074        // Disassemble the section
1075        let instructions = cs
1076            .disasm_all(section_data, section.start_address)
1077            .map_err(|e| format!("Disassembly failed: {}", e))?;
1078
1079        // Build basic blocks from disassembled instructions
1080        self.build_basic_blocks_from_instructions(&instructions, entry_point)?;
1081
1082        Ok(())
1083    }
1084
1085    pub fn build_basic_blocks_from_instructions(
1086        &mut self,
1087        instructions: &capstone::Instructions,
1088        entry_point: u64,
1089    ) -> Result<(), String> {
1090        if instructions.is_empty() {
1091            return Ok(());
1092        }
1093
1094        // Find basic block boundaries
1095        let mut block_starts = std::collections::HashSet::new();
1096        block_starts.insert(entry_point);
1097
1098        // Add instruction addresses that start basic blocks
1099        for insn in instructions.iter() {
1100            let addr = insn.address();
1101
1102            // Check if this is a jump target or function start
1103            if self.is_block_boundary_instruction(&insn) {
1104                block_starts.insert(addr);
1105
1106                // Also mark the next instruction as a block start
1107                if let Some(next_insn) = instructions.iter().find(|i| i.address() > addr) {
1108                    block_starts.insert(next_insn.address());
1109                }
1110            }
1111
1112            // Mark targets of branch instructions
1113            if let Some(target) = self.get_branch_target(&insn) {
1114                block_starts.insert(target);
1115            }
1116        }
1117
1118        // Convert to sorted vector
1119        let mut starts: Vec<u64> = block_starts.into_iter().collect();
1120        starts.sort();
1121
1122        // Build basic blocks
1123        for i in 0..starts.len() {
1124            let start_addr = starts[i];
1125            let end_addr = if i + 1 < starts.len() {
1126                starts[i + 1]
1127            } else {
1128                instructions
1129                    .iter()
1130                    .last()
1131                    .map(|i| i.address() + i.bytes().len() as u64)
1132                    .unwrap_or(start_addr + 1)
1133            };
1134
1135            // Collect instructions for this block
1136            let block_instructions: Vec<Instruction> = instructions
1137                .iter()
1138                .filter(|insn| insn.address() >= start_addr && insn.address() < end_addr)
1139                .map(|insn| Instruction {
1140                    address: insn.address(),
1141                    mnemonic: insn.mnemonic().unwrap_or("").to_string(),
1142                    operands: insn.op_str().unwrap_or("").to_string(),
1143                    bytes: insn.bytes().to_vec(),
1144                    size: insn.bytes().len() as u32,
1145                    is_branch: self.is_branch_instruction(&insn),
1146                    is_call: self.is_call_instruction(&insn),
1147                    is_return: self.is_return_instruction(&insn),
1148                    branch_target: self.get_branch_target(&insn),
1149                })
1150                .collect();
1151
1152            if block_instructions.is_empty() {
1153                continue;
1154            }
1155
1156            // Determine block type with improved heuristics
1157            let block_type = if start_addr == entry_point {
1158                BasicBlockType::FunctionEntry {
1159                    function_name: format!("entry_{:x}", start_addr),
1160                    address: start_addr,
1161                }
1162            } else if self.is_function_entry_heuristic(start_addr, &block_instructions) {
1163                BasicBlockType::FunctionEntry {
1164                    function_name: format!("func_{:x}", start_addr),
1165                    address: start_addr,
1166                }
1167            } else if self.is_conditional_block(&block_instructions) {
1168                BasicBlockType::ConditionalBranch {
1169                    address: start_addr,
1170                    condition: self.extract_condition(&block_instructions),
1171                }
1172            } else if self.is_call_block(&block_instructions) {
1173                BasicBlockType::FunctionCall {
1174                    address: start_addr,
1175                    target: self.extract_call_target(&block_instructions),
1176                }
1177            } else {
1178                BasicBlockType::Sequential {
1179                    start_address: start_addr,
1180                    end_address: end_addr,
1181                }
1182            };
1183
1184            let basic_block = BasicBlock {
1185                instructions: block_instructions,
1186                block_type,
1187                size_bytes: (end_addr - start_addr) as usize,
1188                execution_count: None,
1189            };
1190
1191            // Track function entries before moving basic_block
1192            let function_name = if let BasicBlockType::FunctionEntry { function_name, .. } =
1193                &basic_block.block_type
1194            {
1195                Some(function_name.clone())
1196            } else {
1197                None
1198            };
1199
1200            let node = self.graph.add_node(basic_block);
1201            self.address_to_node.insert(start_addr, node);
1202
1203            if let Some(name) = function_name {
1204                self.function_entries.insert(name, node);
1205            }
1206        }
1207
1208        Ok(())
1209    }
1210
1211    pub fn get_block_end_address(&self, block: &BasicBlock) -> u64 {
1212        if let Some(last_insn) = block.instructions.last() {
1213            last_insn.address + last_insn.size as u64
1214        } else {
1215            match &block.block_type {
1216                BasicBlockType::FunctionEntry { address, .. } => *address,
1217                BasicBlockType::Sequential { end_address, .. } => *end_address,
1218                BasicBlockType::ConditionalBranch { address, .. } => *address,
1219                BasicBlockType::UnconditionalJump { address, .. } => *address,
1220                BasicBlockType::FunctionCall { address, .. } => *address,
1221                BasicBlockType::Return { address } => *address,
1222                BasicBlockType::ExceptionHandler { address, .. } => *address,
1223            }
1224        }
1225    }
1226
1227    pub fn build_control_flow_edges(&mut self) -> Result<(), String> {
1228        let nodes: Vec<_> = self.graph.node_indices().collect();
1229
1230        for node_idx in nodes {
1231            if let Some(block) = self.graph.node_weight(node_idx).cloned() {
1232                let end_address = self.get_block_end_address(&block);
1233                match &block.block_type {
1234                    BasicBlockType::Sequential { .. } => {
1235                        if let Some(next_node) = self.find_block_at_address(end_address) {
1236                            self.graph
1237                                .add_edge(node_idx, next_node, EdgeType::Sequential);
1238                        }
1239                    }
1240                    BasicBlockType::ConditionalBranch { .. } => {
1241                        // Connect to both fall-through and branch target
1242                        if let Some(next_node) = self.find_block_at_address(end_address) {
1243                            self.graph
1244                                .add_edge(node_idx, next_node, EdgeType::ConditionalFalse);
1245                        }
1246                        // TODO: Extract and connect to branch target
1247                    }
1248                    BasicBlockType::FunctionCall { .. } => {
1249                        if let Some(return_node) = self.find_block_at_address(end_address) {
1250                            self.graph.add_edge(node_idx, return_node, EdgeType::Call);
1251                        }
1252                    }
1253                    _ => {
1254                        if let Some(next_node) = self.find_block_at_address(end_address) {
1255                            self.graph
1256                                .add_edge(node_idx, next_node, EdgeType::Sequential);
1257                        }
1258                    }
1259                }
1260            }
1261        }
1262
1263        Ok(())
1264    }
1265
1266    pub fn find_block_at_address(&self, address: u64) -> Option<NodeIndex> {
1267        self.address_to_node.get(&address).copied()
1268    }
1269
1270    // Helper methods for instruction analysis
1271    pub fn is_function_entry_heuristic(&self, addr: u64, instructions: &[Instruction]) -> bool {
1272        if instructions.is_empty() {
1273            return false;
1274        }
1275
1276        let first_insn = &instructions[0];
1277
1278        // Common function prologue patterns
1279        match first_insn.mnemonic.as_str() {
1280            // x86/x64 function prologues
1281            "push" if first_insn.operands.contains("bp") || first_insn.operands.contains("rbp") => {
1282                true
1283            }
1284            "mov" if first_insn.operands.contains("bp") || first_insn.operands.contains("rbp") => {
1285                true
1286            }
1287            "sub" if first_insn.operands.contains("sp") || first_insn.operands.contains("rsp") => {
1288                true
1289            }
1290
1291            // ARM function prologues
1292            "push" if first_insn.operands.contains("lr") => true,
1293            "stmdb" if first_insn.operands.contains("sp!") => true,
1294            "str" if first_insn.operands.contains("lr") => true,
1295
1296            // RISC-V function prologues
1297            "addi" if first_insn.operands.contains("sp") => true,
1298            "sd" if first_insn.operands.contains("ra") => true,
1299
1300            _ => {
1301                // Check if address is aligned (common for function entries)
1302                addr % 4 == 0 || addr % 8 == 0
1303            }
1304        }
1305    }
1306
1307    pub fn is_block_boundary_instruction(&self, insn: &capstone::Insn) -> bool {
1308        if let Some(mnemonic) = insn.mnemonic() {
1309            matches!(
1310                mnemonic,
1311                // x86/x64
1312                "ret" | "retq" | "retn" |           // Returns
1313                "jmp" | "jmpq" |                    // Unconditional jumps
1314                "je" | "jne" | "jz" | "jnz" |       // Conditional jumps
1315                "jl" | "jle" | "jg" | "jge" |
1316                "ja" | "jae" | "jb" | "jbe" |
1317                "jo" | "jno" | "js" | "jns" |
1318                "call" | "callq" |                  // Function calls
1319                
1320                // ARM
1321                "bx" | "blx" | "b" | "bl" |
1322                "bmi" | "bpl" | "bvs" | "bvc" | "bhi" | "bls" |
1323                
1324                // RISC-V/ARM/MIPS shared conditional branches
1325                "beq" | "bne" | "blt" | "ble" | "bgt" | "bge" |
1326                
1327                // RISC-V/MIPS specific
1328                "bltu" | "bgeu" | "jal" | "jalr" |
1329                "j" | "jr" | "bgtz" | "blez" | "bltz" | "bgez"
1330            )
1331        } else {
1332            false
1333        }
1334    }
1335
1336    pub fn is_branch_instruction(&self, insn: &capstone::Insn) -> bool {
1337        if let Some(mnemonic) = insn.mnemonic() {
1338            matches!(
1339                mnemonic,
1340                "jmp"
1341                    | "jmpq"
1342                    | "je"
1343                    | "jne"
1344                    | "jz"
1345                    | "jnz"
1346                    | "jl"
1347                    | "jle"
1348                    | "jg"
1349                    | "jge"
1350                    | "ja"
1351                    | "jae"
1352                    | "jb"
1353                    | "jbe"
1354                    | "jo"
1355                    | "jno"
1356                    | "js"
1357                    | "jns"
1358                    | "b"
1359                    | "bl"
1360                    | "bx"
1361                    | "blx"
1362                    | "beq"
1363                    | "bne"
1364                    | "blt"
1365                    | "ble"
1366                    | "bgt"
1367                    | "bge"
1368                    | "bmi"
1369                    | "bpl"
1370                    | "bvs"
1371                    | "bvc"
1372                    | "bhi"
1373                    | "bls"
1374                    | "jal"
1375                    | "jalr"
1376                    | "j"
1377                    | "jr"
1378                    | "bgtz"
1379                    | "blez"
1380                    | "bltz"
1381                    | "bgez"
1382                    | "bltu"
1383                    | "bgeu"
1384            )
1385        } else {
1386            false
1387        }
1388    }
1389
1390    pub fn is_call_instruction(&self, insn: &capstone::Insn) -> bool {
1391        if let Some(mnemonic) = insn.mnemonic() {
1392            matches!(mnemonic, "call" | "callq" | "bl" | "blx" | "jal" | "jalr")
1393        } else {
1394            false
1395        }
1396    }
1397
1398    pub fn is_return_instruction(&self, insn: &capstone::Insn) -> bool {
1399        if let Some(mnemonic) = insn.mnemonic() {
1400            matches!(mnemonic, "ret" | "retq" | "retn" | "bx" | "jr")
1401        } else {
1402            false
1403        }
1404    }
1405
1406    pub fn get_branch_target(&self, insn: &capstone::Insn) -> Option<u64> {
1407        if let Some(op_str) = insn.op_str() {
1408            // Try to parse hex addresses (e.g., "0x401000")
1409            if op_str.starts_with("0x") {
1410                if let Ok(addr) = u64::from_str_radix(&op_str[2..], 16) {
1411                    return Some(addr);
1412                }
1413            }
1414            // Try to parse decimal addresses
1415            if let Ok(addr) = op_str.parse::<u64>() {
1416                return Some(addr);
1417            }
1418        }
1419        None
1420    }
1421
1422    pub fn is_conditional_block(&self, instructions: &[Instruction]) -> bool {
1423        instructions.iter().any(|insn| {
1424            matches!(
1425                insn.mnemonic.as_str(),
1426                // x86/x64 conditional jumps
1427                "je" | "jne" | "jz" | "jnz" |
1428                "jl" | "jle" | "jg" | "jge" |
1429                "ja" | "jae" | "jb" | "jbe" |
1430                "jo" | "jno" | "js" | "jns" |
1431                
1432                // ARM specific
1433                "bmi" | "bpl" | "bvs" | "bvc" | "bhi" | "bls" |
1434                
1435                // Shared conditional branches (ARM/RISC-V/MIPS)
1436                "beq" | "bne" | "blt" | "ble" | "bgt" | "bge" |
1437                
1438                // RISC-V/MIPS specific
1439                "bltu" | "bgeu" | "bgtz" | "blez" | "bltz" | "bgez"
1440            )
1441        })
1442    }
1443
1444   pub fn is_call_block(&self, instructions: &[Instruction]) -> bool {
1445        instructions.iter().any(|insn| {
1446            matches!(
1447                insn.mnemonic.as_str(),
1448                "call" | "callq" |      // x86/x64
1449                "bl" | "blx" |          // ARM
1450                "jal" | "jalr" // RISC-V
1451            )
1452        })
1453    }
1454
1455    pub fn extract_condition(&self, instructions: &[Instruction]) -> String {
1456        for insn in instructions {
1457            if matches!(
1458                insn.mnemonic.as_str(),
1459                "je" | "jne"
1460                    | "jz"
1461                    | "jnz"
1462                    | "jl"
1463                    | "jle"
1464                    | "jg"
1465                    | "jge"
1466                    | "ja"
1467                    | "jae"
1468                    | "jb"
1469                    | "jbe"
1470                    | "jo"
1471                    | "jno"
1472                    | "js"
1473                    | "jns"
1474                    | "beq"
1475                    | "bne"
1476                    | "blt"
1477                    | "ble"
1478                    | "bgt"
1479                    | "bge"
1480                    | "bmi"
1481                    | "bpl"
1482                    | "bvs"
1483                    | "bvc"
1484                    | "bhi"
1485                    | "bls"
1486                    | "bgtz"
1487                    | "blez"
1488                    | "bltz"
1489                    | "bgez"
1490                    | "bltu"
1491                    | "bgeu"
1492            ) {
1493                return format!("{} {}", insn.mnemonic, insn.operands);
1494            }
1495        }
1496        "unknown".to_string()
1497    }
1498
1499    pub fn extract_call_target(&self, instructions: &[Instruction]) -> String {
1500        for insn in instructions {
1501            if matches!(
1502                insn.mnemonic.as_str(),
1503                "call" | "callq" | "bl" | "blx" | "jal" | "jalr"
1504            ) {
1505                return insn.operands.clone();
1506            }
1507        }
1508        "unknown".to_string()
1509    }
1510
1511    pub fn create_placeholder_instructions(
1512        &self,
1513        start_addr: u64,
1514        function_name: &str,
1515    ) -> Vec<Instruction> {
1516        // Create realistic instruction sequences based on function names
1517        let mut instructions = Vec::new();
1518        let mut addr = start_addr;
1519
1520        // Function prologue
1521        instructions.push(Instruction {
1522            address: addr,
1523            mnemonic: "push".to_string(),
1524            operands: "rbp".to_string(),
1525            bytes: vec![0x55],
1526            size: 1,
1527            is_branch: false,
1528            is_call: false,
1529            is_return: false,
1530            branch_target: None,
1531        });
1532        addr += 1;
1533
1534        instructions.push(Instruction {
1535            address: addr,
1536            mnemonic: "mov".to_string(),
1537            operands: "rbp, rsp".to_string(),
1538            bytes: vec![0x48, 0x89, 0xE5],
1539            size: 3,
1540            is_branch: false,
1541            is_call: false,
1542            is_return: false,
1543            branch_target: None,
1544        });
1545        addr += 3;
1546
1547        // Add function-specific instructions based on name patterns
1548        if function_name.contains("malloc") || function_name.contains("alloc") {
1549            instructions.push(Instruction {
1550                address: addr,
1551                mnemonic: "test".to_string(),
1552                operands: "rdi, rdi".to_string(),
1553                bytes: vec![0x48, 0x85, 0xFF],
1554                size: 3,
1555                is_branch: false,
1556                is_call: false,
1557                is_return: false,
1558                branch_target: None,
1559            });
1560            addr += 3;
1561
1562            instructions.push(Instruction {
1563                address: addr,
1564                mnemonic: "jz".to_string(),
1565                operands: format!("0x{:x}", addr + 10),
1566                bytes: vec![0x74, 0x08],
1567                size: 2,
1568                is_branch: true,
1569                is_call: false,
1570                is_return: false,
1571                branch_target: Some(addr + 10),
1572            });
1573        }
1574
1575        // Function epilogue
1576        addr += 10;
1577        instructions.push(Instruction {
1578            address: addr,
1579            mnemonic: "pop".to_string(),
1580            operands: "rbp".to_string(),
1581            bytes: vec![0x5D],
1582            size: 1,
1583            is_branch: false,
1584            is_call: false,
1585            is_return: false,
1586            branch_target: None,
1587        });
1588        addr += 1;
1589
1590        instructions.push(Instruction {
1591            address: addr,
1592            mnemonic: "ret".to_string(),
1593            operands: "".to_string(),
1594            bytes: vec![0xC3],
1595            size: 1,
1596            is_branch: false,
1597            is_call: false,
1598            is_return: true,
1599            branch_target: None,
1600        });
1601
1602        instructions
1603    }
1604
1605    pub fn detect_loops(&mut self) {
1606        // Implement natural loop detection using dominator analysis
1607        // This is a simplified version - would need proper back-edge detection
1608
1609        for node_idx in self.graph.node_indices() {
1610            if let Some(block) = self.graph.node_weight(node_idx) {
1611                // Look for back edges (edges to blocks that dominate the current block)
1612                for edge in self.graph.edges_directed(node_idx, Direction::Outgoing) {
1613                    let target = edge.target();
1614
1615                    // Simple heuristic: if target address < current address, it might be a back edge
1616                    if let (Some(current_addr), Some(target_addr)) = (
1617                        self.get_block_address(block),
1618                        self.get_block_address_by_node(target),
1619                    ) {
1620                        if target_addr < current_addr {
1621                            // Potential back edge - create loop info
1622                            let loop_info = LoopInfo {
1623                                header: target_addr,
1624                                back_edges: vec![(current_addr, target_addr)],
1625                                loop_blocks: HashSet::from([current_addr, target_addr]),
1626                                nesting_level: 1,
1627                                loop_type: LoopType::Natural,
1628                            };
1629
1630                            self.loops.push(loop_info);
1631                        }
1632                    }
1633                }
1634            }
1635        }
1636    }
1637
1638    pub fn build_dominators(&mut self) {
1639        if let Some(entry_node) = self.graph.node_indices().next() {
1640            self.dominators = Some(dominators::simple_fast(&self.graph, entry_node));
1641        }
1642    }
1643
1644    pub fn get_block_address(&self, block: &BasicBlock) -> Option<u64> {
1645        match &block.block_type {
1646            BasicBlockType::FunctionEntry { address, .. } => Some(*address),
1647            BasicBlockType::Sequential { start_address, .. } => Some(*start_address),
1648            BasicBlockType::ConditionalBranch { address, .. } => Some(*address),
1649            BasicBlockType::UnconditionalJump { address, .. } => Some(*address),
1650            BasicBlockType::FunctionCall { address, .. } => Some(*address),
1651            BasicBlockType::Return { address } => Some(*address),
1652            BasicBlockType::ExceptionHandler { address, .. } => Some(*address),
1653        }
1654    }
1655
1656   pub fn get_block_address_by_node(&self, node_idx: NodeIndex) -> Option<u64> {
1657        self.graph
1658            .node_weight(node_idx)
1659            .and_then(|block| self.get_block_address(block))
1660    }
1661
1662    /// Analyze call graph relationships
1663    pub fn analyze_call_graph(&self, analysis: &BinaryAnalysis) -> CallGraphAnalysis {
1664        let mut call_sites = Vec::new();
1665        let mut function_summaries = HashMap::new();
1666        let mut recursive_functions = Vec::new();
1667        let mut dead_functions = Vec::new();
1668
1669        // Build function summaries
1670        for (func_name, &node_idx) in &self.function_entries {
1671            let mut calls_made = Vec::new();
1672            let mut total_size = 0;
1673
1674            // Traverse all blocks reachable from this function entry
1675            let reachable = dijkstra(&self.graph, node_idx, None, |_| 1);
1676            let basic_blocks = reachable.len();
1677
1678            // Analyze outgoing call edges
1679            for edge in self.graph.edges_directed(node_idx, Direction::Outgoing) {
1680                if matches!(edge.weight(), EdgeType::Call) {
1681                    if let Some(target_block) = self.graph.node_weight(edge.target()) {
1682                        if let BasicBlockType::FunctionCall { target, .. } =
1683                            &target_block.block_type
1684                        {
1685                            calls_made.push(target.clone());
1686
1687                            call_sites.push(CallSite {
1688                                caller_address: self.get_block_address(target_block).unwrap_or(0),
1689                                callee: target.clone(),
1690                                call_type: CallType::Direct,
1691                                arguments: vec![], // Would analyze calling convention
1692                            });
1693                        }
1694                    }
1695                }
1696            }
1697
1698            // Calculate cyclomatic complexity (simplified)
1699            let mut complexity = 1; // Base complexity
1700            for reachable_node in reachable.keys() {
1701                if let Some(block) = self.graph.node_weight(*reachable_node) {
1702                    total_size += block.size_bytes;
1703
1704                    match &block.block_type {
1705                        BasicBlockType::ConditionalBranch { .. } => complexity += 1,
1706                        _ => {}
1707                    }
1708                }
1709            }
1710
1711            // Check for recursion
1712            if calls_made.contains(func_name) {
1713                recursive_functions.push(func_name.clone());
1714            }
1715
1716            function_summaries.insert(
1717                func_name.clone(),
1718                FunctionSummary {
1719                    entry_address: self.get_block_address_by_node(node_idx).unwrap_or(0),
1720                    size_bytes: total_size,
1721                    basic_blocks,
1722                    cyclomatic_complexity: complexity,
1723                    calls_made,
1724                    calls_received: vec![], // Would be filled by reverse analysis
1725                    has_loops: self.loops.iter().any(|l| {
1726                        l.loop_blocks
1727                            .contains(&self.get_block_address_by_node(node_idx).unwrap_or(0))
1728                    }),
1729                    max_call_depth: 1, // Would require recursive analysis
1730                },
1731            );
1732        }
1733
1734        // Find dead functions (not called by anyone)
1735        for symbol in &analysis.detected_symbols {
1736            if !call_sites.iter().any(|cs| cs.callee == *symbol)
1737                && symbol != "_start"
1738                && symbol != "main"
1739            {
1740                dead_functions.push(symbol.clone());
1741            }
1742        }
1743
1744        CallGraphAnalysis {
1745            call_sites,
1746            function_summaries,
1747            recursive_functions,
1748            dead_functions,
1749        }
1750    }
1751
1752    /// Get basic blocks that are loop headers
1753    #[allow(dead_code)]
1754    pub fn get_loop_headers(&self) -> Vec<u64> {
1755        self.loops.iter().map(|l| l.header).collect()
1756    }
1757
1758    /// Check if an address is in a loop
1759    #[allow(dead_code)]
1760    pub fn is_in_loop(&self, address: u64) -> bool {
1761        self.loops.iter().any(|l| l.loop_blocks.contains(&address))
1762    }
1763
1764    /// Get dominator information for a block
1765    #[allow(dead_code)]
1766    pub fn get_dominators(&self, node: NodeIndex) -> Option<Vec<NodeIndex>> {
1767        self.dominators.as_ref().map(|dom| {
1768            let mut dominators = Vec::new();
1769            let mut current = Some(node);
1770
1771            while let Some(n) = current {
1772                dominators.push(n);
1773                current = dom.immediate_dominator(n);
1774            }
1775
1776            dominators
1777        })
1778    }
1779}
1780
1781/// Separate exploitability analysis that uses the CFG
1782#[derive(Debug, Clone, Serialize, Deserialize)]
1783pub struct ExploitabilityAnalysis {
1784    pub is_reachable: bool,
1785    pub path: Option<Vec<String>>,
1786    pub sink: String,
1787    pub confidence: f32,
1788    pub attack_vectors: Vec<AttackVector>,
1789}
1790
1791#[derive(Debug, Clone, Serialize, Deserialize)]
1792pub struct AttackVector {
1793    pub vector_type: String,
1794    pub entry_points: Vec<u64>,
1795    pub vulnerable_path: Vec<u64>,
1796    pub prerequisites: Vec<String>,
1797    pub impact: String,
1798}
1799
1800impl ExploitabilityAnalysis {
1801    pub fn analyze(cfg: &ControlFlowGraph, sources: &[String], sink: &str) -> Self {
1802        // Use the CFG to perform more sophisticated reachability analysis
1803        let mut is_reachable = false;
1804        let mut path = None;
1805        let mut attack_vectors = Vec::new();
1806
1807        // Find sink node
1808        if let Some(&sink_node) = cfg.function_entries.get(sink) {
1809            // Check reachability from each source
1810            for source in sources {
1811                if let Some(&source_node) = cfg.function_entries.get(source) {
1812                    let paths = dijkstra(&cfg.graph, source_node, Some(sink_node), |_| 1);
1813
1814                    if paths.contains_key(&sink_node) {
1815                        is_reachable = true;
1816
1817                        // Build path description
1818                        let mut path_nodes = Vec::new();
1819                        let current = sink_node;
1820                        while let Some(block) = cfg.graph.node_weight(current) {
1821                            if let Some(addr) = cfg.get_block_address(block) {
1822                                path_nodes.push(format!("0x{:x}", addr));
1823                                // This is simplified - would need proper path reconstruction
1824                                break;
1825                            }
1826                        }
1827
1828                        path = Some(vec![source.clone(), sink.to_string()]);
1829
1830                        // Create attack vector
1831                        attack_vectors.push(AttackVector {
1832                            vector_type: "Control Flow".to_string(),
1833                            entry_points: vec![
1834                                cfg.get_block_address_by_node(source_node).unwrap_or(0),
1835                            ],
1836                            vulnerable_path: vec![
1837                                cfg.get_block_address_by_node(sink_node).unwrap_or(0),
1838                            ],
1839                            prerequisites: vec!["User input control".to_string()],
1840                            impact: "Code execution".to_string(),
1841                        });
1842
1843                        break;
1844                    }
1845                }
1846            }
1847        }
1848
1849        ExploitabilityAnalysis {
1850            is_reachable,
1851            path,
1852            sink: sink.to_string(),
1853            confidence: if is_reachable { 0.8 } else { 0.0 },
1854            attack_vectors,
1855        }
1856    }
1857}