threatflux_binary_analysis/
lib.rs

1#![allow(clippy::uninlined_format_args)]
2//! # ThreatFlux Binary Analysis Library
3//!
4//! A comprehensive binary analysis framework for security research, reverse engineering,
5//! and threat detection. Supports multiple binary formats with advanced analysis capabilities.
6//!
7//! ## Features
8//!
9//! - **Multi-format Support**: ELF, PE, Mach-O, Java, WASM
10//! - **Disassembly**: Multi-architecture support via Capstone and iced-x86
11//! - **Control Flow Analysis**: CFG construction, complexity metrics, anomaly detection
12//! - **Symbol Resolution**: Debug info parsing, demangling, cross-references
13//! - **Entropy Analysis**: Statistical analysis, packing detection
14//! - **Security Analysis**: Vulnerability patterns, malware indicators
15//!
16//! ## Quick Start
17//!
18//! ```rust
19//! use threatflux_binary_analysis::BinaryAnalyzer;
20//!
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // Example with minimal data - analysis may fail for incomplete binaries
23//! let data = vec![0x7f, 0x45, 0x4c, 0x46]; // ELF magic
24//!
25//! let analyzer = BinaryAnalyzer::new();
26//! match analyzer.analyze(&data) {
27//!     Ok(analysis) => {
28//!         println!("Format: {:?}", analysis.format);
29//!         println!("Architecture: {:?}", analysis.architecture);
30//!     }
31//!     Err(e) => {
32//!         println!("Analysis failed: {}", e);
33//!     }
34//! }
35//! # Ok(())
36//! # }
37//! ```
38
39pub mod analysis;
40pub mod error;
41pub mod formats;
42pub mod types;
43
44#[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
45pub mod disasm;
46
47pub mod utils;
48
49// Re-export main types
50#[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
51pub use disasm::DisassemblyEngine;
52pub use error::{BinaryError, Result};
53pub use types::{
54    AnalysisResult, Architecture, BasicBlock, BinaryFormat, BinaryFormatParser, BinaryFormatTrait,
55    BinaryMetadata, CallGraph, CallGraphConfig, CallGraphEdge, CallGraphNode, CallGraphStatistics,
56    ComplexityMetrics, ControlFlowGraph, EnhancedControlFlowAnalysis, EntropyAnalysis, Export,
57    Function, HalsteadMetrics, Import, Instruction, Loop, LoopType, NodeType, Section,
58    SecurityIndicators, Symbol,
59};
60
61/// Main entry point for binary analysis
62pub struct BinaryAnalyzer {
63    config: AnalysisConfig,
64}
65
66/// Configuration for binary analysis
67#[derive(Debug, Clone)]
68pub struct AnalysisConfig {
69    /// Enable disassembly analysis
70    pub enable_disassembly: bool,
71    /// Preferred disassembly engine
72    #[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
73    pub disassembly_engine: DisassemblyEngine,
74    /// Enable control flow analysis
75    pub enable_control_flow: bool,
76    /// Enable call graph analysis
77    pub enable_call_graph: bool,
78    /// Enable cognitive complexity calculation
79    pub enable_cognitive_complexity: bool,
80    /// Enable advanced loop analysis
81    pub enable_advanced_loops: bool,
82    /// Enable entropy analysis
83    pub enable_entropy: bool,
84    /// Enable symbol resolution
85    pub enable_symbols: bool,
86    /// Maximum bytes to analyze for large files
87    pub max_analysis_size: usize,
88    /// Architecture hint (None for auto-detection)
89    pub architecture_hint: Option<Architecture>,
90    /// Call graph configuration
91    pub call_graph_config: Option<CallGraphConfig>,
92}
93
94impl Default for AnalysisConfig {
95    fn default() -> Self {
96        Self {
97            enable_disassembly: true,
98            #[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
99            disassembly_engine: DisassemblyEngine::Auto,
100            enable_control_flow: true,
101            enable_call_graph: false,
102            enable_cognitive_complexity: true,
103            enable_advanced_loops: true,
104            enable_entropy: true,
105            enable_symbols: true,
106            max_analysis_size: 100 * 1024 * 1024, // 100MB
107            architecture_hint: None,
108            call_graph_config: None,
109        }
110    }
111}
112
113impl BinaryAnalyzer {
114    /// Create a new analyzer with default configuration
115    pub fn new() -> Self {
116        Self::with_config(AnalysisConfig::default())
117    }
118
119    /// Create a new analyzer with custom configuration
120    pub fn with_config(config: AnalysisConfig) -> Self {
121        Self { config }
122    }
123
124    /// Get a reference to the analysis configuration
125    pub fn config(&self) -> &AnalysisConfig {
126        &self.config
127    }
128
129    /// Analyze a binary file from raw data
130    pub fn analyze(&self, data: &[u8]) -> Result<AnalysisResult> {
131        let binary_file = BinaryFile::parse(data)?;
132        self.analyze_binary(&binary_file)
133    }
134
135    /// Analyze a parsed binary file
136    pub fn analyze_binary(&self, binary: &BinaryFile) -> Result<AnalysisResult> {
137        #[allow(unused_mut)] // mut needed when optional analysis features are enabled
138        let mut result = AnalysisResult {
139            format: binary.format(),
140            architecture: binary.architecture(),
141            entry_point: binary.entry_point(),
142            sections: binary.sections().to_vec(),
143            symbols: binary.symbols().to_vec(),
144            imports: binary.imports().to_vec(),
145            exports: binary.exports().to_vec(),
146            metadata: binary.metadata().clone(),
147            ..Default::default()
148        };
149
150        // Perform optional analyses based on configuration
151        if self.config.enable_disassembly {
152            #[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
153            {
154                result.disassembly = Some(self.perform_disassembly(binary)?);
155            }
156        }
157
158        if self.config.enable_control_flow {
159            #[cfg(feature = "control-flow")]
160            {
161                result.control_flow = Some(self.perform_control_flow_analysis(binary)?);
162            }
163        }
164
165        if self.config.enable_call_graph {
166            #[cfg(feature = "control-flow")]
167            {
168                result.call_graph = Some(self.perform_call_graph_analysis(binary)?);
169            }
170        }
171
172        if self.config.enable_cognitive_complexity || self.config.enable_advanced_loops {
173            #[cfg(feature = "control-flow")]
174            {
175                result.enhanced_control_flow =
176                    Some(self.perform_enhanced_control_flow_analysis(binary)?);
177            }
178        }
179
180        if self.config.enable_entropy {
181            #[cfg(feature = "entropy-analysis")]
182            {
183                result.entropy = Some(self.perform_entropy_analysis(binary)?);
184            }
185        }
186
187        #[cfg(feature = "symbol-resolution")]
188        {
189            if self.config.enable_symbols {
190                analysis::symbols::demangle_symbols(&mut result.symbols);
191            }
192        }
193
194        Ok(result)
195    }
196
197    #[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
198    fn perform_disassembly(&self, binary: &BinaryFile) -> Result<Vec<Instruction>> {
199        disasm::disassemble_binary(binary, &self.config)
200    }
201
202    #[cfg(feature = "control-flow")]
203    fn perform_control_flow_analysis(&self, binary: &BinaryFile) -> Result<Vec<ControlFlowGraph>> {
204        analysis::control_flow::analyze_binary(binary)
205    }
206
207    #[cfg(feature = "control-flow")]
208    fn perform_call_graph_analysis(&self, binary: &BinaryFile) -> Result<CallGraph> {
209        let config = self.config.call_graph_config.clone().unwrap_or_default();
210        analysis::call_graph::analyze_binary_with_config(binary, config)
211    }
212
213    #[cfg(feature = "control-flow")]
214    fn perform_enhanced_control_flow_analysis(
215        &self,
216        binary: &BinaryFile,
217    ) -> Result<EnhancedControlFlowAnalysis> {
218        // Build enhanced control flow graphs
219        let control_flow_config = analysis::control_flow::AnalysisConfig {
220            max_instructions: 10000,
221            max_depth: 100,
222            detect_loops: true,
223            calculate_metrics: true,
224            enable_call_graph: false,
225            enable_cognitive_complexity: self.config.enable_cognitive_complexity,
226            enable_advanced_loops: self.config.enable_advanced_loops,
227            call_graph_config: None,
228        };
229
230        let analyzer = analysis::control_flow::ControlFlowAnalyzer::with_config(
231            binary.architecture(),
232            control_flow_config,
233        );
234        let control_flow_graphs = analyzer.analyze_binary(binary)?;
235
236        // Compute summary statistics
237        let mut total_cognitive_complexity = 0;
238        let mut max_cognitive_complexity = 0;
239        let mut most_complex_function = None;
240        let mut functions_analyzed = 0;
241
242        let mut total_loops = 0;
243        let mut natural_loops = 0;
244        let mut irreducible_loops = 0;
245        let mut nested_loops = 0;
246        let mut max_nesting_depth = 0;
247        let mut loops_by_type = std::collections::HashMap::new();
248
249        for cfg in &control_flow_graphs {
250            functions_analyzed += 1;
251
252            // Cognitive complexity stats
253            let cognitive = cfg.complexity.cognitive_complexity;
254            total_cognitive_complexity += cognitive;
255            if cognitive > max_cognitive_complexity {
256                max_cognitive_complexity = cognitive;
257                most_complex_function = Some(cfg.function.name.clone());
258            }
259
260            // Loop stats
261            total_loops += cfg.loops.len();
262            for loop_info in &cfg.loops {
263                match loop_info.loop_type {
264                    LoopType::Natural => natural_loops += 1,
265                    LoopType::Irreducible => irreducible_loops += 1,
266                    _ => {}
267                }
268
269                if loop_info.nesting_level > 1 {
270                    nested_loops += 1;
271                }
272
273                if loop_info.nesting_level > max_nesting_depth {
274                    max_nesting_depth = loop_info.nesting_level;
275                }
276
277                *loops_by_type
278                    .entry(loop_info.loop_type.clone())
279                    .or_insert(0) += 1;
280            }
281        }
282
283        let average_cognitive_complexity = if functions_analyzed > 0 {
284            total_cognitive_complexity as f64 / functions_analyzed as f64
285        } else {
286            0.0
287        };
288
289        let cognitive_complexity_summary = types::CognitiveComplexityStats {
290            total_cognitive_complexity,
291            average_cognitive_complexity,
292            max_cognitive_complexity,
293            most_complex_function,
294            functions_analyzed,
295        };
296
297        let loop_analysis_summary = types::LoopAnalysisStats {
298            total_loops,
299            natural_loops,
300            irreducible_loops,
301            nested_loops,
302            max_nesting_depth,
303            loops_by_type,
304        };
305
306        Ok(EnhancedControlFlowAnalysis {
307            control_flow_graphs,
308            cognitive_complexity_summary,
309            loop_analysis_summary,
310        })
311    }
312
313    #[cfg(feature = "entropy-analysis")]
314    fn perform_entropy_analysis(&self, binary: &BinaryFile) -> Result<EntropyAnalysis> {
315        analysis::entropy::analyze_binary(binary)
316    }
317}
318
319impl Default for BinaryAnalyzer {
320    fn default() -> Self {
321        Self::new()
322    }
323}
324
325/// Parsed binary file representation
326pub struct BinaryFile {
327    data: Vec<u8>,
328    parsed: Box<dyn BinaryFormatTrait>,
329}
330
331impl BinaryFile {
332    /// Parse binary data and detect format
333    pub fn parse(data: &[u8]) -> Result<Self> {
334        let format = formats::detect_format(data)?;
335        let parsed = formats::parse_binary(data, format)?;
336
337        Ok(Self {
338            data: data.to_vec(),
339            parsed,
340        })
341    }
342
343    /// Get the binary format type
344    pub fn format(&self) -> BinaryFormat {
345        self.parsed.format_type()
346    }
347
348    /// Get the target architecture
349    pub fn architecture(&self) -> Architecture {
350        self.parsed.architecture()
351    }
352
353    /// Get the entry point address
354    pub fn entry_point(&self) -> Option<u64> {
355        self.parsed.entry_point()
356    }
357
358    /// Get binary sections
359    pub fn sections(&self) -> &[Section] {
360        self.parsed.sections()
361    }
362
363    /// Get symbol table
364    pub fn symbols(&self) -> &[Symbol] {
365        self.parsed.symbols()
366    }
367
368    /// Get imports
369    pub fn imports(&self) -> &[Import] {
370        self.parsed.imports()
371    }
372
373    /// Get exports
374    pub fn exports(&self) -> &[Export] {
375        self.parsed.exports()
376    }
377
378    /// Get binary metadata
379    pub fn metadata(&self) -> &BinaryMetadata {
380        self.parsed.metadata()
381    }
382
383    /// Get raw binary data
384    pub fn data(&self) -> &[u8] {
385        &self.data
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    #[test]
394    fn test_analyzer_creation() {
395        let analyzer = BinaryAnalyzer::new();
396        assert!(analyzer.config.enable_disassembly);
397        assert!(analyzer.config.enable_control_flow);
398        assert!(!analyzer.config.enable_call_graph);
399        assert!(analyzer.config.enable_cognitive_complexity);
400        assert!(analyzer.config.enable_advanced_loops);
401        assert!(analyzer.config.enable_entropy);
402        assert!(analyzer.config.enable_symbols);
403    }
404
405    #[test]
406    fn test_custom_config() {
407        let config = AnalysisConfig {
408            enable_disassembly: false,
409            #[cfg(any(feature = "disasm-capstone", feature = "disasm-iced"))]
410            disassembly_engine: DisassemblyEngine::Auto,
411            enable_control_flow: true,
412            enable_call_graph: true,
413            enable_cognitive_complexity: false,
414            enable_advanced_loops: true,
415            enable_entropy: false,
416            enable_symbols: true,
417            max_analysis_size: 1024,
418            architecture_hint: Some(Architecture::X86_64),
419            call_graph_config: Some(CallGraphConfig::default()),
420        };
421
422        let analyzer = BinaryAnalyzer::with_config(config);
423        assert!(!analyzer.config.enable_disassembly);
424        assert!(analyzer.config.enable_control_flow);
425        assert!(analyzer.config.enable_call_graph);
426        assert!(!analyzer.config.enable_cognitive_complexity);
427        assert!(analyzer.config.enable_advanced_loops);
428        assert!(!analyzer.config.enable_entropy);
429        assert!(analyzer.config.enable_symbols);
430        assert_eq!(analyzer.config.max_analysis_size, 1024);
431        assert!(analyzer.config.call_graph_config.is_some());
432    }
433}