threatflux_binary_analysis/
types.rs

1//! Core types and data structures for binary analysis
2
3use std::collections::HashMap;
4
5#[cfg(feature = "serde-support")]
6use serde::{Deserialize, Serialize};
7
8// Type aliases to reduce complexity
9pub type BinaryResult<T> = crate::Result<T>;
10pub type ParsedBinary = Box<dyn BinaryFormatTrait>;
11pub type ParseResult = BinaryResult<ParsedBinary>;
12pub type ImportExportResult = BinaryResult<(Vec<Import>, Vec<Export>)>;
13pub type ByteSliceResult<'a> = BinaryResult<&'a [u8]>;
14pub type PatternMatchMap =
15    HashMap<crate::utils::patterns::PatternCategory, Vec<crate::utils::patterns::PatternMatch>>;
16pub type HexPatternResult = BinaryResult<Vec<Option<u8>>>;
17pub type HexPattern = Vec<Option<u8>>;
18
19/// Supported binary formats
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
21#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
22pub enum BinaryFormat {
23    /// Executable and Linkable Format (Linux/Unix)
24    Elf,
25    /// Portable Executable (Windows)
26    Pe,
27    /// Mach Object (macOS/iOS)
28    MachO,
29    /// Java Class file
30    Java,
31    /// WebAssembly
32    Wasm,
33    /// Raw binary data
34    Raw,
35    /// Unknown format
36    #[default]
37    Unknown,
38}
39
40impl std::fmt::Display for BinaryFormat {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            BinaryFormat::Elf => write!(f, "ELF"),
44            BinaryFormat::Pe => write!(f, "PE"),
45            BinaryFormat::MachO => write!(f, "Mach-O"),
46            BinaryFormat::Java => write!(f, "Java"),
47            BinaryFormat::Wasm => write!(f, "WebAssembly"),
48            BinaryFormat::Raw => write!(f, "Raw"),
49            BinaryFormat::Unknown => write!(f, "Unknown"),
50        }
51    }
52}
53
54/// Supported architectures
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
56#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
57pub enum Architecture {
58    /// x86 32-bit
59    X86,
60    /// x86 64-bit
61    X86_64,
62    /// ARM 32-bit
63    Arm,
64    /// ARM 64-bit
65    Arm64,
66    /// MIPS
67    Mips,
68    /// MIPS 64-bit
69    Mips64,
70    /// PowerPC
71    PowerPC,
72    /// PowerPC 64-bit
73    PowerPC64,
74    /// RISC-V
75    RiscV,
76    /// RISC-V 64-bit
77    RiscV64,
78    /// WebAssembly
79    Wasm,
80    /// Java Virtual Machine
81    Jvm,
82    /// Unknown architecture
83    #[default]
84    Unknown,
85}
86
87impl std::fmt::Display for Architecture {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Architecture::X86 => write!(f, "x86"),
91            Architecture::X86_64 => write!(f, "x86-64"),
92            Architecture::Arm => write!(f, "ARM"),
93            Architecture::Arm64 => write!(f, "ARM64"),
94            Architecture::Mips => write!(f, "MIPS"),
95            Architecture::Mips64 => write!(f, "MIPS64"),
96            Architecture::PowerPC => write!(f, "PowerPC"),
97            Architecture::PowerPC64 => write!(f, "PowerPC64"),
98            Architecture::RiscV => write!(f, "RISC-V"),
99            Architecture::RiscV64 => write!(f, "RISC-V64"),
100            Architecture::Wasm => write!(f, "WebAssembly"),
101            Architecture::Jvm => write!(f, "JVM"),
102            Architecture::Unknown => write!(f, "Unknown"),
103        }
104    }
105}
106
107/// Binary metadata
108#[derive(Debug, Clone)]
109#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
110pub struct BinaryMetadata {
111    /// File size in bytes
112    pub size: usize,
113    /// Detected format
114    pub format: BinaryFormat,
115    /// Target architecture
116    pub architecture: Architecture,
117    /// Entry point address
118    pub entry_point: Option<u64>,
119    /// Base address for loading
120    pub base_address: Option<u64>,
121    /// Compilation timestamp
122    pub timestamp: Option<u64>,
123    /// Compiler information
124    pub compiler_info: Option<String>,
125    /// Endianness
126    pub endian: Endianness,
127    /// Security features
128    pub security_features: SecurityFeatures,
129}
130
131/// Endianness
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
134pub enum Endianness {
135    Little,
136    Big,
137}
138
139/// Security features detected in the binary
140#[derive(Debug, Clone, Default)]
141#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
142pub struct SecurityFeatures {
143    /// Data Execution Prevention / No-Execute bit
144    pub nx_bit: bool,
145    /// Address Space Layout Randomization
146    pub aslr: bool,
147    /// Stack canaries / stack protection
148    pub stack_canary: bool,
149    /// Control Flow Integrity
150    pub cfi: bool,
151    /// Fortify source
152    pub fortify: bool,
153    /// Position Independent Executable
154    pub pie: bool,
155    /// Relocation Read-Only
156    pub relro: bool,
157    /// Signed binary
158    pub signed: bool,
159}
160
161/// Binary section information
162#[derive(Debug, Clone)]
163#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
164pub struct Section {
165    /// Section name
166    pub name: String,
167    /// Virtual address
168    pub address: u64,
169    /// Size in bytes
170    pub size: u64,
171    /// File offset
172    pub offset: u64,
173    /// Section permissions
174    pub permissions: SectionPermissions,
175    /// Section type
176    pub section_type: SectionType,
177    /// Raw data (optional, for small sections)
178    pub data: Option<Vec<u8>>,
179}
180
181/// Section permissions
182#[derive(Debug, Clone, Default)]
183#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
184pub struct SectionPermissions {
185    pub read: bool,
186    pub write: bool,
187    pub execute: bool,
188}
189
190/// Section types
191#[derive(Debug, Clone, PartialEq, Eq)]
192#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
193pub enum SectionType {
194    Code,
195    Data,
196    ReadOnlyData,
197    Bss,
198    Debug,
199    Symbol,
200    String,
201    Relocation,
202    Dynamic,
203    Note,
204    Other(String),
205}
206
207/// Symbol information
208#[derive(Debug, Clone)]
209#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
210pub struct Symbol {
211    /// Symbol name
212    pub name: String,
213    /// Demangled name (if applicable)
214    pub demangled_name: Option<String>,
215    /// Address
216    pub address: u64,
217    /// Size
218    pub size: u64,
219    /// Symbol type
220    pub symbol_type: SymbolType,
221    /// Binding
222    pub binding: SymbolBinding,
223    /// Visibility
224    pub visibility: SymbolVisibility,
225    /// Section index
226    pub section_index: Option<usize>,
227}
228
229/// Symbol types
230#[derive(Debug, Clone, PartialEq, Eq)]
231#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
232pub enum SymbolType {
233    Function,
234    Object,
235    Section,
236    File,
237    Common,
238    Thread,
239    Other(String),
240}
241
242/// Symbol binding
243#[derive(Debug, Clone, PartialEq, Eq)]
244#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
245pub enum SymbolBinding {
246    Local,
247    Global,
248    Weak,
249    Other(String),
250}
251
252/// Symbol visibility
253#[derive(Debug, Clone, PartialEq, Eq)]
254#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
255pub enum SymbolVisibility {
256    Default,
257    Internal,
258    Hidden,
259    Protected,
260}
261
262/// Import information
263#[derive(Debug, Clone)]
264#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
265pub struct Import {
266    /// Function or symbol name
267    pub name: String,
268    /// Library name
269    pub library: Option<String>,
270    /// Address (if resolved)
271    pub address: Option<u64>,
272    /// Ordinal (for PE files)
273    pub ordinal: Option<u16>,
274}
275
276/// Export information
277#[derive(Debug, Clone)]
278#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
279pub struct Export {
280    /// Function or symbol name
281    pub name: String,
282    /// Address
283    pub address: u64,
284    /// Ordinal (for PE files)
285    pub ordinal: Option<u16>,
286    /// Forwarded name (if applicable)
287    pub forwarded_name: Option<String>,
288}
289
290/// Disassembled instruction
291#[derive(Debug, Clone)]
292#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
293pub struct Instruction {
294    /// Instruction address
295    pub address: u64,
296    /// Raw instruction bytes
297    pub bytes: Vec<u8>,
298    /// Assembly mnemonic
299    pub mnemonic: String,
300    /// Operand string
301    pub operands: String,
302    /// Instruction category
303    pub category: InstructionCategory,
304    /// Control flow information
305    pub flow: ControlFlow,
306    /// Size in bytes
307    pub size: usize,
308}
309
310/// Instruction categories
311#[derive(Debug, Clone, PartialEq, Eq, Hash)]
312#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
313pub enum InstructionCategory {
314    Arithmetic,
315    Logic,
316    Memory,
317    Control,
318    System,
319    Crypto,
320    Vector,
321    Float,
322    Unknown,
323}
324
325/// Control flow information
326#[derive(Debug, Clone, PartialEq, Eq)]
327#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
328pub enum ControlFlow {
329    /// Normal sequential flow
330    Sequential,
331    /// Unconditional jump
332    Jump(u64),
333    /// Conditional jump
334    ConditionalJump(u64),
335    /// Function call
336    Call(u64),
337    /// Function return
338    Return,
339    /// Interrupt/system call
340    Interrupt,
341    /// Unknown/indirect
342    Unknown,
343}
344
345/// Basic block in control flow graph
346#[derive(Debug, Clone)]
347#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
348pub struct BasicBlock {
349    /// Block ID
350    pub id: usize,
351    /// Start address
352    pub start_address: u64,
353    /// End address
354    pub end_address: u64,
355    /// Instructions in this block
356    pub instructions: Vec<Instruction>,
357    /// Successor blocks
358    pub successors: Vec<usize>,
359    /// Predecessor blocks
360    pub predecessors: Vec<usize>,
361}
362
363/// Control flow graph
364#[derive(Debug, Clone)]
365#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
366pub struct ControlFlowGraph {
367    /// Function information
368    pub function: Function,
369    /// Basic blocks
370    pub basic_blocks: Vec<BasicBlock>,
371    /// Complexity metrics
372    pub complexity: ComplexityMetrics,
373}
374
375/// Function information
376#[derive(Debug, Clone)]
377#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
378pub struct Function {
379    /// Function name
380    pub name: String,
381    /// Start address
382    pub start_address: u64,
383    /// End address
384    pub end_address: u64,
385    /// Size in bytes
386    pub size: u64,
387    /// Function type
388    pub function_type: FunctionType,
389    /// Calling convention
390    pub calling_convention: Option<String>,
391    /// Parameters (if available)
392    pub parameters: Vec<Parameter>,
393    /// Return type (if available)
394    pub return_type: Option<String>,
395}
396
397/// Function types
398#[derive(Debug, Clone, PartialEq, Eq)]
399#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
400pub enum FunctionType {
401    Normal,
402    Constructor,
403    Destructor,
404    Operator,
405    Main,
406    Entrypoint,
407    Import,
408    Export,
409    Thunk,
410    Unknown,
411}
412
413/// Function parameter
414#[derive(Debug, Clone)]
415#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
416pub struct Parameter {
417    /// Parameter name
418    pub name: Option<String>,
419    /// Parameter type
420    pub param_type: String,
421    /// Register or stack location
422    pub location: ParameterLocation,
423}
424
425/// Parameter location
426#[derive(Debug, Clone, PartialEq)]
427#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
428pub enum ParameterLocation {
429    Register(String),
430    Stack(i64),
431    Unknown,
432}
433
434/// Complexity metrics for control flow
435#[derive(Debug, Clone, Default)]
436#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
437pub struct ComplexityMetrics {
438    /// Cyclomatic complexity
439    pub cyclomatic_complexity: u32,
440    /// Number of basic blocks
441    pub basic_block_count: u32,
442    /// Number of edges
443    pub edge_count: u32,
444    /// Depth of nesting
445    pub nesting_depth: u32,
446    /// Number of loops
447    pub loop_count: u32,
448}
449
450/// Entropy analysis results
451#[derive(Debug, Clone)]
452#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
453pub struct EntropyAnalysis {
454    /// Overall entropy score (0.0 - 8.0)
455    pub overall_entropy: f64,
456    /// Section-wise entropy
457    pub section_entropy: HashMap<String, f64>,
458    /// High entropy regions
459    pub high_entropy_regions: Vec<EntropyRegion>,
460    /// Packing indicators
461    pub packing_indicators: PackingIndicators,
462}
463
464/// High entropy region
465#[derive(Debug, Clone)]
466#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
467pub struct EntropyRegion {
468    /// Start offset
469    pub start: u64,
470    /// End offset
471    pub end: u64,
472    /// Entropy value
473    pub entropy: f64,
474    /// Possible explanation
475    pub description: String,
476}
477
478/// Packing indicators
479#[derive(Debug, Clone, Default)]
480#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
481pub struct PackingIndicators {
482    /// Likely packed
483    pub is_packed: bool,
484    /// Detected packer (if any)
485    pub packer_name: Option<String>,
486    /// Compression ratio estimate
487    pub compression_ratio: Option<f64>,
488    /// Obfuscation indicators
489    pub obfuscation_level: ObfuscationLevel,
490}
491
492/// Obfuscation level
493#[derive(Debug, Clone, PartialEq, Eq, Default)]
494#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
495pub enum ObfuscationLevel {
496    #[default]
497    None,
498    Low,
499    Medium,
500    High,
501    Extreme,
502}
503
504/// Security indicators
505#[derive(Debug, Clone, Default)]
506#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
507pub struct SecurityIndicators {
508    /// Suspicious API calls
509    pub suspicious_apis: Vec<String>,
510    /// Anti-debugging techniques
511    pub anti_debug: Vec<String>,
512    /// Anti-VM techniques
513    pub anti_vm: Vec<String>,
514    /// Cryptographic indicators
515    pub crypto_indicators: Vec<String>,
516    /// Network indicators
517    pub network_indicators: Vec<String>,
518    /// File system indicators
519    pub filesystem_indicators: Vec<String>,
520    /// Registry indicators (Windows)
521    pub registry_indicators: Vec<String>,
522}
523
524/// Complete analysis result
525#[derive(Debug, Clone, Default)]
526#[cfg_attr(feature = "serde-support", derive(Serialize, Deserialize))]
527pub struct AnalysisResult {
528    /// Binary format
529    pub format: BinaryFormat,
530    /// Target architecture
531    pub architecture: Architecture,
532    /// Entry point
533    pub entry_point: Option<u64>,
534    /// Binary metadata
535    pub metadata: BinaryMetadata,
536    /// Sections
537    pub sections: Vec<Section>,
538    /// Symbols
539    pub symbols: Vec<Symbol>,
540    /// Imports
541    pub imports: Vec<Import>,
542    /// Exports
543    pub exports: Vec<Export>,
544    /// Disassembly (optional)
545    pub disassembly: Option<Vec<Instruction>>,
546    /// Control flow graphs (optional)
547    pub control_flow: Option<Vec<ControlFlowGraph>>,
548    /// Entropy analysis (optional)
549    pub entropy: Option<EntropyAnalysis>,
550    /// Security indicators (optional)
551    pub security: Option<SecurityIndicators>,
552}
553
554impl Default for BinaryMetadata {
555    fn default() -> Self {
556        Self {
557            size: 0,
558            format: BinaryFormat::Unknown,
559            architecture: Architecture::Unknown,
560            entry_point: None,
561            base_address: None,
562            timestamp: None,
563            compiler_info: None,
564            endian: Endianness::Little,
565            security_features: SecurityFeatures::default(),
566        }
567    }
568}
569
570/// Trait for binary format parsers
571pub trait BinaryFormatParser {
572    /// Parse binary data
573    fn parse(data: &[u8]) -> ParseResult;
574
575    /// Check if this parser can handle the data
576    fn can_parse(data: &[u8]) -> bool;
577}
578
579/// Trait implemented by all binary formats
580pub trait BinaryFormatTrait: Send + Sync {
581    /// Get format type
582    fn format_type(&self) -> BinaryFormat;
583
584    /// Get target architecture
585    fn architecture(&self) -> Architecture;
586
587    /// Get entry point
588    fn entry_point(&self) -> Option<u64>;
589
590    /// Get sections
591    fn sections(&self) -> &[Section];
592
593    /// Get symbols
594    fn symbols(&self) -> &[Symbol];
595
596    /// Get imports
597    fn imports(&self) -> &[Import];
598
599    /// Get exports
600    fn exports(&self) -> &[Export];
601
602    /// Get metadata
603    fn metadata(&self) -> &BinaryMetadata;
604}