threatflux_binary_analysis/formats/
elf.rs

1//! ELF format parser
2
3use crate::{
4    types::{
5        Architecture, BinaryFormat as Format, BinaryMetadata, Endianness, Export, Import, Section,
6        SectionPermissions, SectionType, SecurityFeatures, Symbol, SymbolBinding, SymbolType,
7        SymbolVisibility,
8    },
9    BinaryFormatParser, BinaryFormatTrait, Result,
10};
11use goblin::elf::Elf;
12
13/// ELF format parser
14pub struct ElfParser;
15
16impl BinaryFormatParser for ElfParser {
17    fn parse(data: &[u8]) -> Result<Box<dyn BinaryFormatTrait>> {
18        let elf = Elf::parse(data)?;
19        Ok(Box::new(ElfBinary::new(elf, data)?))
20    }
21
22    fn can_parse(data: &[u8]) -> bool {
23        data.len() >= 4 && &data[0..4] == b"\x7fELF"
24    }
25}
26
27/// Parsed ELF binary
28pub struct ElfBinary {
29    #[allow(dead_code)]
30    elf: Elf<'static>,
31    #[allow(dead_code)]
32    data: Vec<u8>,
33    metadata: BinaryMetadata,
34    sections: Vec<Section>,
35    symbols: Vec<Symbol>,
36    imports: Vec<Import>,
37    exports: Vec<Export>,
38}
39
40impl ElfBinary {
41    fn new(elf: Elf<'_>, data: &[u8]) -> Result<Self> {
42        let data = data.to_vec();
43
44        // Convert architecture
45        let architecture = match elf.header.e_machine {
46            goblin::elf::header::EM_386 => Architecture::X86,
47            goblin::elf::header::EM_X86_64 => Architecture::X86_64,
48            goblin::elf::header::EM_ARM => Architecture::Arm,
49            goblin::elf::header::EM_AARCH64 => Architecture::Arm64,
50            goblin::elf::header::EM_MIPS => Architecture::Mips,
51            goblin::elf::header::EM_PPC => Architecture::PowerPC,
52            goblin::elf::header::EM_PPC64 => Architecture::PowerPC64,
53            goblin::elf::header::EM_RISCV => Architecture::RiscV,
54            _ => Architecture::Unknown,
55        };
56
57        // Detect endianness
58        let endian = match elf.header.endianness()? {
59            goblin::container::Endian::Little => Endianness::Little,
60            goblin::container::Endian::Big => Endianness::Big,
61        };
62
63        // Analyze security features
64        let security_features = analyze_security_features(&elf, &data);
65
66        let metadata = BinaryMetadata {
67            size: data.len(),
68            format: Format::Elf,
69            architecture,
70            entry_point: if elf.entry != 0 {
71                Some(elf.entry)
72            } else {
73                None
74            },
75            base_address: None, // ELF doesn't have a fixed base address
76            timestamp: None,    // Not available in ELF headers
77            compiler_info: extract_compiler_info(&elf, &data),
78            endian,
79            security_features,
80        };
81
82        // Parse sections
83        let sections = parse_sections(&elf, &data)?;
84
85        // Parse symbols
86        let symbols = parse_symbols(&elf)?;
87
88        // Parse imports and exports
89        let (imports, exports) = parse_imports_exports(&elf)?;
90
91        // We need to handle lifetime issues with the Elf struct
92        // For now, we'll store the essential data and reconstruct what we need
93        let elf_owned = unsafe { std::mem::transmute::<Elf<'_>, Elf<'static>>(elf) };
94
95        Ok(Self {
96            elf: elf_owned,
97            data,
98            metadata,
99            sections,
100            symbols,
101            imports,
102            exports,
103        })
104    }
105}
106
107impl BinaryFormatTrait for ElfBinary {
108    fn format_type(&self) -> Format {
109        Format::Elf
110    }
111
112    fn architecture(&self) -> Architecture {
113        self.metadata.architecture
114    }
115
116    fn entry_point(&self) -> Option<u64> {
117        self.metadata.entry_point
118    }
119
120    fn sections(&self) -> &[Section] {
121        &self.sections
122    }
123
124    fn symbols(&self) -> &[Symbol] {
125        &self.symbols
126    }
127
128    fn imports(&self) -> &[Import] {
129        &self.imports
130    }
131
132    fn exports(&self) -> &[Export] {
133        &self.exports
134    }
135
136    fn metadata(&self) -> &BinaryMetadata {
137        &self.metadata
138    }
139}
140
141fn parse_sections(elf: &Elf, data: &[u8]) -> Result<Vec<Section>> {
142    let mut sections = Vec::new();
143
144    for (i, section_header) in elf.section_headers.iter().enumerate() {
145        let name = elf
146            .shdr_strtab
147            .get_at(section_header.sh_name)
148            .unwrap_or(&format!(".section_{}", i))
149            .to_string();
150
151        let section_type = match section_header.sh_type {
152            goblin::elf::section_header::SHT_PROGBITS => {
153                // Check for debug sections by name first
154                if name.starts_with(".debug_") || name.starts_with(".zdebug_") {
155                    SectionType::Debug
156                } else if section_header.sh_flags
157                    & (goblin::elf::section_header::SHF_EXECINSTR as u64)
158                    != 0
159                {
160                    SectionType::Code
161                } else if section_header.sh_flags & (goblin::elf::section_header::SHF_WRITE as u64)
162                    != 0
163                {
164                    SectionType::Data
165                } else {
166                    SectionType::ReadOnlyData
167                }
168            }
169            goblin::elf::section_header::SHT_NOBITS => SectionType::Bss,
170            goblin::elf::section_header::SHT_SYMTAB => SectionType::Symbol,
171            goblin::elf::section_header::SHT_STRTAB => SectionType::String,
172            goblin::elf::section_header::SHT_RELA | goblin::elf::section_header::SHT_REL => {
173                SectionType::Relocation
174            }
175            goblin::elf::section_header::SHT_DYNAMIC => SectionType::Dynamic,
176            goblin::elf::section_header::SHT_NOTE => SectionType::Note,
177            _ => SectionType::Other(format!("SHT_{}", section_header.sh_type)),
178        };
179
180        let permissions = SectionPermissions {
181            read: true, // ELF sections are generally readable
182            write: section_header.sh_flags & (goblin::elf::section_header::SHF_WRITE as u64) != 0,
183            execute: section_header.sh_flags & (goblin::elf::section_header::SHF_EXECINSTR as u64)
184                != 0,
185        };
186
187        // Extract small section data
188        let section_data = if section_header.sh_size <= 1024
189            && section_header.sh_type != goblin::elf::section_header::SHT_NOBITS
190        {
191            let start = section_header.sh_offset as usize;
192            let end = start + section_header.sh_size as usize;
193            if end <= data.len() {
194                Some(data[start..end].to_vec())
195            } else {
196                None
197            }
198        } else {
199            None
200        };
201
202        sections.push(Section {
203            name,
204            address: section_header.sh_addr,
205            size: section_header.sh_size,
206            offset: section_header.sh_offset,
207            permissions,
208            section_type,
209            data: section_data,
210        });
211    }
212
213    Ok(sections)
214}
215
216fn parse_symbols(elf: &Elf) -> Result<Vec<Symbol>> {
217    let mut symbols = Vec::new();
218
219    for sym in &elf.syms {
220        let name = elf
221            .strtab
222            .get_at(sym.st_name)
223            .unwrap_or("unknown")
224            .to_string();
225
226        // Skip empty names
227        if name.is_empty() {
228            continue;
229        }
230
231        let symbol_type = match sym.st_type() {
232            goblin::elf::sym::STT_FUNC => SymbolType::Function,
233            goblin::elf::sym::STT_OBJECT => SymbolType::Object,
234            goblin::elf::sym::STT_SECTION => SymbolType::Section,
235            goblin::elf::sym::STT_FILE => SymbolType::File,
236            goblin::elf::sym::STT_COMMON => SymbolType::Common,
237            goblin::elf::sym::STT_TLS => SymbolType::Thread,
238            _ => SymbolType::Other(format!("STT_{}", sym.st_type())),
239        };
240
241        let binding = match sym.st_bind() {
242            goblin::elf::sym::STB_LOCAL => SymbolBinding::Local,
243            goblin::elf::sym::STB_GLOBAL => SymbolBinding::Global,
244            goblin::elf::sym::STB_WEAK => SymbolBinding::Weak,
245            _ => SymbolBinding::Other(format!("STB_{}", sym.st_bind())),
246        };
247
248        let visibility = match sym.st_visibility() {
249            goblin::elf::sym::STV_DEFAULT => SymbolVisibility::Default,
250            goblin::elf::sym::STV_INTERNAL => SymbolVisibility::Internal,
251            goblin::elf::sym::STV_HIDDEN => SymbolVisibility::Hidden,
252            goblin::elf::sym::STV_PROTECTED => SymbolVisibility::Protected,
253            _ => SymbolVisibility::Default,
254        };
255
256        let section_index = if sym.st_shndx == (goblin::elf::section_header::SHN_UNDEF as usize) {
257            None
258        } else {
259            Some(sym.st_shndx)
260        };
261
262        symbols.push(Symbol {
263            name: name.clone(),
264            demangled_name: try_demangle(&name),
265            address: sym.st_value,
266            size: sym.st_size,
267            symbol_type,
268            binding,
269            visibility,
270            section_index,
271        });
272    }
273
274    Ok(symbols)
275}
276
277fn parse_imports_exports(elf: &Elf) -> crate::types::ImportExportResult {
278    let mut imports = Vec::new();
279    let mut exports = Vec::new();
280
281    // Parse dynamic symbols for imports/exports
282    for sym in &elf.dynsyms {
283        let name = elf
284            .dynstrtab
285            .get_at(sym.st_name)
286            .unwrap_or("unknown")
287            .to_string();
288
289        if name.is_empty() {
290            continue;
291        }
292
293        if sym.st_shndx == (goblin::elf::section_header::SHN_UNDEF as usize) {
294            // This is an import
295            imports.push(Import {
296                name,
297                library: None, // Library name would need to be resolved from dynamic entries
298                address: None,
299                ordinal: None,
300            });
301        } else if sym.st_bind() == goblin::elf::sym::STB_GLOBAL {
302            // This is an export
303            exports.push(Export {
304                name,
305                address: sym.st_value,
306                ordinal: None,
307                forwarded_name: None,
308            });
309        }
310    }
311
312    Ok((imports, exports))
313}
314
315fn analyze_security_features(elf: &Elf, _data: &[u8]) -> SecurityFeatures {
316    let mut features = SecurityFeatures::default();
317
318    // Check for NX bit (GNU_STACK segment)
319    for phdr in &elf.program_headers {
320        if phdr.p_type == goblin::elf::program_header::PT_GNU_STACK {
321            features.nx_bit = (phdr.p_flags & goblin::elf::program_header::PF_X) == 0;
322        }
323    }
324
325    // Check for PIE (Position Independent Executable)
326    features.pie = elf.header.e_type == goblin::elf::header::ET_DYN;
327
328    // Check for RELRO
329    for phdr in &elf.program_headers {
330        if phdr.p_type == goblin::elf::program_header::PT_GNU_RELRO {
331            features.relro = true;
332        }
333    }
334
335    // Other features would need more complex analysis
336    features.aslr = features.pie; // PIE enables ASLR
337
338    features
339}
340
341fn extract_compiler_info(elf: &Elf, data: &[u8]) -> Option<String> {
342    // Look for compiler information in .comment section
343    for section in &elf.section_headers {
344        if let Some(name) = elf.shdr_strtab.get_at(section.sh_name) {
345            if name == ".comment" {
346                let offset = section.sh_offset as usize;
347                let size = section.sh_size as usize;
348
349                if offset + size <= data.len() {
350                    let section_data = &data[offset..offset + size];
351
352                    // Parse null-terminated strings from the comment section
353                    let comment_str = String::from_utf8_lossy(section_data);
354                    let comment = comment_str.trim_end_matches('\0').trim();
355
356                    if !comment.is_empty() {
357                        return Some(comment.to_string());
358                    }
359                }
360            }
361        }
362    }
363
364    // Also look for Go build info
365    for section in &elf.section_headers {
366        if let Some(name) = elf.shdr_strtab.get_at(section.sh_name) {
367            if name == ".go.buildinfo" || name.contains("go.") {
368                return Some("Go compiler".to_string());
369            }
370        }
371    }
372
373    // Look for Rust-specific sections
374    for section in &elf.section_headers {
375        if let Some(name) = elf.shdr_strtab.get_at(section.sh_name) {
376            if name.starts_with(".rustc") || name.contains("rust") {
377                return Some("Rust compiler".to_string());
378            }
379        }
380    }
381
382    None
383}
384
385fn try_demangle(name: &str) -> Option<String> {
386    // Basic C++ demangling detection
387    if name.starts_with("_Z") {
388        // This is a mangled C++ name, but we'd need a proper demangler
389        Some(format!("demangled_{}", name))
390    } else {
391        None
392    }
393}