nabla_cli/binary/
binary_analysis.rs

1use super::{BinaryAnalysis, extract_license_info, extract_version_info};
2use blake3::Hasher;
3use chrono::Utc;
4use goblin::{
5    Object as GoblinObject,
6    elf::Elf,
7    mach::{MachO, constants::S_ATTR_PURE_INSTRUCTIONS, load_command::CommandVariant},
8    pe::PE,
9};
10use infer;
11use object::{Object, ObjectSymbol};
12use sha2::{Digest, Sha256};
13use std::collections::HashSet;
14use uuid::Uuid;
15use wasmparser::{Parser, Payload};
16
17// Import specialized format parsers
18use capstone::prelude::*;
19
20pub async fn analyze_binary(file_name: &str, contents: &[u8]) -> anyhow::Result<BinaryAnalysis> {
21    tracing::info!(
22        "Starting binary analysis for '{}' ({} bytes)",
23        file_name,
24        contents.len()
25    );
26
27    // Early validation for very small files
28    if contents.len() < 50 {
29        tracing::warn!(
30            "File is very small ({} bytes), analyzing as raw firmware blob",
31            contents.len()
32        );
33        let sha256_hash = Sha256::digest(contents);
34        let mut hasher = Hasher::new();
35        hasher.update(contents);
36        let alternative_hash = hasher.finalize();
37
38        let mut analysis = BinaryAnalysis {
39            id: Uuid::new_v4(),
40            file_name: file_name.to_string(),
41            format: "unknown".to_string(),
42            architecture: "unknown".to_string(),
43            languages: Vec::new(),
44            detected_symbols: Vec::new(),
45            embedded_strings: Vec::new(),
46            suspected_secrets: Vec::new(),
47            imports: Vec::new(),
48            exports: Vec::new(),
49            hash_sha256: format!("{:x}", sha256_hash),
50            hash_blake3: Some(hex::encode(alternative_hash.as_bytes())),
51            size_bytes: contents.len() as u64,
52            linked_libraries: Vec::new(),
53            static_linked: false,
54            version_info: None,
55            license_info: None,
56            metadata: serde_json::json!({}),
57            created_at: Utc::now(),
58            sbom: None,
59            binary_data: Some(contents.to_vec()),
60            entry_point: None,
61            code_sections: Vec::new(),
62        };
63
64        analyze_raw_firmware_blob(&mut analysis, contents)?;
65
66        // Extract version and license information
67        analysis.version_info = Some(extract_version_info(
68            contents,
69            &analysis.embedded_strings,
70            &analysis.format,
71        ));
72        analysis.license_info = Some(extract_license_info(&analysis.embedded_strings));
73
74        return Ok(analysis);
75    }
76
77    let sha256_hash = Sha256::digest(contents);
78    let mut hasher = Hasher::new();
79    hasher.update(contents);
80    let alternative_hash = hasher.finalize();
81
82    // Detect file type with more detailed logging
83    let detected_type = infer::get(contents);
84    let file_type = if let Some(kind) = detected_type {
85        tracing::info!(
86            "Detected file type: {} ({})",
87            kind.mime_type(),
88            kind.extension()
89        );
90        kind.mime_type().to_string()
91    } else {
92        tracing::debug!("Could not detect file type, using fallback");
93        detect_file_type_fallback(file_name, contents)
94    };
95
96    let mut analysis = BinaryAnalysis {
97        id: Uuid::new_v4(),
98        file_name: file_name.to_string(),
99        format: file_type.clone(),
100        architecture: "unknown".to_string(),
101        languages: Vec::new(),
102        detected_symbols: Vec::new(),
103        embedded_strings: extract_strings(contents),
104        suspected_secrets: Vec::new(),
105        imports: Vec::new(),
106        exports: Vec::new(),
107        hash_sha256: format!("{:x}", sha256_hash),
108        hash_blake3: Some(hex::encode(alternative_hash.as_bytes())),
109        size_bytes: contents.len() as u64,
110        linked_libraries: Vec::new(),
111        static_linked: false,
112        version_info: None,
113        license_info: None,
114        metadata: serde_json::json!({}),
115        created_at: Utc::now(),
116        sbom: None,
117        binary_data: Some(contents.to_vec()),
118        entry_point: None,
119        code_sections: Vec::new(),
120    };
121
122    // Try different parsing strategies based on file type and magic bytes
123    let mut parsed_successfully = false;
124
125    if contents.len() >= 4 {
126        // Explicitly check for compressed formats first
127        match &contents[0..4.min(contents.len())] {
128            [0x1F, 0x8B, _, _] => {
129                tracing::info!("GZIP magic detected");
130                analysis.format = "compressed-firmware".to_string();
131                analysis.languages.push("Compressed Binary".to_string());
132                parsed_successfully = true;
133            }
134            [0x04, 0x22, 0x4D, 0x18] => {
135                tracing::info!("LZ4 magic detected");
136                analysis.format = "compressed-firmware".to_string();
137                analysis.languages.push("Compressed Binary".to_string());
138                parsed_successfully = true;
139            }
140            [0x42, 0x5A, 0x68, _] => {
141                tracing::info!("BZIP2 magic detected");
142                analysis.format = "compressed-firmware".to_string();
143                analysis.languages.push("Compressed Binary".to_string());
144                parsed_successfully = true;
145            }
146            [0xFD, 0x37, 0x7A, 0x58] => {
147                tracing::info!("XZ magic detected");
148                analysis.format = "compressed-firmware".to_string();
149                analysis.languages.push("Compressed Binary".to_string());
150                parsed_successfully = true;
151            }
152            [0x28, 0xB5, 0x2F, 0xFD] => {
153                tracing::info!("ZSTD magic detected");
154                analysis.format = "compressed-firmware".to_string();
155                analysis.languages.push("Compressed Binary".to_string());
156                parsed_successfully = true;
157            }
158            [0x43, 0x4F, 0x4D, 0x50] => {
159                // "COMP" magic for custom IoT firmware
160                tracing::info!("Custom IoT compressed firmware magic (COMP) detected");
161                analysis.format = "compressed-firmware".to_string();
162                analysis.languages.push("Compressed Binary".to_string());
163                parsed_successfully = true;
164            }
165            _ => {}
166        }
167
168        // If not a compressed format, proceed with other magic checks
169        if !parsed_successfully {
170            match &contents[0..4] {
171                [0x7f, b'E', b'L', b'F'] => {
172                    tracing::info!("ELF magic detected, using goblin ELF parser");
173                    if let Ok(GoblinObject::Elf(elf)) = GoblinObject::parse(contents) {
174                        analyze_elf(&mut analysis, &elf, contents)?;
175                        parsed_successfully = true;
176                    }
177                }
178                [b'M', b'Z', _, _] => {
179                    tracing::info!("PE magic detected, using goblin PE parser");
180                    if let Ok(GoblinObject::PE(pe)) = GoblinObject::parse(contents) {
181                        analyze_pe(&mut analysis, &pe, contents)?;
182                        parsed_successfully = true;
183                    }
184                }
185                [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
186                    tracing::info!("Mach-O magic detected, using goblin Mach-O parser");
187                    if let Ok(GoblinObject::Mach(mach)) = GoblinObject::parse(contents) {
188                        match mach {
189                            goblin::mach::Mach::Fat(_) => {
190                                analysis.format = "macho-fat".to_string();
191                                analysis.architecture = "multi".to_string();
192                            }
193                            goblin::mach::Mach::Binary(macho) => {
194                                analyze_macho(&mut analysis, &macho, contents)?
195                            }
196                        }
197                        parsed_successfully = true;
198                    }
199                }
200                [0x00, 0x61, 0x73, 0x6d] => {
201                    tracing::info!("WASM magic detected, using wasmparser");
202                    if analyze_wasm(&mut analysis, contents).is_ok() {
203                        parsed_successfully = true;
204                    }
205                }
206                _ => {
207                    // Check for ar archive magic
208                    if contents.len() >= 8 && &contents[0..8] == b"!<arch>\n" {
209                        tracing::info!("AR archive magic detected");
210                        analysis.format = "archive".to_string();
211                        parsed_successfully = true;
212                    }
213                    // Check for DICOM files (DICM at offset 128)
214                    else if contents.len() >= 132 && &contents[128..132] == b"DICM" {
215                        tracing::info!("DICOM magic detected, using DICOM parser");
216                        if analyze_dicom_medical_imaging(&mut analysis, contents).is_ok() {
217                            parsed_successfully = true;
218                        }
219                    }
220                }
221            }
222        }
223    }
224
225    // Check for firmware file formats before goblin parsing
226    if !parsed_successfully {
227        let text_content = String::from_utf8_lossy(contents);
228        let first_few_lines: Vec<&str> = text_content.lines().take(5).collect();
229
230        // Check for Intel HEX format (starts with :)
231        if first_few_lines
232            .iter()
233            .any(|line| line.trim().starts_with(':'))
234            && first_few_lines.iter().all(|line| {
235                let trimmed = line.trim();
236                trimmed.is_empty()
237                    || trimmed.starts_with(':')
238                    || trimmed.chars().all(|c| c.is_ascii_hexdigit() || c == ':')
239            })
240        {
241            tracing::info!("Detected Intel HEX format, using Intel HEX parser");
242            if analyze_intel_hex(&mut analysis, contents).is_ok() {
243                parsed_successfully = true;
244            }
245        }
246
247        // Check for Motorola S-Record format (starts with S)
248        if !parsed_successfully
249            && first_few_lines
250                .iter()
251                .any(|line| line.trim().starts_with('S'))
252            && first_few_lines.iter().all(|line| {
253                let trimmed = line.trim();
254                trimmed.is_empty()
255                    || (trimmed.starts_with('S')
256                        && trimmed.len() >= 4
257                        && trimmed.chars().skip(1).all(|c| c.is_ascii_hexdigit()))
258            })
259        {
260            tracing::info!("Detected Motorola S-Record format, using S-Record parser");
261            if analyze_srec(&mut analysis, contents).is_ok() {
262                parsed_successfully = true;
263            }
264        }
265    }
266
267    if !parsed_successfully {
268        tracing::debug!("No specific magic bytes found, attempting generic goblin parsing...");
269        match GoblinObject::parse(contents) {
270            Ok(obj) => {
271                tracing::info!("Successfully parsed with goblin (generic)");
272                match obj {
273                    GoblinObject::Elf(elf) => {
274                        tracing::info!("Detected ELF binary (generic)");
275                        analyze_elf(&mut analysis, &elf, contents)?;
276                        parsed_successfully = true;
277                    }
278                    GoblinObject::PE(pe) => {
279                        tracing::info!("Detected PE binary (generic)");
280                        analyze_pe(&mut analysis, &pe, contents)?;
281                        parsed_successfully = true;
282                    }
283                    GoblinObject::Mach(mach) => {
284                        tracing::info!("Detected Mach-O binary (generic)");
285                        match mach {
286                            goblin::mach::Mach::Fat(_) => {
287                                analysis.format = "macho-fat".to_string();
288                                analysis.architecture = "multi".to_string();
289                            }
290                            goblin::mach::Mach::Binary(macho) => {
291                                analyze_macho(&mut analysis, &macho, contents)?
292                            }
293                        }
294                        parsed_successfully = true;
295                    }
296                    GoblinObject::Archive(_) => {
297                        tracing::info!("Detected archive");
298                        analysis.format = "archive".to_string();
299                        parsed_successfully = true;
300                    }
301                    _ => {
302                        tracing::debug!("Unknown goblin object type");
303                    }
304                }
305            }
306            Err(e) => {
307                tracing::debug!("Goblin parsing failed: {}, trying WebAssembly", e);
308                if analyze_wasm(&mut analysis, contents).is_ok() {
309                    tracing::info!("Successfully parsed as WebAssembly");
310                    parsed_successfully = true;
311                }
312            }
313        }
314    }
315
316    // Check for raw firmware blobs before giving up
317    if !parsed_successfully {
318        // Try ARM Cortex-M firmware detection (look for vector table patterns)
319        if contents.len() >= 8 {
320            let sp_bytes = [contents[0], contents[1], contents[2], contents[3]];
321            let reset_bytes = [contents[4], contents[5], contents[6], contents[7]];
322            let sp_value = u32::from_le_bytes(sp_bytes);
323            let reset_value = u32::from_le_bytes(reset_bytes);
324
325            // Check if this looks like ARM Cortex-M vector table
326            if sp_value >= 0x20000000 && sp_value <= 0x20100000 && // Stack in SRAM
327               reset_value >= 0x08000000 && reset_value <= 0x08100000 && // Reset in Flash
328               (reset_value & 1) == 1
329            {
330                // Thumb mode bit set
331                tracing::info!("Detected ARM Cortex-M firmware blob, using ARM Cortex-M parser");
332                if analyze_arm_cortex_m(&mut analysis, contents).is_ok() {
333                    parsed_successfully = true;
334                }
335            }
336        }
337    }
338
339    // Extract version and license information
340    tracing::debug!("Extracting version and license metadata");
341    analysis.version_info = Some(extract_version_info(
342        contents,
343        &analysis.embedded_strings,
344        &analysis.format,
345    ));
346    analysis.license_info = Some(extract_license_info(&analysis.embedded_strings));
347
348    tracing::info!(
349        "Metadata extraction complete: version_confidence={:.2}, license_confidence={:.2}",
350        analysis
351            .version_info
352            .as_ref()
353            .map(|v| v.confidence)
354            .unwrap_or(0.0),
355        analysis
356            .license_info
357            .as_ref()
358            .map(|l| l.confidence)
359            .unwrap_or(0.0)
360    );
361
362    if !parsed_successfully {
363        tracing::info!("All specialized parsers failed, using raw firmware blob analysis");
364        analyze_raw_firmware_blob(&mut analysis, contents)?;
365    } else {
366        tracing::info!("Successfully analyzed {} as {}", file_name, analysis.format);
367    }
368
369    Ok(analysis)
370}
371
372fn analyze_macho(
373    analysis: &mut BinaryAnalysis,
374    macho: &MachO,
375    contents: &[u8],
376) -> anyhow::Result<()> {
377    analysis.format = "macho".to_string();
378
379    // Determine architecture
380    analysis.architecture = match macho.header.cputype() {
381        goblin::mach::constants::cputype::CPU_TYPE_X86_64 => "x86_64".to_string(),
382        goblin::mach::constants::cputype::CPU_TYPE_ARM64 => "aarch64".to_string(),
383        goblin::mach::constants::cputype::CPU_TYPE_X86 => "i386".to_string(),
384        _ => format!("unknown({})", macho.header.cputype()),
385    };
386
387    // Extract symbols (both regular and dynamic)
388    let mut symbol_set = HashSet::new();
389    if let Some(symbols) = &macho.symbols {
390        for symbol in symbols.iter() {
391            if let Ok((name, _)) = symbol {
392                if !name.is_empty() {
393                    symbol_set.insert(name.to_string());
394                    analysis.detected_symbols.push(name.to_string());
395                }
396            }
397        }
398    }
399
400    // Extract libraries and frameworks
401    for lib in &macho.libs {
402        let lib_name = lib.to_string();
403        analysis.linked_libraries.push(lib_name.clone());
404        analysis.imports.push(lib_name.clone());
405        // Add to embedded strings for version extraction
406        analysis.embedded_strings.push(lib_name.clone());
407        // Extract potential version info from library name (e.g., libcrypto.1.1.dylib)
408        if let Some(version) = extract_version_from_lib_name(&lib_name) {
409            analysis.embedded_strings.push(version);
410        }
411    }
412
413    // Use object crate for detailed import/export analysis
414    if let Ok(obj_file) = object::File::parse(contents) {
415        for symbol in obj_file.symbols() {
416            if let Ok(name) = symbol.name() {
417                if !name.is_empty() {
418                    if symbol.is_undefined() {
419                        analysis.imports.push(name.to_string());
420                        analysis.embedded_strings.push(name.to_string());
421                    } else if symbol.is_global() {
422                        analysis.exports.push(name.to_string());
423                    }
424                    symbol_set.insert(name.to_string());
425                }
426            }
427        }
428    }
429
430    // Extract additional metadata from load commands
431    let mut metadata = serde_json::json!({
432        "analysis_type": "macho",
433        "load_commands": [],
434        "frameworks": [],
435        "min_os_version": null,
436    });
437
438    // Process load commands for frameworks and version info
439    for lc in macho.load_commands.iter() {
440        match lc.command {
441            CommandVariant::LoadDylib(ref dylib) => {
442                let offset = dylib.dylib.name as usize;
443                if offset < contents.len() {
444                    let name_bytes = &contents[offset..];
445                    if let Some(end) = name_bytes.iter().position(|&b| b == 0) {
446                        if let Ok(name_str) = std::str::from_utf8(&name_bytes[..end]) {
447                            if name_str.contains(".framework") {
448                                metadata["frameworks"]
449                                    .as_array_mut()
450                                    .unwrap()
451                                    .push(serde_json::Value::String(name_str.to_string()));
452                                analysis.embedded_strings.push(name_str.to_string());
453                            }
454                        }
455                    }
456                }
457            }
458            CommandVariant::VersionMinMacosx(ref ver) => {
459                let (major, minor) = unpack_version(ver.version);
460                metadata["min_os_version"] =
461                    serde_json::Value::String(format!("{}.{}", major, minor));
462            }
463            CommandVariant::BuildVersion(ref build) => {
464                let (major, minor) = unpack_version(build.minos);
465                metadata["min_os_version"] =
466                    serde_json::Value::String(format!("{}.{}", major, minor));
467            }
468            _ => {}
469        }
470        metadata["load_commands"]
471            .as_array_mut()
472            .unwrap()
473            .push(serde_json::Value::String(format!("{:?}", lc.command)));
474    }
475
476    // Extract entry point from Mach-O header
477    if macho.entry != 0 {
478        analysis.entry_point = Some(format!("0x{:08X}", macho.entry));
479        tracing::debug!("Mach-O entry point: 0x{:08X}", macho.entry);
480    }
481
482    // Extract code sections
483    for segment in &macho.segments {
484        if let Ok(sections) = segment.sections() {
485            for (section, _data) in sections {
486                analysis.code_sections.push(super::CodeSection {
487                    name: section.name().unwrap_or("").to_string(),
488                    size: section.size,
489                    start_address: section.addr,
490                    end_address: section.addr + section.size,
491                    permissions: if (section.flags & S_ATTR_PURE_INSTRUCTIONS) != 0 {
492                        "r-x"
493                    } else {
494                        "rw-"
495                    }
496                    .to_string(),
497                    section_type: super::CodeSectionType::Text,
498                });
499            }
500        }
501    }
502
503    // Detect static linking
504    analysis.static_linked = macho.libs.is_empty() && symbol_set.iter().any(|s| s.contains("main"));
505
506    // Extract potential CPE identifiers for CVE matching
507    let cpe_candidates = extract_cpe_candidates(
508        &analysis.linked_libraries,
509        &analysis.imports,
510        &analysis.detected_symbols,
511    );
512    analysis.metadata = serde_json::json!({
513        "macho_metadata": metadata,
514        "cpe_candidates": cpe_candidates,
515    });
516
517    tracing::info!(
518        "Mach-O analysis complete: {} symbols, {} libraries, {} imports, {} exports",
519        analysis.detected_symbols.len(),
520        analysis.linked_libraries.len(),
521        analysis.imports.len(),
522        analysis.exports.len()
523    );
524
525    Ok(())
526}
527
528// Helper function to extract version from library names
529fn extract_version_from_lib_name(lib_name: &str) -> Option<String> {
530    let parts: Vec<&str> = lib_name.split('.').collect();
531    for part in parts {
532        if part.chars().all(|c| c.is_digit(10) || c == '.') {
533            return Some(part.to_string());
534        }
535    }
536    None
537}
538
539// Helper function to unpack Mach-O version numbers (u32) into major and minor components
540fn unpack_version(version: u32) -> (u32, u32) {
541    let major = (version >> 16) & 0xFFFF;
542    let minor = (version >> 8) & 0xFF;
543    (major, minor)
544}
545
546// Helper function to generate CPE-like identifiers
547fn extract_cpe_candidates(libs: &[String], imports: &[String], symbols: &[String]) -> Vec<String> {
548    let mut cpes = HashSet::new();
549    for item in libs.iter().chain(imports.iter()).chain(symbols.iter()) {
550        let item_lower = item.to_lowercase();
551        // Example: Convert "libcrypto.1.1.dylib" to "cpe:2.3:a:openssl:openssl:1.1:*:*:*:*:*:*:*"
552        if item_lower.contains("openssl")
553            || item_lower.contains("libcrypto")
554            || item_lower.contains("libssl")
555        {
556            if let Some(version) = extract_version_from_lib_name(&item_lower) {
557                cpes.insert(format!(
558                    "cpe:2.3:a:openssl:openssl:{}:*:*:*:*:*:*:*",
559                    version
560                ));
561            } else {
562                cpes.insert("cpe:2.3:a:openssl:openssl:*:*:*:*:*:*:*:*".to_string());
563            }
564        }
565        // Add more CPE patterns for common libraries (e.g., zlib, curl)
566        if item_lower.contains("zlib") {
567            if let Some(version) = extract_version_from_lib_name(&item_lower) {
568                cpes.insert(format!("cpe:2.3:a:zlib:zlib:{}:*:*:*:*:*:*:*", version));
569            }
570        }
571        if item_lower.contains("curl") || item_lower.contains("libcurl") {
572            if let Some(version) = extract_version_from_lib_name(&item_lower) {
573                cpes.insert(format!("cpe:2.3:a:curl:curl:{}:*:*:*:*:*:*:*", version));
574            }
575        }
576    }
577    cpes.into_iter().collect()
578}
579
580fn analyze_elf(analysis: &mut BinaryAnalysis, elf: &Elf, contents: &[u8]) -> anyhow::Result<()> {
581    analysis.format = "elf".to_string();
582
583    // Determine architecture
584    analysis.architecture = match elf.header.e_machine {
585        goblin::elf::header::EM_X86_64 => "x86_64".to_string(),
586        goblin::elf::header::EM_386 => "i386".to_string(),
587        goblin::elf::header::EM_ARM => "arm".to_string(),
588        goblin::elf::header::EM_AARCH64 => "aarch64".to_string(),
589        goblin::elf::header::EM_RISCV => "riscv".to_string(),
590        _ => format!("unknown({})", elf.header.e_machine),
591    };
592
593    // Extract symbols
594    for sym in &elf.syms {
595        if let Some(name) = elf.strtab.get_at(sym.st_name) {
596            if !name.is_empty() {
597                analysis.detected_symbols.push(name.to_string());
598            }
599        }
600    }
601
602    // Extract dynamic symbols
603    for sym in &elf.dynsyms {
604        if let Some(name) = elf.dynstrtab.get_at(sym.st_name) {
605            if !name.is_empty() {
606                analysis.detected_symbols.push(name.to_string());
607            }
608        }
609    }
610
611    // Extract libraries
612    for lib in &elf.libraries {
613        analysis.linked_libraries.push(lib.to_string());
614        // Store library name for regex-based version extraction later
615        analysis.embedded_strings.push(lib.to_string());
616    }
617
618    // Extract entry point
619    if elf.header.e_entry != 0 {
620        analysis.entry_point = Some(format!("0x{:08X}", elf.header.e_entry));
621        tracing::debug!("ELF entry point: 0x{:08X}", elf.header.e_entry);
622    }
623
624    // Determine if statically linked
625    analysis.static_linked =
626        elf.libraries.is_empty() && elf.header.e_type == goblin::elf::header::ET_EXEC;
627
628    // Extract imports/exports using object crate for more detailed analysis
629    if let Ok(obj_file) = object::File::parse(contents) {
630        for symbol in obj_file.symbols() {
631            if let Ok(name) = symbol.name() {
632                if symbol.is_undefined() {
633                    analysis.imports.push(name.to_string());
634                } else if symbol.is_global() {
635                    analysis.exports.push(name.to_string());
636                }
637            }
638        }
639    }
640
641    Ok(())
642}
643
644fn analyze_pe(analysis: &mut BinaryAnalysis, pe: &PE, _contents: &[u8]) -> anyhow::Result<()> {
645    analysis.format = "pe".to_string();
646
647    // Determine architecture
648    analysis.architecture = match pe.header.coff_header.machine {
649        goblin::pe::header::COFF_MACHINE_X86_64 => "x86_64".to_string(),
650        goblin::pe::header::COFF_MACHINE_X86 => "i386".to_string(),
651        goblin::pe::header::COFF_MACHINE_ARM64 => "aarch64".to_string(),
652        _ => format!("unknown({})", pe.header.coff_header.machine),
653    };
654
655    // Extract exports
656    for export in &pe.exports {
657        if let Some(name) = &export.name {
658            analysis.exports.push(name.to_string());
659        }
660    }
661
662    // Extract imports
663    for import in &pe.imports {
664        analysis.imports.push(import.name.to_string());
665        // Add import name to embedded strings for version extraction heuristics
666        analysis.embedded_strings.push(import.name.to_string());
667        if !analysis.linked_libraries.contains(&import.dll.to_string()) {
668            analysis.linked_libraries.push(import.dll.to_string());
669            // Include DLL name in embedded strings so version like "vcruntime140.dll" can be parsed
670            analysis.embedded_strings.push(import.dll.to_string());
671        }
672    }
673
674    // Extract entry point
675    if let Some(optional_header) = &pe.header.optional_header {
676        let entry_point = optional_header.standard_fields.address_of_entry_point;
677        if entry_point != 0 {
678            // Add image base to get virtual address
679            let image_base = optional_header.windows_fields.image_base;
680            let virtual_entry_point = image_base + entry_point as u64;
681            analysis.entry_point = Some(format!("0x{:08X}", virtual_entry_point));
682            tracing::debug!(
683                "PE entry point: 0x{:08X} (RVA: 0x{:08X})",
684                virtual_entry_point,
685                entry_point
686            );
687        }
688    }
689
690    // PE files are typically dynamically linked if they have imports
691    analysis.static_linked = pe.imports.is_empty();
692
693    Ok(())
694}
695
696fn analyze_wasm(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
697    tracing::info!("Starting WASM analysis");
698    analysis.format = "application/wasm".to_string();
699    analysis.architecture = "wasm32".to_string();
700    analysis.languages.push("WebAssembly".to_string());
701
702    let parser = Parser::new(0);
703    let mut imports = HashSet::new();
704    let mut exports = HashSet::new();
705    let mut function_count = 0;
706    let mut memory_info = Vec::new();
707    let mut table_info = Vec::new();
708
709    for payload in parser.parse_all(contents) {
710        use wasmparser::Payload as WasmPayload;
711        match payload {
712            Ok(payload) => {
713                match payload {
714                    Payload::Version { num, .. } => {
715                        tracing::debug!("WASM version: {}", num);
716                    }
717                    Payload::ImportSection(reader) => {
718                        for import in reader {
719                            match import {
720                                Ok(import) => {
721                                    let import_name = format!("{}::{}", import.module, import.name);
722                                    imports.insert(import_name);
723                                    tracing::debug!(
724                                        "Found import: {}::{}",
725                                        import.module,
726                                        import.name
727                                    );
728                                }
729                                Err(e) => tracing::warn!("Failed to parse import: {}", e),
730                            }
731                        }
732                    }
733                    Payload::ExportSection(reader) => {
734                        for export in reader {
735                            match export {
736                                Ok(export) => {
737                                    exports.insert(export.name.to_string());
738                                    tracing::debug!("Found export: {}", export.name);
739                                }
740                                Err(e) => tracing::warn!("Failed to parse export: {}", e),
741                            }
742                        }
743                    }
744                    Payload::FunctionSection(reader) => {
745                        function_count = reader.count();
746                        tracing::debug!("Function count: {}", function_count);
747                    }
748                    Payload::MemorySection(reader) => {
749                        for memory in reader {
750                            match memory {
751                                Ok(memory) => {
752                                    memory_info.push(format!(
753                                        "initial: {}, maximum: {:?}",
754                                        memory.initial, memory.maximum
755                                    ));
756                                }
757                                Err(e) => tracing::warn!("Failed to parse memory: {}", e),
758                            }
759                        }
760                    }
761                    Payload::TableSection(reader) => {
762                        for table in reader {
763                            match table {
764                                Ok(table) => {
765                                    table_info.push(format!(
766                                        "element_type: {:?}, initial: {}, maximum: {:?}",
767                                        table.ty.element_type, table.ty.initial, table.ty.maximum
768                                    ));
769                                }
770                                Err(e) => tracing::warn!("Failed to parse table: {}", e),
771                            }
772                        }
773                    }
774                    WasmPayload::CustomSection(custom) => {
775                        if let Ok(bytes_str) = std::str::from_utf8(custom.data()) {
776                            for s in extract_strings(bytes_str.as_bytes()) {
777                                analysis.embedded_strings.push(s);
778                            }
779                        }
780                    }
781                    Payload::TypeSection(reader) => {
782                        tracing::debug!("Type section with {} types", reader.count());
783                    }
784                    _ => {
785                        // tracing::debug!("Skipping WASM section: {:?}", payload);
786                    }
787                }
788            }
789            Err(e) => {
790                tracing::warn!("WASM parsing error: {}", e);
791                break;
792            }
793        }
794    }
795
796    analysis.imports = imports.into_iter().collect();
797    analysis.exports = exports.into_iter().collect();
798    analysis.static_linked = true; // WASM modules are self-contained
799
800    // Add WASM-specific metadata
801    analysis.metadata = serde_json::json!({
802        "wasm_version": "1.0",
803        "function_count": function_count,
804        "memory_sections": memory_info,
805        "table_sections": table_info,
806        "import_count": analysis.imports.len(),
807        "export_count": analysis.exports.len(),
808        "analysis_type": "wasm"
809    });
810
811    tracing::info!(
812        "WASM analysis complete: {} imports, {} exports, {} functions",
813        analysis.imports.len(),
814        analysis.exports.len(),
815        function_count
816    );
817
818    Ok(())
819}
820
821fn analyze_intel_hex(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
822    tracing::info!("Starting Intel HEX analysis using manual parsing");
823    analysis.format = "intel-hex".to_string();
824    analysis.architecture = "embedded".to_string();
825    analysis.languages.push("Firmware".to_string());
826
827    let hex_content = String::from_utf8_lossy(contents);
828    let mut memory_segments = Vec::new();
829    let mut entry_points = Vec::new();
830    let mut total_data_bytes = 0;
831    let mut start_address = None;
832    let mut firmware_data = Vec::new();
833    let mut min_address = None;
834    let mut max_address = None;
835    let mut extended_linear_address = 0u32;
836    let mut extended_segment_address = 0u32;
837
838    // Parse Intel HEX manually for better control
839    for (line_num, line) in hex_content.lines().enumerate() {
840        let line = line.trim();
841        if line.is_empty() || !line.starts_with(':') {
842            continue;
843        }
844
845        if line.len() < 11 {
846            tracing::warn!(
847                "Invalid Intel HEX record at line {}: too short",
848                line_num + 1
849            );
850            continue;
851        }
852
853        // Parse Intel HEX: :LLAAAATT[DD...]CC
854        // LL = byte count, AAAA = address, TT = type, DD = data, CC = checksum
855
856        let byte_count = match u8::from_str_radix(&line[1..3], 16) {
857            Ok(count) => count,
858            Err(_) => continue,
859        };
860
861        let address = match u16::from_str_radix(&line[3..7], 16) {
862            Ok(addr) => addr,
863            Err(_) => continue,
864        };
865
866        let record_type = match u8::from_str_radix(&line[7..9], 16) {
867            Ok(rt) => rt,
868            Err(_) => continue,
869        };
870
871        // Calculate expected line length
872        let expected_len = 11 + (byte_count as usize * 2);
873        if line.len() != expected_len {
874            tracing::warn!(
875                "Invalid Intel HEX record at line {}: wrong length",
876                line_num + 1
877            );
878            continue;
879        }
880
881        // Extract data bytes
882        let mut data_bytes = Vec::new();
883        for i in 0..byte_count {
884            let start_idx = 9 + (i as usize * 2);
885            let end_idx = start_idx + 2;
886            if let Ok(byte) = u8::from_str_radix(&line[start_idx..end_idx], 16) {
887                data_bytes.push(byte);
888            }
889        }
890
891        match record_type {
892            0x00 => {
893                // Data record
894                let full_address =
895                    extended_linear_address + extended_segment_address + (address as u32);
896                total_data_bytes += data_bytes.len() as u32;
897                firmware_data.extend_from_slice(&data_bytes);
898
899                min_address =
900                    Some(min_address.map_or(full_address, |min: u32| min.min(full_address)));
901                max_address = Some(
902                    max_address.map_or(full_address + data_bytes.len() as u32, |max: u32| {
903                        max.max(full_address + data_bytes.len() as u32)
904                    }),
905                );
906
907                memory_segments.push(serde_json::json!({
908                    "address": format!("0x{:08X}", full_address),
909                    "size": data_bytes.len(),
910                    "type": "data"
911                }));
912            }
913            0x01 => {
914                // End of file record
915                tracing::debug!("Found end of file record");
916                break;
917            }
918            0x02 => {
919                // Extended segment address
920                if data_bytes.len() >= 2 {
921                    extended_segment_address =
922                        ((data_bytes[0] as u32) << 12) | ((data_bytes[1] as u32) << 4);
923                    tracing::debug!(
924                        "Extended segment address: 0x{:08X}",
925                        extended_segment_address
926                    );
927                }
928            }
929            0x03 => {
930                // Start segment address (CS:IP)
931                if data_bytes.len() >= 4 {
932                    let cs = ((data_bytes[0] as u32) << 8) | (data_bytes[1] as u32);
933                    let ip = ((data_bytes[2] as u32) << 8) | (data_bytes[3] as u32);
934                    let start_addr = (cs << 4) + ip;
935                    start_address = Some(start_addr);
936                    entry_points.push(format!("0x{:08X}", start_addr));
937                    analysis.entry_point = Some(format!("0x{:08X}", start_addr));
938                    tracing::debug!("Start segment address: CS=0x{:04X}, IP=0x{:04X}", cs, ip);
939                }
940            }
941            0x04 => {
942                // Extended linear address
943                if data_bytes.len() >= 2 {
944                    extended_linear_address =
945                        ((data_bytes[0] as u32) << 24) | ((data_bytes[1] as u32) << 16);
946                    tracing::debug!("Extended linear address: 0x{:08X}", extended_linear_address);
947                }
948            }
949            0x05 => {
950                // Start linear address
951                if data_bytes.len() >= 4 {
952                    let start_addr = ((data_bytes[0] as u32) << 24)
953                        | ((data_bytes[1] as u32) << 16)
954                        | ((data_bytes[2] as u32) << 8)
955                        | (data_bytes[3] as u32);
956                    start_address = Some(start_addr);
957                    entry_points.push(format!("0x{:08X}", start_addr));
958                    analysis.entry_point = Some(format!("0x{:08X}", start_addr));
959                    tracing::debug!("Start linear address: 0x{:08X}", start_addr);
960                }
961            }
962            _ => {
963                tracing::debug!("Unknown Intel HEX record type: 0x{:02X}", record_type);
964            }
965        }
966    }
967
968    // Extract strings from firmware data
969    let firmware_strings = extract_strings(&firmware_data);
970    analysis.embedded_strings.extend(firmware_strings);
971
972    // Detect potential microcontroller/bootloader patterns
973    let mut device_hints = Vec::new();
974    for string in &analysis.embedded_strings {
975        let lower = string.to_lowercase();
976        if lower.contains("bootloader") || lower.contains("boot") {
977            device_hints.push("bootloader");
978        }
979        if lower.contains("interrupt") || lower.contains("isr") {
980            device_hints.push("interrupt_handler");
981        }
982        if lower.contains("uart") || lower.contains("spi") || lower.contains("i2c") {
983            device_hints.push("peripheral_driver");
984        }
985        if lower.contains("atmega") || lower.contains("avr") {
986            analysis.architecture = "avr".to_string();
987            device_hints.push("avr_microcontroller");
988        }
989        if lower.contains("stm32") || lower.contains("cortex") {
990            analysis.architecture = "arm_cortex_m".to_string();
991            device_hints.push("arm_cortex_m");
992        }
993        if lower.contains("pic") && (lower.contains("16") || lower.contains("18")) {
994            analysis.architecture = "pic".to_string();
995            device_hints.push("pic_microcontroller");
996        }
997    }
998
999    analysis.static_linked = true; // Firmware is typically self-contained
1000
1001    // Calculate memory utilization
1002    let memory_span = if let (Some(min), Some(max)) = (min_address, max_address) {
1003        max - min
1004    } else {
1005        0
1006    };
1007
1008    // Add Intel HEX specific metadata
1009    analysis.metadata = serde_json::json!({
1010        "hex_format": "intel_hex",
1011        "total_data_bytes": total_data_bytes,
1012        "memory_segments": memory_segments,
1013        "entry_points": entry_points,
1014        "start_address": start_address.map(|addr| format!("0x{:08X}", addr)),
1015        "memory_range": {
1016            "min_address": min_address.map(|addr| format!("0x{:08X}", addr)),
1017            "max_address": max_address.map(|addr| format!("0x{:08X}", addr)),
1018            "span_bytes": memory_span
1019        },
1020        "device_hints": device_hints,
1021        "analysis_type": "intel_hex_firmware"
1022    });
1023
1024    tracing::info!(
1025        "Intel HEX analysis complete: {} data bytes, {} memory segments, memory span: {} bytes",
1026        total_data_bytes,
1027        memory_segments.len(),
1028        memory_span
1029    );
1030
1031    Ok(())
1032}
1033
1034fn analyze_srec(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1035    tracing::info!("Starting Motorola S-Record analysis using srec library");
1036    analysis.format = "motorola-srec".to_string();
1037    analysis.architecture = "embedded".to_string();
1038    analysis.languages.push("Firmware".to_string());
1039
1040    let srec_content = String::from_utf8_lossy(contents);
1041    let mut memory_segments = Vec::new();
1042    let mut entry_points = Vec::new();
1043    let mut total_data_bytes = 0;
1044    let mut start_address = None;
1045    let mut firmware_data = Vec::new();
1046    let mut min_address = None;
1047    let mut max_address = None;
1048    let mut header_info = None;
1049
1050    // Parse S-Record manually for better control
1051    for (line_num, line) in srec_content.lines().enumerate() {
1052        let line = line.trim();
1053        if line.is_empty() || !line.starts_with('S') {
1054            continue;
1055        }
1056
1057        if line.len() < 4 {
1058            tracing::warn!("Invalid S-Record at line {}: too short", line_num + 1);
1059            continue;
1060        }
1061
1062        // Parse S-Record: STYCC[AAAA...][DD...]CC
1063        let record_type = match line.chars().nth(1) {
1064            Some(c) => c,
1065            None => continue,
1066        };
1067
1068        let byte_count = match u8::from_str_radix(&line[2..4], 16) {
1069            Ok(count) => count,
1070            Err(_) => continue,
1071        };
1072
1073        match record_type {
1074            '0' => {
1075                // Header record
1076                if line.len() >= 8 {
1077                    let data_start = 8;
1078                    let data_end = line.len().saturating_sub(2);
1079                    if data_end > data_start {
1080                        let data_hex = &line[data_start..data_end];
1081                        let mut header_data = Vec::new();
1082                        for i in (0..data_hex.len()).step_by(2) {
1083                            if i + 1 < data_hex.len() {
1084                                if let Ok(byte_val) = u8::from_str_radix(&data_hex[i..i + 2], 16) {
1085                                    header_data.push(byte_val);
1086                                }
1087                            }
1088                        }
1089                        let header_string = String::from_utf8_lossy(&header_data);
1090                        if !header_string.trim().is_empty() {
1091                            header_info = Some(header_string.trim().to_string());
1092                        }
1093                    }
1094                }
1095            }
1096            '1' => {
1097                // 16-bit address data record
1098                if line.len() >= 8 {
1099                    if let Ok(address) = u16::from_str_radix(&line[4..8], 16) {
1100                        let data_bytes = byte_count.saturating_sub(3);
1101                        total_data_bytes += data_bytes as u32;
1102
1103                        let addr32 = address as u32;
1104                        min_address = Some(min_address.map_or(addr32, |min: u32| min.min(addr32)));
1105                        max_address =
1106                            Some(max_address.map_or(addr32 + data_bytes as u32, |max: u32| {
1107                                max.max(addr32 + data_bytes as u32)
1108                            }));
1109
1110                        // Extract actual data bytes
1111                        if line.len() >= 8 + (data_bytes as usize * 2) {
1112                            let data_hex = &line[8..8 + (data_bytes as usize * 2)];
1113                            for i in (0..data_hex.len()).step_by(2) {
1114                                if i + 1 < data_hex.len() {
1115                                    if let Ok(byte_val) =
1116                                        u8::from_str_radix(&data_hex[i..i + 2], 16)
1117                                    {
1118                                        firmware_data.push(byte_val);
1119                                    }
1120                                }
1121                            }
1122                        }
1123
1124                        memory_segments.push(serde_json::json!({
1125                            "address": format!("0x{:04X}", address),
1126                            "size": data_bytes,
1127                            "type": "data_16bit"
1128                        }));
1129                    }
1130                }
1131            }
1132            '7' => {
1133                // 32-bit start address
1134                if line.len() >= 12 {
1135                    if let Ok(address) = u32::from_str_radix(&line[4..12], 16) {
1136                        start_address = Some(address);
1137                        entry_points.push(format!("0x{:08X}", address));
1138                        analysis.entry_point = Some(format!("0x{:08X}", address));
1139                    }
1140                }
1141            }
1142            '8' => {
1143                // 24-bit start address
1144                if line.len() >= 10 {
1145                    if let Ok(address) = u32::from_str_radix(&line[4..10], 16) {
1146                        start_address = Some(address & 0x00FFFFFF);
1147                        entry_points.push(format!("0x{:06X}", address & 0x00FFFFFF));
1148                        analysis.entry_point = Some(format!("0x{:06X}", address & 0x00FFFFFF));
1149                    }
1150                }
1151            }
1152            '9' => {
1153                // 16-bit start address
1154                if line.len() >= 8 {
1155                    if let Ok(address) = u16::from_str_radix(&line[4..8], 16) {
1156                        start_address = Some(address as u32);
1157                        entry_points.push(format!("0x{:04X}", address));
1158                        analysis.entry_point = Some(format!("0x{:04X}", address));
1159                    }
1160                }
1161            }
1162            _ => {}
1163        }
1164    }
1165
1166    // Extract strings from firmware data
1167    let firmware_strings = extract_strings(&firmware_data);
1168    analysis.embedded_strings.extend(firmware_strings);
1169
1170    // Add header info if available
1171    if let Some(header) = header_info {
1172        analysis.embedded_strings.push(header);
1173    }
1174
1175    // Detect potential microcontroller/bootloader patterns
1176    let mut device_hints = Vec::new();
1177    for string in &analysis.embedded_strings {
1178        let lower = string.to_lowercase();
1179        if lower.contains("bootloader") || lower.contains("boot") {
1180            device_hints.push("bootloader");
1181        }
1182        if lower.contains("interrupt") || lower.contains("isr") {
1183            device_hints.push("interrupt_handler");
1184        }
1185        if lower.contains("can") || lower.contains("ecu") || lower.contains("automotive") {
1186            device_hints.push("automotive_ecu");
1187            analysis.architecture = "automotive".to_string();
1188        }
1189        if lower.contains("motorola") || lower.contains("freescale") || lower.contains("nxp") {
1190            device_hints.push("motorola_mcu");
1191        }
1192        if lower.contains("68k") || lower.contains("68000") {
1193            analysis.architecture = "m68k".to_string();
1194            device_hints.push("motorola_68k");
1195        }
1196        if lower.contains("coldfire") {
1197            analysis.architecture = "coldfire".to_string();
1198            device_hints.push("coldfire_mcu");
1199        }
1200        if lower.contains("powerpc") || lower.contains("ppc") {
1201            analysis.architecture = "powerpc".to_string();
1202            device_hints.push("powerpc_mcu");
1203        }
1204    }
1205
1206    analysis.static_linked = true; // Firmware is typically self-contained
1207
1208    // Calculate memory utilization
1209    let memory_span = if let (Some(min), Some(max)) = (min_address, max_address) {
1210        max - min
1211    } else {
1212        0
1213    };
1214
1215    // Add S-Record specific metadata
1216    analysis.metadata = serde_json::json!({
1217        "record_format": "motorola_srec",
1218        "total_data_bytes": total_data_bytes,
1219        "memory_segments": memory_segments,
1220        "entry_points": entry_points,
1221        "start_address": start_address.map(|addr| format!("0x{:08X}", addr)),
1222        "memory_range": {
1223            "min_address": min_address.map(|addr| format!("0x{:08X}", addr)),
1224            "max_address": max_address.map(|addr| format!("0x{:08X}", addr)),
1225            "span_bytes": memory_span
1226        },
1227        "device_hints": device_hints,
1228        "analysis_type": "srec_firmware"
1229    });
1230
1231    tracing::info!(
1232        "S-Record analysis complete: {} data bytes, {} memory segments, memory span: {} bytes",
1233        total_data_bytes,
1234        memory_segments.len(),
1235        memory_span
1236    );
1237
1238    Ok(())
1239}
1240
1241fn analyze_arm_cortex_m(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1242    tracing::info!("Starting ARM Cortex-M firmware analysis with Capstone disassembly");
1243    analysis.format = "arm-cortex-m-firmware".to_string();
1244    analysis.architecture = "arm_cortex_m".to_string();
1245    analysis.languages.push("ARM Assembly".to_string());
1246    analysis.languages.push("C/C++".to_string());
1247
1248    let mut vector_table = Vec::new();
1249    let mut interrupt_handlers = Vec::new();
1250    let mut rtos_indicators = Vec::new();
1251    let mut memory_regions = Vec::new();
1252    let mut stack_pointer = None;
1253    let mut reset_handler = None;
1254    let mut disassembly_info = Vec::new();
1255
1256    // Initialize Capstone disassembler for ARM Thumb
1257    let cs = match Capstone::new()
1258        .arm()
1259        .mode(arch::arm::ArchMode::Thumb)
1260        .detail(true)
1261        .build()
1262    {
1263        Ok(cs) => Some(cs),
1264        Err(e) => {
1265            tracing::warn!("Failed to initialize Capstone disassembler: {}", e);
1266            None
1267        }
1268    };
1269
1270    // Parse ARM Cortex-M vector table (first 1KB typically)
1271    if contents.len() >= 256 {
1272        // Vector table starts at offset 0x00000000
1273        // First entry: Initial Stack Pointer (MSP)
1274        // Second entry: Reset Handler
1275
1276        // Extract initial stack pointer (first 4 bytes, little endian)
1277        if contents.len() >= 4 {
1278            let sp_bytes = [contents[0], contents[1], contents[2], contents[3]];
1279            let sp_value = u32::from_le_bytes(sp_bytes);
1280            stack_pointer = Some(sp_value);
1281
1282            // Validate that stack pointer looks reasonable (usually in RAM region)
1283            if sp_value >= 0x20000000 && sp_value <= 0x20100000 {
1284                tracing::debug!("Valid ARM Cortex-M stack pointer found: 0x{:08X}", sp_value);
1285                memory_regions.push(serde_json::json!({
1286                    "type": "RAM",
1287                    "start_address": "0x20000000",
1288                    "size_estimate": format!("{}KB", (sp_value - 0x20000000) / 1024),
1289                    "purpose": "SRAM"
1290                }));
1291            }
1292        }
1293
1294        // Extract reset handler (second 4 bytes, little endian)
1295        if contents.len() >= 8 {
1296            let reset_bytes = [contents[4], contents[5], contents[6], contents[7]];
1297            let reset_addr = u32::from_le_bytes(reset_bytes);
1298
1299            // ARM Cortex-M addresses have LSB set for Thumb mode
1300            let actual_reset_addr = reset_addr & 0xFFFFFFFE;
1301
1302            if actual_reset_addr > 0 && actual_reset_addr < 0x08100000 {
1303                reset_handler = Some(actual_reset_addr);
1304                analysis.entry_point = Some(format!("0x{:08X}", actual_reset_addr));
1305
1306                // Try to disassemble first few instructions at reset handler
1307                let mut reset_analysis = serde_json::json!({
1308                    "name": "Reset_Handler",
1309                    "address": format!("0x{:08X}", actual_reset_addr),
1310                    "thumb_mode": (reset_addr & 1) == 1
1311                });
1312
1313                if let Some(ref cs) = cs {
1314                    // Try to find the reset handler code in the binary
1315                    // Assume it's near the beginning for now
1316                    let code_start = if actual_reset_addr >= 0x08000000
1317                        && actual_reset_addr < 0x08000000 + contents.len() as u32
1318                    {
1319                        (actual_reset_addr - 0x08000000) as usize
1320                    } else {
1321                        0x200 // Common offset after vector table
1322                    };
1323
1324                    if code_start < contents.len() && contents.len() > code_start + 32 {
1325                        let code_slice = &contents
1326                            [code_start..code_start.min(contents.len()).min(code_start + 32)];
1327                        match cs.disasm_all(code_slice, actual_reset_addr as u64) {
1328                            Ok(insns) => {
1329                                let mut reset_instructions = Vec::new();
1330                                for insn in insns.iter().take(8) {
1331                                    // First 8 instructions
1332                                    reset_instructions.push(serde_json::json!({
1333                                        "address": format!("0x{:08X}", insn.address()),
1334                                        "mnemonic": insn.mnemonic().unwrap_or(""),
1335                                        "op_str": insn.op_str().unwrap_or("")
1336                                    }));
1337                                }
1338                                reset_analysis["disassembly"] =
1339                                    serde_json::json!(reset_instructions);
1340                                disassembly_info.push("Reset handler disassembled");
1341                            }
1342                            Err(e) => {
1343                                tracing::debug!("Failed to disassemble reset handler: {}", e);
1344                            }
1345                        }
1346                    }
1347                }
1348
1349                interrupt_handlers.push(reset_analysis);
1350                tracing::debug!("Reset handler found at: 0x{:08X}", actual_reset_addr);
1351            }
1352        }
1353
1354        // Parse standard ARM Cortex-M vector table entries
1355        let vector_names = [
1356            "Initial_SP",
1357            "Reset_Handler",
1358            "NMI_Handler",
1359            "HardFault_Handler",
1360            "MemManage_Handler",
1361            "BusFault_Handler",
1362            "UsageFault_Handler",
1363            "Reserved",
1364            "Reserved",
1365            "Reserved",
1366            "Reserved",
1367            "SVC_Handler",
1368            "DebugMon_Handler",
1369            "Reserved",
1370            "PendSV_Handler",
1371            "SysTick_Handler",
1372        ];
1373
1374        for (i, &name) in vector_names.iter().enumerate() {
1375            let offset = i * 4;
1376            if offset + 4 <= contents.len() && offset + 4 <= 64 {
1377                // Standard vectors are first 16 entries
1378                let addr_bytes = [
1379                    contents[offset],
1380                    contents[offset + 1],
1381                    contents[offset + 2],
1382                    contents[offset + 3],
1383                ];
1384                let addr_value = u32::from_le_bytes(addr_bytes);
1385
1386                vector_table.push(serde_json::json!({
1387                    "index": i,
1388                    "name": name,
1389                    "address": format!("0x{:08X}", addr_value),
1390                    "raw_value": format!("0x{:08X}", addr_value)
1391                }));
1392
1393                // Check for valid interrupt handler addresses
1394                if i > 0 && addr_value > 0 && addr_value != 0xFFFFFFFF {
1395                    let actual_addr = addr_value & 0xFFFFFFFE;
1396                    if actual_addr < 0x08100000 && actual_addr > 0x08000000 {
1397                        interrupt_handlers.push(serde_json::json!({
1398                            "name": name,
1399                            "address": format!("0x{:08X}", actual_addr),
1400                            "thumb_mode": (addr_value & 1) == 1,
1401                            "vector_index": i
1402                        }));
1403                    }
1404                }
1405            }
1406        }
1407    }
1408
1409    // Look for RTOS patterns in the firmware
1410    let firmware_strings = extract_strings(contents);
1411    for string in &firmware_strings {
1412        let lower = string.to_lowercase();
1413
1414        // FreeRTOS indicators
1415        if lower.contains("freertos")
1416            || lower.contains("xTaskCreate")
1417            || lower.contains("vTaskDelay")
1418            || lower.contains("xQueueCreate")
1419        {
1420            rtos_indicators.push("FreeRTOS");
1421        }
1422
1423        // RTX indicators
1424        if lower.contains("rtx")
1425            || lower.contains("osKernelStart")
1426            || lower.contains("osThreadCreate")
1427        {
1428            rtos_indicators.push("ARM RTX");
1429        }
1430
1431        // ThreadX indicators
1432        if lower.contains("threadx") || lower.contains("tx_thread_create") {
1433            rtos_indicators.push("ThreadX");
1434        }
1435
1436        // Zephyr indicators
1437        if lower.contains("zephyr") || lower.contains("k_thread_create") {
1438            rtos_indicators.push("Zephyr RTOS");
1439        }
1440
1441        // CMSIS indicators
1442        if lower.contains("cmsis") || lower.contains("__main") || lower.contains("SystemInit") {
1443            rtos_indicators.push("CMSIS");
1444        }
1445
1446        // Hardware abstraction layer indicators
1447        if lower.contains("hal_") || lower.contains("stm32") {
1448            rtos_indicators.push("STM32 HAL");
1449        }
1450    }
1451
1452    // Add firmware strings to analysis
1453    analysis.embedded_strings.extend(firmware_strings);
1454
1455    // Identify common ARM Cortex-M memory regions
1456    memory_regions.push(serde_json::json!({
1457        "type": "Flash",
1458        "start_address": "0x08000000",
1459        "purpose": "Program Flash Memory",
1460        "typical_size": "64KB-2MB"
1461    }));
1462
1463    memory_regions.push(serde_json::json!({
1464        "type": "System",
1465        "start_address": "0xE0000000",
1466        "purpose": "System Control Space",
1467        "contains": ["SysTick", "NVIC", "SCB", "MPU", "FPU"]
1468    }));
1469
1470    // Look for peripheral register access patterns
1471    let mut peripheral_indicators = Vec::new();
1472
1473    // Check for common STM32 peripheral base addresses in the binary
1474    let peripheral_bases: &[(u32, &str)] = &[
1475        (0x40000000, "APB1 Peripherals"),
1476        (0x40010000, "APB2 Peripherals"),
1477        (0x40020000, "AHB1 Peripherals"),
1478        (0x50000000, "AHB2 Peripherals"),
1479        (0xE0000000, "Cortex-M System"),
1480    ];
1481
1482    for (base_addr, name) in peripheral_bases {
1483        // Look for this address in the binary (little endian)
1484        let addr_bytes = base_addr.to_le_bytes();
1485        if contents.windows(4).any(|window| window == addr_bytes) {
1486            peripheral_indicators.push(serde_json::json!({
1487                "base_address": format!("0x{:08X}", base_addr),
1488                "name": name
1489            }));
1490        }
1491    }
1492
1493    // Remove duplicates from RTOS indicators
1494    rtos_indicators.sort();
1495    rtos_indicators.dedup();
1496
1497    analysis.static_linked = true; // Firmware is self-contained
1498
1499    // Calculate useful statistics
1500    let vector_table_size = vector_table.len() * 4;
1501    let total_handlers = interrupt_handlers.len();
1502
1503    // Add ARM Cortex-M specific metadata
1504    analysis.metadata = serde_json::json!({
1505        "firmware_type": "arm_cortex_m",
1506        "vector_table": {
1507            "entries": vector_table,
1508            "size_bytes": vector_table_size,
1509            "total_vectors": vector_table.len()
1510        },
1511        "interrupt_handlers": interrupt_handlers,
1512        "stack_pointer": stack_pointer.map(|sp| format!("0x{:08X}", sp)),
1513        "reset_handler": reset_handler.map(|rh| format!("0x{:08X}", rh)),
1514        "rtos_detected": rtos_indicators,
1515        "memory_regions": memory_regions,
1516        "peripheral_indicators": peripheral_indicators,
1517        "analysis_type": "arm_cortex_m_firmware",
1518        "disassembly": {
1519            "capstone_available": cs.is_some(),
1520            "analysis_info": disassembly_info
1521        },
1522        "statistics": {
1523            "total_interrupt_handlers": total_handlers,
1524            "has_rtos": !rtos_indicators.is_empty(),
1525            "has_hal": rtos_indicators.iter().any(|s| s.contains("HAL")),
1526            "disassembly_performed": !disassembly_info.is_empty()
1527        }
1528    });
1529
1530    tracing::info!(
1531        "ARM Cortex-M analysis complete: {} interrupt handlers, {} RTOS indicators, stack at 0x{:08X}",
1532        total_handlers,
1533        rtos_indicators.len(),
1534        stack_pointer.unwrap_or(0)
1535    );
1536
1537    Ok(())
1538}
1539
1540fn analyze_raw_firmware_blob(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1541    tracing::info!(
1542        "Starting raw firmware blob analysis ({} bytes)",
1543        contents.len()
1544    );
1545
1546    let text_ratio = contents
1547        .iter()
1548        .filter(|&&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
1549        .count() as f64
1550        / contents.len() as f64;
1551
1552    let mut architecture_hints = Vec::new();
1553    let mut firmware_indicators = Vec::new();
1554    let mut compression_detected = Vec::new();
1555    let mut crypto_indicators = Vec::new();
1556
1557    // Check for various firmware signatures and patterns
1558
1559    // 1. Architecture detection by instruction patterns
1560    if contents.len() >= 4 {
1561        // ARM Thumb instructions (common in Cortex-M)
1562        let thumb_patterns = [
1563            [0x00, 0xBF], // NOP (Thumb)
1564            [0x70, 0x47], // BX LR (Thumb)
1565            [0x08, 0x68], // LDR r0, [r1] (Thumb)
1566        ];
1567
1568        for pattern in &thumb_patterns {
1569            if contents.windows(2).any(|w| w == pattern) {
1570                architecture_hints.push("ARM Thumb");
1571                break;
1572            }
1573        }
1574
1575        // x86 patterns
1576        let x86_patterns: &[&[u8]] = &[
1577            &[0x55u8, 0x89, 0xE5], // push ebp; mov ebp, esp
1578            &[0x48u8, 0x89, 0xE5], // mov rbp, rsp (x86-64)
1579            &[0xEBu8, 0xFE],       // jmp $ (infinite loop)
1580        ];
1581
1582        for pattern in x86_patterns {
1583            if contents.windows(pattern.len()).any(|w| w == *pattern) {
1584                architecture_hints.push("x86");
1585                break;
1586            }
1587        }
1588
1589        // MIPS patterns
1590        if contents
1591            .windows(4)
1592            .any(|w| matches!(w, [0x27, 0xBD, _, _] | [_, _, 0xBD, 0x27]))
1593        {
1594            architecture_hints.push("MIPS");
1595        }
1596
1597        // PowerPC patterns
1598        if contents
1599            .windows(4)
1600            .any(|w| matches!(w, [0x94, 0x21, _, _] | [_, _, 0x21, 0x94]))
1601        {
1602            architecture_hints.push("PowerPC");
1603        }
1604    }
1605
1606    // 2. Bootloader detection
1607    let bootloader_strings = [
1608        "U-Boot",
1609        "GRUB",
1610        "bootloader",
1611        "BOOT",
1612        "loader",
1613        "SPL",
1614        "MLO",
1615        "bootstrap",
1616        "uboot",
1617    ];
1618
1619    for &pattern in &bootloader_strings {
1620        if contents.windows(pattern.len()).any(|w| {
1621            String::from_utf8_lossy(w)
1622                .to_lowercase()
1623                .contains(&pattern.to_lowercase())
1624        }) {
1625            firmware_indicators.push("bootloader");
1626            break;
1627        }
1628    }
1629
1630    // 3. Compression detection
1631    if contents.len() >= 4 {
1632        match &contents[0..4.min(contents.len())] {
1633            [0x1F, 0x8B, _, _] => compression_detected.push("gzip"),
1634            [0x42, 0x5A, 0x68, _] => compression_detected.push("bzip2"),
1635            [0xFD, 0x37, 0x7A, 0x58] => compression_detected.push("xz"),
1636            [0x28, 0xB5, 0x2F, 0xFD] => compression_detected.push("zstd"),
1637            [0x04, 0x22, 0x4D, 0x18] => compression_detected.push("lz4"),
1638            _ => {}
1639        }
1640    }
1641
1642    // 4. Cryptographic signatures
1643    let crypto_patterns: &[(&str, &[u8])] = &[
1644        ("AES", b"AES"),
1645        ("RSA", b"RSA"),
1646        ("SHA", b"SHA"),
1647        ("OpenSSL", b"OpenSSL"),
1648        ("mbedtls", b"mbedtls"),
1649        ("WolfSSL", b"wolfSSL"),
1650    ];
1651
1652    for (name, pattern) in crypto_patterns {
1653        if contents.windows(pattern.len()).any(|w| w == *pattern) {
1654            crypto_indicators.push(*name);
1655        }
1656    }
1657
1658    // 5. Device-specific patterns
1659    let device_patterns = [
1660        ("ESP32", b"ESP32" as &[u8]),
1661        ("Arduino", b"Arduino"),
1662        ("Raspberry Pi", b"Raspberry Pi"),
1663        ("STM32", b"STM32"),
1664        ("Nordic", b"Nordic"),
1665        ("Qualcomm", b"Qualcomm"),
1666        ("Broadcom", b"Broadcom"),
1667    ];
1668
1669    for (device, pattern) in &device_patterns {
1670        if contents.windows(pattern.len()).any(|w| w == *pattern) {
1671            firmware_indicators.push(*device);
1672        }
1673    }
1674
1675    // If format is still unknown, determine it based on raw analysis
1676    if analysis.format == "unknown" {
1677        if text_ratio > 0.8 {
1678            analysis.format = if contents.len() < 1024 {
1679                "text/small"
1680            } else {
1681                "text"
1682            }
1683            .to_string();
1684
1685            let text = String::from_utf8_lossy(contents);
1686            if text.starts_with("#!") {
1687                analysis.format = "script".to_string();
1688                analysis.languages.push("script".to_string());
1689            }
1690
1691            // Look for programming language patterns
1692            if text.contains("function") || text.contains("def ") {
1693                analysis.languages.push("script".to_string());
1694            }
1695            if text.contains("#include") || text.contains("int main") {
1696                analysis.languages.push("C/C++".to_string());
1697            }
1698            if text.contains("pub fn") || text.contains("fn main") {
1699                analysis.languages.push("Rust".to_string());
1700            }
1701        } else if !compression_detected.is_empty() {
1702            analysis.format = "compressed-firmware".to_string();
1703            analysis.languages.push("Compressed Binary".to_string());
1704        } else if !firmware_indicators.is_empty() {
1705            analysis.format = "firmware-blob".to_string();
1706            analysis.languages.push("Firmware".to_string());
1707        } else if contents.len() < 50 {
1708            analysis.format = "micro-binary".to_string();
1709        } else {
1710            analysis.format = "raw-binary".to_string();
1711        }
1712    }
1713
1714    // Set architecture based on hints
1715    analysis.architecture = if architecture_hints.is_empty() {
1716        "unknown".to_string()
1717    } else {
1718        architecture_hints.join(", ")
1719    };
1720
1721    // Extract strings for further analysis
1722    let extracted_strings = extract_strings(contents);
1723    analysis.embedded_strings.extend(extracted_strings);
1724
1725    // Look for version patterns in strings
1726    let mut version_hints = Vec::new();
1727    for string in &analysis.embedded_strings {
1728        if string.len() > 2 && string.len() < 20 {
1729            // Look for version-like patterns (e.g., "1.2.3", "v2.0", "Rev 1.0")
1730            if string.chars().any(|c| c.is_ascii_digit())
1731                && (string.contains('.')
1732                    || string.to_lowercase().contains('v')
1733                    || string.to_lowercase().contains("rev"))
1734            {
1735                version_hints.push(string.clone());
1736            }
1737        }
1738    }
1739
1740    // Firmware-specific analysis
1741    analysis.static_linked = !firmware_indicators.is_empty() || text_ratio < 0.1;
1742
1743    // Calculate entropy to detect encryption/compression
1744    let entropy = calculate_entropy(contents);
1745    let is_likely_encrypted = entropy > 7.5;
1746    let is_likely_compressed = entropy > 7.0 && compression_detected.is_empty();
1747
1748    // Build comprehensive metadata
1749    analysis.metadata = serde_json::json!({
1750        "analysis_type": "raw_firmware_blob",
1751        "file_characteristics": {
1752            "size_bytes": contents.len(),
1753            "ascii_ratio": text_ratio,
1754            "entropy": entropy,
1755            "likely_encrypted": is_likely_encrypted,
1756            "likely_compressed": is_likely_compressed
1757        },
1758        "architecture_hints": architecture_hints,
1759        "firmware_indicators": firmware_indicators,
1760        "compression_detected": compression_detected,
1761        "crypto_indicators": crypto_indicators,
1762        "version_hints": version_hints,
1763        "detection_confidence": {
1764            "architecture": if architecture_hints.is_empty() { "low" } else { "medium" },
1765            "firmware_type": if firmware_indicators.is_empty() { "low" } else { "high" },
1766            "format": if text_ratio > 0.8 { "high" } else if !firmware_indicators.is_empty() { "medium" } else { "low" }
1767        }
1768    });
1769
1770    tracing::info!(
1771        "Raw firmware blob analysis complete: format={}, arch={}, {} indicators, entropy={:.2}",
1772        analysis.format,
1773        analysis.architecture,
1774        firmware_indicators.len(),
1775        entropy
1776    );
1777
1778    Ok(())
1779}
1780
1781// Helper function to calculate Shannon entropy
1782fn calculate_entropy(data: &[u8]) -> f64 {
1783    let mut counts = [0u32; 256];
1784    for &byte in data {
1785        counts[byte as usize] += 1;
1786    }
1787
1788    let len = data.len() as f64;
1789    let mut entropy = 0.0;
1790
1791    for &count in &counts {
1792        if count > 0 {
1793            let p = count as f64 / len;
1794            entropy -= p * p.log2();
1795        }
1796    }
1797
1798    entropy
1799}
1800
1801fn analyze_dicom_medical_imaging(
1802    analysis: &mut BinaryAnalysis,
1803    contents: &[u8],
1804) -> anyhow::Result<()> {
1805    tracing::info!("Starting DICOM medical imaging analysis using dicom library");
1806    analysis.format = "dicom-medical-imaging".to_string();
1807    analysis.architecture = "medical-device".to_string();
1808    analysis.languages.push("Medical Software".to_string());
1809
1810    let mut dicom_tags = Vec::new();
1811    let mut fda_compliance_indicators = Vec::new();
1812    let mut medical_protocols = Vec::new();
1813    let mut embedded_software_components = Vec::new();
1814    let mut security_features = Vec::new();
1815    let mut patient_data_detected = false;
1816
1817    // Check for DICOM file format manually since API is complex
1818    let has_dicom_preamble = contents.len() >= 132 && &contents[128..132] == b"DICM";
1819    let dicom_obj = if has_dicom_preamble {
1820        Some(()) // Just indicate we found DICOM format
1821    } else {
1822        None
1823    };
1824
1825    if dicom_obj.is_some() {
1826        analysis.format = "dicom-file".to_string();
1827
1828        // Basic DICOM tag parsing - look for common patterns in the data after preamble
1829        if contents.len() > 132 {
1830            let dicom_data = &contents[132..];
1831
1832            // Look for common DICOM tags manually
1833            for i in (0..dicom_data.len().saturating_sub(8)).step_by(2) {
1834                if i + 8 <= dicom_data.len() {
1835                    let group = u16::from_le_bytes([dicom_data[i], dicom_data[i + 1]]);
1836                    let element = u16::from_le_bytes([dicom_data[i + 2], dicom_data[i + 3]]);
1837
1838                    // Check for patient data tags
1839                    if group == 0x0010 && (element == 0x0010 || element == 0x0020) {
1840                        patient_data_detected = true;
1841                        medical_protocols.push("Patient Data");
1842                    }
1843
1844                    // Check for manufacturer info
1845                    if group == 0x0008 && element == 0x0070 {
1846                        medical_protocols.push("Manufacturer");
1847                    }
1848
1849                    // Limit our search to avoid performance issues
1850                    if dicom_tags.len() > 20 {
1851                        break;
1852                    }
1853
1854                    dicom_tags.push(serde_json::json!({
1855                        "group": format!("0x{:04X}", group),
1856                        "element": format!("0x{:04X}", element),
1857                        "tag": format!("({:04X},{:04X})", group, element)
1858                    }));
1859                }
1860            }
1861        }
1862    } else {
1863        // Not a DICOM file, analyze as medical imaging software
1864        analysis.format = "medical-imaging-software".to_string();
1865    }
1866
1867    // Look for medical device software indicators in strings
1868    let medical_strings = extract_strings(contents);
1869    analysis.embedded_strings.extend(medical_strings);
1870
1871    // Analyze embedded strings for medical software patterns
1872    for string in &analysis.embedded_strings {
1873        let lower = string.to_lowercase();
1874
1875        // FDA compliance indicators
1876        if lower.contains("fda") || lower.contains("510k") || lower.contains("pma") {
1877            fda_compliance_indicators.push("FDA Regulatory");
1878        }
1879        if lower.contains("ce mark") || lower.contains("ce marked") {
1880            fda_compliance_indicators.push("CE Marking");
1881        }
1882        if lower.contains("iso 13485") || lower.contains("iso13485") {
1883            fda_compliance_indicators.push("ISO 13485");
1884        }
1885        if lower.contains("iec 62304") || lower.contains("iec62304") {
1886            fda_compliance_indicators.push("IEC 62304");
1887        }
1888        if lower.contains("hipaa") {
1889            fda_compliance_indicators.push("HIPAA Compliance");
1890        }
1891
1892        // Medical protocols and standards
1893        if lower.contains("dicom") {
1894            medical_protocols.push("DICOM Protocol");
1895        }
1896        if lower.contains("hl7") || lower.contains("fhir") {
1897            medical_protocols.push("HL7/FHIR");
1898        }
1899        if lower.contains("pacs") {
1900            medical_protocols.push("PACS System");
1901        }
1902        if lower.contains("modality") || lower.contains("worklist") {
1903            medical_protocols.push("Modality Worklist");
1904        }
1905        if lower.contains("mpps") {
1906            medical_protocols.push("MPPS Protocol");
1907        }
1908
1909        // Embedded software components
1910        if lower.contains("qt") || lower.contains("qtcore") {
1911            embedded_software_components.push("Qt Framework");
1912        }
1913        if lower.contains("opencv") {
1914            embedded_software_components.push("OpenCV");
1915        }
1916        if lower.contains("vtk") {
1917            embedded_software_components.push("VTK Visualization");
1918        }
1919        if lower.contains("itk") {
1920            embedded_software_components.push("ITK Medical Imaging");
1921        }
1922        if lower.contains("gdcm") {
1923            embedded_software_components.push("GDCM DICOM Library");
1924        }
1925        if lower.contains("dcmtk") {
1926            embedded_software_components.push("DCMTK DICOM Toolkit");
1927        }
1928        if lower.contains("cornerstone") {
1929            embedded_software_components.push("Cornerstone Medical Imaging");
1930        }
1931
1932        // Security features
1933        if lower.contains("encryption") || lower.contains("encrypt") {
1934            security_features.push("Data Encryption");
1935        }
1936        if lower.contains("digital signature") || lower.contains("signature") {
1937            security_features.push("Digital Signatures");
1938        }
1939        if lower.contains("audit log") || lower.contains("audit trail") {
1940            security_features.push("Audit Logging");
1941        }
1942        if lower.contains("access control") || lower.contains("authentication") {
1943            security_features.push("Access Control");
1944        }
1945        if lower.contains("tls") || lower.contains("ssl") {
1946            security_features.push("TLS/SSL");
1947        }
1948    }
1949
1950    // Look for medical device identifiers
1951    let mut device_identifiers = Vec::new();
1952    let manufacturers = [
1953        ("GE Healthcare", b"GE Medical" as &[u8]),
1954        ("Siemens", b"Siemens"),
1955        ("Philips", b"Philips"),
1956        ("Canon Medical", b"Canon"),
1957        ("Fujifilm", b"Fujifilm"),
1958        ("Hologic", b"Hologic"),
1959        ("Carestream", b"Carestream"),
1960        ("Agfa", b"Agfa"),
1961    ];
1962
1963    for (name, pattern) in &manufacturers {
1964        if contents
1965            .windows(pattern.len())
1966            .any(|w| w.to_ascii_lowercase() == pattern.to_ascii_lowercase())
1967        {
1968            device_identifiers.push(*name);
1969        }
1970    }
1971
1972    // Remove duplicates
1973    fda_compliance_indicators.sort();
1974    fda_compliance_indicators.dedup();
1975    medical_protocols.sort();
1976    medical_protocols.dedup();
1977    embedded_software_components.sort();
1978    embedded_software_components.dedup();
1979    security_features.sort();
1980    security_features.dedup();
1981
1982    analysis.static_linked = false; // Medical software often uses shared libraries
1983
1984    // Risk assessment based on found indicators
1985    let risk_level = if fda_compliance_indicators.len() >= 2 && security_features.len() >= 2 {
1986        "Low" // Has compliance and security features
1987    } else if fda_compliance_indicators.len() >= 1 {
1988        "Medium" // Has some compliance indicators
1989    } else if patient_data_detected {
1990        "High" // Has patient data but lacks compliance indicators
1991    } else {
1992        "Medium" // General medical software
1993    };
1994
1995    // Add DICOM medical imaging specific metadata
1996    analysis.metadata = serde_json::json!({
1997        "medical_device_type": "dicom_medical_imaging",
1998        "dicom_analysis": {
1999            "is_dicom_file": dicom_obj.is_some(),
2000            "dicom_tags_found": dicom_tags.len(),
2001            "sample_tags": dicom_tags.into_iter().take(10).collect::<Vec<_>>(),
2002            "patient_data_detected": patient_data_detected
2003        },
2004        "compliance_indicators": {
2005            "fda_compliance": fda_compliance_indicators,
2006            "medical_protocols": medical_protocols,
2007            "risk_assessment": risk_level
2008        },
2009        "embedded_components": embedded_software_components,
2010        "security_features": security_features,
2011        "device_identifiers": device_identifiers,
2012        "analysis_type": "dicom_medical_imaging",
2013        "regulatory_notes": {
2014            "requires_fda_clearance": fda_compliance_indicators.is_empty(),
2015            "hipaa_relevant": patient_data_detected || security_features.iter().any(|s| s.contains("Encryption") || s.contains("Access Control")),
2016            "dicom_compliant": medical_protocols.iter().any(|p| p.contains("DICOM"))
2017        }
2018    });
2019
2020    tracing::info!(
2021        "DICOM medical imaging analysis complete: {} compliance indicators, {} protocols, {} security features",
2022        fda_compliance_indicators.len(),
2023        medical_protocols.len(),
2024        security_features.len()
2025    );
2026
2027    Ok(())
2028}
2029
2030fn detect_file_type_fallback(file_name: &str, contents: &[u8]) -> String {
2031    // Check for common magic bytes
2032    if contents.len() >= 4 {
2033        match &contents[0..4] {
2034            [0x7f, b'E', b'L', b'F'] => return "application/x-elf".to_string(),
2035            [b'M', b'Z', _, _] => return "application/x-msdownload".to_string(), // PE
2036            [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
2037                return "application/x-mach-binary".to_string();
2038            }
2039            [0x00, 0x61, 0x73, 0x6d] => return "application/wasm".to_string(), // WASM
2040            _ => {}
2041        }
2042    }
2043
2044    // Check file extension
2045    if let Some(ext) = file_name.split('.').last() {
2046        match ext.to_lowercase().as_str() {
2047            "exe" | "dll" => return "application/x-msdownload".to_string(),
2048            "so" | "a" => return "application/x-sharedlib".to_string(),
2049            "wasm" => return "application/wasm".to_string(),
2050            "bin" => return "application/octet-stream".to_string(),
2051            _ => {}
2052        }
2053    }
2054
2055    "application/octet-stream".to_string()
2056}
2057
2058fn extract_strings(contents: &[u8]) -> Vec<String> {
2059    let mut strings = Vec::new();
2060    let mut current_string = Vec::new();
2061
2062    tracing::debug!("Extracting strings from {} bytes", contents.len());
2063
2064    for &byte in contents {
2065        if byte.is_ascii_graphic() || byte == b' ' || byte == b'\t' {
2066            current_string.push(byte);
2067        } else {
2068            if current_string.len() >= 3 {
2069                // Reduced minimum for small files
2070                if let Ok(s) = String::from_utf8(current_string.clone()) {
2071                    // Filter out very common/useless strings
2072                    if !s.trim().is_empty() && !is_junk_string(&s) {
2073                        strings.push(s.trim().to_string());
2074                    }
2075                }
2076            }
2077            current_string.clear();
2078        }
2079    }
2080
2081    // Process any remaining string
2082    if current_string.len() >= 3 {
2083        if let Ok(s) = String::from_utf8(current_string) {
2084            if !s.trim().is_empty() && !is_junk_string(&s) {
2085                strings.push(s.trim().to_string());
2086            }
2087        }
2088    }
2089
2090    // Deduplicate and limit
2091    strings.sort();
2092    strings.dedup();
2093    strings.truncate(50);
2094
2095    tracing::debug!("Extracted {} strings", strings.len());
2096    strings
2097}
2098
2099fn is_junk_string(s: &str) -> bool {
2100    // Filter out strings that are likely padding or noise
2101    s.chars().all(|c| c == '\0' || c == ' ') ||
2102    s.len() > 200 || // Very long strings are often noise
2103    s.chars().all(|c| c.is_ascii_punctuation())
2104}
2105
2106#[cfg(test)]
2107mod tests {
2108    use super::*;
2109
2110    #[tokio::test]
2111    async fn test_analyze_empty() {
2112        let result = analyze_binary("test.bin", &[]).await;
2113        assert!(result.is_ok());
2114        let analysis = result.unwrap();
2115        assert_eq!(analysis.file_name, "test.bin");
2116        assert_eq!(analysis.size_bytes, 0);
2117    }
2118}