1use super::{BinaryAnalysis, extract_license_info, extract_version_info};
2use blake3::Hasher;
3use chrono::Utc;
4use goblin::{
5 Object as GoblinObject,
6 elf::Elf,
7 mach::{MachO, constants::S_ATTR_PURE_INSTRUCTIONS, load_command::CommandVariant},
8 pe::PE,
9};
10use infer;
11use object::{Object, ObjectSymbol};
12use sha2::{Digest, Sha256};
13use std::collections::HashSet;
14use uuid::Uuid;
15use wasmparser::{Parser, Payload};
16
17use capstone::prelude::*;
19
20pub async fn analyze_binary(file_name: &str, contents: &[u8]) -> anyhow::Result<BinaryAnalysis> {
21 tracing::info!(
22 "Starting binary analysis for '{}' ({} bytes)",
23 file_name,
24 contents.len()
25 );
26
27 if contents.len() < 50 {
29 tracing::warn!(
30 "File is very small ({} bytes), analyzing as raw firmware blob",
31 contents.len()
32 );
33 let sha256_hash = Sha256::digest(contents);
34 let mut hasher = Hasher::new();
35 hasher.update(contents);
36 let alternative_hash = hasher.finalize();
37
38 let mut analysis = BinaryAnalysis {
39 id: Uuid::new_v4(),
40 file_name: file_name.to_string(),
41 format: "unknown".to_string(),
42 architecture: "unknown".to_string(),
43 languages: Vec::new(),
44 detected_symbols: Vec::new(),
45 embedded_strings: Vec::new(),
46 suspected_secrets: Vec::new(),
47 imports: Vec::new(),
48 exports: Vec::new(),
49 hash_sha256: format!("{:x}", sha256_hash),
50 hash_blake3: Some(hex::encode(alternative_hash.as_bytes())),
51 size_bytes: contents.len() as u64,
52 linked_libraries: Vec::new(),
53 static_linked: false,
54 version_info: None,
55 license_info: None,
56 metadata: serde_json::json!({}),
57 created_at: Utc::now(),
58 sbom: None,
59 binary_data: Some(contents.to_vec()),
60 entry_point: None,
61 code_sections: Vec::new(),
62 };
63
64 analyze_raw_firmware_blob(&mut analysis, contents)?;
65
66 analysis.version_info = Some(extract_version_info(
68 contents,
69 &analysis.embedded_strings,
70 &analysis.format,
71 ));
72 analysis.license_info = Some(extract_license_info(&analysis.embedded_strings));
73
74 return Ok(analysis);
75 }
76
77 let sha256_hash = Sha256::digest(contents);
78 let mut hasher = Hasher::new();
79 hasher.update(contents);
80 let alternative_hash = hasher.finalize();
81
82 let detected_type = infer::get(contents);
84 let file_type = if let Some(kind) = detected_type {
85 tracing::info!(
86 "Detected file type: {} ({})",
87 kind.mime_type(),
88 kind.extension()
89 );
90 kind.mime_type().to_string()
91 } else {
92 tracing::debug!("Could not detect file type, using fallback");
93 detect_file_type_fallback(file_name, contents)
94 };
95
96 let mut analysis = BinaryAnalysis {
97 id: Uuid::new_v4(),
98 file_name: file_name.to_string(),
99 format: file_type.clone(),
100 architecture: "unknown".to_string(),
101 languages: Vec::new(),
102 detected_symbols: Vec::new(),
103 embedded_strings: extract_strings(contents),
104 suspected_secrets: Vec::new(),
105 imports: Vec::new(),
106 exports: Vec::new(),
107 hash_sha256: format!("{:x}", sha256_hash),
108 hash_blake3: Some(hex::encode(alternative_hash.as_bytes())),
109 size_bytes: contents.len() as u64,
110 linked_libraries: Vec::new(),
111 static_linked: false,
112 version_info: None,
113 license_info: None,
114 metadata: serde_json::json!({}),
115 created_at: Utc::now(),
116 sbom: None,
117 binary_data: Some(contents.to_vec()),
118 entry_point: None,
119 code_sections: Vec::new(),
120 };
121
122 let mut parsed_successfully = false;
124
125 if contents.len() >= 4 {
126 match &contents[0..4.min(contents.len())] {
128 [0x1F, 0x8B, _, _] => {
129 tracing::info!("GZIP magic detected");
130 analysis.format = "compressed-firmware".to_string();
131 analysis.languages.push("Compressed Binary".to_string());
132 parsed_successfully = true;
133 }
134 [0x04, 0x22, 0x4D, 0x18] => {
135 tracing::info!("LZ4 magic detected");
136 analysis.format = "compressed-firmware".to_string();
137 analysis.languages.push("Compressed Binary".to_string());
138 parsed_successfully = true;
139 }
140 [0x42, 0x5A, 0x68, _] => {
141 tracing::info!("BZIP2 magic detected");
142 analysis.format = "compressed-firmware".to_string();
143 analysis.languages.push("Compressed Binary".to_string());
144 parsed_successfully = true;
145 }
146 [0xFD, 0x37, 0x7A, 0x58] => {
147 tracing::info!("XZ magic detected");
148 analysis.format = "compressed-firmware".to_string();
149 analysis.languages.push("Compressed Binary".to_string());
150 parsed_successfully = true;
151 }
152 [0x28, 0xB5, 0x2F, 0xFD] => {
153 tracing::info!("ZSTD magic detected");
154 analysis.format = "compressed-firmware".to_string();
155 analysis.languages.push("Compressed Binary".to_string());
156 parsed_successfully = true;
157 }
158 [0x43, 0x4F, 0x4D, 0x50] => {
159 tracing::info!("Custom IoT compressed firmware magic (COMP) detected");
161 analysis.format = "compressed-firmware".to_string();
162 analysis.languages.push("Compressed Binary".to_string());
163 parsed_successfully = true;
164 }
165 _ => {}
166 }
167
168 if !parsed_successfully {
170 match &contents[0..4] {
171 [0x7f, b'E', b'L', b'F'] => {
172 tracing::info!("ELF magic detected, using goblin ELF parser");
173 if let Ok(GoblinObject::Elf(elf)) = GoblinObject::parse(contents) {
174 analyze_elf(&mut analysis, &elf, contents)?;
175 parsed_successfully = true;
176 }
177 }
178 [b'M', b'Z', _, _] => {
179 tracing::info!("PE magic detected, using goblin PE parser");
180 if let Ok(GoblinObject::PE(pe)) = GoblinObject::parse(contents) {
181 analyze_pe(&mut analysis, &pe, contents)?;
182 parsed_successfully = true;
183 }
184 }
185 [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
186 tracing::info!("Mach-O magic detected, using goblin Mach-O parser");
187 if let Ok(GoblinObject::Mach(mach)) = GoblinObject::parse(contents) {
188 match mach {
189 goblin::mach::Mach::Fat(_) => {
190 analysis.format = "macho-fat".to_string();
191 analysis.architecture = "multi".to_string();
192 }
193 goblin::mach::Mach::Binary(macho) => {
194 analyze_macho(&mut analysis, &macho, contents)?
195 }
196 }
197 parsed_successfully = true;
198 }
199 }
200 [0x00, 0x61, 0x73, 0x6d] => {
201 tracing::info!("WASM magic detected, using wasmparser");
202 if analyze_wasm(&mut analysis, contents).is_ok() {
203 parsed_successfully = true;
204 }
205 }
206 _ => {
207 if contents.len() >= 8 && &contents[0..8] == b"!<arch>\n" {
209 tracing::info!("AR archive magic detected");
210 analysis.format = "archive".to_string();
211 parsed_successfully = true;
212 }
213 else if contents.len() >= 132 && &contents[128..132] == b"DICM" {
215 tracing::info!("DICOM magic detected, using DICOM parser");
216 if analyze_dicom_medical_imaging(&mut analysis, contents).is_ok() {
217 parsed_successfully = true;
218 }
219 }
220 }
221 }
222 }
223 }
224
225 if !parsed_successfully {
227 let text_content = String::from_utf8_lossy(contents);
228 let first_few_lines: Vec<&str> = text_content.lines().take(5).collect();
229
230 if first_few_lines
232 .iter()
233 .any(|line| line.trim().starts_with(':'))
234 && first_few_lines.iter().all(|line| {
235 let trimmed = line.trim();
236 trimmed.is_empty()
237 || trimmed.starts_with(':')
238 || trimmed.chars().all(|c| c.is_ascii_hexdigit() || c == ':')
239 })
240 {
241 tracing::info!("Detected Intel HEX format, using Intel HEX parser");
242 if analyze_intel_hex(&mut analysis, contents).is_ok() {
243 parsed_successfully = true;
244 }
245 }
246
247 if !parsed_successfully
249 && first_few_lines
250 .iter()
251 .any(|line| line.trim().starts_with('S'))
252 && first_few_lines.iter().all(|line| {
253 let trimmed = line.trim();
254 trimmed.is_empty()
255 || (trimmed.starts_with('S')
256 && trimmed.len() >= 4
257 && trimmed.chars().skip(1).all(|c| c.is_ascii_hexdigit()))
258 })
259 {
260 tracing::info!("Detected Motorola S-Record format, using S-Record parser");
261 if analyze_srec(&mut analysis, contents).is_ok() {
262 parsed_successfully = true;
263 }
264 }
265 }
266
267 if !parsed_successfully {
268 tracing::debug!("No specific magic bytes found, attempting generic goblin parsing...");
269 match GoblinObject::parse(contents) {
270 Ok(obj) => {
271 tracing::info!("Successfully parsed with goblin (generic)");
272 match obj {
273 GoblinObject::Elf(elf) => {
274 tracing::info!("Detected ELF binary (generic)");
275 analyze_elf(&mut analysis, &elf, contents)?;
276 parsed_successfully = true;
277 }
278 GoblinObject::PE(pe) => {
279 tracing::info!("Detected PE binary (generic)");
280 analyze_pe(&mut analysis, &pe, contents)?;
281 parsed_successfully = true;
282 }
283 GoblinObject::Mach(mach) => {
284 tracing::info!("Detected Mach-O binary (generic)");
285 match mach {
286 goblin::mach::Mach::Fat(_) => {
287 analysis.format = "macho-fat".to_string();
288 analysis.architecture = "multi".to_string();
289 }
290 goblin::mach::Mach::Binary(macho) => {
291 analyze_macho(&mut analysis, &macho, contents)?
292 }
293 }
294 parsed_successfully = true;
295 }
296 GoblinObject::Archive(_) => {
297 tracing::info!("Detected archive");
298 analysis.format = "archive".to_string();
299 parsed_successfully = true;
300 }
301 _ => {
302 tracing::debug!("Unknown goblin object type");
303 }
304 }
305 }
306 Err(e) => {
307 tracing::debug!("Goblin parsing failed: {}, trying WebAssembly", e);
308 if analyze_wasm(&mut analysis, contents).is_ok() {
309 tracing::info!("Successfully parsed as WebAssembly");
310 parsed_successfully = true;
311 }
312 }
313 }
314 }
315
316 if !parsed_successfully {
318 if contents.len() >= 8 {
320 let sp_bytes = [contents[0], contents[1], contents[2], contents[3]];
321 let reset_bytes = [contents[4], contents[5], contents[6], contents[7]];
322 let sp_value = u32::from_le_bytes(sp_bytes);
323 let reset_value = u32::from_le_bytes(reset_bytes);
324
325 if sp_value >= 0x20000000 && sp_value <= 0x20100000 && reset_value >= 0x08000000 && reset_value <= 0x08100000 && (reset_value & 1) == 1
329 {
330 tracing::info!("Detected ARM Cortex-M firmware blob, using ARM Cortex-M parser");
332 if analyze_arm_cortex_m(&mut analysis, contents).is_ok() {
333 parsed_successfully = true;
334 }
335 }
336 }
337 }
338
339 tracing::debug!("Extracting version and license metadata");
341 analysis.version_info = Some(extract_version_info(
342 contents,
343 &analysis.embedded_strings,
344 &analysis.format,
345 ));
346 analysis.license_info = Some(extract_license_info(&analysis.embedded_strings));
347
348 tracing::info!(
349 "Metadata extraction complete: version_confidence={:.2}, license_confidence={:.2}",
350 analysis
351 .version_info
352 .as_ref()
353 .map(|v| v.confidence)
354 .unwrap_or(0.0),
355 analysis
356 .license_info
357 .as_ref()
358 .map(|l| l.confidence)
359 .unwrap_or(0.0)
360 );
361
362 if !parsed_successfully {
363 tracing::info!("All specialized parsers failed, using raw firmware blob analysis");
364 analyze_raw_firmware_blob(&mut analysis, contents)?;
365 } else {
366 tracing::info!("Successfully analyzed {} as {}", file_name, analysis.format);
367 }
368
369 Ok(analysis)
370}
371
372fn analyze_macho(
373 analysis: &mut BinaryAnalysis,
374 macho: &MachO,
375 contents: &[u8],
376) -> anyhow::Result<()> {
377 analysis.format = "macho".to_string();
378
379 analysis.architecture = match macho.header.cputype() {
381 goblin::mach::constants::cputype::CPU_TYPE_X86_64 => "x86_64".to_string(),
382 goblin::mach::constants::cputype::CPU_TYPE_ARM64 => "aarch64".to_string(),
383 goblin::mach::constants::cputype::CPU_TYPE_X86 => "i386".to_string(),
384 _ => format!("unknown({})", macho.header.cputype()),
385 };
386
387 let mut symbol_set = HashSet::new();
389 if let Some(symbols) = &macho.symbols {
390 for symbol in symbols.iter() {
391 if let Ok((name, _)) = symbol {
392 if !name.is_empty() {
393 symbol_set.insert(name.to_string());
394 analysis.detected_symbols.push(name.to_string());
395 }
396 }
397 }
398 }
399
400 for lib in &macho.libs {
402 let lib_name = lib.to_string();
403 analysis.linked_libraries.push(lib_name.clone());
404 analysis.imports.push(lib_name.clone());
405 analysis.embedded_strings.push(lib_name.clone());
407 if let Some(version) = extract_version_from_lib_name(&lib_name) {
409 analysis.embedded_strings.push(version);
410 }
411 }
412
413 if let Ok(obj_file) = object::File::parse(contents) {
415 for symbol in obj_file.symbols() {
416 if let Ok(name) = symbol.name() {
417 if !name.is_empty() {
418 if symbol.is_undefined() {
419 analysis.imports.push(name.to_string());
420 analysis.embedded_strings.push(name.to_string());
421 } else if symbol.is_global() {
422 analysis.exports.push(name.to_string());
423 }
424 symbol_set.insert(name.to_string());
425 }
426 }
427 }
428 }
429
430 let mut metadata = serde_json::json!({
432 "analysis_type": "macho",
433 "load_commands": [],
434 "frameworks": [],
435 "min_os_version": null,
436 });
437
438 for lc in macho.load_commands.iter() {
440 match lc.command {
441 CommandVariant::LoadDylib(ref dylib) => {
442 let offset = dylib.dylib.name as usize;
443 if offset < contents.len() {
444 let name_bytes = &contents[offset..];
445 if let Some(end) = name_bytes.iter().position(|&b| b == 0) {
446 if let Ok(name_str) = std::str::from_utf8(&name_bytes[..end]) {
447 if name_str.contains(".framework") {
448 metadata["frameworks"]
449 .as_array_mut()
450 .unwrap()
451 .push(serde_json::Value::String(name_str.to_string()));
452 analysis.embedded_strings.push(name_str.to_string());
453 }
454 }
455 }
456 }
457 }
458 CommandVariant::VersionMinMacosx(ref ver) => {
459 let (major, minor) = unpack_version(ver.version);
460 metadata["min_os_version"] =
461 serde_json::Value::String(format!("{}.{}", major, minor));
462 }
463 CommandVariant::BuildVersion(ref build) => {
464 let (major, minor) = unpack_version(build.minos);
465 metadata["min_os_version"] =
466 serde_json::Value::String(format!("{}.{}", major, minor));
467 }
468 _ => {}
469 }
470 metadata["load_commands"]
471 .as_array_mut()
472 .unwrap()
473 .push(serde_json::Value::String(format!("{:?}", lc.command)));
474 }
475
476 if macho.entry != 0 {
478 analysis.entry_point = Some(format!("0x{:08X}", macho.entry));
479 tracing::debug!("Mach-O entry point: 0x{:08X}", macho.entry);
480 }
481
482 for segment in &macho.segments {
484 if let Ok(sections) = segment.sections() {
485 for (section, _data) in sections {
486 analysis.code_sections.push(super::CodeSection {
487 name: section.name().unwrap_or("").to_string(),
488 size: section.size,
489 start_address: section.addr,
490 end_address: section.addr + section.size,
491 permissions: if (section.flags & S_ATTR_PURE_INSTRUCTIONS) != 0 {
492 "r-x"
493 } else {
494 "rw-"
495 }
496 .to_string(),
497 section_type: super::CodeSectionType::Text,
498 });
499 }
500 }
501 }
502
503 analysis.static_linked = macho.libs.is_empty() && symbol_set.iter().any(|s| s.contains("main"));
505
506 let cpe_candidates = extract_cpe_candidates(
508 &analysis.linked_libraries,
509 &analysis.imports,
510 &analysis.detected_symbols,
511 );
512 analysis.metadata = serde_json::json!({
513 "macho_metadata": metadata,
514 "cpe_candidates": cpe_candidates,
515 });
516
517 tracing::info!(
518 "Mach-O analysis complete: {} symbols, {} libraries, {} imports, {} exports",
519 analysis.detected_symbols.len(),
520 analysis.linked_libraries.len(),
521 analysis.imports.len(),
522 analysis.exports.len()
523 );
524
525 Ok(())
526}
527
528fn extract_version_from_lib_name(lib_name: &str) -> Option<String> {
530 let parts: Vec<&str> = lib_name.split('.').collect();
531 for part in parts {
532 if part.chars().all(|c| c.is_digit(10) || c == '.') {
533 return Some(part.to_string());
534 }
535 }
536 None
537}
538
539fn unpack_version(version: u32) -> (u32, u32) {
541 let major = (version >> 16) & 0xFFFF;
542 let minor = (version >> 8) & 0xFF;
543 (major, minor)
544}
545
546fn extract_cpe_candidates(libs: &[String], imports: &[String], symbols: &[String]) -> Vec<String> {
548 let mut cpes = HashSet::new();
549 for item in libs.iter().chain(imports.iter()).chain(symbols.iter()) {
550 let item_lower = item.to_lowercase();
551 if item_lower.contains("openssl")
553 || item_lower.contains("libcrypto")
554 || item_lower.contains("libssl")
555 {
556 if let Some(version) = extract_version_from_lib_name(&item_lower) {
557 cpes.insert(format!(
558 "cpe:2.3:a:openssl:openssl:{}:*:*:*:*:*:*:*",
559 version
560 ));
561 } else {
562 cpes.insert("cpe:2.3:a:openssl:openssl:*:*:*:*:*:*:*:*".to_string());
563 }
564 }
565 if item_lower.contains("zlib") {
567 if let Some(version) = extract_version_from_lib_name(&item_lower) {
568 cpes.insert(format!("cpe:2.3:a:zlib:zlib:{}:*:*:*:*:*:*:*", version));
569 }
570 }
571 if item_lower.contains("curl") || item_lower.contains("libcurl") {
572 if let Some(version) = extract_version_from_lib_name(&item_lower) {
573 cpes.insert(format!("cpe:2.3:a:curl:curl:{}:*:*:*:*:*:*:*", version));
574 }
575 }
576 }
577 cpes.into_iter().collect()
578}
579
580fn analyze_elf(analysis: &mut BinaryAnalysis, elf: &Elf, contents: &[u8]) -> anyhow::Result<()> {
581 analysis.format = "elf".to_string();
582
583 analysis.architecture = match elf.header.e_machine {
585 goblin::elf::header::EM_X86_64 => "x86_64".to_string(),
586 goblin::elf::header::EM_386 => "i386".to_string(),
587 goblin::elf::header::EM_ARM => "arm".to_string(),
588 goblin::elf::header::EM_AARCH64 => "aarch64".to_string(),
589 goblin::elf::header::EM_RISCV => "riscv".to_string(),
590 _ => format!("unknown({})", elf.header.e_machine),
591 };
592
593 for sym in &elf.syms {
595 if let Some(name) = elf.strtab.get_at(sym.st_name) {
596 if !name.is_empty() {
597 analysis.detected_symbols.push(name.to_string());
598 }
599 }
600 }
601
602 for sym in &elf.dynsyms {
604 if let Some(name) = elf.dynstrtab.get_at(sym.st_name) {
605 if !name.is_empty() {
606 analysis.detected_symbols.push(name.to_string());
607 }
608 }
609 }
610
611 for lib in &elf.libraries {
613 analysis.linked_libraries.push(lib.to_string());
614 analysis.embedded_strings.push(lib.to_string());
616 }
617
618 if elf.header.e_entry != 0 {
620 analysis.entry_point = Some(format!("0x{:08X}", elf.header.e_entry));
621 tracing::debug!("ELF entry point: 0x{:08X}", elf.header.e_entry);
622 }
623
624 analysis.static_linked =
626 elf.libraries.is_empty() && elf.header.e_type == goblin::elf::header::ET_EXEC;
627
628 if let Ok(obj_file) = object::File::parse(contents) {
630 for symbol in obj_file.symbols() {
631 if let Ok(name) = symbol.name() {
632 if symbol.is_undefined() {
633 analysis.imports.push(name.to_string());
634 } else if symbol.is_global() {
635 analysis.exports.push(name.to_string());
636 }
637 }
638 }
639 }
640
641 Ok(())
642}
643
644fn analyze_pe(analysis: &mut BinaryAnalysis, pe: &PE, _contents: &[u8]) -> anyhow::Result<()> {
645 analysis.format = "pe".to_string();
646
647 analysis.architecture = match pe.header.coff_header.machine {
649 goblin::pe::header::COFF_MACHINE_X86_64 => "x86_64".to_string(),
650 goblin::pe::header::COFF_MACHINE_X86 => "i386".to_string(),
651 goblin::pe::header::COFF_MACHINE_ARM64 => "aarch64".to_string(),
652 _ => format!("unknown({})", pe.header.coff_header.machine),
653 };
654
655 for export in &pe.exports {
657 if let Some(name) = &export.name {
658 analysis.exports.push(name.to_string());
659 }
660 }
661
662 for import in &pe.imports {
664 analysis.imports.push(import.name.to_string());
665 analysis.embedded_strings.push(import.name.to_string());
667 if !analysis.linked_libraries.contains(&import.dll.to_string()) {
668 analysis.linked_libraries.push(import.dll.to_string());
669 analysis.embedded_strings.push(import.dll.to_string());
671 }
672 }
673
674 if let Some(optional_header) = &pe.header.optional_header {
676 let entry_point = optional_header.standard_fields.address_of_entry_point;
677 if entry_point != 0 {
678 let image_base = optional_header.windows_fields.image_base;
680 let virtual_entry_point = image_base + entry_point as u64;
681 analysis.entry_point = Some(format!("0x{:08X}", virtual_entry_point));
682 tracing::debug!(
683 "PE entry point: 0x{:08X} (RVA: 0x{:08X})",
684 virtual_entry_point,
685 entry_point
686 );
687 }
688 }
689
690 analysis.static_linked = pe.imports.is_empty();
692
693 Ok(())
694}
695
696fn analyze_wasm(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
697 tracing::info!("Starting WASM analysis");
698 analysis.format = "application/wasm".to_string();
699 analysis.architecture = "wasm32".to_string();
700 analysis.languages.push("WebAssembly".to_string());
701
702 let parser = Parser::new(0);
703 let mut imports = HashSet::new();
704 let mut exports = HashSet::new();
705 let mut function_count = 0;
706 let mut memory_info = Vec::new();
707 let mut table_info = Vec::new();
708
709 for payload in parser.parse_all(contents) {
710 use wasmparser::Payload as WasmPayload;
711 match payload {
712 Ok(payload) => {
713 match payload {
714 Payload::Version { num, .. } => {
715 tracing::debug!("WASM version: {}", num);
716 }
717 Payload::ImportSection(reader) => {
718 for import in reader {
719 match import {
720 Ok(import) => {
721 let import_name = format!("{}::{}", import.module, import.name);
722 imports.insert(import_name);
723 tracing::debug!(
724 "Found import: {}::{}",
725 import.module,
726 import.name
727 );
728 }
729 Err(e) => tracing::warn!("Failed to parse import: {}", e),
730 }
731 }
732 }
733 Payload::ExportSection(reader) => {
734 for export in reader {
735 match export {
736 Ok(export) => {
737 exports.insert(export.name.to_string());
738 tracing::debug!("Found export: {}", export.name);
739 }
740 Err(e) => tracing::warn!("Failed to parse export: {}", e),
741 }
742 }
743 }
744 Payload::FunctionSection(reader) => {
745 function_count = reader.count();
746 tracing::debug!("Function count: {}", function_count);
747 }
748 Payload::MemorySection(reader) => {
749 for memory in reader {
750 match memory {
751 Ok(memory) => {
752 memory_info.push(format!(
753 "initial: {}, maximum: {:?}",
754 memory.initial, memory.maximum
755 ));
756 }
757 Err(e) => tracing::warn!("Failed to parse memory: {}", e),
758 }
759 }
760 }
761 Payload::TableSection(reader) => {
762 for table in reader {
763 match table {
764 Ok(table) => {
765 table_info.push(format!(
766 "element_type: {:?}, initial: {}, maximum: {:?}",
767 table.ty.element_type, table.ty.initial, table.ty.maximum
768 ));
769 }
770 Err(e) => tracing::warn!("Failed to parse table: {}", e),
771 }
772 }
773 }
774 WasmPayload::CustomSection(custom) => {
775 if let Ok(bytes_str) = std::str::from_utf8(custom.data()) {
776 for s in extract_strings(bytes_str.as_bytes()) {
777 analysis.embedded_strings.push(s);
778 }
779 }
780 }
781 Payload::TypeSection(reader) => {
782 tracing::debug!("Type section with {} types", reader.count());
783 }
784 _ => {
785 }
787 }
788 }
789 Err(e) => {
790 tracing::warn!("WASM parsing error: {}", e);
791 break;
792 }
793 }
794 }
795
796 analysis.imports = imports.into_iter().collect();
797 analysis.exports = exports.into_iter().collect();
798 analysis.static_linked = true; analysis.metadata = serde_json::json!({
802 "wasm_version": "1.0",
803 "function_count": function_count,
804 "memory_sections": memory_info,
805 "table_sections": table_info,
806 "import_count": analysis.imports.len(),
807 "export_count": analysis.exports.len(),
808 "analysis_type": "wasm"
809 });
810
811 tracing::info!(
812 "WASM analysis complete: {} imports, {} exports, {} functions",
813 analysis.imports.len(),
814 analysis.exports.len(),
815 function_count
816 );
817
818 Ok(())
819}
820
821fn analyze_intel_hex(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
822 tracing::info!("Starting Intel HEX analysis using manual parsing");
823 analysis.format = "intel-hex".to_string();
824 analysis.architecture = "embedded".to_string();
825 analysis.languages.push("Firmware".to_string());
826
827 let hex_content = String::from_utf8_lossy(contents);
828 let mut memory_segments = Vec::new();
829 let mut entry_points = Vec::new();
830 let mut total_data_bytes = 0;
831 let mut start_address = None;
832 let mut firmware_data = Vec::new();
833 let mut min_address = None;
834 let mut max_address = None;
835 let mut extended_linear_address = 0u32;
836 let mut extended_segment_address = 0u32;
837
838 for (line_num, line) in hex_content.lines().enumerate() {
840 let line = line.trim();
841 if line.is_empty() || !line.starts_with(':') {
842 continue;
843 }
844
845 if line.len() < 11 {
846 tracing::warn!(
847 "Invalid Intel HEX record at line {}: too short",
848 line_num + 1
849 );
850 continue;
851 }
852
853 let byte_count = match u8::from_str_radix(&line[1..3], 16) {
857 Ok(count) => count,
858 Err(_) => continue,
859 };
860
861 let address = match u16::from_str_radix(&line[3..7], 16) {
862 Ok(addr) => addr,
863 Err(_) => continue,
864 };
865
866 let record_type = match u8::from_str_radix(&line[7..9], 16) {
867 Ok(rt) => rt,
868 Err(_) => continue,
869 };
870
871 let expected_len = 11 + (byte_count as usize * 2);
873 if line.len() != expected_len {
874 tracing::warn!(
875 "Invalid Intel HEX record at line {}: wrong length",
876 line_num + 1
877 );
878 continue;
879 }
880
881 let mut data_bytes = Vec::new();
883 for i in 0..byte_count {
884 let start_idx = 9 + (i as usize * 2);
885 let end_idx = start_idx + 2;
886 if let Ok(byte) = u8::from_str_radix(&line[start_idx..end_idx], 16) {
887 data_bytes.push(byte);
888 }
889 }
890
891 match record_type {
892 0x00 => {
893 let full_address =
895 extended_linear_address + extended_segment_address + (address as u32);
896 total_data_bytes += data_bytes.len() as u32;
897 firmware_data.extend_from_slice(&data_bytes);
898
899 min_address =
900 Some(min_address.map_or(full_address, |min: u32| min.min(full_address)));
901 max_address = Some(
902 max_address.map_or(full_address + data_bytes.len() as u32, |max: u32| {
903 max.max(full_address + data_bytes.len() as u32)
904 }),
905 );
906
907 memory_segments.push(serde_json::json!({
908 "address": format!("0x{:08X}", full_address),
909 "size": data_bytes.len(),
910 "type": "data"
911 }));
912 }
913 0x01 => {
914 tracing::debug!("Found end of file record");
916 break;
917 }
918 0x02 => {
919 if data_bytes.len() >= 2 {
921 extended_segment_address =
922 ((data_bytes[0] as u32) << 12) | ((data_bytes[1] as u32) << 4);
923 tracing::debug!(
924 "Extended segment address: 0x{:08X}",
925 extended_segment_address
926 );
927 }
928 }
929 0x03 => {
930 if data_bytes.len() >= 4 {
932 let cs = ((data_bytes[0] as u32) << 8) | (data_bytes[1] as u32);
933 let ip = ((data_bytes[2] as u32) << 8) | (data_bytes[3] as u32);
934 let start_addr = (cs << 4) + ip;
935 start_address = Some(start_addr);
936 entry_points.push(format!("0x{:08X}", start_addr));
937 analysis.entry_point = Some(format!("0x{:08X}", start_addr));
938 tracing::debug!("Start segment address: CS=0x{:04X}, IP=0x{:04X}", cs, ip);
939 }
940 }
941 0x04 => {
942 if data_bytes.len() >= 2 {
944 extended_linear_address =
945 ((data_bytes[0] as u32) << 24) | ((data_bytes[1] as u32) << 16);
946 tracing::debug!("Extended linear address: 0x{:08X}", extended_linear_address);
947 }
948 }
949 0x05 => {
950 if data_bytes.len() >= 4 {
952 let start_addr = ((data_bytes[0] as u32) << 24)
953 | ((data_bytes[1] as u32) << 16)
954 | ((data_bytes[2] as u32) << 8)
955 | (data_bytes[3] as u32);
956 start_address = Some(start_addr);
957 entry_points.push(format!("0x{:08X}", start_addr));
958 analysis.entry_point = Some(format!("0x{:08X}", start_addr));
959 tracing::debug!("Start linear address: 0x{:08X}", start_addr);
960 }
961 }
962 _ => {
963 tracing::debug!("Unknown Intel HEX record type: 0x{:02X}", record_type);
964 }
965 }
966 }
967
968 let firmware_strings = extract_strings(&firmware_data);
970 analysis.embedded_strings.extend(firmware_strings);
971
972 let mut device_hints = Vec::new();
974 for string in &analysis.embedded_strings {
975 let lower = string.to_lowercase();
976 if lower.contains("bootloader") || lower.contains("boot") {
977 device_hints.push("bootloader");
978 }
979 if lower.contains("interrupt") || lower.contains("isr") {
980 device_hints.push("interrupt_handler");
981 }
982 if lower.contains("uart") || lower.contains("spi") || lower.contains("i2c") {
983 device_hints.push("peripheral_driver");
984 }
985 if lower.contains("atmega") || lower.contains("avr") {
986 analysis.architecture = "avr".to_string();
987 device_hints.push("avr_microcontroller");
988 }
989 if lower.contains("stm32") || lower.contains("cortex") {
990 analysis.architecture = "arm_cortex_m".to_string();
991 device_hints.push("arm_cortex_m");
992 }
993 if lower.contains("pic") && (lower.contains("16") || lower.contains("18")) {
994 analysis.architecture = "pic".to_string();
995 device_hints.push("pic_microcontroller");
996 }
997 }
998
999 analysis.static_linked = true; let memory_span = if let (Some(min), Some(max)) = (min_address, max_address) {
1003 max - min
1004 } else {
1005 0
1006 };
1007
1008 analysis.metadata = serde_json::json!({
1010 "hex_format": "intel_hex",
1011 "total_data_bytes": total_data_bytes,
1012 "memory_segments": memory_segments,
1013 "entry_points": entry_points,
1014 "start_address": start_address.map(|addr| format!("0x{:08X}", addr)),
1015 "memory_range": {
1016 "min_address": min_address.map(|addr| format!("0x{:08X}", addr)),
1017 "max_address": max_address.map(|addr| format!("0x{:08X}", addr)),
1018 "span_bytes": memory_span
1019 },
1020 "device_hints": device_hints,
1021 "analysis_type": "intel_hex_firmware"
1022 });
1023
1024 tracing::info!(
1025 "Intel HEX analysis complete: {} data bytes, {} memory segments, memory span: {} bytes",
1026 total_data_bytes,
1027 memory_segments.len(),
1028 memory_span
1029 );
1030
1031 Ok(())
1032}
1033
1034fn analyze_srec(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1035 tracing::info!("Starting Motorola S-Record analysis using srec library");
1036 analysis.format = "motorola-srec".to_string();
1037 analysis.architecture = "embedded".to_string();
1038 analysis.languages.push("Firmware".to_string());
1039
1040 let srec_content = String::from_utf8_lossy(contents);
1041 let mut memory_segments = Vec::new();
1042 let mut entry_points = Vec::new();
1043 let mut total_data_bytes = 0;
1044 let mut start_address = None;
1045 let mut firmware_data = Vec::new();
1046 let mut min_address = None;
1047 let mut max_address = None;
1048 let mut header_info = None;
1049
1050 for (line_num, line) in srec_content.lines().enumerate() {
1052 let line = line.trim();
1053 if line.is_empty() || !line.starts_with('S') {
1054 continue;
1055 }
1056
1057 if line.len() < 4 {
1058 tracing::warn!("Invalid S-Record at line {}: too short", line_num + 1);
1059 continue;
1060 }
1061
1062 let record_type = match line.chars().nth(1) {
1064 Some(c) => c,
1065 None => continue,
1066 };
1067
1068 let byte_count = match u8::from_str_radix(&line[2..4], 16) {
1069 Ok(count) => count,
1070 Err(_) => continue,
1071 };
1072
1073 match record_type {
1074 '0' => {
1075 if line.len() >= 8 {
1077 let data_start = 8;
1078 let data_end = line.len().saturating_sub(2);
1079 if data_end > data_start {
1080 let data_hex = &line[data_start..data_end];
1081 let mut header_data = Vec::new();
1082 for i in (0..data_hex.len()).step_by(2) {
1083 if i + 1 < data_hex.len() {
1084 if let Ok(byte_val) = u8::from_str_radix(&data_hex[i..i + 2], 16) {
1085 header_data.push(byte_val);
1086 }
1087 }
1088 }
1089 let header_string = String::from_utf8_lossy(&header_data);
1090 if !header_string.trim().is_empty() {
1091 header_info = Some(header_string.trim().to_string());
1092 }
1093 }
1094 }
1095 }
1096 '1' => {
1097 if line.len() >= 8 {
1099 if let Ok(address) = u16::from_str_radix(&line[4..8], 16) {
1100 let data_bytes = byte_count.saturating_sub(3);
1101 total_data_bytes += data_bytes as u32;
1102
1103 let addr32 = address as u32;
1104 min_address = Some(min_address.map_or(addr32, |min: u32| min.min(addr32)));
1105 max_address =
1106 Some(max_address.map_or(addr32 + data_bytes as u32, |max: u32| {
1107 max.max(addr32 + data_bytes as u32)
1108 }));
1109
1110 if line.len() >= 8 + (data_bytes as usize * 2) {
1112 let data_hex = &line[8..8 + (data_bytes as usize * 2)];
1113 for i in (0..data_hex.len()).step_by(2) {
1114 if i + 1 < data_hex.len() {
1115 if let Ok(byte_val) =
1116 u8::from_str_radix(&data_hex[i..i + 2], 16)
1117 {
1118 firmware_data.push(byte_val);
1119 }
1120 }
1121 }
1122 }
1123
1124 memory_segments.push(serde_json::json!({
1125 "address": format!("0x{:04X}", address),
1126 "size": data_bytes,
1127 "type": "data_16bit"
1128 }));
1129 }
1130 }
1131 }
1132 '7' => {
1133 if line.len() >= 12 {
1135 if let Ok(address) = u32::from_str_radix(&line[4..12], 16) {
1136 start_address = Some(address);
1137 entry_points.push(format!("0x{:08X}", address));
1138 analysis.entry_point = Some(format!("0x{:08X}", address));
1139 }
1140 }
1141 }
1142 '8' => {
1143 if line.len() >= 10 {
1145 if let Ok(address) = u32::from_str_radix(&line[4..10], 16) {
1146 start_address = Some(address & 0x00FFFFFF);
1147 entry_points.push(format!("0x{:06X}", address & 0x00FFFFFF));
1148 analysis.entry_point = Some(format!("0x{:06X}", address & 0x00FFFFFF));
1149 }
1150 }
1151 }
1152 '9' => {
1153 if line.len() >= 8 {
1155 if let Ok(address) = u16::from_str_radix(&line[4..8], 16) {
1156 start_address = Some(address as u32);
1157 entry_points.push(format!("0x{:04X}", address));
1158 analysis.entry_point = Some(format!("0x{:04X}", address));
1159 }
1160 }
1161 }
1162 _ => {}
1163 }
1164 }
1165
1166 let firmware_strings = extract_strings(&firmware_data);
1168 analysis.embedded_strings.extend(firmware_strings);
1169
1170 if let Some(header) = header_info {
1172 analysis.embedded_strings.push(header);
1173 }
1174
1175 let mut device_hints = Vec::new();
1177 for string in &analysis.embedded_strings {
1178 let lower = string.to_lowercase();
1179 if lower.contains("bootloader") || lower.contains("boot") {
1180 device_hints.push("bootloader");
1181 }
1182 if lower.contains("interrupt") || lower.contains("isr") {
1183 device_hints.push("interrupt_handler");
1184 }
1185 if lower.contains("can") || lower.contains("ecu") || lower.contains("automotive") {
1186 device_hints.push("automotive_ecu");
1187 analysis.architecture = "automotive".to_string();
1188 }
1189 if lower.contains("motorola") || lower.contains("freescale") || lower.contains("nxp") {
1190 device_hints.push("motorola_mcu");
1191 }
1192 if lower.contains("68k") || lower.contains("68000") {
1193 analysis.architecture = "m68k".to_string();
1194 device_hints.push("motorola_68k");
1195 }
1196 if lower.contains("coldfire") {
1197 analysis.architecture = "coldfire".to_string();
1198 device_hints.push("coldfire_mcu");
1199 }
1200 if lower.contains("powerpc") || lower.contains("ppc") {
1201 analysis.architecture = "powerpc".to_string();
1202 device_hints.push("powerpc_mcu");
1203 }
1204 }
1205
1206 analysis.static_linked = true; let memory_span = if let (Some(min), Some(max)) = (min_address, max_address) {
1210 max - min
1211 } else {
1212 0
1213 };
1214
1215 analysis.metadata = serde_json::json!({
1217 "record_format": "motorola_srec",
1218 "total_data_bytes": total_data_bytes,
1219 "memory_segments": memory_segments,
1220 "entry_points": entry_points,
1221 "start_address": start_address.map(|addr| format!("0x{:08X}", addr)),
1222 "memory_range": {
1223 "min_address": min_address.map(|addr| format!("0x{:08X}", addr)),
1224 "max_address": max_address.map(|addr| format!("0x{:08X}", addr)),
1225 "span_bytes": memory_span
1226 },
1227 "device_hints": device_hints,
1228 "analysis_type": "srec_firmware"
1229 });
1230
1231 tracing::info!(
1232 "S-Record analysis complete: {} data bytes, {} memory segments, memory span: {} bytes",
1233 total_data_bytes,
1234 memory_segments.len(),
1235 memory_span
1236 );
1237
1238 Ok(())
1239}
1240
1241fn analyze_arm_cortex_m(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1242 tracing::info!("Starting ARM Cortex-M firmware analysis with Capstone disassembly");
1243 analysis.format = "arm-cortex-m-firmware".to_string();
1244 analysis.architecture = "arm_cortex_m".to_string();
1245 analysis.languages.push("ARM Assembly".to_string());
1246 analysis.languages.push("C/C++".to_string());
1247
1248 let mut vector_table = Vec::new();
1249 let mut interrupt_handlers = Vec::new();
1250 let mut rtos_indicators = Vec::new();
1251 let mut memory_regions = Vec::new();
1252 let mut stack_pointer = None;
1253 let mut reset_handler = None;
1254 let mut disassembly_info = Vec::new();
1255
1256 let cs = match Capstone::new()
1258 .arm()
1259 .mode(arch::arm::ArchMode::Thumb)
1260 .detail(true)
1261 .build()
1262 {
1263 Ok(cs) => Some(cs),
1264 Err(e) => {
1265 tracing::warn!("Failed to initialize Capstone disassembler: {}", e);
1266 None
1267 }
1268 };
1269
1270 if contents.len() >= 256 {
1272 if contents.len() >= 4 {
1278 let sp_bytes = [contents[0], contents[1], contents[2], contents[3]];
1279 let sp_value = u32::from_le_bytes(sp_bytes);
1280 stack_pointer = Some(sp_value);
1281
1282 if sp_value >= 0x20000000 && sp_value <= 0x20100000 {
1284 tracing::debug!("Valid ARM Cortex-M stack pointer found: 0x{:08X}", sp_value);
1285 memory_regions.push(serde_json::json!({
1286 "type": "RAM",
1287 "start_address": "0x20000000",
1288 "size_estimate": format!("{}KB", (sp_value - 0x20000000) / 1024),
1289 "purpose": "SRAM"
1290 }));
1291 }
1292 }
1293
1294 if contents.len() >= 8 {
1296 let reset_bytes = [contents[4], contents[5], contents[6], contents[7]];
1297 let reset_addr = u32::from_le_bytes(reset_bytes);
1298
1299 let actual_reset_addr = reset_addr & 0xFFFFFFFE;
1301
1302 if actual_reset_addr > 0 && actual_reset_addr < 0x08100000 {
1303 reset_handler = Some(actual_reset_addr);
1304 analysis.entry_point = Some(format!("0x{:08X}", actual_reset_addr));
1305
1306 let mut reset_analysis = serde_json::json!({
1308 "name": "Reset_Handler",
1309 "address": format!("0x{:08X}", actual_reset_addr),
1310 "thumb_mode": (reset_addr & 1) == 1
1311 });
1312
1313 if let Some(ref cs) = cs {
1314 let code_start = if actual_reset_addr >= 0x08000000
1317 && actual_reset_addr < 0x08000000 + contents.len() as u32
1318 {
1319 (actual_reset_addr - 0x08000000) as usize
1320 } else {
1321 0x200 };
1323
1324 if code_start < contents.len() && contents.len() > code_start + 32 {
1325 let code_slice = &contents
1326 [code_start..code_start.min(contents.len()).min(code_start + 32)];
1327 match cs.disasm_all(code_slice, actual_reset_addr as u64) {
1328 Ok(insns) => {
1329 let mut reset_instructions = Vec::new();
1330 for insn in insns.iter().take(8) {
1331 reset_instructions.push(serde_json::json!({
1333 "address": format!("0x{:08X}", insn.address()),
1334 "mnemonic": insn.mnemonic().unwrap_or(""),
1335 "op_str": insn.op_str().unwrap_or("")
1336 }));
1337 }
1338 reset_analysis["disassembly"] =
1339 serde_json::json!(reset_instructions);
1340 disassembly_info.push("Reset handler disassembled");
1341 }
1342 Err(e) => {
1343 tracing::debug!("Failed to disassemble reset handler: {}", e);
1344 }
1345 }
1346 }
1347 }
1348
1349 interrupt_handlers.push(reset_analysis);
1350 tracing::debug!("Reset handler found at: 0x{:08X}", actual_reset_addr);
1351 }
1352 }
1353
1354 let vector_names = [
1356 "Initial_SP",
1357 "Reset_Handler",
1358 "NMI_Handler",
1359 "HardFault_Handler",
1360 "MemManage_Handler",
1361 "BusFault_Handler",
1362 "UsageFault_Handler",
1363 "Reserved",
1364 "Reserved",
1365 "Reserved",
1366 "Reserved",
1367 "SVC_Handler",
1368 "DebugMon_Handler",
1369 "Reserved",
1370 "PendSV_Handler",
1371 "SysTick_Handler",
1372 ];
1373
1374 for (i, &name) in vector_names.iter().enumerate() {
1375 let offset = i * 4;
1376 if offset + 4 <= contents.len() && offset + 4 <= 64 {
1377 let addr_bytes = [
1379 contents[offset],
1380 contents[offset + 1],
1381 contents[offset + 2],
1382 contents[offset + 3],
1383 ];
1384 let addr_value = u32::from_le_bytes(addr_bytes);
1385
1386 vector_table.push(serde_json::json!({
1387 "index": i,
1388 "name": name,
1389 "address": format!("0x{:08X}", addr_value),
1390 "raw_value": format!("0x{:08X}", addr_value)
1391 }));
1392
1393 if i > 0 && addr_value > 0 && addr_value != 0xFFFFFFFF {
1395 let actual_addr = addr_value & 0xFFFFFFFE;
1396 if actual_addr < 0x08100000 && actual_addr > 0x08000000 {
1397 interrupt_handlers.push(serde_json::json!({
1398 "name": name,
1399 "address": format!("0x{:08X}", actual_addr),
1400 "thumb_mode": (addr_value & 1) == 1,
1401 "vector_index": i
1402 }));
1403 }
1404 }
1405 }
1406 }
1407 }
1408
1409 let firmware_strings = extract_strings(contents);
1411 for string in &firmware_strings {
1412 let lower = string.to_lowercase();
1413
1414 if lower.contains("freertos")
1416 || lower.contains("xTaskCreate")
1417 || lower.contains("vTaskDelay")
1418 || lower.contains("xQueueCreate")
1419 {
1420 rtos_indicators.push("FreeRTOS");
1421 }
1422
1423 if lower.contains("rtx")
1425 || lower.contains("osKernelStart")
1426 || lower.contains("osThreadCreate")
1427 {
1428 rtos_indicators.push("ARM RTX");
1429 }
1430
1431 if lower.contains("threadx") || lower.contains("tx_thread_create") {
1433 rtos_indicators.push("ThreadX");
1434 }
1435
1436 if lower.contains("zephyr") || lower.contains("k_thread_create") {
1438 rtos_indicators.push("Zephyr RTOS");
1439 }
1440
1441 if lower.contains("cmsis") || lower.contains("__main") || lower.contains("SystemInit") {
1443 rtos_indicators.push("CMSIS");
1444 }
1445
1446 if lower.contains("hal_") || lower.contains("stm32") {
1448 rtos_indicators.push("STM32 HAL");
1449 }
1450 }
1451
1452 analysis.embedded_strings.extend(firmware_strings);
1454
1455 memory_regions.push(serde_json::json!({
1457 "type": "Flash",
1458 "start_address": "0x08000000",
1459 "purpose": "Program Flash Memory",
1460 "typical_size": "64KB-2MB"
1461 }));
1462
1463 memory_regions.push(serde_json::json!({
1464 "type": "System",
1465 "start_address": "0xE0000000",
1466 "purpose": "System Control Space",
1467 "contains": ["SysTick", "NVIC", "SCB", "MPU", "FPU"]
1468 }));
1469
1470 let mut peripheral_indicators = Vec::new();
1472
1473 let peripheral_bases: &[(u32, &str)] = &[
1475 (0x40000000, "APB1 Peripherals"),
1476 (0x40010000, "APB2 Peripherals"),
1477 (0x40020000, "AHB1 Peripherals"),
1478 (0x50000000, "AHB2 Peripherals"),
1479 (0xE0000000, "Cortex-M System"),
1480 ];
1481
1482 for (base_addr, name) in peripheral_bases {
1483 let addr_bytes = base_addr.to_le_bytes();
1485 if contents.windows(4).any(|window| window == addr_bytes) {
1486 peripheral_indicators.push(serde_json::json!({
1487 "base_address": format!("0x{:08X}", base_addr),
1488 "name": name
1489 }));
1490 }
1491 }
1492
1493 rtos_indicators.sort();
1495 rtos_indicators.dedup();
1496
1497 analysis.static_linked = true; let vector_table_size = vector_table.len() * 4;
1501 let total_handlers = interrupt_handlers.len();
1502
1503 analysis.metadata = serde_json::json!({
1505 "firmware_type": "arm_cortex_m",
1506 "vector_table": {
1507 "entries": vector_table,
1508 "size_bytes": vector_table_size,
1509 "total_vectors": vector_table.len()
1510 },
1511 "interrupt_handlers": interrupt_handlers,
1512 "stack_pointer": stack_pointer.map(|sp| format!("0x{:08X}", sp)),
1513 "reset_handler": reset_handler.map(|rh| format!("0x{:08X}", rh)),
1514 "rtos_detected": rtos_indicators,
1515 "memory_regions": memory_regions,
1516 "peripheral_indicators": peripheral_indicators,
1517 "analysis_type": "arm_cortex_m_firmware",
1518 "disassembly": {
1519 "capstone_available": cs.is_some(),
1520 "analysis_info": disassembly_info
1521 },
1522 "statistics": {
1523 "total_interrupt_handlers": total_handlers,
1524 "has_rtos": !rtos_indicators.is_empty(),
1525 "has_hal": rtos_indicators.iter().any(|s| s.contains("HAL")),
1526 "disassembly_performed": !disassembly_info.is_empty()
1527 }
1528 });
1529
1530 tracing::info!(
1531 "ARM Cortex-M analysis complete: {} interrupt handlers, {} RTOS indicators, stack at 0x{:08X}",
1532 total_handlers,
1533 rtos_indicators.len(),
1534 stack_pointer.unwrap_or(0)
1535 );
1536
1537 Ok(())
1538}
1539
1540fn analyze_raw_firmware_blob(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
1541 tracing::info!(
1542 "Starting raw firmware blob analysis ({} bytes)",
1543 contents.len()
1544 );
1545
1546 let text_ratio = contents
1547 .iter()
1548 .filter(|&&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
1549 .count() as f64
1550 / contents.len() as f64;
1551
1552 let mut architecture_hints = Vec::new();
1553 let mut firmware_indicators = Vec::new();
1554 let mut compression_detected = Vec::new();
1555 let mut crypto_indicators = Vec::new();
1556
1557 if contents.len() >= 4 {
1561 let thumb_patterns = [
1563 [0x00, 0xBF], [0x70, 0x47], [0x08, 0x68], ];
1567
1568 for pattern in &thumb_patterns {
1569 if contents.windows(2).any(|w| w == pattern) {
1570 architecture_hints.push("ARM Thumb");
1571 break;
1572 }
1573 }
1574
1575 let x86_patterns: &[&[u8]] = &[
1577 &[0x55u8, 0x89, 0xE5], &[0x48u8, 0x89, 0xE5], &[0xEBu8, 0xFE], ];
1581
1582 for pattern in x86_patterns {
1583 if contents.windows(pattern.len()).any(|w| w == *pattern) {
1584 architecture_hints.push("x86");
1585 break;
1586 }
1587 }
1588
1589 if contents
1591 .windows(4)
1592 .any(|w| matches!(w, [0x27, 0xBD, _, _] | [_, _, 0xBD, 0x27]))
1593 {
1594 architecture_hints.push("MIPS");
1595 }
1596
1597 if contents
1599 .windows(4)
1600 .any(|w| matches!(w, [0x94, 0x21, _, _] | [_, _, 0x21, 0x94]))
1601 {
1602 architecture_hints.push("PowerPC");
1603 }
1604 }
1605
1606 let bootloader_strings = [
1608 "U-Boot",
1609 "GRUB",
1610 "bootloader",
1611 "BOOT",
1612 "loader",
1613 "SPL",
1614 "MLO",
1615 "bootstrap",
1616 "uboot",
1617 ];
1618
1619 for &pattern in &bootloader_strings {
1620 if contents.windows(pattern.len()).any(|w| {
1621 String::from_utf8_lossy(w)
1622 .to_lowercase()
1623 .contains(&pattern.to_lowercase())
1624 }) {
1625 firmware_indicators.push("bootloader");
1626 break;
1627 }
1628 }
1629
1630 if contents.len() >= 4 {
1632 match &contents[0..4.min(contents.len())] {
1633 [0x1F, 0x8B, _, _] => compression_detected.push("gzip"),
1634 [0x42, 0x5A, 0x68, _] => compression_detected.push("bzip2"),
1635 [0xFD, 0x37, 0x7A, 0x58] => compression_detected.push("xz"),
1636 [0x28, 0xB5, 0x2F, 0xFD] => compression_detected.push("zstd"),
1637 [0x04, 0x22, 0x4D, 0x18] => compression_detected.push("lz4"),
1638 _ => {}
1639 }
1640 }
1641
1642 let crypto_patterns: &[(&str, &[u8])] = &[
1644 ("AES", b"AES"),
1645 ("RSA", b"RSA"),
1646 ("SHA", b"SHA"),
1647 ("OpenSSL", b"OpenSSL"),
1648 ("mbedtls", b"mbedtls"),
1649 ("WolfSSL", b"wolfSSL"),
1650 ];
1651
1652 for (name, pattern) in crypto_patterns {
1653 if contents.windows(pattern.len()).any(|w| w == *pattern) {
1654 crypto_indicators.push(*name);
1655 }
1656 }
1657
1658 let device_patterns = [
1660 ("ESP32", b"ESP32" as &[u8]),
1661 ("Arduino", b"Arduino"),
1662 ("Raspberry Pi", b"Raspberry Pi"),
1663 ("STM32", b"STM32"),
1664 ("Nordic", b"Nordic"),
1665 ("Qualcomm", b"Qualcomm"),
1666 ("Broadcom", b"Broadcom"),
1667 ];
1668
1669 for (device, pattern) in &device_patterns {
1670 if contents.windows(pattern.len()).any(|w| w == *pattern) {
1671 firmware_indicators.push(*device);
1672 }
1673 }
1674
1675 if analysis.format == "unknown" {
1677 if text_ratio > 0.8 {
1678 analysis.format = if contents.len() < 1024 {
1679 "text/small"
1680 } else {
1681 "text"
1682 }
1683 .to_string();
1684
1685 let text = String::from_utf8_lossy(contents);
1686 if text.starts_with("#!") {
1687 analysis.format = "script".to_string();
1688 analysis.languages.push("script".to_string());
1689 }
1690
1691 if text.contains("function") || text.contains("def ") {
1693 analysis.languages.push("script".to_string());
1694 }
1695 if text.contains("#include") || text.contains("int main") {
1696 analysis.languages.push("C/C++".to_string());
1697 }
1698 if text.contains("pub fn") || text.contains("fn main") {
1699 analysis.languages.push("Rust".to_string());
1700 }
1701 } else if !compression_detected.is_empty() {
1702 analysis.format = "compressed-firmware".to_string();
1703 analysis.languages.push("Compressed Binary".to_string());
1704 } else if !firmware_indicators.is_empty() {
1705 analysis.format = "firmware-blob".to_string();
1706 analysis.languages.push("Firmware".to_string());
1707 } else if contents.len() < 50 {
1708 analysis.format = "micro-binary".to_string();
1709 } else {
1710 analysis.format = "raw-binary".to_string();
1711 }
1712 }
1713
1714 analysis.architecture = if architecture_hints.is_empty() {
1716 "unknown".to_string()
1717 } else {
1718 architecture_hints.join(", ")
1719 };
1720
1721 let extracted_strings = extract_strings(contents);
1723 analysis.embedded_strings.extend(extracted_strings);
1724
1725 let mut version_hints = Vec::new();
1727 for string in &analysis.embedded_strings {
1728 if string.len() > 2 && string.len() < 20 {
1729 if string.chars().any(|c| c.is_ascii_digit())
1731 && (string.contains('.')
1732 || string.to_lowercase().contains('v')
1733 || string.to_lowercase().contains("rev"))
1734 {
1735 version_hints.push(string.clone());
1736 }
1737 }
1738 }
1739
1740 analysis.static_linked = !firmware_indicators.is_empty() || text_ratio < 0.1;
1742
1743 let entropy = calculate_entropy(contents);
1745 let is_likely_encrypted = entropy > 7.5;
1746 let is_likely_compressed = entropy > 7.0 && compression_detected.is_empty();
1747
1748 analysis.metadata = serde_json::json!({
1750 "analysis_type": "raw_firmware_blob",
1751 "file_characteristics": {
1752 "size_bytes": contents.len(),
1753 "ascii_ratio": text_ratio,
1754 "entropy": entropy,
1755 "likely_encrypted": is_likely_encrypted,
1756 "likely_compressed": is_likely_compressed
1757 },
1758 "architecture_hints": architecture_hints,
1759 "firmware_indicators": firmware_indicators,
1760 "compression_detected": compression_detected,
1761 "crypto_indicators": crypto_indicators,
1762 "version_hints": version_hints,
1763 "detection_confidence": {
1764 "architecture": if architecture_hints.is_empty() { "low" } else { "medium" },
1765 "firmware_type": if firmware_indicators.is_empty() { "low" } else { "high" },
1766 "format": if text_ratio > 0.8 { "high" } else if !firmware_indicators.is_empty() { "medium" } else { "low" }
1767 }
1768 });
1769
1770 tracing::info!(
1771 "Raw firmware blob analysis complete: format={}, arch={}, {} indicators, entropy={:.2}",
1772 analysis.format,
1773 analysis.architecture,
1774 firmware_indicators.len(),
1775 entropy
1776 );
1777
1778 Ok(())
1779}
1780
1781fn calculate_entropy(data: &[u8]) -> f64 {
1783 let mut counts = [0u32; 256];
1784 for &byte in data {
1785 counts[byte as usize] += 1;
1786 }
1787
1788 let len = data.len() as f64;
1789 let mut entropy = 0.0;
1790
1791 for &count in &counts {
1792 if count > 0 {
1793 let p = count as f64 / len;
1794 entropy -= p * p.log2();
1795 }
1796 }
1797
1798 entropy
1799}
1800
1801fn analyze_dicom_medical_imaging(
1802 analysis: &mut BinaryAnalysis,
1803 contents: &[u8],
1804) -> anyhow::Result<()> {
1805 tracing::info!("Starting DICOM medical imaging analysis using dicom library");
1806 analysis.format = "dicom-medical-imaging".to_string();
1807 analysis.architecture = "medical-device".to_string();
1808 analysis.languages.push("Medical Software".to_string());
1809
1810 let mut dicom_tags = Vec::new();
1811 let mut fda_compliance_indicators = Vec::new();
1812 let mut medical_protocols = Vec::new();
1813 let mut embedded_software_components = Vec::new();
1814 let mut security_features = Vec::new();
1815 let mut patient_data_detected = false;
1816
1817 let has_dicom_preamble = contents.len() >= 132 && &contents[128..132] == b"DICM";
1819 let dicom_obj = if has_dicom_preamble {
1820 Some(()) } else {
1822 None
1823 };
1824
1825 if dicom_obj.is_some() {
1826 analysis.format = "dicom-file".to_string();
1827
1828 if contents.len() > 132 {
1830 let dicom_data = &contents[132..];
1831
1832 for i in (0..dicom_data.len().saturating_sub(8)).step_by(2) {
1834 if i + 8 <= dicom_data.len() {
1835 let group = u16::from_le_bytes([dicom_data[i], dicom_data[i + 1]]);
1836 let element = u16::from_le_bytes([dicom_data[i + 2], dicom_data[i + 3]]);
1837
1838 if group == 0x0010 && (element == 0x0010 || element == 0x0020) {
1840 patient_data_detected = true;
1841 medical_protocols.push("Patient Data");
1842 }
1843
1844 if group == 0x0008 && element == 0x0070 {
1846 medical_protocols.push("Manufacturer");
1847 }
1848
1849 if dicom_tags.len() > 20 {
1851 break;
1852 }
1853
1854 dicom_tags.push(serde_json::json!({
1855 "group": format!("0x{:04X}", group),
1856 "element": format!("0x{:04X}", element),
1857 "tag": format!("({:04X},{:04X})", group, element)
1858 }));
1859 }
1860 }
1861 }
1862 } else {
1863 analysis.format = "medical-imaging-software".to_string();
1865 }
1866
1867 let medical_strings = extract_strings(contents);
1869 analysis.embedded_strings.extend(medical_strings);
1870
1871 for string in &analysis.embedded_strings {
1873 let lower = string.to_lowercase();
1874
1875 if lower.contains("fda") || lower.contains("510k") || lower.contains("pma") {
1877 fda_compliance_indicators.push("FDA Regulatory");
1878 }
1879 if lower.contains("ce mark") || lower.contains("ce marked") {
1880 fda_compliance_indicators.push("CE Marking");
1881 }
1882 if lower.contains("iso 13485") || lower.contains("iso13485") {
1883 fda_compliance_indicators.push("ISO 13485");
1884 }
1885 if lower.contains("iec 62304") || lower.contains("iec62304") {
1886 fda_compliance_indicators.push("IEC 62304");
1887 }
1888 if lower.contains("hipaa") {
1889 fda_compliance_indicators.push("HIPAA Compliance");
1890 }
1891
1892 if lower.contains("dicom") {
1894 medical_protocols.push("DICOM Protocol");
1895 }
1896 if lower.contains("hl7") || lower.contains("fhir") {
1897 medical_protocols.push("HL7/FHIR");
1898 }
1899 if lower.contains("pacs") {
1900 medical_protocols.push("PACS System");
1901 }
1902 if lower.contains("modality") || lower.contains("worklist") {
1903 medical_protocols.push("Modality Worklist");
1904 }
1905 if lower.contains("mpps") {
1906 medical_protocols.push("MPPS Protocol");
1907 }
1908
1909 if lower.contains("qt") || lower.contains("qtcore") {
1911 embedded_software_components.push("Qt Framework");
1912 }
1913 if lower.contains("opencv") {
1914 embedded_software_components.push("OpenCV");
1915 }
1916 if lower.contains("vtk") {
1917 embedded_software_components.push("VTK Visualization");
1918 }
1919 if lower.contains("itk") {
1920 embedded_software_components.push("ITK Medical Imaging");
1921 }
1922 if lower.contains("gdcm") {
1923 embedded_software_components.push("GDCM DICOM Library");
1924 }
1925 if lower.contains("dcmtk") {
1926 embedded_software_components.push("DCMTK DICOM Toolkit");
1927 }
1928 if lower.contains("cornerstone") {
1929 embedded_software_components.push("Cornerstone Medical Imaging");
1930 }
1931
1932 if lower.contains("encryption") || lower.contains("encrypt") {
1934 security_features.push("Data Encryption");
1935 }
1936 if lower.contains("digital signature") || lower.contains("signature") {
1937 security_features.push("Digital Signatures");
1938 }
1939 if lower.contains("audit log") || lower.contains("audit trail") {
1940 security_features.push("Audit Logging");
1941 }
1942 if lower.contains("access control") || lower.contains("authentication") {
1943 security_features.push("Access Control");
1944 }
1945 if lower.contains("tls") || lower.contains("ssl") {
1946 security_features.push("TLS/SSL");
1947 }
1948 }
1949
1950 let mut device_identifiers = Vec::new();
1952 let manufacturers = [
1953 ("GE Healthcare", b"GE Medical" as &[u8]),
1954 ("Siemens", b"Siemens"),
1955 ("Philips", b"Philips"),
1956 ("Canon Medical", b"Canon"),
1957 ("Fujifilm", b"Fujifilm"),
1958 ("Hologic", b"Hologic"),
1959 ("Carestream", b"Carestream"),
1960 ("Agfa", b"Agfa"),
1961 ];
1962
1963 for (name, pattern) in &manufacturers {
1964 if contents
1965 .windows(pattern.len())
1966 .any(|w| w.to_ascii_lowercase() == pattern.to_ascii_lowercase())
1967 {
1968 device_identifiers.push(*name);
1969 }
1970 }
1971
1972 fda_compliance_indicators.sort();
1974 fda_compliance_indicators.dedup();
1975 medical_protocols.sort();
1976 medical_protocols.dedup();
1977 embedded_software_components.sort();
1978 embedded_software_components.dedup();
1979 security_features.sort();
1980 security_features.dedup();
1981
1982 analysis.static_linked = false; let risk_level = if fda_compliance_indicators.len() >= 2 && security_features.len() >= 2 {
1986 "Low" } else if fda_compliance_indicators.len() >= 1 {
1988 "Medium" } else if patient_data_detected {
1990 "High" } else {
1992 "Medium" };
1994
1995 analysis.metadata = serde_json::json!({
1997 "medical_device_type": "dicom_medical_imaging",
1998 "dicom_analysis": {
1999 "is_dicom_file": dicom_obj.is_some(),
2000 "dicom_tags_found": dicom_tags.len(),
2001 "sample_tags": dicom_tags.into_iter().take(10).collect::<Vec<_>>(),
2002 "patient_data_detected": patient_data_detected
2003 },
2004 "compliance_indicators": {
2005 "fda_compliance": fda_compliance_indicators,
2006 "medical_protocols": medical_protocols,
2007 "risk_assessment": risk_level
2008 },
2009 "embedded_components": embedded_software_components,
2010 "security_features": security_features,
2011 "device_identifiers": device_identifiers,
2012 "analysis_type": "dicom_medical_imaging",
2013 "regulatory_notes": {
2014 "requires_fda_clearance": fda_compliance_indicators.is_empty(),
2015 "hipaa_relevant": patient_data_detected || security_features.iter().any(|s| s.contains("Encryption") || s.contains("Access Control")),
2016 "dicom_compliant": medical_protocols.iter().any(|p| p.contains("DICOM"))
2017 }
2018 });
2019
2020 tracing::info!(
2021 "DICOM medical imaging analysis complete: {} compliance indicators, {} protocols, {} security features",
2022 fda_compliance_indicators.len(),
2023 medical_protocols.len(),
2024 security_features.len()
2025 );
2026
2027 Ok(())
2028}
2029
2030fn detect_file_type_fallback(file_name: &str, contents: &[u8]) -> String {
2031 if contents.len() >= 4 {
2033 match &contents[0..4] {
2034 [0x7f, b'E', b'L', b'F'] => return "application/x-elf".to_string(),
2035 [b'M', b'Z', _, _] => return "application/x-msdownload".to_string(), [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
2037 return "application/x-mach-binary".to_string();
2038 }
2039 [0x00, 0x61, 0x73, 0x6d] => return "application/wasm".to_string(), _ => {}
2041 }
2042 }
2043
2044 if let Some(ext) = file_name.split('.').last() {
2046 match ext.to_lowercase().as_str() {
2047 "exe" | "dll" => return "application/x-msdownload".to_string(),
2048 "so" | "a" => return "application/x-sharedlib".to_string(),
2049 "wasm" => return "application/wasm".to_string(),
2050 "bin" => return "application/octet-stream".to_string(),
2051 _ => {}
2052 }
2053 }
2054
2055 "application/octet-stream".to_string()
2056}
2057
2058fn extract_strings(contents: &[u8]) -> Vec<String> {
2059 let mut strings = Vec::new();
2060 let mut current_string = Vec::new();
2061
2062 tracing::debug!("Extracting strings from {} bytes", contents.len());
2063
2064 for &byte in contents {
2065 if byte.is_ascii_graphic() || byte == b' ' || byte == b'\t' {
2066 current_string.push(byte);
2067 } else {
2068 if current_string.len() >= 3 {
2069 if let Ok(s) = String::from_utf8(current_string.clone()) {
2071 if !s.trim().is_empty() && !is_junk_string(&s) {
2073 strings.push(s.trim().to_string());
2074 }
2075 }
2076 }
2077 current_string.clear();
2078 }
2079 }
2080
2081 if current_string.len() >= 3 {
2083 if let Ok(s) = String::from_utf8(current_string) {
2084 if !s.trim().is_empty() && !is_junk_string(&s) {
2085 strings.push(s.trim().to_string());
2086 }
2087 }
2088 }
2089
2090 strings.sort();
2092 strings.dedup();
2093 strings.truncate(50);
2094
2095 tracing::debug!("Extracted {} strings", strings.len());
2096 strings
2097}
2098
2099fn is_junk_string(s: &str) -> bool {
2100 s.chars().all(|c| c == '\0' || c == ' ') ||
2102 s.len() > 200 || s.chars().all(|c| c.is_ascii_punctuation())
2104}
2105
2106#[cfg(test)]
2107mod tests {
2108 use super::*;
2109
2110 #[tokio::test]
2111 async fn test_analyze_empty() {
2112 let result = analyze_binary("test.bin", &[]).await;
2113 assert!(result.is_ok());
2114 let analysis = result.unwrap();
2115 assert_eq!(analysis.file_name, "test.bin");
2116 assert_eq!(analysis.size_bytes, 0);
2117 }
2118}