1use super::{BinaryAnalysis, extract_license_info, extract_version_info};
2use crate::enterprise::crypto::CryptoProvider;
3use chrono::Utc;
4use goblin::{
5 Object as GoblinObject,
6 elf::Elf,
7 mach::{MachO, load_command::CommandVariant},
8 pe::PE,
9};
10use infer;
11use object::{Object, ObjectSymbol};
12use sha2::{Digest, Sha256};
13use std::collections::HashSet;
14use uuid::Uuid;
15use wasmparser::{Parser, Payload};
16
17pub async fn analyze_binary(
18 file_name: &str,
19 contents: &[u8],
20 crypto_provider: &CryptoProvider,
21) -> anyhow::Result<BinaryAnalysis> {
22 tracing::info!(
23 "Starting binary analysis for '{}' ({} bytes)",
24 file_name,
25 contents.len()
26 );
27
28 if contents.len() < 50 {
30 tracing::warn!(
31 "File is very small ({} bytes), likely not a binary executable",
32 contents.len()
33 );
34 return analyze_small_file(file_name, contents, crypto_provider);
35 }
36
37 let sha256_hash = Sha256::digest(contents);
38 let alternative_hash = crypto_provider.hash_alternative(contents)?;
39
40 let detected_type = infer::get(contents);
42 let file_type = if let Some(kind) = detected_type {
43 tracing::info!(
44 "Detected file type: {} ({})",
45 kind.mime_type(),
46 kind.extension()
47 );
48 kind.mime_type().to_string()
49 } else {
50 tracing::debug!("Could not detect file type, using fallback");
51 detect_file_type_fallback(file_name, contents)
52 };
53
54 let mut analysis = BinaryAnalysis {
55 id: Uuid::new_v4(),
56 file_name: file_name.to_string(),
57 format: file_type.clone(),
58 architecture: "unknown".to_string(),
59 languages: Vec::new(),
60 detected_symbols: Vec::new(),
61 embedded_strings: extract_strings(contents),
62 suspected_secrets: Vec::new(),
63 imports: Vec::new(),
64 exports: Vec::new(),
65 hash_sha256: format!("{:x}", sha256_hash),
66 hash_blake3: Some(hex::encode(&alternative_hash)),
67 size_bytes: contents.len() as u64,
68 linked_libraries: Vec::new(),
69 static_linked: false,
70 version_info: None,
71 license_info: None,
72 metadata: serde_json::json!({
73 "fips_mode": crypto_provider.fips_enabled,
74 "hash_algorithm": if crypto_provider.fips_enabled { "SHA-512" } else { "Blake3" }
75 }),
76 created_at: Utc::now(),
77 sbom: None,
78 };
79
80 let mut parsed_successfully = false;
82
83 if contents.len() >= 4 {
84 match &contents[0..4] {
85 [0x7f, b'E', b'L', b'F'] => {
86 tracing::info!("ELF magic detected, using goblin ELF parser");
87 if let Ok(GoblinObject::Elf(elf)) = GoblinObject::parse(contents) {
88 analyze_elf(&mut analysis, &elf, contents)?;
89 parsed_successfully = true;
90 }
91 }
92 [b'M', b'Z', _, _] => {
93 tracing::info!("PE magic detected, using goblin PE parser");
94 if let Ok(GoblinObject::PE(pe)) = GoblinObject::parse(contents) {
95 analyze_pe(&mut analysis, &pe, contents)?;
96 parsed_successfully = true;
97 }
98 }
99 [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
100 tracing::info!("Mach-O magic detected, using goblin Mach-O parser");
101 if let Ok(GoblinObject::Mach(mach)) = GoblinObject::parse(contents) {
102 match mach {
103 goblin::mach::Mach::Fat(_) => {
104 analysis.format = "macho-fat".to_string();
105 analysis.architecture = "multi".to_string();
106 }
107 goblin::mach::Mach::Binary(macho) => {
108 analyze_macho(&mut analysis, &macho, contents)?
109 }
110 }
111 parsed_successfully = true;
112 }
113 }
114 [0x00, 0x61, 0x73, 0x6d] => {
115 tracing::info!("WASM magic detected, using wasmparser");
116 if analyze_wasm(&mut analysis, contents).is_ok() {
117 parsed_successfully = true;
118 }
119 }
120 _ => {}
121 }
122 }
123
124 if !parsed_successfully {
125 tracing::debug!("No specific magic bytes found, attempting generic goblin parsing...");
126 match GoblinObject::parse(contents) {
127 Ok(obj) => {
128 tracing::info!("Successfully parsed with goblin (generic)");
129 match obj {
130 GoblinObject::Elf(elf) => {
131 tracing::info!("Detected ELF binary (generic)");
132 analyze_elf(&mut analysis, &elf, contents)?;
133 parsed_successfully = true;
134 }
135 GoblinObject::PE(pe) => {
136 tracing::info!("Detected PE binary (generic)");
137 analyze_pe(&mut analysis, &pe, contents)?;
138 parsed_successfully = true;
139 }
140 GoblinObject::Mach(mach) => {
141 tracing::info!("Detected Mach-O binary (generic)");
142 match mach {
143 goblin::mach::Mach::Fat(_) => {
144 analysis.format = "macho-fat".to_string();
145 analysis.architecture = "multi".to_string();
146 }
147 goblin::mach::Mach::Binary(macho) => {
148 analyze_macho(&mut analysis, &macho, contents)?
149 }
150 }
151 parsed_successfully = true;
152 }
153 GoblinObject::Archive(_) => {
154 tracing::info!("Detected archive");
155 analysis.format = "archive".to_string();
156 parsed_successfully = true;
157 }
158 _ => {
159 tracing::debug!("Unknown goblin object type");
160 }
161 }
162 }
163 Err(e) => {
164 tracing::debug!("Goblin parsing failed: {}, trying WebAssembly", e);
165 if analyze_wasm(&mut analysis, contents).is_ok() {
166 tracing::info!("Successfully parsed as WebAssembly");
167 parsed_successfully = true;
168 }
169 }
170 }
171 }
172
173 if !parsed_successfully {
174 tracing::info!("All specialized parsers failed, using generic analysis");
175 analyze_unknown_binary(&mut analysis, contents)?;
176 } else {
177 tracing::info!("Successfully analyzed {} as {}", file_name, analysis.format);
178 }
179
180 tracing::debug!("Extracting version and license metadata");
182 analysis.version_info = Some(extract_version_info(
183 contents,
184 &analysis.embedded_strings,
185 &analysis.format,
186 ));
187 analysis.license_info = Some(extract_license_info(&analysis.embedded_strings));
188
189 tracing::info!(
190 "Metadata extraction complete: version_confidence={:.2}, license_confidence={:.2}",
191 analysis
192 .version_info
193 .as_ref()
194 .map(|v| v.confidence)
195 .unwrap_or(0.0),
196 analysis
197 .license_info
198 .as_ref()
199 .map(|l| l.confidence)
200 .unwrap_or(0.0)
201 );
202
203 Ok(analysis)
204}
205
206fn analyze_macho(
207 analysis: &mut BinaryAnalysis,
208 macho: &MachO,
209 contents: &[u8],
210) -> anyhow::Result<()> {
211 analysis.format = "macho".to_string();
212
213 analysis.architecture = match macho.header.cputype() {
215 goblin::mach::constants::cputype::CPU_TYPE_X86_64 => "x86_64".to_string(),
216 goblin::mach::constants::cputype::CPU_TYPE_ARM64 => "aarch64".to_string(),
217 goblin::mach::constants::cputype::CPU_TYPE_X86 => "i386".to_string(),
218 _ => format!("unknown({})", macho.header.cputype()),
219 };
220
221 let mut symbol_set = HashSet::new();
223 if let Some(symbols) = &macho.symbols {
224 for symbol in symbols.iter() {
225 if let Ok((name, _)) = symbol {
226 if !name.is_empty() {
227 symbol_set.insert(name.to_string());
228 analysis.detected_symbols.push(name.to_string());
229 }
230 }
231 }
232 }
233
234 for lib in &macho.libs {
236 let lib_name = lib.to_string();
237 analysis.linked_libraries.push(lib_name.clone());
238 analysis.embedded_strings.push(lib_name.clone());
240 if let Some(version) = extract_version_from_lib_name(&lib_name) {
242 analysis.embedded_strings.push(version);
243 }
244 }
245
246 if let Ok(obj_file) = object::File::parse(contents) {
248 for symbol in obj_file.symbols() {
249 if let Ok(name) = symbol.name() {
250 if !name.is_empty() {
251 if symbol.is_undefined() {
252 analysis.imports.push(name.to_string());
253 analysis.embedded_strings.push(name.to_string());
254 } else if symbol.is_global() {
255 analysis.exports.push(name.to_string());
256 }
257 symbol_set.insert(name.to_string());
258 }
259 }
260 }
261 }
262
263 let mut metadata = serde_json::json!({
265 "analysis_type": "macho",
266 "load_commands": [],
267 "frameworks": [],
268 "min_os_version": null,
269 });
270
271 for lc in macho.load_commands.iter() {
273 match lc.command {
274 CommandVariant::LoadDylib(ref dylib) => {
275 let offset = dylib.dylib.name as usize;
276 if offset < contents.len() {
277 let name_bytes = &contents[offset..];
278 if let Some(end) = name_bytes.iter().position(|&b| b == 0) {
279 if let Ok(name_str) = std::str::from_utf8(&name_bytes[..end]) {
280 if name_str.contains(".framework") {
281 metadata["frameworks"]
282 .as_array_mut()
283 .unwrap()
284 .push(serde_json::Value::String(name_str.to_string()));
285 analysis.embedded_strings.push(name_str.to_string());
286 }
287 }
288 }
289 }
290 }
291 CommandVariant::VersionMinMacosx(ref ver) => {
292 let (major, minor) = unpack_version(ver.version);
293 metadata["min_os_version"] =
294 serde_json::Value::String(format!("{}.{}", major, minor));
295 }
296 CommandVariant::BuildVersion(ref build) => {
297 let (major, minor) = unpack_version(build.minos);
298 metadata["min_os_version"] =
299 serde_json::Value::String(format!("{}.{}", major, minor));
300 }
301 _ => {}
302 }
303 metadata["load_commands"]
304 .as_array_mut()
305 .unwrap()
306 .push(serde_json::Value::String(format!("{:?}", lc.command)));
307 }
308
309 analysis.static_linked = macho.libs.is_empty() && symbol_set.iter().any(|s| s.contains("main"));
311
312 let cpe_candidates = extract_cpe_candidates(
314 &analysis.linked_libraries,
315 &analysis.imports,
316 &analysis.detected_symbols,
317 );
318 analysis.metadata = serde_json::json!({
319 "macho_metadata": metadata,
320 "cpe_candidates": cpe_candidates,
321 });
322
323 tracing::info!(
324 "Mach-O analysis complete: {} symbols, {} libraries, {} imports, {} exports",
325 analysis.detected_symbols.len(),
326 analysis.linked_libraries.len(),
327 analysis.imports.len(),
328 analysis.exports.len()
329 );
330
331 Ok(())
332}
333
334fn extract_version_from_lib_name(lib_name: &str) -> Option<String> {
336 let parts: Vec<&str> = lib_name.split('.').collect();
337 for part in parts {
338 if part.chars().all(|c| c.is_digit(10) || c == '.') {
339 return Some(part.to_string());
340 }
341 }
342 None
343}
344
345fn unpack_version(version: u32) -> (u32, u32) {
347 let major = (version >> 16) & 0xFFFF;
348 let minor = (version >> 8) & 0xFF;
349 (major, minor)
350}
351
352fn extract_cpe_candidates(libs: &[String], imports: &[String], symbols: &[String]) -> Vec<String> {
354 let mut cpes = HashSet::new();
355 for item in libs.iter().chain(imports.iter()).chain(symbols.iter()) {
356 let item_lower = item.to_lowercase();
357 if item_lower.contains("openssl")
359 || item_lower.contains("libcrypto")
360 || item_lower.contains("libssl")
361 {
362 if let Some(version) = extract_version_from_lib_name(&item_lower) {
363 cpes.insert(format!(
364 "cpe:2.3:a:openssl:openssl:{}:*:*:*:*:*:*:*",
365 version
366 ));
367 } else {
368 cpes.insert("cpe:2.3:a:openssl:openssl:*:*:*:*:*:*:*:*".to_string());
369 }
370 }
371 if item_lower.contains("zlib") {
373 if let Some(version) = extract_version_from_lib_name(&item_lower) {
374 cpes.insert(format!("cpe:2.3:a:zlib:zlib:{}:*:*:*:*:*:*:*", version));
375 }
376 }
377 if item_lower.contains("curl") || item_lower.contains("libcurl") {
378 if let Some(version) = extract_version_from_lib_name(&item_lower) {
379 cpes.insert(format!("cpe:2.3:a:curl:curl:{}:*:*:*:*:*:*:*", version));
380 }
381 }
382 }
383 cpes.into_iter().collect()
384}
385
386fn analyze_elf(analysis: &mut BinaryAnalysis, elf: &Elf, contents: &[u8]) -> anyhow::Result<()> {
387 analysis.format = "elf".to_string();
388
389 analysis.architecture = match elf.header.e_machine {
391 goblin::elf::header::EM_X86_64 => "x86_64".to_string(),
392 goblin::elf::header::EM_386 => "i386".to_string(),
393 goblin::elf::header::EM_ARM => "arm".to_string(),
394 goblin::elf::header::EM_AARCH64 => "aarch64".to_string(),
395 goblin::elf::header::EM_RISCV => "riscv".to_string(),
396 _ => format!("unknown({})", elf.header.e_machine),
397 };
398
399 for sym in &elf.syms {
401 if let Some(name) = elf.strtab.get_at(sym.st_name) {
402 if !name.is_empty() {
403 analysis.detected_symbols.push(name.to_string());
404 }
405 }
406 }
407
408 for sym in &elf.dynsyms {
410 if let Some(name) = elf.dynstrtab.get_at(sym.st_name) {
411 if !name.is_empty() {
412 analysis.detected_symbols.push(name.to_string());
413 }
414 }
415 }
416
417 for lib in &elf.libraries {
419 analysis.linked_libraries.push(lib.to_string());
420 analysis.embedded_strings.push(lib.to_string());
422 }
423
424 analysis.static_linked =
426 elf.libraries.is_empty() && elf.header.e_type == goblin::elf::header::ET_EXEC;
427
428 if let Ok(obj_file) = object::File::parse(contents) {
430 for symbol in obj_file.symbols() {
431 if let Ok(name) = symbol.name() {
432 if symbol.is_undefined() {
433 analysis.imports.push(name.to_string());
434 } else if symbol.is_global() {
435 analysis.exports.push(name.to_string());
436 }
437 }
438 }
439 }
440
441 Ok(())
442}
443
444fn analyze_pe(analysis: &mut BinaryAnalysis, pe: &PE, _contents: &[u8]) -> anyhow::Result<()> {
445 analysis.format = "pe".to_string();
446
447 analysis.architecture = match pe.header.coff_header.machine {
449 goblin::pe::header::COFF_MACHINE_X86_64 => "x86_64".to_string(),
450 goblin::pe::header::COFF_MACHINE_X86 => "i386".to_string(),
451 goblin::pe::header::COFF_MACHINE_ARM64 => "aarch64".to_string(),
452 _ => format!("unknown({})", pe.header.coff_header.machine),
453 };
454
455 for export in &pe.exports {
457 if let Some(name) = &export.name {
458 analysis.exports.push(name.to_string());
459 }
460 }
461
462 for import in &pe.imports {
464 analysis.imports.push(import.name.to_string());
465 analysis.embedded_strings.push(import.name.to_string());
467 if !analysis.linked_libraries.contains(&import.dll.to_string()) {
468 analysis.linked_libraries.push(import.dll.to_string());
469 analysis.embedded_strings.push(import.dll.to_string());
471 }
472 }
473
474 analysis.static_linked = pe.imports.is_empty();
476
477 Ok(())
478}
479
480fn analyze_wasm(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
481 tracing::info!("Starting WASM analysis");
482 analysis.format = "application/wasm".to_string();
483 analysis.architecture = "wasm32".to_string();
484 analysis.languages.push("WebAssembly".to_string());
485
486 let parser = Parser::new(0);
487 let mut imports = HashSet::new();
488 let mut exports = HashSet::new();
489 let mut function_count = 0;
490 let mut memory_info = Vec::new();
491 let mut table_info = Vec::new();
492
493 for payload in parser.parse_all(contents) {
494 use wasmparser::Payload as WasmPayload;
495 match payload {
496 Ok(payload) => {
497 match payload {
498 Payload::Version { num, .. } => {
499 tracing::debug!("WASM version: {}", num);
500 }
501 Payload::ImportSection(reader) => {
502 for import in reader {
503 match import {
504 Ok(import) => {
505 let import_name = format!("{}::{}", import.module, import.name);
506 imports.insert(import_name);
507 tracing::debug!(
508 "Found import: {}::{}",
509 import.module,
510 import.name
511 );
512 }
513 Err(e) => tracing::warn!("Failed to parse import: {}", e),
514 }
515 }
516 }
517 Payload::ExportSection(reader) => {
518 for export in reader {
519 match export {
520 Ok(export) => {
521 exports.insert(export.name.to_string());
522 tracing::debug!("Found export: {}", export.name);
523 }
524 Err(e) => tracing::warn!("Failed to parse export: {}", e),
525 }
526 }
527 }
528 Payload::FunctionSection(reader) => {
529 function_count = reader.count();
530 tracing::debug!("Function count: {}", function_count);
531 }
532 Payload::MemorySection(reader) => {
533 for memory in reader {
534 match memory {
535 Ok(memory) => {
536 memory_info.push(format!(
537 "initial: {}, maximum: {:?}",
538 memory.initial, memory.maximum
539 ));
540 }
541 Err(e) => tracing::warn!("Failed to parse memory: {}", e),
542 }
543 }
544 }
545 Payload::TableSection(reader) => {
546 for table in reader {
547 match table {
548 Ok(table) => {
549 table_info.push(format!(
550 "element_type: {:?}, initial: {}, maximum: {:?}",
551 table.ty.element_type, table.ty.initial, table.ty.maximum
552 ));
553 }
554 Err(e) => tracing::warn!("Failed to parse table: {}", e),
555 }
556 }
557 }
558 WasmPayload::CustomSection(custom) => {
559 if let Ok(bytes_str) = std::str::from_utf8(custom.data()) {
560 for s in extract_strings(bytes_str.as_bytes()) {
561 analysis.embedded_strings.push(s);
562 }
563 }
564 }
565 Payload::TypeSection(reader) => {
566 tracing::debug!("Type section with {} types", reader.count());
567 }
568 _ => {
569 }
571 }
572 }
573 Err(e) => {
574 tracing::warn!("WASM parsing error: {}", e);
575 break;
576 }
577 }
578 }
579
580 analysis.imports = imports.into_iter().collect();
581 analysis.exports = exports.into_iter().collect();
582 analysis.static_linked = true; analysis.metadata = serde_json::json!({
586 "wasm_version": "1.0",
587 "function_count": function_count,
588 "memory_sections": memory_info,
589 "table_sections": table_info,
590 "import_count": analysis.imports.len(),
591 "export_count": analysis.exports.len(),
592 "analysis_type": "wasm"
593 });
594
595 tracing::info!(
596 "WASM analysis complete: {} imports, {} exports, {} functions",
597 analysis.imports.len(),
598 analysis.exports.len(),
599 function_count
600 );
601
602 Ok(())
603}
604
605fn analyze_unknown_binary(analysis: &mut BinaryAnalysis, contents: &[u8]) -> anyhow::Result<()> {
606 tracing::debug!("Performing generic binary analysis");
607
608 let text_ratio = contents
610 .iter()
611 .filter(|&&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
612 .count() as f64
613 / contents.len() as f64;
614
615 if text_ratio > 0.7 {
616 analysis.format = "text".to_string();
617 tracing::debug!(
618 "Detected text file ({}% ASCII)",
619 (text_ratio * 100.0) as u32
620 );
621
622 let text = String::from_utf8_lossy(contents);
624 {
625 if text.starts_with("#!") {
627 analysis.format = "script".to_string();
628 analysis.languages.push("script".to_string());
629 }
630
631 if text.contains("function") || text.contains("def ") {
633 analysis.languages.push("script".to_string());
634 }
635 if text.contains("#include") || text.contains("int main") {
636 analysis.languages.push("C/C++".to_string());
637 }
638 if text.contains("pub fn") || text.contains("fn main") {
639 analysis.languages.push("Rust".to_string());
640 }
641 }
642 } else {
643 analysis.format = "binary".to_string();
644 tracing::debug!(
645 "Detected binary file ({}% ASCII)",
646 (text_ratio * 100.0) as u32
647 );
648 }
649
650 analysis.architecture = "unknown".to_string();
651
652 analysis.metadata = serde_json::json!({
654 "ascii_ratio": text_ratio,
655 "analysis_type": "generic"
656 });
657
658 Ok(())
659}
660
661fn analyze_small_file(
662 file_name: &str,
663 contents: &[u8],
664 crypto_provider: &CryptoProvider,
665) -> anyhow::Result<BinaryAnalysis> {
666 tracing::info!(
667 "Analyzing small file '{}' ({} bytes)",
668 file_name,
669 contents.len()
670 );
671
672 let sha256_hash = Sha256::digest(contents);
673 let alternative_hash = crypto_provider.hash_alternative(contents)?;
674
675 let strings = extract_strings(contents);
677 let text_content = String::from_utf8_lossy(contents);
678
679 let text_ratio = contents
681 .iter()
682 .filter(|&&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
683 .count() as f64
684 / contents.len() as f64;
685
686 let format = if text_ratio > 0.8 {
687 "text/plain"
688 } else {
689 "application/octet-stream"
690 }
691 .to_string();
692
693 let mut languages = Vec::new();
695 let mut analysis_notes = Vec::new();
696
697 if strings.iter().any(|s| s.ends_with(".wasm")) {
698 analysis_notes.push("Contains WASM module reference".to_string());
699 languages.push("WebAssembly".to_string());
700 }
701
702 if strings
703 .iter()
704 .any(|s| s.ends_with(".dll") || s.ends_with(".exe"))
705 {
706 analysis_notes.push("Contains Windows executable reference".to_string());
707 }
708
709 if text_content.starts_with("#!") {
710 languages.push("Script".to_string());
711 analysis_notes.push("Shell script or executable script".to_string());
712 }
713
714 let metadata = serde_json::json!({
715 "ascii_ratio": text_ratio,
716 "analysis_type": "small_file",
717 "notes": analysis_notes,
718 "content_preview": text_content.chars().take(50).collect::<String>()
719 });
720
721 let version_info = extract_version_info(contents, &strings, &format);
722 let license_info = extract_license_info(&strings);
723
724 Ok(BinaryAnalysis {
725 id: Uuid::new_v4(),
726 file_name: file_name.to_string(),
727 format,
728 architecture: "n/a".to_string(),
729 languages,
730 detected_symbols: Vec::new(),
731 embedded_strings: strings,
732 suspected_secrets: Vec::new(),
733 imports: Vec::new(),
734 exports: Vec::new(),
735 hash_sha256: format!("{:x}", sha256_hash),
736 hash_blake3: Some(hex::encode(&alternative_hash)),
737 size_bytes: contents.len() as u64,
738 linked_libraries: Vec::new(),
739 static_linked: false,
740 version_info: Some(version_info),
741 license_info: Some(license_info),
742 metadata,
743 created_at: Utc::now(),
744 sbom: None,
745 })
746}
747
748fn detect_file_type_fallback(file_name: &str, contents: &[u8]) -> String {
749 if contents.len() >= 4 {
751 match &contents[0..4] {
752 [0x7f, b'E', b'L', b'F'] => return "application/x-elf".to_string(),
753 [b'M', b'Z', _, _] => return "application/x-msdownload".to_string(), [0xfe, 0xed, 0xfa, 0xce] | [0xce, 0xfa, 0xed, 0xfe] => {
755 return "application/x-mach-binary".to_string();
756 }
757 [0x00, 0x61, 0x73, 0x6d] => return "application/wasm".to_string(), _ => {}
759 }
760 }
761
762 if let Some(ext) = file_name.split('.').last() {
764 match ext.to_lowercase().as_str() {
765 "exe" | "dll" => return "application/x-msdownload".to_string(),
766 "so" | "a" => return "application/x-sharedlib".to_string(),
767 "wasm" => return "application/wasm".to_string(),
768 "bin" => return "application/octet-stream".to_string(),
769 _ => {}
770 }
771 }
772
773 "application/octet-stream".to_string()
774}
775
776fn extract_strings(contents: &[u8]) -> Vec<String> {
777 let mut strings = Vec::new();
778 let mut current_string = Vec::new();
779
780 tracing::debug!("Extracting strings from {} bytes", contents.len());
781
782 for &byte in contents {
783 if byte.is_ascii_graphic() || byte == b' ' || byte == b'\t' {
784 current_string.push(byte);
785 } else {
786 if current_string.len() >= 3 {
787 if let Ok(s) = String::from_utf8(current_string.clone()) {
789 if !s.trim().is_empty() && !is_junk_string(&s) {
791 strings.push(s.trim().to_string());
792 }
793 }
794 }
795 current_string.clear();
796 }
797 }
798
799 if current_string.len() >= 3 {
801 if let Ok(s) = String::from_utf8(current_string) {
802 if !s.trim().is_empty() && !is_junk_string(&s) {
803 strings.push(s.trim().to_string());
804 }
805 }
806 }
807
808 strings.sort();
810 strings.dedup();
811 strings.truncate(50);
812
813 tracing::debug!("Extracted {} strings", strings.len());
814 strings
815}
816
817fn is_junk_string(s: &str) -> bool {
818 s.chars().all(|c| c == '\0' || c == ' ') ||
820 s.len() > 200 || s.chars().all(|c| c.is_ascii_punctuation())
822}
823
824#[cfg(test)]
825mod tests {
826 use super::*;
827
828 #[tokio::test]
829 async fn test_analyze_empty() {
830 let crypto_provider = CryptoProvider::new(false, false).unwrap();
831 let result = analyze_binary("test.bin", &[], &crypto_provider).await;
832 assert!(result.is_ok());
833 let analysis = result.unwrap();
834 assert_eq!(analysis.file_name, "test.bin");
835 assert_eq!(analysis.size_bytes, 0);
836 }
837}