Skip to main content

rch_common/
binary_hash.rs

1//! Binary hash computation utility for verifying remote compilation correctness.
2//!
3//! This module provides deterministic hashing of compiled binaries by focusing on
4//! code sections (.text, .rodata) while ignoring non-deterministic metadata like
5//! timestamps and paths.
6
7use anyhow::{Context, Result, anyhow};
8use blake3::Hasher;
9use object::read::macho::{FatArch, MachOFatFile32, MachOFatFile64};
10use object::{Architecture, Object, ObjectSection};
11use serde::{Deserialize, Serialize};
12use std::fs::File;
13use std::io::{BufReader, Read};
14use std::path::Path;
15use tracing::info;
16
17/// Result of computing hashes for a binary file.
18#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19pub struct BinaryHashResult {
20    /// Hash of the entire binary file (includes non-deterministic elements).
21    pub full_hash: String,
22    /// Hash of only code sections (.text, .rodata) - deterministic across builds.
23    pub code_hash: String,
24    /// Size of the .text section in bytes.
25    pub text_section_size: u64,
26    /// Whether the binary contains debug information.
27    pub is_debug: bool,
28}
29
30/// Compute hash of the entire binary file using BLAKE3.
31///
32/// This hash includes all non-deterministic elements (timestamps, paths, etc.)
33/// and will differ between builds even of identical source code.
34fn compute_full_hash(path: &Path) -> Result<String> {
35    let file = File::open(path).with_context(|| format!("Failed to open binary: {:?}", path))?;
36    let mut reader = BufReader::new(file);
37    let mut hasher = Hasher::new();
38    let mut buffer = [0u8; 65536];
39
40    loop {
41        let bytes_read = reader.read(&mut buffer)?;
42        if bytes_read == 0 {
43            break;
44        }
45        hasher.update(&buffer[..bytes_read]);
46    }
47
48    Ok(hasher.finalize().to_hex().to_string())
49}
50
51/// Get the current architecture for selecting the right slice from a fat binary.
52#[cfg(target_arch = "x86_64")]
53fn native_architecture() -> Architecture {
54    Architecture::X86_64
55}
56
57#[cfg(target_arch = "aarch64")]
58fn native_architecture() -> Architecture {
59    Architecture::Aarch64
60}
61
62#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
63fn native_architecture() -> Architecture {
64    Architecture::Unknown
65}
66
67/// Try to extract a slice from a fat binary using the given arch list.
68fn try_extract_from_fat<'a, F: FatArch>(
69    data: &'a [u8],
70    arches: &[F],
71    native_arch: Architecture,
72) -> Option<&'a [u8]> {
73    // First try to find a slice matching our native architecture
74    for arch in arches {
75        if arch.architecture() == native_arch
76            && let Ok(slice) = arch.data(data)
77        {
78            info!("Extracted {:?} slice from fat binary", native_arch);
79            return Some(slice);
80        }
81    }
82
83    // No exact match found, use the first valid slice
84    if let Some(arch) = arches.first()
85        && let Ok(slice) = arch.data(data)
86    {
87        info!("Using first available slice from fat binary");
88        return Some(slice);
89    }
90
91    None
92}
93
94/// Extract the appropriate binary slice from a fat/universal binary.
95///
96/// macOS universal binaries (fat binaries) contain multiple architecture slices.
97/// This function extracts the slice matching the current native architecture,
98/// or the first available slice if no exact match is found.
99///
100/// For non-fat binaries, returns the original data unchanged.
101fn extract_binary_slice(data: &[u8]) -> Result<&[u8]> {
102    // Try parsing as 32-bit fat binary first (more common)
103    if let Ok(fat) = MachOFatFile32::parse(data) {
104        let arches = fat.arches();
105        if arches.is_empty() {
106            return Err(anyhow!("Fat binary contains no architecture slices"));
107        }
108
109        let native_arch = native_architecture();
110        if let Some(slice) = try_extract_from_fat(data, arches, native_arch) {
111            return Ok(slice);
112        }
113        return Err(anyhow!("Failed to extract any valid slice from fat binary"));
114    }
115
116    // Try parsing as 64-bit fat binary
117    if let Ok(fat) = MachOFatFile64::parse(data) {
118        let arches = fat.arches();
119        if arches.is_empty() {
120            return Err(anyhow!("Fat64 binary contains no architecture slices"));
121        }
122
123        let native_arch = native_architecture();
124        if let Some(slice) = try_extract_from_fat(data, arches, native_arch) {
125            return Ok(slice);
126        }
127        return Err(anyhow!(
128            "Failed to extract any valid slice from fat64 binary"
129        ));
130    }
131
132    // Not a fat binary, return as-is
133    Ok(data)
134}
135
136/// Check if a section name represents code or read-only data.
137///
138/// Supports both ELF (.text, .rodata) and Mach-O (__text, __const) section names.
139fn is_code_section(name: &str) -> bool {
140    // ELF section names
141    name == ".text"
142        || name == ".rodata"
143        || name.starts_with(".text.")
144        || name.starts_with(".rodata.")
145        // Mach-O section names (lowercase, within __TEXT segment)
146        || name == "__text"
147        || name == "__const"
148        || name == "__cstring"
149        || name == "__stubs"
150        || name == "__stub_helper"
151}
152
153/// Compute hash of code sections only using BLAKE3.
154///
155/// This hash focuses on executable code and read-only data sections,
156/// ignoring non-deterministic metadata. This provides a more reliable
157/// comparison between builds of the same source code.
158///
159/// Supports both ELF (Linux) and Mach-O (macOS) binary formats,
160/// including macOS universal (fat) binaries.
161fn compute_code_hash(data: &[u8]) -> Result<String> {
162    // Handle fat/universal binaries by extracting the appropriate slice
163    let binary_data = extract_binary_slice(data)?;
164    let file = object::File::parse(binary_data).context("Failed to parse binary format")?;
165    let mut hasher = Hasher::new();
166    let mut sections_hashed = 0;
167
168    // Hash only executable code and read-only data sections
169    for section in file.sections() {
170        let name = section.name().unwrap_or("");
171
172        // Include code sections from both ELF and Mach-O formats
173        if is_code_section(name)
174            && let Ok(section_data) = section.data()
175        {
176            hasher.update(section_data);
177            sections_hashed += 1;
178        }
179    }
180
181    if sections_hashed == 0 {
182        return Err(anyhow!("No code sections found in binary"));
183    }
184
185    Ok(hasher.finalize().to_hex().to_string())
186}
187
188/// Check if a section name represents the main text/code section.
189fn is_text_section(name: &str) -> bool {
190    // ELF and Mach-O text sections
191    name == ".text" || name == "__text"
192}
193
194/// Check if a section name represents debug information.
195fn is_debug_section(name: &str) -> bool {
196    // ELF debug sections
197    name.starts_with(".debug")
198        // Mach-O debug sections (DWARF)
199        || name.starts_with("__debug")
200        || name == "__DWARF"
201}
202
203/// Extract metadata about the binary.
204///
205/// Returns (text_section_size, has_debug_info).
206/// Supports both ELF (Linux) and Mach-O (macOS) binary formats,
207/// including macOS universal (fat) binaries.
208fn extract_metadata(data: &[u8]) -> Result<(u64, bool)> {
209    // Handle fat/universal binaries by extracting the appropriate slice
210    let binary_data = extract_binary_slice(data)?;
211    let file = object::File::parse(binary_data).context("Failed to parse binary for metadata")?;
212
213    let text_size: u64 = file
214        .sections()
215        .filter(|s| is_text_section(s.name().unwrap_or("")))
216        .map(|s| s.size())
217        .sum();
218
219    let has_debug = file
220        .sections()
221        .any(|s| is_debug_section(s.name().unwrap_or("")));
222
223    Ok((text_size, has_debug))
224}
225
226/// Compute comprehensive hash information for a binary file.
227///
228/// This function returns both a full file hash and a deterministic code hash
229/// that can be used to verify remote compilation correctness.
230///
231/// # Arguments
232/// * `path` - Path to the binary file
233///
234/// # Returns
235/// A `BinaryHashResult` containing:
236/// - `full_hash`: Complete file hash (non-deterministic)
237/// - `code_hash`: Hash of code sections only (deterministic)
238/// - `text_section_size`: Size of executable code
239/// - `is_debug`: Whether debug symbols are present
240pub fn compute_binary_hash(path: &Path) -> Result<BinaryHashResult> {
241    info!("Computing binary hash for {:?}", path);
242
243    // Compute full file hash
244    let full_hash = compute_full_hash(path)?;
245    info!("Full hash computed: {}", &full_hash[..16]);
246
247    // Read file for section analysis
248    let data = std::fs::read(path).with_context(|| format!("Failed to read binary: {:?}", path))?;
249
250    // Compute code-only hash
251    let code_hash = compute_code_hash(&data)?;
252    info!("Code hash computed: {}", &code_hash[..16]);
253
254    // Extract metadata
255    let (text_section_size, is_debug) = extract_metadata(&data)?;
256    info!(
257        "Metadata: text_size={}, is_debug={}",
258        text_section_size, is_debug
259    );
260
261    Ok(BinaryHashResult {
262        full_hash,
263        code_hash,
264        text_section_size,
265        is_debug,
266    })
267}
268
269/// Check if a binary contains a specific marker string.
270///
271/// This is used to verify that the remote worker actually compiled the code
272/// by checking for a unique test marker that was added to the source.
273///
274/// # Arguments
275/// * `path` - Path to the binary file
276/// * `marker` - The marker string to search for
277///
278/// # Returns
279/// `true` if the marker is found in the binary's string data
280pub fn binary_contains_marker(path: &Path, marker: &str) -> Result<bool> {
281    info!("Searching for marker '{}' in {:?}", marker, path);
282
283    // Empty marker is trivially contained in any file (consistent with str::contains(""))
284    if marker.is_empty() {
285        return Ok(true);
286    }
287
288    let data = std::fs::read(path).with_context(|| format!("Failed to read binary: {:?}", path))?;
289
290    // Search for the marker in the raw binary data
291    // The marker should appear as a string literal in .rodata or similar sections
292    let marker_bytes = marker.as_bytes();
293    let contains = data
294        .windows(marker_bytes.len())
295        .any(|window| window == marker_bytes);
296
297    info!("Marker search result: found={}", contains);
298    Ok(contains)
299}
300
301/// Compare two binaries for functional equivalence.
302///
303/// Two binaries are considered equivalent if they have:
304/// - Matching code hashes (same executable code)
305/// - Matching text section sizes
306/// - Matching debug status
307///
308/// The full hash may differ due to timestamps and paths, so it is not used
309/// for equivalence checking.
310///
311/// # Arguments
312/// * `local` - Hash result from local build
313/// * `remote` - Hash result from remote build
314///
315/// # Returns
316/// `true` if the binaries are functionally equivalent
317pub fn binaries_equivalent(local: &BinaryHashResult, remote: &BinaryHashResult) -> bool {
318    // Code hash must match exactly
319    if local.code_hash != remote.code_hash {
320        info!(
321            "Code hash mismatch: local={}, remote={}",
322            &local.code_hash[..local.code_hash.len().min(16)],
323            &remote.code_hash[..remote.code_hash.len().min(16)]
324        );
325        return false;
326    }
327
328    // Text section size should match
329    if local.text_section_size != remote.text_section_size {
330        info!(
331            "Text section size mismatch: local={}, remote={}",
332            local.text_section_size, remote.text_section_size
333        );
334        return false;
335    }
336
337    // Debug status should match
338    if local.is_debug != remote.is_debug {
339        info!(
340            "Debug status mismatch: local={}, remote={}",
341            local.is_debug, remote.is_debug
342        );
343        return false;
344    }
345
346    true
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use std::path::PathBuf;
353
354    fn init_test_logging() {
355        let _ = tracing_subscriber::fmt()
356            .with_test_writer()
357            .with_max_level(tracing::Level::INFO)
358            .try_init();
359    }
360
361    /// Find a binary in the project for testing
362    fn find_test_binary() -> Option<PathBuf> {
363        // Try release binary first, then debug
364        let candidates = [
365            "target/release/rch",
366            "target/debug/rch",
367            "target/release/rchd",
368            "target/debug/rchd",
369            "/bin/ls",  // Fallback to system binary
370            "/bin/cat", // Another fallback
371            "/usr/bin/ls",
372        ];
373
374        for candidate in candidates {
375            let path = PathBuf::from(candidate);
376            if path.exists() {
377                return Some(path);
378            }
379        }
380        None
381    }
382
383    #[test]
384    fn test_hash_same_binary_twice() {
385        init_test_logging();
386        info!("TEST START: test_hash_same_binary_twice");
387
388        let binary_path = match find_test_binary() {
389            Some(p) => p,
390            None => {
391                info!("SKIP: No test binary found");
392                return;
393            }
394        };
395
396        info!("INPUT: compute_binary_hash({:?}) twice", binary_path);
397
398        let hash1 = compute_binary_hash(&binary_path).unwrap();
399        let hash2 = compute_binary_hash(&binary_path).unwrap();
400
401        info!(
402            "RESULT: hash1.code_hash={}, hash2.code_hash={}",
403            &hash1.code_hash[..16],
404            &hash2.code_hash[..16]
405        );
406
407        assert_eq!(hash1.code_hash, hash2.code_hash, "Code hash should match");
408        assert_eq!(hash1.full_hash, hash2.full_hash, "Full hash should match");
409        assert_eq!(
410            hash1.text_section_size, hash2.text_section_size,
411            "Text section size should match"
412        );
413        assert_eq!(hash1.is_debug, hash2.is_debug, "Debug status should match");
414
415        info!("VERIFY: Same binary produces identical hashes");
416        info!("TEST PASS: test_hash_same_binary_twice");
417    }
418
419    #[test]
420    fn test_binaries_equivalent_matching() {
421        init_test_logging();
422        info!("TEST START: test_binaries_equivalent_matching");
423
424        let local = BinaryHashResult {
425            full_hash: "abc123def456".into(),
426            code_hash: "xyz789abc".into(),
427            text_section_size: 12345,
428            is_debug: false,
429        };
430        let remote = BinaryHashResult {
431            full_hash: "different_full_hash".into(), // Full hash may differ
432            code_hash: "xyz789abc".into(),           // Code hash matches
433            text_section_size: 12345,
434            is_debug: false,
435        };
436
437        info!(
438            "INPUT: local.code_hash={}, remote.code_hash={}",
439            local.code_hash, remote.code_hash
440        );
441
442        let result = binaries_equivalent(&local, &remote);
443        info!("RESULT: binaries_equivalent = {}", result);
444
445        assert!(
446            result,
447            "Binaries with matching code hash should be equivalent"
448        );
449        info!("VERIFY: Binaries with matching code hash are equivalent");
450        info!("TEST PASS: test_binaries_equivalent_matching");
451    }
452
453    #[test]
454    fn test_binaries_not_equivalent_different_code_hash() {
455        init_test_logging();
456        info!("TEST START: test_binaries_not_equivalent_different_code_hash");
457
458        let local = BinaryHashResult {
459            full_hash: "abc123".into(),
460            code_hash: "hash_v1".into(),
461            text_section_size: 12345,
462            is_debug: false,
463        };
464        let remote = BinaryHashResult {
465            full_hash: "abc123".into(),
466            code_hash: "hash_v2".into(), // Different code hash
467            text_section_size: 12345,
468            is_debug: false,
469        };
470
471        info!(
472            "INPUT: local.code_hash={}, remote.code_hash={}",
473            local.code_hash, remote.code_hash
474        );
475
476        let result = binaries_equivalent(&local, &remote);
477        info!("RESULT: binaries_equivalent = {}", result);
478
479        assert!(
480            !result,
481            "Binaries with different code hash should not be equivalent"
482        );
483        info!("VERIFY: Different code hash makes binaries non-equivalent");
484        info!("TEST PASS: test_binaries_not_equivalent_different_code_hash");
485    }
486
487    #[test]
488    fn test_binaries_not_equivalent_different_size() {
489        init_test_logging();
490        info!("TEST START: test_binaries_not_equivalent_different_size");
491
492        let local = BinaryHashResult {
493            full_hash: "abc123".into(),
494            code_hash: "same_hash".into(),
495            text_section_size: 12345,
496            is_debug: false,
497        };
498        let remote = BinaryHashResult {
499            full_hash: "abc123".into(),
500            code_hash: "same_hash".into(),
501            text_section_size: 54321, // Different size
502            is_debug: false,
503        };
504
505        info!(
506            "INPUT: local.text_size={}, remote.text_size={}",
507            local.text_section_size, remote.text_section_size
508        );
509
510        let result = binaries_equivalent(&local, &remote);
511        info!("RESULT: binaries_equivalent = {}", result);
512
513        assert!(
514            !result,
515            "Binaries with different text size should not be equivalent"
516        );
517        info!("VERIFY: Different text section size makes binaries non-equivalent");
518        info!("TEST PASS: test_binaries_not_equivalent_different_size");
519    }
520
521    #[test]
522    fn test_binaries_not_equivalent_different_debug_status() {
523        init_test_logging();
524        info!("TEST START: test_binaries_not_equivalent_different_debug_status");
525
526        let local = BinaryHashResult {
527            full_hash: "abc123".into(),
528            code_hash: "same_hash".into(),
529            text_section_size: 12345,
530            is_debug: false,
531        };
532        let remote = BinaryHashResult {
533            full_hash: "abc123".into(),
534            code_hash: "same_hash".into(),
535            text_section_size: 12345,
536            is_debug: true, // Different debug status
537        };
538
539        info!(
540            "INPUT: local.is_debug={}, remote.is_debug={}",
541            local.is_debug, remote.is_debug
542        );
543
544        let result = binaries_equivalent(&local, &remote);
545        info!("RESULT: binaries_equivalent = {}", result);
546
547        assert!(
548            !result,
549            "Binaries with different debug status should not be equivalent"
550        );
551        info!("VERIFY: Different debug status makes binaries non-equivalent");
552        info!("TEST PASS: test_binaries_not_equivalent_different_debug_status");
553    }
554
555    #[test]
556    fn test_compute_binary_hash_nonexistent_file() {
557        init_test_logging();
558        info!("TEST START: test_compute_binary_hash_nonexistent_file");
559
560        let path = Path::new("/nonexistent/path/to/binary");
561        info!("INPUT: compute_binary_hash({:?})", path);
562
563        let result = compute_binary_hash(path);
564        info!("RESULT: is_err = {}", result.is_err());
565
566        assert!(result.is_err(), "Should fail for nonexistent file");
567        info!("VERIFY: Nonexistent file returns error");
568        info!("TEST PASS: test_compute_binary_hash_nonexistent_file");
569    }
570
571    #[test]
572    fn test_compute_binary_hash_invalid_file() {
573        init_test_logging();
574        info!("TEST START: test_compute_binary_hash_invalid_file");
575
576        // Try to hash a text file which is not a valid binary
577        let path = Path::new("Cargo.toml");
578        if !path.exists() {
579            info!("SKIP: Cargo.toml not found");
580            return;
581        }
582
583        info!("INPUT: compute_binary_hash({:?}) on text file", path);
584
585        let result = compute_binary_hash(path);
586        info!("RESULT: is_err = {}", result.is_err());
587
588        assert!(result.is_err(), "Should fail for non-binary file");
589        info!("VERIFY: Non-binary file returns error");
590        info!("TEST PASS: test_compute_binary_hash_invalid_file");
591    }
592
593    #[test]
594    fn test_binary_hash_result_fields() {
595        init_test_logging();
596        info!("TEST START: test_binary_hash_result_fields");
597
598        let binary_path = match find_test_binary() {
599            Some(p) => p,
600            None => {
601                info!("SKIP: No test binary found");
602                return;
603            }
604        };
605
606        info!("INPUT: compute_binary_hash({:?})", binary_path);
607
608        let result = compute_binary_hash(&binary_path).unwrap();
609
610        info!("RESULT: full_hash_len={}", result.full_hash.len());
611        info!("RESULT: code_hash_len={}", result.code_hash.len());
612        info!("RESULT: text_section_size={}", result.text_section_size);
613        info!("RESULT: is_debug={}", result.is_debug);
614
615        // BLAKE3 produces 64-character hex string
616        assert_eq!(
617            result.full_hash.len(),
618            64,
619            "Full hash should be 64 hex chars"
620        );
621        assert_eq!(
622            result.code_hash.len(),
623            64,
624            "Code hash should be 64 hex chars"
625        );
626        assert!(
627            result.text_section_size > 0,
628            "Text section should have content"
629        );
630
631        info!("VERIFY: All fields have valid values");
632        info!("TEST PASS: test_binary_hash_result_fields");
633    }
634
635    #[test]
636    fn test_binary_contains_marker_found() {
637        init_test_logging();
638        info!("TEST START: test_binary_contains_marker_found");
639
640        let binary_path = match find_test_binary() {
641            Some(p) => p,
642            None => {
643                info!("SKIP: No test binary found");
644                return;
645            }
646        };
647
648        // Look for a common string that should exist in any binary
649        // ELF binaries contain "ELF", Mach-O binaries don't have that marker
650        // so we test for common strings that appear in most system binaries
651        #[cfg(target_os = "linux")]
652        let marker = "ELF";
653        #[cfg(target_os = "macos")]
654        let marker = "__TEXT"; // Mach-O segment name commonly found in binaries
655        #[cfg(target_os = "windows")]
656        let marker = "PE"; // PE header marker (though this test may be skipped on Windows)
657        #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))]
658        let marker = "ELF";
659
660        info!(
661            "INPUT: binary_contains_marker({:?}, '{}')",
662            binary_path, marker
663        );
664
665        let result = binary_contains_marker(&binary_path, marker).unwrap();
666        info!("RESULT: contains_marker = {}", result);
667
668        // The marker should be found in the binary
669        assert!(result, "Binary should contain '{}' string", marker);
670        info!("VERIFY: Marker '{}' found in binary", marker);
671        info!("TEST PASS: test_binary_contains_marker_found");
672    }
673
674    #[test]
675    fn test_binary_contains_marker_not_found() {
676        init_test_logging();
677        info!("TEST START: test_binary_contains_marker_not_found");
678
679        let binary_path = match find_test_binary() {
680            Some(p) => p,
681            None => {
682                info!("SKIP: No test binary found");
683                return;
684            }
685        };
686
687        // Look for a unique string that should NOT exist in any binary
688        let marker = "RCH_TEST_MARKER_UNIQUE_12345_XYZ";
689        info!(
690            "INPUT: binary_contains_marker({:?}, '{}')",
691            binary_path, marker
692        );
693
694        let result = binary_contains_marker(&binary_path, marker).unwrap();
695        info!("RESULT: contains_marker = {}", result);
696
697        assert!(!result, "Binary should not contain made-up marker");
698        info!("VERIFY: Unique marker not found in binary");
699        info!("TEST PASS: test_binary_contains_marker_not_found");
700    }
701
702    #[test]
703    fn test_binary_contains_marker_nonexistent_file() {
704        init_test_logging();
705        info!("TEST START: test_binary_contains_marker_nonexistent_file");
706
707        let path = Path::new("/nonexistent/path/to/binary");
708        let marker = "test";
709        info!("INPUT: binary_contains_marker({:?}, '{}')", path, marker);
710
711        let result = binary_contains_marker(path, marker);
712        info!("RESULT: is_err = {}", result.is_err());
713
714        assert!(result.is_err(), "Should fail for nonexistent file");
715        info!("VERIFY: Nonexistent file returns error");
716        info!("TEST PASS: test_binary_contains_marker_nonexistent_file");
717    }
718
719    // ===================================================================
720    // Empty / non-binary file edge cases
721    // ===================================================================
722
723    #[test]
724    fn test_compute_binary_hash_empty_file() {
725        init_test_logging();
726        info!("TEST START: test_compute_binary_hash_empty_file");
727
728        let dir = std::env::temp_dir().join("rch-binary-hash-test-empty");
729        let _ = std::fs::create_dir_all(&dir);
730        let empty_path = dir.join("empty_binary");
731        std::fs::write(&empty_path, b"").unwrap();
732
733        let result = compute_binary_hash(&empty_path);
734        assert!(
735            result.is_err(),
736            "empty file should fail binary hash (not a valid binary format)"
737        );
738
739        let _ = std::fs::remove_dir_all(&dir);
740        info!("TEST PASS: test_compute_binary_hash_empty_file");
741    }
742
743    #[test]
744    fn test_compute_binary_hash_text_file() {
745        init_test_logging();
746        info!("TEST START: test_compute_binary_hash_text_file");
747
748        let dir = std::env::temp_dir().join("rch-binary-hash-test-text");
749        let _ = std::fs::create_dir_all(&dir);
750        let text_path = dir.join("not_a_binary.txt");
751        std::fs::write(&text_path, b"Hello, this is not a binary file.").unwrap();
752
753        let result = compute_binary_hash(&text_path);
754        assert!(
755            result.is_err(),
756            "text file should fail binary hash computation"
757        );
758
759        let _ = std::fs::remove_dir_all(&dir);
760        info!("TEST PASS: test_compute_binary_hash_text_file");
761    }
762
763    #[test]
764    fn test_compute_binary_hash_truncated_elf_header() {
765        init_test_logging();
766        info!("TEST START: test_compute_binary_hash_truncated_elf_header");
767
768        let dir = std::env::temp_dir().join("rch-binary-hash-test-truncated");
769        let _ = std::fs::create_dir_all(&dir);
770        let trunc_path = dir.join("truncated_elf");
771        // ELF magic followed by truncated header
772        std::fs::write(&trunc_path, b"\x7fELF\x02\x01\x01\x00").unwrap();
773
774        let result = compute_binary_hash(&trunc_path);
775        assert!(
776            result.is_err(),
777            "truncated ELF should fail binary hash computation"
778        );
779
780        let _ = std::fs::remove_dir_all(&dir);
781        info!("TEST PASS: test_compute_binary_hash_truncated_elf_header");
782    }
783
784    // ===================================================================
785    // BinaryHashResult equivalence edge cases
786    // ===================================================================
787
788    #[test]
789    fn test_binaries_equivalent_ignores_full_hash() {
790        init_test_logging();
791
792        let result1 = BinaryHashResult {
793            full_hash: "aaaa".to_string(),
794            code_hash: "same_code".to_string(),
795            text_section_size: 1024,
796            is_debug: false,
797        };
798        let result2 = BinaryHashResult {
799            full_hash: "bbbb".to_string(), // different full hash
800            code_hash: "same_code".to_string(),
801            text_section_size: 1024,
802            is_debug: false,
803        };
804
805        assert!(
806            binaries_equivalent(&result1, &result2),
807            "equivalence should only consider code_hash, text_section_size, is_debug"
808        );
809    }
810
811    #[test]
812    fn test_binaries_not_equivalent_debug_vs_release() {
813        init_test_logging();
814
815        let debug = BinaryHashResult {
816            full_hash: "a".to_string(),
817            code_hash: "same".to_string(),
818            text_section_size: 1024,
819            is_debug: true,
820        };
821        let release = BinaryHashResult {
822            full_hash: "a".to_string(),
823            code_hash: "same".to_string(),
824            text_section_size: 1024,
825            is_debug: false,
826        };
827
828        assert!(
829            !binaries_equivalent(&debug, &release),
830            "debug vs release should not be equivalent"
831        );
832    }
833
834    #[test]
835    fn test_binaries_not_equivalent_different_text_size() {
836        init_test_logging();
837
838        let small = BinaryHashResult {
839            full_hash: "a".to_string(),
840            code_hash: "same".to_string(),
841            text_section_size: 1024,
842            is_debug: false,
843        };
844        let large = BinaryHashResult {
845            full_hash: "a".to_string(),
846            code_hash: "same".to_string(),
847            text_section_size: 2048,
848            is_debug: false,
849        };
850
851        assert!(
852            !binaries_equivalent(&small, &large),
853            "different text section sizes should not be equivalent"
854        );
855    }
856
857    // ===================================================================
858    // BinaryHashResult serialization
859    // ===================================================================
860
861    #[test]
862    fn test_binary_hash_result_serialization_round_trip() {
863        let result = BinaryHashResult {
864            full_hash: "abc123def456".to_string(),
865            code_hash: "789ghi012".to_string(),
866            text_section_size: 65536,
867            is_debug: true,
868        };
869
870        let json = serde_json::to_string(&result).expect("should serialize");
871        let deserialized: BinaryHashResult =
872            serde_json::from_str(&json).expect("should deserialize");
873        assert_eq!(result, deserialized);
874    }
875
876    // ===================================================================
877    // Marker search edge cases
878    // ===================================================================
879
880    #[test]
881    fn test_binary_contains_marker_empty_marker() {
882        init_test_logging();
883
884        let binary_path = match find_test_binary() {
885            Some(p) => p,
886            None => {
887                info!("SKIP: No test binary found");
888                return;
889            }
890        };
891
892        // Empty marker is trivially contained (consistent with str::contains(""))
893        let result = binary_contains_marker(&binary_path, "");
894        assert!(result.is_ok(), "empty marker should not panic");
895        assert!(result.unwrap(), "empty marker should be trivially found");
896    }
897
898    #[test]
899    fn test_binary_contains_marker_very_long_marker() {
900        init_test_logging();
901
902        let binary_path = match find_test_binary() {
903            Some(p) => p,
904            None => {
905                info!("SKIP: No test binary found");
906                return;
907            }
908        };
909
910        // Very long marker that surely doesn't exist
911        let marker = "X".repeat(1024);
912        let result = binary_contains_marker(&binary_path, &marker).unwrap();
913        assert!(
914            !result,
915            "very long marker should not be found in any binary"
916        );
917    }
918
919    // ===================================================================
920    // Hash determinism verification
921    // ===================================================================
922
923    #[test]
924    fn test_full_hash_determinism() {
925        init_test_logging();
926
927        let binary_path = match find_test_binary() {
928            Some(p) => p,
929            None => return,
930        };
931
932        let result1 = compute_binary_hash(&binary_path).unwrap();
933        let result2 = compute_binary_hash(&binary_path).unwrap();
934
935        assert_eq!(
936            result1.full_hash, result2.full_hash,
937            "full_hash must be deterministic"
938        );
939        assert_eq!(
940            result1.code_hash, result2.code_hash,
941            "code_hash must be deterministic"
942        );
943        assert_eq!(
944            result1.text_section_size, result2.text_section_size,
945            "text_section_size must be deterministic"
946        );
947        assert_eq!(
948            result1.is_debug, result2.is_debug,
949            "is_debug must be deterministic"
950        );
951    }
952
953    #[test]
954    fn test_code_hash_differs_from_full_hash() {
955        init_test_logging();
956
957        let binary_path = match find_test_binary() {
958            Some(p) => p,
959            None => return,
960        };
961
962        let result = compute_binary_hash(&binary_path).unwrap();
963        assert_ne!(
964            result.code_hash, result.full_hash,
965            "code_hash should differ from full_hash (unless binary has only code sections)"
966        );
967    }
968
969    #[test]
970    fn test_text_section_size_nonzero_for_real_binary() {
971        init_test_logging();
972
973        let binary_path = match find_test_binary() {
974            Some(p) => p,
975            None => return,
976        };
977
978        let result = compute_binary_hash(&binary_path).unwrap();
979        assert!(
980            result.text_section_size > 0,
981            "real binary should have non-zero text section"
982        );
983    }
984
985    #[test]
986    fn test_hash_format_is_hex_string() {
987        init_test_logging();
988
989        let binary_path = match find_test_binary() {
990            Some(p) => p,
991            None => return,
992        };
993
994        let result = compute_binary_hash(&binary_path).unwrap();
995        assert!(
996            result.full_hash.chars().all(|c| c.is_ascii_hexdigit()),
997            "full_hash should be hex string"
998        );
999        assert!(
1000            result.code_hash.chars().all(|c| c.is_ascii_hexdigit()),
1001            "code_hash should be hex string"
1002        );
1003        // BLAKE3 produces 64-char hex (256 bits)
1004        assert_eq!(
1005            result.full_hash.len(),
1006            64,
1007            "BLAKE3 hash should be 64 hex chars"
1008        );
1009        assert_eq!(
1010            result.code_hash.len(),
1011            64,
1012            "BLAKE3 hash should be 64 hex chars"
1013        );
1014    }
1015}