rsleigh_fid/lib.rs
1//! Function ID database — operand-masked body hashing for function
2//! identification across stripped binaries. Ghidra FID semantic clone
3//! with pure-Rust ingest/match pipeline.
4//!
5//! # Algorithm
6//! 1. Disassemble function linearly.
7//! 2. For each instruction: keep opcode + prefix bytes, zero operand slots
8//! (registers, immediates, displacements) per arch mask table.
9//! 3. Hash masked byte stream with xxh3-64 → `full_hash`.
10//! 4. Combine with hashes of direct call targets → `specific_hash`.
11//! 5. Persist rows (full_hash, specific_hash, name, lib_id) in compact
12//! binary format; ship as gzipped blob for runtime match pass.
13
14pub mod db;
15pub mod hash;
16pub mod ingest;
17pub mod mask;
18
19pub use db::{FidDb, FidEntry};
20pub use hash::FidHashQuad;
21
22/// Load all bundled FID databases matching the given architecture.
23/// Returns a list of (library_name, db) pairs. Empty if no bundled
24/// DBs ship for the architecture (e.g. ARM32, MIPS32, RISC-V today).
25pub fn bundled_dbs(arch: rsleigh_api::Architecture) -> Vec<(&'static str, FidDb)> {
26 let mut out = Vec::new();
27 let blobs: &[(&str, &[u8])] = match arch {
28 rsleigh_api::Architecture::X86_64 => &[
29 ("glibc", include_bytes!("../data/glibc-x86_64.fidb")),
30 ("libstdcxx", include_bytes!("../data/libstdcxx-x86_64.fidb")),
31 ("musl", include_bytes!("../data/musl-x86_64.fidb")),
32 ("zlib", include_bytes!("../data/zlib-x86_64.fidb")),
33 ("openssl", include_bytes!("../data/openssl-x86_64.fidb")),
34 ],
35 rsleigh_api::Architecture::AArch64 => &[
36 ("glibc", include_bytes!("../data/glibc-aarch64.fidb")),
37 (
38 "libstdcxx",
39 include_bytes!("../data/libstdcxx-aarch64.fidb"),
40 ),
41 ("musl", include_bytes!("../data/musl-aarch64.fidb")),
42 ("zlib", include_bytes!("../data/zlib-aarch64.fidb")),
43 ("openssl", include_bytes!("../data/openssl-aarch64.fidb")),
44 ],
45 _ => &[],
46 };
47 for (name, bytes) in blobs {
48 if let Ok(db) = FidDb::read(std::io::Cursor::new(bytes)) {
49 out.push((*name, db));
50 }
51 }
52 out
53}
54
55/// Convenience: fingerprint a function body and return matching name(s)
56/// from the database. Prefers `specific_hash` (callee-aware) matches;
57/// falls back to `full_hash` if specific yields nothing.
58///
59/// Returns `None` if the body is too small to fingerprint, or if no
60/// match exists. Returns `Some(&name)` when exactly one entry matches
61/// (unambiguous rename). Multi-match caller should use `FidDb` directly
62/// and apply additional disambiguation (e.g. library preference).
63pub fn identify<'a>(
64 arch: rsleigh_api::Architecture,
65 body: &[u8],
66 addr: u64,
67 db: &'a FidDb,
68) -> Option<&'a str> {
69 let hq = ingest::fingerprint(arch, body, addr, |_| None)?;
70 // Specific hash will only match when the callee graph lines up —
71 // without cross-function fingerprints during match, fall back to full.
72 // Accept multi-match when:
73 // - every hit has the identical name (weak-alias duplicates), OR
74 // - all hits are Itanium C++ ABI ctor/dtor variants of the same
75 // function (C1/C2 complete/base ctors, D0/D1/D2 dtors share a body
76 // by design). Prefer C1/D1 (complete-object) in that case.
77 let resolve = |idxs: &[usize]| -> Option<&'a str> {
78 if idxs.is_empty() {
79 return None;
80 }
81 let first = &db.entries[idxs[0]].name;
82 if idxs.iter().all(|i| &db.entries[*i].name == first) {
83 return Some(first);
84 }
85 if idxs
86 .iter()
87 .all(|i| is_cxx_abi_variant(&db.entries[*i].name, first))
88 {
89 // Pick complete-object variant if present, else first.
90 if let Some(i) = idxs.iter().find(|i| {
91 let n = &db.entries[**i].name;
92 n.contains("C1E") || n.contains("D1E")
93 }) {
94 return Some(&db.entries[*i].name);
95 }
96 return Some(first);
97 }
98 None
99 };
100 resolve(db.match_specific(hq.specific)).or_else(|| resolve(db.match_full(hq.full)))
101}
102
103/// Are `a` and `b` Itanium C++ ABI ctor/dtor variants of the same
104/// function? Variants share a body by spec:
105/// - `C1` (complete), `C2` (base), `C3` (allocating) ctors
106/// - `D0` (deleting), `D1` (complete), `D2` (base) dtors
107fn is_cxx_abi_variant(a: &str, b: &str) -> bool {
108 if a == b {
109 return true;
110 }
111 // Extract (group, position) for each name. If both share group + prefix
112 // + suffix, they are body-equivalent by ABI.
113 let locate = |s: &str| -> Option<(char, usize)> {
114 for &(ch, tag) in &[
115 ('C', "C1E"),
116 ('C', "C2E"),
117 ('C', "C3E"),
118 ('D', "D0E"),
119 ('D', "D1E"),
120 ('D', "D2E"),
121 ] {
122 if let Some(p) = s.find(tag) {
123 return Some((ch, p));
124 }
125 }
126 None
127 };
128 if let (Some((ga, pa)), Some((gb, pb))) = (locate(a), locate(b)) {
129 if ga == gb && pa == pb && a[..pa] == b[..pb] && a[pa + 2..] == b[pb + 2..] {
130 return true;
131 }
132 }
133 false
134}