1use anyhow::{Context, Result};
2use chrono;
3use prost::Message;
4use rusqlite::types::{FromSql, FromSqlResult, ValueRef};
5use rusqlite::{params, Connection};
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9pub mod binexport {
10 include!(concat!(env!("OUT_DIR"), "/binexport.rs"));
11}
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct File {
15 pub id: i64,
16 pub filename: String,
17 pub exe_filename: String,
18 pub hash: String,
19 pub functions: i64,
20 pub lib_functions: i64,
21 pub calls: i64,
22 pub basic_blocks: i64,
23 pub lib_basic_blocks: i64,
24 pub edges: i64,
25 pub lib_edges: i64,
26 pub instructions: i64,
27 pub lib_instructions: i64,
28}
29
30impl std::fmt::Display for File {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 write!(
33 f,
34 "FILE:\n \
35 id: {}\n \
36 filename: {}\n \
37 exe_filename: {}\n \
38 hash: {}\n \
39 functions: {}\n \
40 lib_functions: {}\n \
41 calls: {}\n \
42 basic_blocks: {}\n \
43 lib_basic_blocks: {}\n \
44 edges: {}\n \
45 lib_edges: {}\n \
46 instructions: {}\n \
47 lib_instructions: {}\n",
48 self.id,
49 self.filename,
50 self.exe_filename,
51 self.hash,
52 self.functions,
53 self.lib_functions,
54 self.calls,
55 self.basic_blocks,
56 self.lib_basic_blocks,
57 self.edges,
58 self.lib_edges,
59 self.instructions,
60 self.lib_instructions
61 )
62 }
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct Metadata {
67 pub version: String,
68 pub file1: i64,
69 pub file2: i64,
70 pub description: String,
71 pub created: chrono::DateTime<chrono::Utc>,
72 pub modified: chrono::DateTime<chrono::Utc>,
73 pub similarity: f64,
74 pub confidence: f64,
75}
76
77impl std::fmt::Display for Metadata {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 write!(
80 f,
81 "METADATA:\n \
82 version: {}\n \
83 file1: {}\n \
84 file2: {}\n \
85 description: {}\n \
86 created: {}\n \
87 modified: {}\n \
88 similarity: {:.2}\n \
89 confidence: {:.2}\n",
90 self.version,
91 self.file1,
92 self.file2,
93 self.description,
94 self.created.format("%Y-%m-%d %H:%M:%S"),
95 self.modified.format("%Y-%m-%d %H:%M:%S"),
96 self.similarity,
97 self.confidence
98 )
99 }
100}
101
102#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub enum FunctionAlgorithm {
105 None,
106 NameHashMatching,
107 HashMatching,
108 EdgesFlowgraphMdIndex,
109 EdgesCallgraphMdIndex,
110 MdIndexMatchingFlowgraphTopDown,
111 MdIndexMatchingFlowgraphBottomUp,
112 PrimeSignatureMatching,
113 MdIndexMatchingCallGraphTopDown,
114 MdIndexMatchingCallGraphBottomUp,
115 RelaxedMdIndexMatching,
116 InstructionCount,
117 AddressSequence,
118 StringReferences,
119 LoopCountMatching,
120 CallSequenceMatchingExact,
121 CallSequenceMatchingTopology,
122 CallSequenceMatchingSequence,
123 CallReferenceMatching,
124 Manual,
125 Other(String),
127}
128
129impl std::fmt::Display for FunctionAlgorithm {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 match self {
132 FunctionAlgorithm::None => write!(f, "none"),
133 FunctionAlgorithm::NameHashMatching => write!(f, "name hash matching"),
134 FunctionAlgorithm::HashMatching => write!(f, "hash matching"),
135 FunctionAlgorithm::EdgesFlowgraphMdIndex => write!(f, "edges flowgraph MD index"),
136 FunctionAlgorithm::EdgesCallgraphMdIndex => write!(f, "edges callgraph MD index"),
137 FunctionAlgorithm::MdIndexMatchingFlowgraphTopDown => {
138 write!(f, "MD index matching (flowgraph MD index, top down)")
139 }
140 FunctionAlgorithm::MdIndexMatchingFlowgraphBottomUp => {
141 write!(f, "MD index matching (flowgraph MD index, bottom up)")
142 }
143 FunctionAlgorithm::PrimeSignatureMatching => write!(f, "signature matching"),
144 FunctionAlgorithm::MdIndexMatchingCallGraphTopDown => {
145 write!(f, "MD index matching (callGraph MD index, top down)")
146 }
147 FunctionAlgorithm::MdIndexMatchingCallGraphBottomUp => {
148 write!(f, "MD index matching (callGraph MD index, bottom up)")
149 }
150 FunctionAlgorithm::RelaxedMdIndexMatching => write!(f, "MD index matching"),
151 FunctionAlgorithm::InstructionCount => write!(f, "instruction count"),
152 FunctionAlgorithm::AddressSequence => write!(f, "address sequence"),
153 FunctionAlgorithm::StringReferences => write!(f, "string references"),
154 FunctionAlgorithm::LoopCountMatching => write!(f, "loop count matching"),
155 FunctionAlgorithm::CallSequenceMatchingExact => {
156 write!(f, "call sequence matching(exact)")
157 }
158 FunctionAlgorithm::CallSequenceMatchingTopology => {
159 write!(f, "call sequence matching(topology)")
160 }
161 FunctionAlgorithm::CallSequenceMatchingSequence => {
162 write!(f, "call sequence matching(sequence)")
163 }
164 FunctionAlgorithm::CallReferenceMatching => write!(f, "call references matching"),
165 FunctionAlgorithm::Manual => write!(f, "manual"),
166 FunctionAlgorithm::Other(s) => write!(f, "other({})", s),
167 }
168 }
169}
170
171impl FromSql for FunctionAlgorithm {
172 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
173 let algorithm_id: i32 = value.as_i64()? as i32;
174 match algorithm_id {
175 0 => Ok(FunctionAlgorithm::None),
176 1 => Ok(FunctionAlgorithm::NameHashMatching),
177 2 => Ok(FunctionAlgorithm::HashMatching),
178 3 => Ok(FunctionAlgorithm::EdgesFlowgraphMdIndex),
179 4 => Ok(FunctionAlgorithm::EdgesCallgraphMdIndex),
180 5 => Ok(FunctionAlgorithm::MdIndexMatchingFlowgraphTopDown),
181 6 => Ok(FunctionAlgorithm::MdIndexMatchingFlowgraphBottomUp),
182 7 => Ok(FunctionAlgorithm::PrimeSignatureMatching),
183 8 => Ok(FunctionAlgorithm::MdIndexMatchingCallGraphTopDown),
184 9 => Ok(FunctionAlgorithm::MdIndexMatchingCallGraphBottomUp),
185 10 => Ok(FunctionAlgorithm::RelaxedMdIndexMatching),
186 11 => Ok(FunctionAlgorithm::InstructionCount),
187 12 => Ok(FunctionAlgorithm::AddressSequence),
188 13 => Ok(FunctionAlgorithm::StringReferences),
189 14 => Ok(FunctionAlgorithm::LoopCountMatching),
190 15 => Ok(FunctionAlgorithm::CallSequenceMatchingExact),
191 16 => Ok(FunctionAlgorithm::CallSequenceMatchingTopology),
192 17 => Ok(FunctionAlgorithm::CallSequenceMatchingSequence),
193 18 => Ok(FunctionAlgorithm::CallReferenceMatching),
194 19 => Ok(FunctionAlgorithm::Manual),
195 other => Ok(FunctionAlgorithm::Other(other.to_string())),
196 }
197 }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct FunctionMatch {
202 pub id: i64,
203 pub address1: i64,
204 pub name1: String,
205 pub address2: i64,
206 pub name2: String,
207 pub similarity: f64,
208 pub confidence: f64,
209 pub flags: i64,
210 pub algorithm: FunctionAlgorithm,
211 pub evaluate: bool,
212 pub comment_supported: bool,
213 pub basic_blocks: i64,
214 pub edges: i64,
215 pub instructions: i64,
216}
217
218impl std::fmt::Display for FunctionMatch {
219 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
220 if self.name1 != self.name2 {
221 write!(
222 f,
223 "{} -> {}\tsimilarity: {:.2}, confidence: {:.2}, algorithm: {}",
224 self.name1, self.name2, self.similarity, self.confidence, self.algorithm
225 )
226 } else {
227 write!(
228 f,
229 "{}:\tsimilarity: {:.2}, confidence: {:.2}",
230 self.name1, self.similarity, self.confidence
231 )
232 }
233 }
234}
235
236#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
238pub enum BasicBlockAlgorithm {
239 None,
240 EdgesPrimeProduct,
241 HashMatchingFourInstMin,
242 PrimeMatchingFourInstMin,
243 CallReferenceMatching,
244 StringReferencesMatching,
245 EdgesMdIndexTopDown,
246 MdIndexMatchingTopDown,
247 EdgesMdIndexBottomUp,
248 MdIndexMatchingBottomUp,
249 RelaxedMdIndexMatching,
250 PrimeMatchingNoInstMin,
251 EdgesLengauerTarjanDominated,
252 LoopEntryMatching,
253 SelfLoopMatching,
254 EntryPointMatching,
255 ExitPointMatching,
256 InstructionCountMatching,
257 JumpSequenceMatching,
258 PropagationSizeOne,
259 Manual,
260 Other(String),
262}
263
264impl std::fmt::Display for BasicBlockAlgorithm {
265 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
266 match self {
267 BasicBlockAlgorithm::None => write!(f, "none"),
268 BasicBlockAlgorithm::EdgesPrimeProduct => write!(f, "edges prime product"),
269 BasicBlockAlgorithm::HashMatchingFourInstMin => {
270 write!(f, "hash matching (4 instructions minimum)")
271 }
272 BasicBlockAlgorithm::PrimeMatchingFourInstMin => {
273 write!(f, "prime matching (4 instructions minimum)")
274 }
275 BasicBlockAlgorithm::CallReferenceMatching => write!(f, "call reference matching"),
276 BasicBlockAlgorithm::StringReferencesMatching => write!(f, "string reference matching"),
277 BasicBlockAlgorithm::EdgesMdIndexTopDown => write!(f, "edges MD index (top down)"),
278 BasicBlockAlgorithm::MdIndexMatchingTopDown => {
279 write!(f, "MD index matching (top down)")
280 }
281 BasicBlockAlgorithm::EdgesMdIndexBottomUp => write!(f, "edges MD index (bottom up)"),
282 BasicBlockAlgorithm::MdIndexMatchingBottomUp => {
283 write!(f, "MD index matching (bottom up)")
284 }
285 BasicBlockAlgorithm::RelaxedMdIndexMatching => write!(f, "relaxed MD index matching"),
286 BasicBlockAlgorithm::PrimeMatchingNoInstMin => {
287 write!(f, "prime matching (0 instructions minimum)")
288 }
289 BasicBlockAlgorithm::EdgesLengauerTarjanDominated => {
290 write!(f, "edges Lengauer Tarjan dominated")
291 }
292 BasicBlockAlgorithm::LoopEntryMatching => write!(f, "loop entry matching"),
293 BasicBlockAlgorithm::SelfLoopMatching => write!(f, "self loop matching"),
294 BasicBlockAlgorithm::EntryPointMatching => write!(f, "entry point matching"),
295 BasicBlockAlgorithm::ExitPointMatching => write!(f, "exit point matching"),
296 BasicBlockAlgorithm::InstructionCountMatching => {
297 write!(f, "instruction count matching")
298 }
299 BasicBlockAlgorithm::JumpSequenceMatching => write!(f, "jump sequence matching"),
300 BasicBlockAlgorithm::PropagationSizeOne => write!(f, "propagation (size==1)"),
301 BasicBlockAlgorithm::Manual => write!(f, "manual"),
302 BasicBlockAlgorithm::Other(s) => write!(f, "other({})", s),
303 }
304 }
305}
306
307impl FromSql for BasicBlockAlgorithm {
308 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
309 let algorithm_id: i32 = value.as_i64()? as i32;
310 match algorithm_id {
311 0 => Ok(BasicBlockAlgorithm::None),
312 1 => Ok(BasicBlockAlgorithm::EdgesPrimeProduct),
313 2 => Ok(BasicBlockAlgorithm::HashMatchingFourInstMin),
314 3 => Ok(BasicBlockAlgorithm::PrimeMatchingFourInstMin),
315 4 => Ok(BasicBlockAlgorithm::CallReferenceMatching),
316 5 => Ok(BasicBlockAlgorithm::StringReferencesMatching),
317 6 => Ok(BasicBlockAlgorithm::EdgesMdIndexTopDown),
318 7 => Ok(BasicBlockAlgorithm::MdIndexMatchingTopDown),
319 8 => Ok(BasicBlockAlgorithm::EdgesMdIndexBottomUp),
320 9 => Ok(BasicBlockAlgorithm::MdIndexMatchingBottomUp),
321 10 => Ok(BasicBlockAlgorithm::RelaxedMdIndexMatching),
322 11 => Ok(BasicBlockAlgorithm::PrimeMatchingNoInstMin),
323 12 => Ok(BasicBlockAlgorithm::EdgesLengauerTarjanDominated),
324 13 => Ok(BasicBlockAlgorithm::LoopEntryMatching),
325 14 => Ok(BasicBlockAlgorithm::SelfLoopMatching),
326 15 => Ok(BasicBlockAlgorithm::EntryPointMatching),
327 16 => Ok(BasicBlockAlgorithm::ExitPointMatching),
328 17 => Ok(BasicBlockAlgorithm::InstructionCountMatching),
329 18 => Ok(BasicBlockAlgorithm::JumpSequenceMatching),
330 19 => Ok(BasicBlockAlgorithm::PropagationSizeOne),
331 20 => Ok(BasicBlockAlgorithm::Manual),
332 other => Ok(BasicBlockAlgorithm::Other(other.to_string())),
333 }
334 }
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct BasicBlockMatch {
339 pub id: i64,
340 pub function_id: i64,
341 pub address1: i64,
342 pub address2: i64,
343 pub algorithm: BasicBlockAlgorithm,
344 pub evaluate: bool,
345}
346
347impl std::fmt::Display for BasicBlockMatch {
348 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
349 write!(
350 f,
351 "{:#x} -> {:#x} ({})",
352 self.address1, self.address2, self.algorithm
353 )
354 }
355}
356
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct Instruction {
359 pub id: i64,
360 pub address1: i64,
361 pub address2: i64,
362}
363
364impl std::fmt::Display for Instruction {
365 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366 write!(f, "{} -> {}", self.address1, self.address2)
367 }
368}
369
370pub struct BinDiff {
372 connection: Connection,
373}
374
375impl BinDiff {
376 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
378 let connection = Connection::open(path).context("Failed to open SQLite database")?;
379 Ok(Self { connection })
380 }
381
382 pub fn close(self) -> std::result::Result<(), rusqlite::Error> {
383 self.connection.close().map_err(|(_, err)| err)
384 }
385
386 pub fn read_metadata(&self) -> Result<Metadata> {
387 let mut stmt = self
388 .connection
389 .prepare("SELECT * FROM metadata")
390 .context("Failed to prepare metadata statement")?;
391
392 stmt.query_row(params![], |row| {
393 Ok(Metadata {
394 version: row.get(0)?,
395 file1: row.get(1)?,
396 file2: row.get(2)?,
397 description: row.get(3)?,
398 created: row.get(4)?,
399 modified: row.get(5)?,
400 similarity: row.get(6)?,
401 confidence: row.get(7)?,
402 })
403 })
404 .context("Failed to query metadata row")
405 }
406
407 pub fn read_file(&self) -> Result<File> {
409 let mut stmt = self
410 .connection
411 .prepare("SELECT * FROM file")
412 .context("Failed to prepare file statement")?;
413
414 stmt.query_row(params![], |row| {
415 Ok(File {
416 id: row.get(0)?,
417 filename: row.get(1)?,
418 exe_filename: row.get(2)?,
419 hash: row.get(3)?,
420 functions: row.get(4)?,
421 lib_functions: row.get(5)?,
422 calls: row.get(6)?,
423 basic_blocks: row.get(7)?,
424 lib_basic_blocks: row.get(8)?,
425 edges: row.get(9)?,
426 lib_edges: row.get(10)?,
427 instructions: row.get(11)?,
428 lib_instructions: row.get(12)?,
429 })
430 })
431 .context("Failed to query file row")
432 }
433
434 pub fn count_function_matches(&self) -> Result<usize> {
436 let count: i64 = self
437 .connection
438 .query_row("SELECT COUNT(*) FROM function", params![], |row| row.get(0))
439 .context("Failed to count function matches")?;
440
441 Ok(count as usize)
442 }
443
444 pub fn read_function_matches(&self) -> Result<Vec<FunctionMatch>> {
445 let mut stmt = self
446 .connection
447 .prepare("SELECT * FROM function")
448 .context("Failed to prepare function statement")?;
449
450 let matches = stmt
451 .query_map(params![], |row| {
452 Ok(FunctionMatch {
453 id: row.get(0)?,
454 address1: row.get(1)?,
455 name1: row.get(2)?,
456 address2: row.get(3)?,
457 name2: row.get(4)?,
458 similarity: row.get(5)?,
459 confidence: row.get(6)?,
460 flags: row.get(7)?,
461 algorithm: row.get(8)?,
462 evaluate: row.get(9)?,
463 comment_supported: row.get(10)?,
464 basic_blocks: row.get(11)?,
465 edges: row.get(12)?,
466 instructions: row.get(13)?,
467 })
468 })
469 .context("Failed to query function row")?
470 .collect::<Result<Vec<FunctionMatch>, _>>()?;
471
472 Ok(matches)
473 }
474
475 pub fn count_basic_block_matches(&self) -> Result<usize> {
477 let count: i64 = self
478 .connection
479 .query_row("SELECT COUNT(*) FROM basicblock", params![], |row| {
480 row.get(0)
481 })
482 .context("Failed to count basic block matches")?;
483
484 Ok(count as usize)
485 }
486
487 pub fn read_basic_block_matches(&self) -> Result<Vec<BasicBlockMatch>> {
488 let mut stmt = self
489 .connection
490 .prepare("SELECT * FROM basicblock")
491 .context("Failed to prepare basicblock statement")?;
492
493 let matches = stmt
494 .query_map(params![], |row| {
495 Ok(BasicBlockMatch {
496 id: row.get(0)?,
497 function_id: row.get(1)?,
498 address1: row.get(2)?,
499 address2: row.get(3)?,
500 algorithm: row.get(4)?,
501 evaluate: row.get(5)?,
502 })
503 })
504 .context("Failed to query basicblock row")?
505 .collect::<Result<Vec<BasicBlockMatch>, _>>()?;
506
507 Ok(matches)
508 }
509
510 pub fn count_instruction_matches(&self) -> Result<usize> {
512 let count: i64 = self
513 .connection
514 .query_row("SELECT COUNT(*) FROM instruction", params![], |row| {
515 row.get(0)
516 })
517 .context("Failed to count instruction matches")?;
518
519 Ok(count as usize)
520 }
521
522 pub fn read_instruction_matches(&self) -> Result<Vec<Instruction>> {
523 let mut stmt = self
524 .connection
525 .prepare("SELECT * FROM instruction")
526 .context("Failed to prepare instruction statement")?;
527
528 let matches = stmt
529 .query_map(params![], |row| {
530 Ok(Instruction {
531 id: row.get(0)?,
532 address1: row.get(1)?,
533 address2: row.get(2)?,
534 })
535 })
536 .context("Failed to query instruction row")?
537 .collect::<Result<Vec<Instruction>, _>>()?;
538
539 Ok(matches)
540 }
541}
542
543#[derive(Debug, Clone)]
544pub struct BinExport {
545 pub binexport: binexport::BinExport2,
546}
547
548impl BinExport {
549 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
550 let file = std::fs::read(&path).with_context(|| {
551 format!("Failed to read BinExport file: {}", path.as_ref().display())
552 })?;
553 let binexport = binexport::BinExport2::decode(&file[..])
554 .context("Failed to decode BinExport protobuf")?;
555 Ok(Self { binexport })
556 }
557
558 pub fn executable_name(&self) -> Result<String> {
559 let executable_name = self
560 .binexport
561 .meta_information
562 .as_ref()
563 .context("No meta information available")?
564 .executable_name
565 .clone()
566 .unwrap_or_else(|| "unknown executable".to_string());
567 Ok(executable_name)
568 }
569
570 }
572
573#[cfg(test)]
575mod tests {
576 use super::*;
577
578 #[test]
579 fn test_database_operations() -> Result<()> {
580 let test_file_path = "tests/kernel.release_vs_kernel.release.BinDiff";
581
582 if !std::path::Path::new(test_file_path).exists() {
584 println!("Test file {} not found. Skipping test.", test_file_path);
585 return Ok(());
586 }
587
588 let db = BinDiff::open(test_file_path)?;
589
590 let file = db.read_file()?;
591 println!("{}", file);
592
593 let metadata = db.read_metadata()?;
594 println!("{}", metadata);
595
596 let count = db.count_function_matches()?;
597 println!("Total Matches: {}", count);
598
599 let matches = db.read_function_matches()?;
600 assert_eq!(matches.len(), count);
601 for func_match in matches {
602 println!("{}", func_match);
603 }
604
605 let count = db.count_basic_block_matches()?;
606 println!("Total Basic Block Matches: {}", count);
607
608 let basic_block_matches = db.read_basic_block_matches()?;
609 assert_eq!(basic_block_matches.len(), count);
610 let count = db.count_instruction_matches()?;
615 println!("Total Instruction Matches: {}", count);
616
617 let instruction_matches = db.read_instruction_matches()?;
618 assert_eq!(instruction_matches.len(), count);
619 db.close()?;
624
625 Ok(())
626 }
627
628 #[test]
629 fn test_read_binexport() -> Result<()> {
630 let test_file_path = "tests/kernel.release.t6020.BinExport";
631
632 if !std::path::Path::new(test_file_path).exists() {
634 println!("Test file {} not found. Skipping test.", test_file_path);
635 return Ok(());
636 }
637 let binexport = BinExport::open(test_file_path)?;
638 println!("executable_name: {}", binexport.executable_name()?);
639
640 Ok(())
641 }
642}