bindiff_rs/
lib.rs

1use anyhow::{Context, Result};
2use chrono;
3use prost::Message;
4use rusqlite::types::{FromSql, FromSqlResult, ValueRef};
5use rusqlite::{params, Connection};
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9pub mod binexport {
10    include!(concat!(env!("OUT_DIR"), "/binexport.rs"));
11}
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct File {
15    pub id: i64,
16    pub filename: String,
17    pub exe_filename: String,
18    pub hash: String,
19    pub functions: i64,
20    pub lib_functions: i64,
21    pub calls: i64,
22    pub basic_blocks: i64,
23    pub lib_basic_blocks: i64,
24    pub edges: i64,
25    pub lib_edges: i64,
26    pub instructions: i64,
27    pub lib_instructions: i64,
28}
29
30impl std::fmt::Display for File {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        write!(
33            f,
34            "FILE:\n  \
35            id:               {}\n  \
36            filename:         {}\n  \
37            exe_filename:     {}\n  \
38            hash:             {}\n  \
39            functions:        {}\n  \
40            lib_functions:    {}\n  \
41            calls:            {}\n  \
42            basic_blocks:     {}\n  \
43            lib_basic_blocks: {}\n  \
44            edges:            {}\n  \
45            lib_edges:        {}\n  \
46            instructions:     {}\n  \
47            lib_instructions: {}\n",
48            self.id,
49            self.filename,
50            self.exe_filename,
51            self.hash,
52            self.functions,
53            self.lib_functions,
54            self.calls,
55            self.basic_blocks,
56            self.lib_basic_blocks,
57            self.edges,
58            self.lib_edges,
59            self.instructions,
60            self.lib_instructions
61        )
62    }
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct Metadata {
67    pub version: String,
68    pub file1: i64,
69    pub file2: i64,
70    pub description: String,
71    pub created: chrono::DateTime<chrono::Utc>,
72    pub modified: chrono::DateTime<chrono::Utc>,
73    pub similarity: f64,
74    pub confidence: f64,
75}
76
77impl std::fmt::Display for Metadata {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        write!(
80            f,
81            "METADATA:\n  \
82            version:      {}\n  \
83            file1:        {}\n  \
84            file2:        {}\n  \
85            description:  {}\n  \
86            created:      {}\n  \
87            modified:     {}\n  \
88            similarity:   {:.2}\n  \
89            confidence:   {:.2}\n",
90            self.version,
91            self.file1,
92            self.file2,
93            self.description,
94            self.created.format("%Y-%m-%d %H:%M:%S"),
95            self.modified.format("%Y-%m-%d %H:%M:%S"),
96            self.similarity,
97            self.confidence
98        )
99    }
100}
101
102/// Enum representing the different function matching algorithms used in BinDiff
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub enum FunctionAlgorithm {
105    None,
106    NameHashMatching,
107    HashMatching,
108    EdgesFlowgraphMdIndex,
109    EdgesCallgraphMdIndex,
110    MdIndexMatchingFlowgraphTopDown,
111    MdIndexMatchingFlowgraphBottomUp,
112    PrimeSignatureMatching,
113    MdIndexMatchingCallGraphTopDown,
114    MdIndexMatchingCallGraphBottomUp,
115    RelaxedMdIndexMatching,
116    InstructionCount,
117    AddressSequence,
118    StringReferences,
119    LoopCountMatching,
120    CallSequenceMatchingExact,
121    CallSequenceMatchingTopology,
122    CallSequenceMatchingSequence,
123    CallReferenceMatching,
124    Manual,
125    /// Unknown or custom algorithm
126    Other(String),
127}
128
129impl std::fmt::Display for FunctionAlgorithm {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            FunctionAlgorithm::None => write!(f, "none"),
133            FunctionAlgorithm::NameHashMatching => write!(f, "name hash matching"),
134            FunctionAlgorithm::HashMatching => write!(f, "hash matching"),
135            FunctionAlgorithm::EdgesFlowgraphMdIndex => write!(f, "edges flowgraph MD index"),
136            FunctionAlgorithm::EdgesCallgraphMdIndex => write!(f, "edges callgraph MD index"),
137            FunctionAlgorithm::MdIndexMatchingFlowgraphTopDown => {
138                write!(f, "MD index matching (flowgraph MD index, top down)")
139            }
140            FunctionAlgorithm::MdIndexMatchingFlowgraphBottomUp => {
141                write!(f, "MD index matching (flowgraph MD index, bottom up)")
142            }
143            FunctionAlgorithm::PrimeSignatureMatching => write!(f, "signature matching"),
144            FunctionAlgorithm::MdIndexMatchingCallGraphTopDown => {
145                write!(f, "MD index matching (callGraph MD index, top down)")
146            }
147            FunctionAlgorithm::MdIndexMatchingCallGraphBottomUp => {
148                write!(f, "MD index matching (callGraph MD index, bottom up)")
149            }
150            FunctionAlgorithm::RelaxedMdIndexMatching => write!(f, "MD index matching"),
151            FunctionAlgorithm::InstructionCount => write!(f, "instruction count"),
152            FunctionAlgorithm::AddressSequence => write!(f, "address sequence"),
153            FunctionAlgorithm::StringReferences => write!(f, "string references"),
154            FunctionAlgorithm::LoopCountMatching => write!(f, "loop count matching"),
155            FunctionAlgorithm::CallSequenceMatchingExact => {
156                write!(f, "call sequence matching(exact)")
157            }
158            FunctionAlgorithm::CallSequenceMatchingTopology => {
159                write!(f, "call sequence matching(topology)")
160            }
161            FunctionAlgorithm::CallSequenceMatchingSequence => {
162                write!(f, "call sequence matching(sequence)")
163            }
164            FunctionAlgorithm::CallReferenceMatching => write!(f, "call references matching"),
165            FunctionAlgorithm::Manual => write!(f, "manual"),
166            FunctionAlgorithm::Other(s) => write!(f, "other({})", s),
167        }
168    }
169}
170
171impl FromSql for FunctionAlgorithm {
172    fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
173        let algorithm_id: i32 = value.as_i64()? as i32;
174        match algorithm_id {
175            0 => Ok(FunctionAlgorithm::None),
176            1 => Ok(FunctionAlgorithm::NameHashMatching),
177            2 => Ok(FunctionAlgorithm::HashMatching),
178            3 => Ok(FunctionAlgorithm::EdgesFlowgraphMdIndex),
179            4 => Ok(FunctionAlgorithm::EdgesCallgraphMdIndex),
180            5 => Ok(FunctionAlgorithm::MdIndexMatchingFlowgraphTopDown),
181            6 => Ok(FunctionAlgorithm::MdIndexMatchingFlowgraphBottomUp),
182            7 => Ok(FunctionAlgorithm::PrimeSignatureMatching),
183            8 => Ok(FunctionAlgorithm::MdIndexMatchingCallGraphTopDown),
184            9 => Ok(FunctionAlgorithm::MdIndexMatchingCallGraphBottomUp),
185            10 => Ok(FunctionAlgorithm::RelaxedMdIndexMatching),
186            11 => Ok(FunctionAlgorithm::InstructionCount),
187            12 => Ok(FunctionAlgorithm::AddressSequence),
188            13 => Ok(FunctionAlgorithm::StringReferences),
189            14 => Ok(FunctionAlgorithm::LoopCountMatching),
190            15 => Ok(FunctionAlgorithm::CallSequenceMatchingExact),
191            16 => Ok(FunctionAlgorithm::CallSequenceMatchingTopology),
192            17 => Ok(FunctionAlgorithm::CallSequenceMatchingSequence),
193            18 => Ok(FunctionAlgorithm::CallReferenceMatching),
194            19 => Ok(FunctionAlgorithm::Manual),
195            other => Ok(FunctionAlgorithm::Other(other.to_string())),
196        }
197    }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct FunctionMatch {
202    pub id: i64,
203    pub address1: i64,
204    pub name1: String,
205    pub address2: i64,
206    pub name2: String,
207    pub similarity: f64,
208    pub confidence: f64,
209    pub flags: i64,
210    pub algorithm: FunctionAlgorithm,
211    pub evaluate: bool,
212    pub comment_supported: bool,
213    pub basic_blocks: i64,
214    pub edges: i64,
215    pub instructions: i64,
216}
217
218impl std::fmt::Display for FunctionMatch {
219    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
220        if self.name1 != self.name2 {
221            write!(
222                f,
223                "{} -> {}\tsimilarity: {:.2}, confidence: {:.2}, algorithm: {}",
224                self.name1, self.name2, self.similarity, self.confidence, self.algorithm
225            )
226        } else {
227            write!(
228                f,
229                "{}:\tsimilarity: {:.2}, confidence: {:.2}",
230                self.name1, self.similarity, self.confidence
231            )
232        }
233    }
234}
235
236/// Enum representing the different basic block matching algorithms used in BinDiff
237#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
238pub enum BasicBlockAlgorithm {
239    None,
240    EdgesPrimeProduct,
241    HashMatchingFourInstMin,
242    PrimeMatchingFourInstMin,
243    CallReferenceMatching,
244    StringReferencesMatching,
245    EdgesMdIndexTopDown,
246    MdIndexMatchingTopDown,
247    EdgesMdIndexBottomUp,
248    MdIndexMatchingBottomUp,
249    RelaxedMdIndexMatching,
250    PrimeMatchingNoInstMin,
251    EdgesLengauerTarjanDominated,
252    LoopEntryMatching,
253    SelfLoopMatching,
254    EntryPointMatching,
255    ExitPointMatching,
256    InstructionCountMatching,
257    JumpSequenceMatching,
258    PropagationSizeOne,
259    Manual,
260    /// Unknown or custom algorithm
261    Other(String),
262}
263
264impl std::fmt::Display for BasicBlockAlgorithm {
265    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
266        match self {
267            BasicBlockAlgorithm::None => write!(f, "none"),
268            BasicBlockAlgorithm::EdgesPrimeProduct => write!(f, "edges prime product"),
269            BasicBlockAlgorithm::HashMatchingFourInstMin => {
270                write!(f, "hash matching (4 instructions minimum)")
271            }
272            BasicBlockAlgorithm::PrimeMatchingFourInstMin => {
273                write!(f, "prime matching (4 instructions minimum)")
274            }
275            BasicBlockAlgorithm::CallReferenceMatching => write!(f, "call reference matching"),
276            BasicBlockAlgorithm::StringReferencesMatching => write!(f, "string reference matching"),
277            BasicBlockAlgorithm::EdgesMdIndexTopDown => write!(f, "edges MD index (top down)"),
278            BasicBlockAlgorithm::MdIndexMatchingTopDown => {
279                write!(f, "MD index matching (top down)")
280            }
281            BasicBlockAlgorithm::EdgesMdIndexBottomUp => write!(f, "edges MD index (bottom up)"),
282            BasicBlockAlgorithm::MdIndexMatchingBottomUp => {
283                write!(f, "MD index matching (bottom up)")
284            }
285            BasicBlockAlgorithm::RelaxedMdIndexMatching => write!(f, "relaxed MD index matching"),
286            BasicBlockAlgorithm::PrimeMatchingNoInstMin => {
287                write!(f, "prime matching (0 instructions minimum)")
288            }
289            BasicBlockAlgorithm::EdgesLengauerTarjanDominated => {
290                write!(f, "edges Lengauer Tarjan dominated")
291            }
292            BasicBlockAlgorithm::LoopEntryMatching => write!(f, "loop entry matching"),
293            BasicBlockAlgorithm::SelfLoopMatching => write!(f, "self loop matching"),
294            BasicBlockAlgorithm::EntryPointMatching => write!(f, "entry point matching"),
295            BasicBlockAlgorithm::ExitPointMatching => write!(f, "exit point matching"),
296            BasicBlockAlgorithm::InstructionCountMatching => {
297                write!(f, "instruction count matching")
298            }
299            BasicBlockAlgorithm::JumpSequenceMatching => write!(f, "jump sequence matching"),
300            BasicBlockAlgorithm::PropagationSizeOne => write!(f, "propagation (size==1)"),
301            BasicBlockAlgorithm::Manual => write!(f, "manual"),
302            BasicBlockAlgorithm::Other(s) => write!(f, "other({})", s),
303        }
304    }
305}
306
307impl FromSql for BasicBlockAlgorithm {
308    fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
309        let algorithm_id: i32 = value.as_i64()? as i32;
310        match algorithm_id {
311            0 => Ok(BasicBlockAlgorithm::None),
312            1 => Ok(BasicBlockAlgorithm::EdgesPrimeProduct),
313            2 => Ok(BasicBlockAlgorithm::HashMatchingFourInstMin),
314            3 => Ok(BasicBlockAlgorithm::PrimeMatchingFourInstMin),
315            4 => Ok(BasicBlockAlgorithm::CallReferenceMatching),
316            5 => Ok(BasicBlockAlgorithm::StringReferencesMatching),
317            6 => Ok(BasicBlockAlgorithm::EdgesMdIndexTopDown),
318            7 => Ok(BasicBlockAlgorithm::MdIndexMatchingTopDown),
319            8 => Ok(BasicBlockAlgorithm::EdgesMdIndexBottomUp),
320            9 => Ok(BasicBlockAlgorithm::MdIndexMatchingBottomUp),
321            10 => Ok(BasicBlockAlgorithm::RelaxedMdIndexMatching),
322            11 => Ok(BasicBlockAlgorithm::PrimeMatchingNoInstMin),
323            12 => Ok(BasicBlockAlgorithm::EdgesLengauerTarjanDominated),
324            13 => Ok(BasicBlockAlgorithm::LoopEntryMatching),
325            14 => Ok(BasicBlockAlgorithm::SelfLoopMatching),
326            15 => Ok(BasicBlockAlgorithm::EntryPointMatching),
327            16 => Ok(BasicBlockAlgorithm::ExitPointMatching),
328            17 => Ok(BasicBlockAlgorithm::InstructionCountMatching),
329            18 => Ok(BasicBlockAlgorithm::JumpSequenceMatching),
330            19 => Ok(BasicBlockAlgorithm::PropagationSizeOne),
331            20 => Ok(BasicBlockAlgorithm::Manual),
332            other => Ok(BasicBlockAlgorithm::Other(other.to_string())),
333        }
334    }
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct BasicBlockMatch {
339    pub id: i64,
340    pub function_id: i64,
341    pub address1: i64,
342    pub address2: i64,
343    pub algorithm: BasicBlockAlgorithm,
344    pub evaluate: bool,
345}
346
347impl std::fmt::Display for BasicBlockMatch {
348    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
349        write!(
350            f,
351            "{:#x} -> {:#x} ({})",
352            self.address1, self.address2, self.algorithm
353        )
354    }
355}
356
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct Instruction {
359    pub id: i64,
360    pub address1: i64,
361    pub address2: i64,
362}
363
364impl std::fmt::Display for Instruction {
365    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366        write!(f, "{} -> {}", self.address1, self.address2)
367    }
368}
369
370/// Struct to handle SQLite database operations
371pub struct BinDiff {
372    connection: Connection,
373}
374
375impl BinDiff {
376    /// Open a connection to the SQLite database
377    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
378        let connection = Connection::open(path).context("Failed to open SQLite database")?;
379        Ok(Self { connection })
380    }
381
382    pub fn close(self) -> std::result::Result<(), rusqlite::Error> {
383        self.connection.close().map_err(|(_, err)| err)
384    }
385
386    pub fn read_metadata(&self) -> Result<Metadata> {
387        let mut stmt = self
388            .connection
389            .prepare("SELECT * FROM metadata")
390            .context("Failed to prepare metadata statement")?;
391
392        stmt.query_row(params![], |row| {
393            Ok(Metadata {
394                version: row.get(0)?,
395                file1: row.get(1)?,
396                file2: row.get(2)?,
397                description: row.get(3)?,
398                created: row.get(4)?,
399                modified: row.get(5)?,
400                similarity: row.get(6)?,
401                confidence: row.get(7)?,
402            })
403        })
404        .context("Failed to query metadata row")
405    }
406
407    /// Read all function matches from the database
408    pub fn read_file(&self) -> Result<File> {
409        let mut stmt = self
410            .connection
411            .prepare("SELECT * FROM file")
412            .context("Failed to prepare file statement")?;
413
414        stmt.query_row(params![], |row| {
415            Ok(File {
416                id: row.get(0)?,
417                filename: row.get(1)?,
418                exe_filename: row.get(2)?,
419                hash: row.get(3)?,
420                functions: row.get(4)?,
421                lib_functions: row.get(5)?,
422                calls: row.get(6)?,
423                basic_blocks: row.get(7)?,
424                lib_basic_blocks: row.get(8)?,
425                edges: row.get(9)?,
426                lib_edges: row.get(10)?,
427                instructions: row.get(11)?,
428                lib_instructions: row.get(12)?,
429            })
430        })
431        .context("Failed to query file row")
432    }
433
434    /// Count the number of function matches
435    pub fn count_function_matches(&self) -> Result<usize> {
436        let count: i64 = self
437            .connection
438            .query_row("SELECT COUNT(*) FROM function", params![], |row| row.get(0))
439            .context("Failed to count function matches")?;
440
441        Ok(count as usize)
442    }
443
444    pub fn read_function_matches(&self) -> Result<Vec<FunctionMatch>> {
445        let mut stmt = self
446            .connection
447            .prepare("SELECT * FROM function")
448            .context("Failed to prepare function statement")?;
449
450        let matches = stmt
451            .query_map(params![], |row| {
452                Ok(FunctionMatch {
453                    id: row.get(0)?,
454                    address1: row.get(1)?,
455                    name1: row.get(2)?,
456                    address2: row.get(3)?,
457                    name2: row.get(4)?,
458                    similarity: row.get(5)?,
459                    confidence: row.get(6)?,
460                    flags: row.get(7)?,
461                    algorithm: row.get(8)?,
462                    evaluate: row.get(9)?,
463                    comment_supported: row.get(10)?,
464                    basic_blocks: row.get(11)?,
465                    edges: row.get(12)?,
466                    instructions: row.get(13)?,
467                })
468            })
469            .context("Failed to query function row")?
470            .collect::<Result<Vec<FunctionMatch>, _>>()?;
471
472        Ok(matches)
473    }
474
475    /// Count the number of basic block matches
476    pub fn count_basic_block_matches(&self) -> Result<usize> {
477        let count: i64 = self
478            .connection
479            .query_row("SELECT COUNT(*) FROM basicblock", params![], |row| {
480                row.get(0)
481            })
482            .context("Failed to count basic block matches")?;
483
484        Ok(count as usize)
485    }
486
487    pub fn read_basic_block_matches(&self) -> Result<Vec<BasicBlockMatch>> {
488        let mut stmt = self
489            .connection
490            .prepare("SELECT * FROM basicblock")
491            .context("Failed to prepare basicblock statement")?;
492
493        let matches = stmt
494            .query_map(params![], |row| {
495                Ok(BasicBlockMatch {
496                    id: row.get(0)?,
497                    function_id: row.get(1)?,
498                    address1: row.get(2)?,
499                    address2: row.get(3)?,
500                    algorithm: row.get(4)?,
501                    evaluate: row.get(5)?,
502                })
503            })
504            .context("Failed to query basicblock row")?
505            .collect::<Result<Vec<BasicBlockMatch>, _>>()?;
506
507        Ok(matches)
508    }
509
510    /// Count the number of instruction matches
511    pub fn count_instruction_matches(&self) -> Result<usize> {
512        let count: i64 = self
513            .connection
514            .query_row("SELECT COUNT(*) FROM instruction", params![], |row| {
515                row.get(0)
516            })
517            .context("Failed to count instruction matches")?;
518
519        Ok(count as usize)
520    }
521
522    pub fn read_instruction_matches(&self) -> Result<Vec<Instruction>> {
523        let mut stmt = self
524            .connection
525            .prepare("SELECT * FROM instruction")
526            .context("Failed to prepare instruction statement")?;
527
528        let matches = stmt
529            .query_map(params![], |row| {
530                Ok(Instruction {
531                    id: row.get(0)?,
532                    address1: row.get(1)?,
533                    address2: row.get(2)?,
534                })
535            })
536            .context("Failed to query instruction row")?
537            .collect::<Result<Vec<Instruction>, _>>()?;
538
539        Ok(matches)
540    }
541}
542
543#[derive(Debug, Clone)]
544pub struct BinExport {
545    pub binexport: binexport::BinExport2,
546}
547
548impl BinExport {
549    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
550        let file = std::fs::read(&path).with_context(|| {
551            format!("Failed to read BinExport file: {}", path.as_ref().display())
552        })?;
553        let binexport = binexport::BinExport2::decode(&file[..])
554            .context("Failed to decode BinExport protobuf")?;
555        Ok(Self { binexport })
556    }
557
558    pub fn executable_name(&self) -> Result<String> {
559        let executable_name = self
560            .binexport
561            .meta_information
562            .as_ref()
563            .context("No meta information available")?
564            .executable_name
565            .clone()
566            .unwrap_or_else(|| "unknown executable".to_string());
567        Ok(executable_name)
568    }
569
570    // TODO: Add more methods to handle the BinExport protobuf
571}
572
573// Example usage demonstration
574#[cfg(test)]
575mod tests {
576    use super::*;
577
578    #[test]
579    fn test_database_operations() -> Result<()> {
580        let test_file_path = "tests/kernel.release_vs_kernel.release.BinDiff";
581
582        // Check if the test file exists before running the test
583        if !std::path::Path::new(test_file_path).exists() {
584            println!("Test file {} not found. Skipping test.", test_file_path);
585            return Ok(());
586        }
587
588        let db = BinDiff::open(test_file_path)?;
589
590        let file = db.read_file()?;
591        println!("{}", file);
592
593        let metadata = db.read_metadata()?;
594        println!("{}", metadata);
595
596        let count = db.count_function_matches()?;
597        println!("Total Matches: {}", count);
598
599        let matches = db.read_function_matches()?;
600        assert_eq!(matches.len(), count);
601        for func_match in matches {
602            println!("{}", func_match);
603        }
604
605        let count = db.count_basic_block_matches()?;
606        println!("Total Basic Block Matches: {}", count);
607
608        let basic_block_matches = db.read_basic_block_matches()?;
609        assert_eq!(basic_block_matches.len(), count);
610        // for basic_block_match in basic_block_matches {
611        //     println!("{}", basic_block_match);
612        // }
613
614        let count = db.count_instruction_matches()?;
615        println!("Total Instruction Matches: {}", count);
616
617        let instruction_matches = db.read_instruction_matches()?;
618        assert_eq!(instruction_matches.len(), count);
619        // for instruction_match in instruction_matches {
620        //     println!("{}", instruction_match);
621        // }
622
623        db.close()?;
624
625        Ok(())
626    }
627
628    #[test]
629    fn test_read_binexport() -> Result<()> {
630        let test_file_path = "tests/kernel.release.t6020.BinExport";
631
632        // Check if the test file exists before running the test
633        if !std::path::Path::new(test_file_path).exists() {
634            println!("Test file {} not found. Skipping test.", test_file_path);
635            return Ok(());
636        }
637        let binexport = BinExport::open(test_file_path)?;
638        println!("executable_name: {}", binexport.executable_name()?);
639
640        Ok(())
641    }
642}