fugue_db/
database.rs

1use std::fs::File;
2use std::io::{BufReader, Read, Write};
3use std::path::{Path, PathBuf};
4
5use fs_extra::file::{copy as copy_file, CopyOptions};
6
7use fugue_ir::LanguageDB;
8
9use fugue_ir::Translator;
10use iset::IntervalMap;
11use unicase::UniCase;
12use url::Url;
13
14use crate::architecture::{self, ArchitectureDef};
15use crate::BasicBlock;
16use crate::Function;
17use crate::Id;
18use crate::Metadata;
19use crate::Segment;
20
21use crate::backend::{Backend, DatabaseImporterBackend, Imported};
22use crate::error::Error;
23use crate::schema;
24
25#[ouroboros::self_referencing(chain_hack)]
26#[derive(educe::Educe)]
27#[educe(Debug, Clone, PartialEq, Eq, Hash)]
28pub struct DatabaseImpl {
29    #[educe(Debug(ignore), PartialEq(ignore), Eq(ignore), Hash(ignore))]
30    translators: Box<Vec<Translator>>,
31    #[educe(Debug(ignore), PartialEq(ignore), Eq(ignore), Hash(ignore))]
32    segments: Box<IntervalMap<u64, Segment>>,
33    #[borrows(segments, translators)]
34    #[covariant]
35    functions: Vec<Function<'this>>,
36    metadata: Metadata,
37}
38
39#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
40#[repr(transparent)]
41pub struct Database(DatabaseImpl);
42
43impl Default for DatabaseImpl {
44    fn default() -> Self {
45        DatabaseImpl::new(
46            Box::new(Vec::new()),
47            Box::new(IntervalMap::new()),
48            |_, _| Vec::new(),
49            Metadata::default(),
50        )
51    }
52}
53
54impl Database {
55    pub fn architectures(&self) -> impl Iterator<Item = &ArchitectureDef> {
56        self.0
57            .borrow_translators()
58            .iter()
59            .map(Translator::architecture)
60    }
61
62    pub fn default_translator(&self) -> Translator {
63        self.0
64            .borrow_translators()
65            .first()
66            .map(|t| t.clone())
67            .expect("default translator")
68    }
69
70    pub fn translators(&self) -> impl Iterator<Item = &Translator> {
71        self.0.borrow_translators().iter()
72    }
73
74    pub fn segments(&self) -> &IntervalMap<u64, Segment> {
75        self.0.borrow_segments()
76    }
77
78    pub fn segment<S: AsRef<str>>(&self, name: S) -> Option<&Segment> {
79        let name = name.as_ref();
80        self.segments()
81            .iter(..)
82            .find_map(|(_, s)| if s.name() == name { Some(s) } else { None })
83    }
84
85    pub fn functions(&self) -> &[Function] {
86        self.0.borrow_functions()
87    }
88
89    pub fn functions_in<S: AsRef<str>>(
90        &self,
91        segment: S,
92    ) -> Option<impl Iterator<Item = &Function>> {
93        let segment = self.segment(segment)?.id();
94        Some(
95            self.functions()
96                .iter()
97                .filter(move |f| f.segment_id() == segment),
98        )
99    }
100
101    pub fn function_with<F>(&self, f: F) -> Option<&Function>
102    where
103        F: FnMut(&Function) -> bool,
104    {
105        let mut f = f;
106        self.0.borrow_functions().iter().find(|&fun| f(fun))
107    }
108
109    pub fn function<S: AsRef<str>>(&self, name: S) -> Option<&Function> {
110        let name = name.as_ref();
111        self.0.borrow_functions().iter().find(|f| f.name() == name)
112    }
113
114    pub fn externals(&self) -> Option<impl Iterator<Item = &Function>> {
115        self.functions_in(".extern") // Binary Ninja
116            .or_else(|| self.functions_in("extern")) // IDA Pro
117            .or_else(|| self.functions_in("EXTERNAL")) // Ghidra
118    }
119
120    pub fn blocks(&self) -> impl Iterator<Item = &BasicBlock> {
121        self.functions().iter().map(Function::blocks).flatten()
122    }
123
124    pub fn block_count(&self) -> usize {
125        self.functions().iter().map(|f| f.blocks().len()).sum()
126    }
127
128    pub fn edge_count(&self) -> usize {
129        self.functions()
130            .iter()
131            .map(|f| {
132                f.blocks()
133                    .iter()
134                    .map(|b| b.predecessors().len())
135                    .sum::<usize>()
136                    + f.references().len()
137            })
138            .sum()
139    }
140
141    pub fn blocks_in<S: AsRef<str>>(
142        &self,
143        name: S,
144    ) -> Option<impl Iterator<Item = (&BasicBlock, &[u8])>> {
145        let segment = self.segment(name)?.id();
146        Some(self.blocks().filter_map(move |b| {
147            if b.segment().id() == segment {
148                Some((b, b.bytes()))
149            } else {
150                None
151            }
152        }))
153    }
154
155    pub fn metadata(&self) -> &Metadata {
156        self.0.borrow_metadata()
157    }
158
159    pub fn from_bytes(bytes: &[u8], language_db: &LanguageDB) -> Result<Self, Error> {
160        let reader = schema::root_as_project(&bytes).map_err(Error::Deserialisation)?;
161
162        Self::from_reader(reader, language_db)
163    }
164
165    pub fn from_file<P: AsRef<Path>>(path: P, language_db: &LanguageDB) -> Result<Self, Error> {
166        let path = path.as_ref();
167        let mut file = BufReader::new(File::open(path).map_err(Error::CannotReadFile)?);
168
169        let mut bytes = Vec::new();
170        file.read_to_end(&mut bytes)
171            .map_err(Error::CannotReadFile)?;
172
173        Self::from_bytes(&bytes, language_db)
174    }
175
176    fn from_reader<'a>(
177        database: schema::Project<'a>,
178        language_db: &LanguageDB,
179    ) -> Result<Self, Error> {
180        let metadata = Metadata::from_reader(
181            database
182                .metadata()
183                .ok_or(Error::DeserialiseField("metadata"))?,
184        )?;
185
186        let architectures = database
187            .architectures()
188            .ok_or(Error::DeserialiseField("architectures"))?
189            .into_iter()
190            .map(|r| architecture::from_reader(&r))
191            .collect::<Result<Vec<_>, _>>()?;
192
193        let translators = architectures
194            .into_iter()
195            .map(|arch| {
196                language_db
197                    .lookup(arch.processor(), arch.endian(), arch.bits(), arch.variant())
198                    .ok_or_else(|| Error::UnsupportedArchitecture(arch))?
199                    .build()
200                    .map_err(Error::Translator)
201            })
202            .collect::<Result<Vec<_>, _>>()?;
203
204        let mut segment_id = 0usize;
205        let segments = database
206            .segments()
207            .ok_or(Error::DeserialiseField("segments"))?
208            .into_iter()
209            .filter_map(|r| match Segment::from_reader(Id::from(segment_id), &r) {
210                Ok(seg) if seg.len() != 0 => {
211                    segment_id += 1;
212                    Some(Ok((
213                        seg.address()..(seg.address() + seg.len() as u64),
214                        seg,
215                    )))
216                }
217                Ok(_) => None,
218                Err(e) => Some(Err(e)),
219            })
220            .collect::<Result<IntervalMap<_, _>, Error>>()?;
221
222        Ok(Self(DatabaseImpl::try_new(
223            Box::new(translators),
224            Box::new(segments),
225            |segments, translators| {
226                database
227                    .functions()
228                    .ok_or(Error::DeserialiseField("functions"))?
229                    .into_iter()
230                    .map(|r| Function::from_reader(r, segments, translators))
231                    .collect::<Result<Vec<_>, _>>()
232            },
233            metadata,
234        )?))
235    }
236
237    pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<(), Error> {
238        let path = path.as_ref();
239        let mut file = File::create(path).map_err(Error::CannotWriteFile)?;
240
241        let mut builder = flatbuffers::FlatBufferBuilder::new();
242
243        let project = self.to_builder(&mut builder)?;
244        schema::finish_project_buffer(&mut builder, project);
245
246        file.write_all(builder.finished_data())
247            .map_err(Error::CannotWriteFile)?;
248
249        Ok(())
250    }
251
252    pub(crate) fn to_builder<'a: 'b, 'b>(
253        &self,
254        builder: &'b mut flatbuffers::FlatBufferBuilder<'a>,
255    ) -> Result<flatbuffers::WIPOffset<schema::Project<'a>>, Error> {
256        let architectures = self
257            .architectures()
258            .map(|r| architecture::to_builder(r, builder))
259            .collect::<Result<Vec<_>, _>>()?;
260        let avec = builder.create_vector_from_iter(architectures.into_iter());
261
262        let segments = self
263            .segments()
264            .values(..)
265            .map(|r| r.to_builder(builder))
266            .collect::<Result<Vec<_>, _>>()?;
267        let svec = builder.create_vector_from_iter(segments.into_iter());
268
269        let functions = self
270            .functions()
271            .iter()
272            .map(|r| r.to_builder(builder))
273            .collect::<Result<Vec<_>, _>>()?;
274        let fvec = builder.create_vector_from_iter(functions.into_iter());
275
276        let meta = self.metadata().to_builder(builder)?;
277
278        let mut dbuilder = schema::ProjectBuilder::new(builder);
279
280        dbuilder.add_architectures(avec);
281        dbuilder.add_segments(svec);
282        dbuilder.add_functions(fvec);
283        dbuilder.add_metadata(meta);
284
285        Ok(dbuilder.finish())
286    }
287}
288
289pub struct DatabaseImporter {
290    program: Option<url::Url>,
291    fdb_path: Option<PathBuf>,
292    overwrite_fdb: bool,
293    backend_pref: Option<String>,
294    backends: Vec<DatabaseImporterBackend>,
295}
296
297impl Default for DatabaseImporter {
298    fn default() -> Self {
299        Self {
300            program: None,
301            fdb_path: None,
302            overwrite_fdb: false,
303            backend_pref: None,
304            backends: Vec::default(),
305        }
306    }
307}
308
309impl DatabaseImporter {
310    pub fn new<P: AsRef<Path>>(program: P) -> DatabaseImporter {
311        Self::new_url(Self::url_from_path(program))
312    }
313
314    pub fn new_url<U: Into<Url>>(program_url: U) -> DatabaseImporter {
315        Self {
316            program: Some(program_url.into()),
317            ..Default::default()
318        }
319    }
320
321    fn url_from_path<P: AsRef<Path>>(path: P) -> Url {
322        let path = path.as_ref();
323        if path.is_absolute() {
324            Url::from_file_path(path).unwrap()
325        } else {
326            let apath = std::env::current_dir().unwrap().join(path);
327            Url::from_file_path(apath).unwrap()
328        }
329    }
330
331    pub fn available_backends(&self) -> impl Iterator<Item = &DatabaseImporterBackend> {
332        self.backends.iter()
333    }
334
335    pub fn prefer_backend<N: Into<String>>(&mut self, backend: N) -> &mut Self {
336        self.backend_pref = Some(backend.into());
337        self
338    }
339
340    pub fn register_backend<B, E>(&mut self, backend: B) -> &mut Self
341    where
342        B: Backend<Error = E> + 'static,
343        E: Into<Error> + 'static,
344    {
345        self.backends.push(DatabaseImporterBackend::new(backend));
346        self
347    }
348
349    pub fn program<P: AsRef<Path>>(&mut self, program: P) -> &mut Self {
350        self.program = Some(Self::url_from_path(program));
351        self
352    }
353
354    pub fn remote<U: Into<Url>>(&mut self, url: U) -> &mut Self {
355        self.program = Some(url.into());
356        self
357    }
358
359    pub fn export_to<P: AsRef<Path>>(&mut self, database: P) -> &mut Self {
360        self.fdb_path = Some(database.as_ref().to_owned());
361        self
362    }
363
364    pub fn overwrite(&mut self, overwrite: bool) -> &mut Self {
365        self.overwrite_fdb = overwrite;
366        self
367    }
368
369    pub fn import(&self, language_db: &LanguageDB) -> Result<Database, Error> {
370        let program = if let Some(ref program) = self.program {
371            program.clone()
372        } else {
373            return Err(Error::NoImportUrl);
374        };
375
376        if let Some(ref fdb_path) = self.fdb_path {
377            if fdb_path.exists() && !self.overwrite_fdb {
378                return Err(Error::ExportPathExists(fdb_path.to_owned()));
379            }
380        }
381
382        if program.scheme() == "file" {
383            let program = program
384                .to_file_path()
385                .map_err(|_| Error::InvalidLocalImportUrl(program.clone()))?;
386
387            // importing from an existing database
388            if program.extension().map(|e| e == "fdb").unwrap_or(false) {
389                if let Ok(db) = Database::from_file(&program, language_db) {
390                    return Ok(db);
391                }
392            };
393        }
394
395        let mut backends = self
396            .available_backends()
397            .filter_map(|b| {
398                if !b.is_available() {
399                    None
400                } else if let Some(pref) = b.is_preferred_for(&program) {
401                    Some((if pref { 5 } else { 1 }, b))
402                } else {
403                    None
404                }
405            })
406            .collect::<Vec<_>>();
407
408        if backends.is_empty() {
409            return Err(Error::NoBackendsAvailable);
410        }
411
412        backends.sort_by_key(|(base_score, b)| {
413            -if let Some(ref pref) = self.backend_pref {
414                *base_score
415                    + if UniCase::new(pref) == UniCase::new(b.name()) {
416                        1
417                    } else {
418                        0
419                    }
420            } else {
421                *base_score
422            }
423        });
424
425        // Try all backends
426        let mut res = Err(Error::NoBackendsAvailable);
427        for (_, backend) in backends {
428            // log::debug!("Trying backend {}", backend.name());
429            res = backend.import(&program);
430            if res.is_ok() {
431                break;
432            }
433        }
434
435        match res {
436            Ok(Imported::File(ref path)) => {
437                let db = Database::from_file(path, language_db)?;
438                if let Some(ref fdb_path) = self.fdb_path {
439                    if path != fdb_path {
440                        // copy it
441                        copy_file(
442                            path,
443                            fdb_path,
444                            &CopyOptions {
445                                overwrite: true,
446                                skip_exist: false,
447                                ..Default::default()
448                            },
449                        )
450                        .map_err(Error::ExportViaCopy)?;
451                    }
452                }
453                Ok(db)
454            }
455            Ok(Imported::Bytes(ref bytes)) => {
456                let db = Database::from_bytes(bytes, language_db)?;
457                if let Some(ref fdb_path) = self.fdb_path {
458                    db.to_file(fdb_path)?;
459                }
460                Ok(db)
461            }
462            Err(e) => Err(e),
463        }
464    }
465}