compiledfiles/
lib.rs

1//! # compiledfiles
2//!
3//! A library to get a list of all files that were used to compile the given
4//! binary.
5//!
6//! This library currently only supports the following formats:
7//!
8//! * ELF files
9//! * PDB files
10//!
11//! The following file formats are a work in progress
12//!
13//! * Mach-O files
14//!
15//! This library currently only supports files generated by the following compilers:
16//!
17//! * GCC
18//! * LLVM
19//! * MSVC
20//!
21//! This library currently only has been tested with the following languages:
22//!
23//! * C/C++
24//!
25//! The following languages are a work in progress
26//!
27//! * Rust
28//! * Go
29//!
30//! Help is welcome for supporting any future formats.
31//!
32//! # Examples
33//!
34//! ```no_run
35//! let elf_file = std::fs::File::open("path_to_binary").unwrap();
36//! let files = compiledfiles::parse(elf_file).unwrap();
37//! for file in files {
38//!     println!("{:?}", file);
39//! }
40//! ```
41use gimli::Dwarf;
42use object::{Object, ObjectSection};
43use pdb::FallibleIterator;
44
45use std::cmp::Ordering;
46use std::io::Read;
47use std::io::Seek;
48use std::path::PathBuf;
49use std::vec::Vec;
50use std::{borrow::Cow, path::Path};
51
52/// Checksum of the source file's content
53#[derive(Debug, PartialEq, Eq, PartialOrd)]
54pub enum FileCheckSum {
55    Md5([u8; 16]),
56    Sha1([u8; 20]),
57    Sha256([u8; 32]),
58}
59
60/// Basic information stored for each source file. Only the path is required.
61#[derive(Debug, PartialEq, Eq)]
62pub struct FileInfo {
63    /// Recorded path to the source file
64    pub path: PathBuf,
65
66    /// Size of the source file in bytes
67    pub size: Option<u64>,
68
69    /// Last modified timestamp of the source file
70    pub timestamp: Option<u64>,
71
72    /// Checksum of the source file
73    pub checksum: Option<FileCheckSum>,
74}
75
76impl PartialOrd for FileInfo {
77    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
78        self.path.partial_cmp(&other.path)
79    }
80}
81
82impl Ord for FileInfo {
83    fn cmp(&self, other: &Self) -> Ordering {
84        self.path.cmp(&other.path)
85    }
86}
87
88/// Possible errors for attempting to list all sources
89#[derive(thiserror::Error, Debug)]
90pub enum Error {
91    /// The binary file is a valid file format, but does not contain debug
92    /// symbols.
93    #[error("File was missing debug symbols")]
94    MissingDebugSymbols,
95
96    /// The format of the file past is a unknown format
97    #[error("File format was unrecognized")]
98    UnrecognizedFileFormat,
99
100    /// An IO error occurred
101    #[error("Error occured reading input data")]
102    Io {
103        #[from]
104        source: std::io::Error,
105    },
106
107    /// There was an error parsing the Dwarf information
108    #[error("Error occured while parsing Dwarf information")]
109    Dwarf {
110        #[from]
111        source: gimli::Error,
112    },
113
114    /// There was an error parsing an ELF or Mach-O file
115    #[error("Error occured while parsing ELF or Macho-O file")]
116    Object {
117        #[from]
118        source: object::Error,
119    },
120
121    /// There was an error parsing a PDB file
122    #[error("Error occured while parsing PDB file")]
123    Pdb {
124        #[from]
125        source: pdb::Error,
126    },
127}
128
129type Result<T> = ::std::result::Result<T, Error>;
130
131fn convert_pdb_checksum_to_checksum(pdb_checksum: pdb::FileChecksum) -> Option<FileCheckSum> {
132    match pdb_checksum {
133        pdb::FileChecksum::Md5(data) => {
134            let mut hash: [u8; 16] = [0; 16];
135            hash.copy_from_slice(data);
136            Some(FileCheckSum::Md5(hash))
137        }
138        pdb::FileChecksum::Sha1(data) => {
139            let mut hash: [u8; 20] = [0; 20];
140            hash.copy_from_slice(data);
141            Some(FileCheckSum::Sha1(hash))
142        }
143        pdb::FileChecksum::Sha256(data) => {
144            let mut hash: [u8; 32] = [0; 32];
145            hash.copy_from_slice(data);
146            Some(FileCheckSum::Sha256(hash))
147        }
148        pdb::FileChecksum::None => None,
149    }
150}
151
152/// Parses out the source file information from a file
153///
154/// # Arguments
155///
156/// * `source` - The source from which to read the bytes want to parse
157///
158/// # Example
159///
160/// ```no_run
161/// let elf_file = std::fs::File::open("path_to_binary").unwrap();
162/// let files = compiledfiles::parse(elf_file).unwrap();
163/// for file in files {
164///     println!("{:?}", file);
165/// }
166/// ```
167pub fn parse<S: Read + Seek + std::fmt::Debug>(mut source: S) -> Result<Vec<FileInfo>> {
168    // try parsing a PDB first
169    match pdb::PDB::open(&mut source) {
170        Ok(pdb) => return parse_pdb(pdb),
171        Err(e) => match e {
172            pdb::Error::UnrecognizedFileFormat => {
173                // continue
174            }
175            _ => return Err(Error::Pdb { source: e }),
176        },
177    };
178
179    source.rewind()?;
180
181    // Now try elf or mach-o
182    let mut contents = vec![];
183    source.read_to_end(&mut contents)?;
184
185    match object::File::parse(&contents[..]) {
186        Ok(obj) => parse_object(&obj),
187        Err(e) => Err(Error::Object { source: e }),
188    }
189}
190
191/// Parses out the source file information from a file at a given path
192///
193/// # Arguments
194///
195/// * `path` - The path of the file to read the source info from
196///
197/// # Example
198///
199/// ```no_run
200/// let elf_file = std::path::PathBuf::from("path_to_binary");
201/// let files = compiledfiles::parse_path(&elf_file).unwrap();
202/// for file in files {
203///     println!("{:?}", file);
204/// }
205/// ```
206pub fn parse_path<P>(path: P) -> Result<Vec<FileInfo>>
207where
208    P: AsRef<Path>,
209{
210    let file = std::fs::File::open(path)?;
211    parse(file)
212}
213
214fn parse_pdb<'s, S: pdb::Source<'s> + 's>(mut pdb: pdb::PDB<'s, S>) -> Result<Vec<FileInfo>> {
215    let mut files = vec![];
216
217    let dbi = pdb.debug_information()?;
218    let string_table = pdb.string_table()?;
219
220    let mut modules = dbi.modules()?;
221
222    while let Some(module) = modules.next()? {
223        if let Some(mod_info) = pdb.module_info(&module)? {
224            let line_program = mod_info.line_program()?;
225            let mut mod_files = line_program.files();
226            while let Some(file) = mod_files.next()? {
227                let path_str = file.name.to_raw_string(&string_table)?;
228                let file_checksum = file.checksum;
229                let path = PathBuf::from(path_str.to_string().as_ref());
230                let info = FileInfo {
231                    path,
232                    size: None,
233                    timestamp: None,
234                    checksum: convert_pdb_checksum_to_checksum(file_checksum),
235                };
236                files.push(info);
237            }
238        }
239    }
240
241    files.sort();
242    files.dedup();
243
244    Ok(files)
245}
246
247fn parse_object(file: &object::File) -> Result<Vec<FileInfo>> {
248    let endianness = if file.is_little_endian() {
249        gimli::RunTimeEndian::Little
250    } else {
251        gimli::RunTimeEndian::Big
252    };
253
254    if file.has_debug_symbols() {
255        match file.format() {
256            object::BinaryFormat::Elf => parse_elf_file(file, endianness),
257            object::BinaryFormat::Coff => Err(Error::MissingDebugSymbols),
258            object::BinaryFormat::MachO => parse_elf_file(file, endianness),
259            object::BinaryFormat::Pe => Err(Error::MissingDebugSymbols),
260            object::BinaryFormat::Wasm => unimplemented!(),
261            _ => Err(Error::UnrecognizedFileFormat),
262        }
263    } else {
264        Err(Error::MissingDebugSymbols)
265    }
266}
267
268fn parse_elf_file(file: &object::File, endianness: gimli::RunTimeEndian) -> Result<Vec<FileInfo>> {
269    // Load a section and return as `Cow<[u8]>`.
270    let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>> {
271        let data = match file.section_by_name(id.name()) {
272            Some(ref section) => section
273                .uncompressed_data()
274                .unwrap_or_else(|_| Cow::Owned(Vec::with_capacity(1))),
275            None => Cow::Owned(Vec::with_capacity(1)),
276        };
277        Ok(data)
278    };
279
280    // Load all of the sections.
281    let dwarf_cow = Dwarf::load(&load_section)?;
282
283    // Borrow a `Cow<[u8]>` to create an `EndianSlice`.
284    let borrow_section: &dyn for<'a> Fn(
285        &'a Cow<[u8]>,
286    ) -> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
287        &|section| gimli::EndianSlice::new(section, endianness);
288
289    // Create `EndianSlice`s for all of the sections.
290    let dwarf = dwarf_cow.borrow(&borrow_section);
291
292    // Iterate over the compilation units.
293    let mut iter = dwarf.units();
294
295    let mut files = vec![];
296
297    while let Some(header) = iter.next()? {
298        let unit = dwarf.unit(header)?;
299
300        if let Some(ref program) = unit.line_program {
301            for file in program.header().file_names() {
302                let dir_attr = file.directory(program.header()).unwrap();
303                let dir_string = dwarf.attr_string(&unit, dir_attr)?.to_string_lossy();
304                let dir_str = dir_string.as_ref();
305                let mut path = PathBuf::from(dir_str);
306                if path.is_relative() {
307                    if let Some(ref comp_dir) = unit.comp_dir {
308                        let comp_dir =
309                            std::path::PathBuf::from(comp_dir.to_string_lossy().into_owned());
310                        path = comp_dir.join(path);
311                    }
312                }
313                let mut info = FileInfo {
314                    path,
315                    size: None,
316                    timestamp: None,
317                    checksum: None,
318                };
319
320                let filename_string = dwarf
321                    .attr_string(&unit, file.path_name())?
322                    .to_string_lossy();
323                let filename_str = filename_string.as_ref();
324                info.path.push(filename_str);
325
326                if program.header().file_has_timestamp() {
327                    info.timestamp = match file.timestamp() {
328                        0 => None,
329                        x => Some(x),
330                    };
331                }
332
333                if program.header().file_has_size() {
334                    info.size = match file.size() {
335                        0 => None,
336                        x => Some(x),
337                    };
338                }
339
340                if program.header().file_has_md5() {
341                    info.checksum = Some(FileCheckSum::Md5(*file.md5()));
342                }
343
344                // GCC will stick in a pseudo filename "<built-in>" for source
345                // built into GCC.
346                if !filename_str.starts_with('<') {
347                    files.push(info);
348                }
349            }
350        }
351    }
352
353    files.sort();
354    files.dedup();
355    Ok(files)
356}
357
358#[cfg(test)]
359mod tests {
360    #[test]
361    fn it_works() {
362        assert_eq!(2 + 2, 4);
363    }
364}