1use gimli::Dwarf;
42use object::{Object, ObjectSection};
43use pdb::FallibleIterator;
44
45use std::cmp::Ordering;
46use std::io::Read;
47use std::io::Seek;
48use std::path::PathBuf;
49use std::vec::Vec;
50use std::{borrow::Cow, path::Path};
51
52#[derive(Debug, PartialEq, Eq, PartialOrd)]
54pub enum FileCheckSum {
55 Md5([u8; 16]),
56 Sha1([u8; 20]),
57 Sha256([u8; 32]),
58}
59
60#[derive(Debug, PartialEq, Eq)]
62pub struct FileInfo {
63 pub path: PathBuf,
65
66 pub size: Option<u64>,
68
69 pub timestamp: Option<u64>,
71
72 pub checksum: Option<FileCheckSum>,
74}
75
76impl PartialOrd for FileInfo {
77 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
78 self.path.partial_cmp(&other.path)
79 }
80}
81
82impl Ord for FileInfo {
83 fn cmp(&self, other: &Self) -> Ordering {
84 self.path.cmp(&other.path)
85 }
86}
87
88#[derive(thiserror::Error, Debug)]
90pub enum Error {
91 #[error("File was missing debug symbols")]
94 MissingDebugSymbols,
95
96 #[error("File format was unrecognized")]
98 UnrecognizedFileFormat,
99
100 #[error("Error occured reading input data")]
102 Io {
103 #[from]
104 source: std::io::Error,
105 },
106
107 #[error("Error occured while parsing Dwarf information")]
109 Dwarf {
110 #[from]
111 source: gimli::Error,
112 },
113
114 #[error("Error occured while parsing ELF or Macho-O file")]
116 Object {
117 #[from]
118 source: object::Error,
119 },
120
121 #[error("Error occured while parsing PDB file")]
123 Pdb {
124 #[from]
125 source: pdb::Error,
126 },
127}
128
129type Result<T> = ::std::result::Result<T, Error>;
130
131fn convert_pdb_checksum_to_checksum(pdb_checksum: pdb::FileChecksum) -> Option<FileCheckSum> {
132 match pdb_checksum {
133 pdb::FileChecksum::Md5(data) => {
134 let mut hash: [u8; 16] = [0; 16];
135 hash.copy_from_slice(data);
136 Some(FileCheckSum::Md5(hash))
137 }
138 pdb::FileChecksum::Sha1(data) => {
139 let mut hash: [u8; 20] = [0; 20];
140 hash.copy_from_slice(data);
141 Some(FileCheckSum::Sha1(hash))
142 }
143 pdb::FileChecksum::Sha256(data) => {
144 let mut hash: [u8; 32] = [0; 32];
145 hash.copy_from_slice(data);
146 Some(FileCheckSum::Sha256(hash))
147 }
148 pdb::FileChecksum::None => None,
149 }
150}
151
152pub fn parse<S: Read + Seek + std::fmt::Debug>(mut source: S) -> Result<Vec<FileInfo>> {
168 match pdb::PDB::open(&mut source) {
170 Ok(pdb) => return parse_pdb(pdb),
171 Err(e) => match e {
172 pdb::Error::UnrecognizedFileFormat => {
173 }
175 _ => return Err(Error::Pdb { source: e }),
176 },
177 };
178
179 source.rewind()?;
180
181 let mut contents = vec![];
183 source.read_to_end(&mut contents)?;
184
185 match object::File::parse(&contents[..]) {
186 Ok(obj) => parse_object(&obj),
187 Err(e) => Err(Error::Object { source: e }),
188 }
189}
190
191pub fn parse_path<P>(path: P) -> Result<Vec<FileInfo>>
207where
208 P: AsRef<Path>,
209{
210 let file = std::fs::File::open(path)?;
211 parse(file)
212}
213
214fn parse_pdb<'s, S: pdb::Source<'s> + 's>(mut pdb: pdb::PDB<'s, S>) -> Result<Vec<FileInfo>> {
215 let mut files = vec![];
216
217 let dbi = pdb.debug_information()?;
218 let string_table = pdb.string_table()?;
219
220 let mut modules = dbi.modules()?;
221
222 while let Some(module) = modules.next()? {
223 if let Some(mod_info) = pdb.module_info(&module)? {
224 let line_program = mod_info.line_program()?;
225 let mut mod_files = line_program.files();
226 while let Some(file) = mod_files.next()? {
227 let path_str = file.name.to_raw_string(&string_table)?;
228 let file_checksum = file.checksum;
229 let path = PathBuf::from(path_str.to_string().as_ref());
230 let info = FileInfo {
231 path,
232 size: None,
233 timestamp: None,
234 checksum: convert_pdb_checksum_to_checksum(file_checksum),
235 };
236 files.push(info);
237 }
238 }
239 }
240
241 files.sort();
242 files.dedup();
243
244 Ok(files)
245}
246
247fn parse_object(file: &object::File) -> Result<Vec<FileInfo>> {
248 let endianness = if file.is_little_endian() {
249 gimli::RunTimeEndian::Little
250 } else {
251 gimli::RunTimeEndian::Big
252 };
253
254 if file.has_debug_symbols() {
255 match file.format() {
256 object::BinaryFormat::Elf => parse_elf_file(file, endianness),
257 object::BinaryFormat::Coff => Err(Error::MissingDebugSymbols),
258 object::BinaryFormat::MachO => parse_elf_file(file, endianness),
259 object::BinaryFormat::Pe => Err(Error::MissingDebugSymbols),
260 object::BinaryFormat::Wasm => unimplemented!(),
261 _ => Err(Error::UnrecognizedFileFormat),
262 }
263 } else {
264 Err(Error::MissingDebugSymbols)
265 }
266}
267
268fn parse_elf_file(file: &object::File, endianness: gimli::RunTimeEndian) -> Result<Vec<FileInfo>> {
269 let load_section = |id: gimli::SectionId| -> Result<Cow<[u8]>> {
271 let data = match file.section_by_name(id.name()) {
272 Some(ref section) => section
273 .uncompressed_data()
274 .unwrap_or_else(|_| Cow::Owned(Vec::with_capacity(1))),
275 None => Cow::Owned(Vec::with_capacity(1)),
276 };
277 Ok(data)
278 };
279
280 let dwarf_cow = Dwarf::load(&load_section)?;
282
283 let borrow_section: &dyn for<'a> Fn(
285 &'a Cow<[u8]>,
286 ) -> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
287 &|section| gimli::EndianSlice::new(section, endianness);
288
289 let dwarf = dwarf_cow.borrow(&borrow_section);
291
292 let mut iter = dwarf.units();
294
295 let mut files = vec![];
296
297 while let Some(header) = iter.next()? {
298 let unit = dwarf.unit(header)?;
299
300 if let Some(ref program) = unit.line_program {
301 for file in program.header().file_names() {
302 let dir_attr = file.directory(program.header()).unwrap();
303 let dir_string = dwarf.attr_string(&unit, dir_attr)?.to_string_lossy();
304 let dir_str = dir_string.as_ref();
305 let mut path = PathBuf::from(dir_str);
306 if path.is_relative() {
307 if let Some(ref comp_dir) = unit.comp_dir {
308 let comp_dir =
309 std::path::PathBuf::from(comp_dir.to_string_lossy().into_owned());
310 path = comp_dir.join(path);
311 }
312 }
313 let mut info = FileInfo {
314 path,
315 size: None,
316 timestamp: None,
317 checksum: None,
318 };
319
320 let filename_string = dwarf
321 .attr_string(&unit, file.path_name())?
322 .to_string_lossy();
323 let filename_str = filename_string.as_ref();
324 info.path.push(filename_str);
325
326 if program.header().file_has_timestamp() {
327 info.timestamp = match file.timestamp() {
328 0 => None,
329 x => Some(x),
330 };
331 }
332
333 if program.header().file_has_size() {
334 info.size = match file.size() {
335 0 => None,
336 x => Some(x),
337 };
338 }
339
340 if program.header().file_has_md5() {
341 info.checksum = Some(FileCheckSum::Md5(*file.md5()));
342 }
343
344 if !filename_str.starts_with('<') {
347 files.push(info);
348 }
349 }
350 }
351 }
352
353 files.sort();
354 files.dedup();
355 Ok(files)
356}
357
358#[cfg(test)]
359mod tests {
360 #[test]
361 fn it_works() {
362 assert_eq!(2 + 2, 4);
363 }
364}