1use crate::*;
2use std::fs::File;
3use std::io::Read;
4
5use serde::Deserialize;
6use thiserror::Error as TError;
7
8#[derive(TError, Debug)]
9pub enum ReadELFError {
10 #[error("input file `{file_path}` is not an ELF file")]
11 NotELF { file_path: String },
12 #[error("can't parse elf header => `{k}`")]
13 CantParseELFHeader { k: Box<dyn std::error::Error> },
14 #[error("can't parse section header => `{k}`")]
15 CantParseSectionHeader { k: Box<dyn std::error::Error> },
16 #[error("can't parse program header => `{k}`")]
17 CantParseProgramHeader { k: Box<dyn std::error::Error> },
18 #[error("can't parse symbol => `{k}`")]
19 CantParseSymbol { k: Box<dyn std::error::Error> },
20}
21
22pub fn parse_elf64(file_path: &str) -> Result<file::ELF64, Box<dyn std::error::Error>> {
24 Ok(parse_elf(file_path)?.as_64bit())
25}
26
27pub fn parse_elf32(file_path: &str) -> Result<file::ELF32, Box<dyn std::error::Error>> {
29 Ok(parse_elf(file_path)?.as_32bit())
30}
31
32pub fn parse_elf(file_path: &str) -> Result<file::ELF, Box<dyn std::error::Error>> {
34 let mut f = File::open(file_path)?;
35 let mut buf = Vec::new();
36 let _ = f.read_to_end(&mut buf);
37
38 let _ = check_elf_magic(file_path, &buf[..4])?;
39
40 let elf_class = header::Class::from(buf[header::Class::INDEX]);
42
43 let elf_header = parse_elf_header(elf_class, &buf)?;
44 let phdr_table_exists = elf_header.pht_exists();
45
46 let mut sections = read_sht(elf_class, elf_header.shnum(), elf_header.sht_start(), &buf)?;
47 let mut segments = Vec::new();
48
49 if phdr_table_exists {
50 segments = read_pht(elf_class, elf_header.phnum(), elf_header.pht_start(), &buf)?;
51 }
52
53 naming_sections_from_shstrtab(elf_header.shstrndx(), &mut sections);
56
57 naming_symbols(&mut sections);
60
61 match elf_class {
62 header::Class::Bit64 => Ok(file::ELF::ELF64(file::ELF64 {
63 ehdr: elf_header.as_64bit(),
64 sections: sections.iter().map(|sct| sct.as_64bit()).collect(),
65 segments: segments.iter().map(|sgt| sgt.as_64bit()).collect(),
66 })),
67 header::Class::Bit32 => Ok(file::ELF::ELF32(file::ELF32 {
68 ehdr: elf_header.as_32bit(),
69 sections: sections.iter().map(|sct| sct.as_32bit()).collect(),
70 segments: segments.iter().map(|sgt| sgt.as_32bit()).collect(),
71 })),
72 _ => todo!(),
73 }
74}
75
76fn read_sht(
78 class: header::Class,
79
80 section_number: usize,
81 sht_offset: usize,
82 buf: &[u8],
83) -> Result<Vec<section::Section>, Box<dyn std::error::Error>> {
84 let mut sections = Vec::with_capacity(50);
85 let shdr_size = match class {
86 header::Class::Bit32 => section::Shdr32::SIZE,
87 header::Class::Bit64 => section::Shdr64::SIZE,
88 _ => todo!(),
89 };
90
91 for sct_idx in 0..section_number {
92 let header_start = sht_offset + shdr_size * sct_idx;
93 let shdr = match class {
94 header::Class::Bit32 => {
95 section::Shdr::Shdr32(bincode::deserialize(&buf[header_start..])?)
96 }
97 header::Class::Bit64 => {
98 section::Shdr::Shdr64(bincode::deserialize(&buf[header_start..])?)
99 }
100 _ => todo!(),
101 };
102
103 let mut sct = section::Section::new(shdr);
104 let section_type = sct.ty();
105
106 if section_type != section::Type::NoBits {
107 let section_offset = sct.offset();
108 let section_raw_contents =
109 buf[section_offset..section_offset + sct.size() as usize].to_vec();
110
111 sct.contents = match section_type {
112 section::Type::StrTab => parse_string_table(class, §ion_raw_contents),
113 section::Type::SymTab | section::Type::DynSym => {
114 parse_symbol_table(class, &sct, §ion_raw_contents)
115 }
116 section::Type::Rela => parse_rela_symbol_table(class, &sct, §ion_raw_contents),
117 section::Type::Dynamic => {
118 parse_dynamic_information(class, &sct, §ion_raw_contents)
119 }
120 _ => match class {
121 header::Class::Bit32 => section::Contents::Contents32(
122 section::Contents32::Raw(section_raw_contents),
123 ),
124 header::Class::Bit64 => section::Contents::Contents64(
125 section::Contents64::Raw(section_raw_contents),
126 ),
127 _ => todo!(),
128 },
129 }
130 }
131
132 sections.push(sct);
133 }
134
135 Ok(sections)
136}
137
138fn parse_string_table(class: header::Class, section_raw_contents: &Vec<u8>) -> section::Contents {
139 let mut strs: Vec<section::StrTabEntry> = Default::default();
140 let mut name_idx = 0;
141 loop {
142 if name_idx >= section_raw_contents.len() {
143 break;
144 }
145
146 if section_raw_contents[name_idx] == 0x00 {
147 name_idx += 1;
148 continue;
149 }
150
151 let nul_range_end = section_raw_contents[name_idx..]
152 .iter()
153 .position(|&c| c == b'\0')
154 .unwrap_or(section_raw_contents.len());
155 let s = std::str::from_utf8(§ion_raw_contents[name_idx..name_idx + nul_range_end])
156 .unwrap()
157 .to_string();
158
159 let idx = name_idx;
160 name_idx += s.len();
161 strs.push(section::StrTabEntry { v: s, idx });
162 }
163
164 match class {
165 header::Class::Bit32 => section::Contents::Contents32(section::Contents32::StrTab(strs)),
166 header::Class::Bit64 => section::Contents::Contents64(section::Contents64::StrTab(strs)),
167 _ => todo!(),
168 }
169}
170
171fn parse_rela_symbol_table(
172 class: header::Class,
173 sct: §ion::Section,
174 raw_symtab: &Vec<u8>,
175) -> section::Contents {
176 let entry_size = sct.entry_size();
177 let entry_number = sct.size() / entry_size;
178 match class {
179 header::Class::Bit32 => section::Contents::Contents32(section::Contents32::RelaSymbols(
180 parse_table(entry_size, entry_number, raw_symtab),
181 )),
182 header::Class::Bit64 => section::Contents::Contents64(section::Contents64::RelaSymbols(
183 parse_table(entry_size, entry_number, raw_symtab),
184 )),
185 _ => todo!(),
186 }
187}
188
189fn parse_dynamic_information(
190 class: header::Class,
191 sct: §ion::Section,
192 raw_symtab: &Vec<u8>,
193) -> section::Contents {
194 let entry_size = sct.entry_size();
195 let entry_number = sct.size() / entry_size;
196 match class {
197 header::Class::Bit32 => section::Contents::Contents32(section::Contents32::Dynamics(
198 parse_table(entry_size, entry_number, raw_symtab),
199 )),
200 header::Class::Bit64 => section::Contents::Contents64(section::Contents64::Dynamics(
201 parse_table(entry_size, entry_number, raw_symtab),
202 )),
203 _ => todo!(),
204 }
205}
206
207fn parse_symbol_table(
208 class: header::Class,
209 sct: §ion::Section,
210 raw_symtab: &Vec<u8>,
211) -> section::Contents {
212 let entry_size = sct.entry_size();
213 let entry_number = sct.size() / entry_size;
214 match class {
215 header::Class::Bit32 => section::Contents::Contents32(section::Contents32::Symbols(
216 parse_table(entry_size, entry_number, raw_symtab),
217 )),
218 header::Class::Bit64 => section::Contents::Contents64(section::Contents64::Symbols(
219 parse_table(entry_size, entry_number, raw_symtab),
220 )),
221 _ => todo!(),
222 }
223}
224
225fn parse_table<'a, T: Deserialize<'a>>(
226 entry_size: usize,
227 entry_number: usize,
228 buf: &'a [u8],
229) -> Vec<T> {
230 let mut table = Vec::new();
231 for idx in 0..entry_number {
232 let start = idx * entry_size;
233 let end = (idx + 1) * entry_size;
234 let entry = bincode::deserialize(&buf[start..end]).unwrap();
235 table.push(entry);
236 }
237 table
238}
239
240fn read_pht(
242 class: header::Class,
243 phnum: usize,
244 pht_start: usize,
245 buf: &[u8],
246) -> Result<Vec<segment::Segment>, Box<dyn std::error::Error>> {
247 let mut segments = Vec::with_capacity(10);
248 let phdr_size = match class {
249 header::Class::Bit32 => segment::Phdr32::SIZE,
250 header::Class::Bit64 => segment::Phdr64::SIZE,
251 _ => todo!(),
252 };
253
254 for seg_idx in 0..phnum {
255 let header_start = pht_start as usize + phdr_size * seg_idx;
256 let phdr = match class {
257 header::Class::Bit32 => {
258 segment::Phdr::Phdr32(segment::Phdr32::deserialize(buf, header_start)?)
259 }
260 header::Class::Bit64 => {
261 segment::Phdr::Phdr64(segment::Phdr64::deserialize(buf, header_start)?)
262 }
263 _ => todo!(),
264 };
265
266 let seg = segment::Segment { phdr };
267 segments.push(seg);
268 }
269
270 Ok(segments)
271}
272
273fn naming_sections_from_shstrtab(shstrndx: usize, sections: &mut Vec<section::Section>) {
276 let shstrtab = sections[shstrndx].contents.as_strtab();
277
278 for sct in sections.iter_mut() {
279 let name_idx = sct.name_idx();
280 if name_idx == 0 {
281 continue;
282 }
283
284 let s = shstrtab
285 .iter()
286 .find(|&s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
287 .unwrap();
288
289 let (_, name) = s.v.split_at(name_idx - s.idx);
290 sct.name = name.to_string();
291 }
292}
293
294fn naming_symbols(sections: &mut Vec<section::Section>) {
297 let section_number = sections.len();
298 for sct_idx in 0..section_number {
299 let sct = §ions[sct_idx];
300 if sct.ty() != section::Type::SymTab && sct.ty() != section::Type::DynSym {
301 continue;
302 }
303
304 let strtab = sections[sct.link()].contents.as_strtab();
305
306 match &mut sections[sct_idx].contents {
307 section::Contents::Contents32(c) => {
308 if let section::Contents32::Symbols(ref mut symbols) = c {
309 for sym in symbols.iter_mut() {
310 let name_idx = sym.st_name as usize;
311 if name_idx == 0 {
312 continue;
313 }
314
315 let s = strtab
316 .iter()
317 .find(|s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
318 .unwrap();
319 let (_, name) = s.v.split_at(name_idx - s.idx);
320
321 sym.symbol_name = name.to_string();
322 }
323 }
324 }
325
326 section::Contents::Contents64(c) => {
327 if let section::Contents64::Symbols(ref mut symbols) = c {
328 for sym in symbols.iter_mut() {
329 let name_idx = sym.st_name as usize;
330 if name_idx == 0 {
331 continue;
332 }
333
334 let s = strtab
335 .iter()
336 .find(|s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
337 .unwrap();
338 let (_, name) = s.v.split_at(name_idx - s.idx);
339
340 sym.symbol_name = name.to_string();
341 }
342 }
343 }
344 }
345 }
346}
347
348fn check_elf_magic(file_path: &str, buf: &[u8]) -> Result<(), Box<dyn std::error::Error>> {
349 assert_eq!(buf.len(), 4);
350
351 if buf[0] != 0x7f || buf[1] != 0x45 || buf[2] != 0x4c || buf[3] != 0x46 {
352 return Err(Box::new(ReadELFError::NotELF {
353 file_path: file_path.to_string(),
354 }));
355 }
356
357 Ok(())
358}
359
360fn parse_elf_header(
361 class: header::Class,
362 buf: &[u8],
363) -> Result<header::Ehdr, Box<dyn std::error::Error>> {
364 match class {
365 header::Class::Bit32 => Ok(header::Ehdr::Ehdr32(bincode::deserialize(buf)?)),
366 header::Class::Bit64 => Ok(header::Ehdr::Ehdr64(bincode::deserialize(buf)?)),
367 _ => todo!(),
368 }
369}
370
371#[cfg(test)]
372mod parse_tests {
373 use crate::section::Contents64;
374
375 use super::*;
376
377 #[test]
378 fn check_elf_magic_test() {
379 assert!(check_elf_magic("", &[0x7f, 0x45, 0x4c, 0x46]).is_ok());
380 assert!(check_elf_magic("", &[0x7f, 0x45, 0x4b, 0x46]).is_err());
381 assert!(check_elf_magic("", &[0x7f, 0x42, 0x43, 0x46]).is_err());
382 }
383
384 #[test]
385 fn parse_elf64_header_test() {
386 let header_bytes = vec![
387 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
388 0x00, 0x00, 0x03, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00, 0x60, 0xe1, 0x00, 0x00,
389 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x1d,
390 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00,
391 0x0c, 0x00, 0x40, 0x00, 0x2c, 0x00, 0x2b, 0x00,
392 ];
393 let hdr_result = parse_elf_header(header::Class::Bit64, &header_bytes).unwrap();
394 assert!(matches!(hdr_result, header::Ehdr::Ehdr64(_)));
395
396 if let header::Ehdr::Ehdr64(ehdr) = hdr_result {
397 assert_eq!(ehdr.get_type(), header::Type::Dyn);
398 assert_eq!(ehdr.e_entry, 0xe160);
399 assert_eq!(ehdr.e_shnum, 44);
400 }
401 }
402
403 #[test]
404 fn parse_elf32_header_test() {
405 let header_bytes = vec![
406 0x7f, 0x45, 0x4c, 0x46, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
407 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x90, 0x10, 0x00, 0x00,
408 0x34, 0x00, 0x00, 0x00, 0xe4, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00,
409 0x20, 0x00, 0x0c, 0x00, 0x28, 0x00, 0x1f, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x34, 0x00,
410 0x00, 0x00, 0x40, 0x00, 0x2c, 0x00, 0x2b, 0x00,
411 ];
412 let hdr_result = parse_elf_header(header::Class::Bit32, &header_bytes).unwrap();
413 assert!(matches!(hdr_result, header::Ehdr::Ehdr32(_)));
414
415 if let header::Ehdr::Ehdr32(ehdr) = hdr_result {
416 assert_eq!(ehdr.get_type(), header::Type::Dyn);
417 assert_eq!(ehdr.e_entry, 0x1090);
418 assert_eq!(ehdr.e_shnum, 31);
419 }
420 }
421
422 #[test]
423 fn read_elf64_test() {
424 let f_result = parse_elf("src/parser/testdata/sample");
425 assert!(f_result.is_ok());
426 let f = f_result.unwrap();
427 assert!(matches!(f, file::ELF::ELF64(_)));
428 if let file::ELF::ELF64(f) = f {
429 assert_eq!(f.ehdr.e_entry, 0x1040);
430 assert_eq!(f.ehdr.e_shnum, 29);
431 assert_eq!(f.ehdr.e_shstrndx, 28);
432
433 assert_eq!(f.sections.len(), 29);
434 assert_eq!(f.segments.len(), 13);
435
436 assert_eq!(".interp", &f.sections[1].name);
437 assert_eq!(f.sections[1].header.get_type(), section::Type::ProgBits);
438 assert_eq!(f.sections[1].header.sh_addr, 0x318);
439 assert_eq!(f.sections[1].header.sh_offset, 0x318);
440 assert_eq!(f.sections[1].header.sh_addralign, 0x1);
441 assert!(f.sections[1]
442 .header
443 .get_flags()
444 .contains(§ion::Flag::Alloc));
445 assert_eq!(f.sections[1].header.sh_size, 0x1c);
446 assert!(
447 matches!(&f.sections[1].contents, Contents64::Raw(x) if x.len() == f.sections[1].header.sh_size as usize )
448 );
449
450 assert_eq!(f.sections[2].header.get_type(), section::Type::Note);
451 assert_eq!(f.sections[2].header.sh_addr, 0x338);
452 assert!(
453 matches!(&f.sections[2].contents, Contents64::Raw(x) if x.len() == f.sections[2].header.sh_size as usize )
454 );
455
456 assert_eq!(f.sections[10].header.get_type(), section::Type::Rela);
457 assert!(matches!(
458 f.sections[10].contents,
459 Contents64::RelaSymbols(_)
460 ));
461 assert_eq!(f.sections[26].header.get_type(), section::Type::SymTab);
462 assert!(matches!(
463 &f.sections[26].contents,
464 Contents64::Symbols(x) if x.len() == 62
465 ));
466 assert!(matches!(
467 &f.sections[26].contents,
468 Contents64::Symbols(x) if x[26].symbol_name == "crtstuff.c"
469 ));
470 assert!(matches!(
471 &f.sections[26].contents,
472 Contents64::Symbols(x) if x[45].symbol_name == "_ITM_deregisterTMCloneTable"
473 ));
474
475 assert_eq!(f.sections[21].header.get_type(), section::Type::Dynamic);
476 assert!(matches!(
477 &f.sections[21].contents,
478 Contents64::Dynamics(x) if x[1].get_type() == dynamic::EntryType::Init
479 ));
480 assert!(matches!(
481 &f.sections[21].contents,
482 Contents64::Dynamics(x) if x[2].get_type() == dynamic::EntryType::Fini
483 ));
484
485 assert_eq!(f.segments[0].header.get_type(), segment::Type::Phdr);
486 assert!(f.segments[0].header.get_flags().contains(&segment::Flag::R));
487 assert_eq!(f.segments[0].header.p_align, 8);
488
489 assert_eq!(f.segments[1].header.get_type(), segment::Type::Interp);
490 assert!(f.segments[1].header.get_flags().contains(&segment::Flag::R));
491 assert_eq!(f.segments[1].header.p_align, 1);
492 }
493 }
494
495 #[test]
496 fn read_elf32_test() {
497 let f_result = parse_elf("src/parser/testdata/32bit");
498 assert!(f_result.is_ok());
499
500 let f = f_result.unwrap();
501 assert!(matches!(f, file::ELF::ELF32(_)));
502
503 if let file::ELF::ELF32(f) = f {
504 assert_eq!(header::Type::Dyn, f.ehdr.get_type());
505 assert_eq!(0x1090, f.ehdr.e_entry);
506 assert_eq!(32, f.ehdr.e_phentsize);
507 assert_eq!(40, f.ehdr.e_shentsize);
508 assert_eq!(30, f.ehdr.e_shstrndx);
509
510 assert_eq!(".interp", f.sections[1].name);
511 assert_eq!(0x1b4, f.sections[1].header.sh_addr);
512 assert_eq!(0x13, f.sections[1].header.sh_size);
513
514 assert_eq!(".note.ABI-tag", f.sections[4].name);
515 assert_eq!(0x208, f.sections[4].header.sh_addr);
516 }
517 }
518}