elf_utilities/parser/
parse.rs

1use crate::*;
2use std::fs::File;
3use std::io::Read;
4
5use serde::Deserialize;
6use thiserror::Error as TError;
7
8#[derive(TError, Debug)]
9pub enum ReadELFError {
10    #[error("input file `{file_path}` is not an ELF file")]
11    NotELF { file_path: String },
12    #[error("can't parse elf header => `{k}`")]
13    CantParseELFHeader { k: Box<dyn std::error::Error> },
14    #[error("can't parse section header => `{k}`")]
15    CantParseSectionHeader { k: Box<dyn std::error::Error> },
16    #[error("can't parse program header => `{k}`")]
17    CantParseProgramHeader { k: Box<dyn std::error::Error> },
18    #[error("can't parse symbol => `{k}`")]
19    CantParseSymbol { k: Box<dyn std::error::Error> },
20}
21
22/// parse 64bit ELF
23pub fn parse_elf64(file_path: &str) -> Result<file::ELF64, Box<dyn std::error::Error>> {
24    Ok(parse_elf(file_path)?.as_64bit())
25}
26
27/// parse 32bit ELF
28pub fn parse_elf32(file_path: &str) -> Result<file::ELF32, Box<dyn std::error::Error>> {
29    Ok(parse_elf(file_path)?.as_32bit())
30}
31
32/// parse ELF and construct `file::ELF`
33pub fn parse_elf(file_path: &str) -> Result<file::ELF, Box<dyn std::error::Error>> {
34    let mut f = File::open(file_path)?;
35    let mut buf = Vec::new();
36    let _ = f.read_to_end(&mut buf);
37
38    let _ = check_elf_magic(file_path, &buf[..4])?;
39
40    // 32bit/64bitでパース処理を共通化するため,classを取っておく
41    let elf_class = header::Class::from(buf[header::Class::INDEX]);
42
43    let elf_header = parse_elf_header(elf_class, &buf)?;
44    let phdr_table_exists = elf_header.pht_exists();
45
46    let mut sections = read_sht(elf_class, elf_header.shnum(), elf_header.sht_start(), &buf)?;
47    let mut segments = Vec::new();
48
49    if phdr_table_exists {
50        segments = read_pht(elf_class, elf_header.phnum(), elf_header.pht_start(), &buf)?;
51    }
52
53    // セクション名の設定
54    // .shstrtabセクションは大抵SHTの末尾にあるため,read_sht() 後に行う必要がある
55    naming_sections_from_shstrtab(elf_header.shstrndx(), &mut sections);
56
57    // シンボル名の設定
58    // これもセクション名の設定と同様,SHTパース後に実行する必要があるため切り離している
59    naming_symbols(&mut sections);
60
61    match elf_class {
62        header::Class::Bit64 => Ok(file::ELF::ELF64(file::ELF64 {
63            ehdr: elf_header.as_64bit(),
64            sections: sections.iter().map(|sct| sct.as_64bit()).collect(),
65            segments: segments.iter().map(|sgt| sgt.as_64bit()).collect(),
66        })),
67        header::Class::Bit32 => Ok(file::ELF::ELF32(file::ELF32 {
68            ehdr: elf_header.as_32bit(),
69            sections: sections.iter().map(|sct| sct.as_32bit()).collect(),
70            segments: segments.iter().map(|sgt| sgt.as_32bit()).collect(),
71        })),
72        _ => todo!(),
73    }
74}
75
76/// セクションヘッダテーブルのパース
77fn read_sht(
78    class: header::Class,
79
80    section_number: usize,
81    sht_offset: usize,
82    buf: &[u8],
83) -> Result<Vec<section::Section>, Box<dyn std::error::Error>> {
84    let mut sections = Vec::with_capacity(50);
85    let shdr_size = match class {
86        header::Class::Bit32 => section::Shdr32::SIZE,
87        header::Class::Bit64 => section::Shdr64::SIZE,
88        _ => todo!(),
89    };
90
91    for sct_idx in 0..section_number {
92        let header_start = sht_offset + shdr_size * sct_idx;
93        let shdr = match class {
94            header::Class::Bit32 => {
95                section::Shdr::Shdr32(bincode::deserialize(&buf[header_start..])?)
96            }
97            header::Class::Bit64 => {
98                section::Shdr::Shdr64(bincode::deserialize(&buf[header_start..])?)
99            }
100            _ => todo!(),
101        };
102
103        let mut sct = section::Section::new(shdr);
104        let section_type = sct.ty();
105
106        if section_type != section::Type::NoBits {
107            let section_offset = sct.offset();
108            let section_raw_contents =
109                buf[section_offset..section_offset + sct.size() as usize].to_vec();
110
111            sct.contents = match section_type {
112                section::Type::StrTab => parse_string_table(class, &section_raw_contents),
113                section::Type::SymTab | section::Type::DynSym => {
114                    parse_symbol_table(class, &sct, &section_raw_contents)
115                }
116                section::Type::Rela => parse_rela_symbol_table(class, &sct, &section_raw_contents),
117                section::Type::Dynamic => {
118                    parse_dynamic_information(class, &sct, &section_raw_contents)
119                }
120                _ => match class {
121                    header::Class::Bit32 => section::Contents::Contents32(
122                        section::Contents32::Raw(section_raw_contents),
123                    ),
124                    header::Class::Bit64 => section::Contents::Contents64(
125                        section::Contents64::Raw(section_raw_contents),
126                    ),
127                    _ => todo!(),
128                },
129            }
130        }
131
132        sections.push(sct);
133    }
134
135    Ok(sections)
136}
137
138fn parse_string_table(class: header::Class, section_raw_contents: &Vec<u8>) -> section::Contents {
139    let mut strs: Vec<section::StrTabEntry> = Default::default();
140    let mut name_idx = 0;
141    loop {
142        if name_idx >= section_raw_contents.len() {
143            break;
144        }
145
146        if section_raw_contents[name_idx] == 0x00 {
147            name_idx += 1;
148            continue;
149        }
150
151        let nul_range_end = section_raw_contents[name_idx..]
152            .iter()
153            .position(|&c| c == b'\0')
154            .unwrap_or(section_raw_contents.len());
155        let s = std::str::from_utf8(&section_raw_contents[name_idx..name_idx + nul_range_end])
156            .unwrap()
157            .to_string();
158
159        let idx = name_idx;
160        name_idx += s.len();
161        strs.push(section::StrTabEntry { v: s, idx });
162    }
163
164    match class {
165        header::Class::Bit32 => section::Contents::Contents32(section::Contents32::StrTab(strs)),
166        header::Class::Bit64 => section::Contents::Contents64(section::Contents64::StrTab(strs)),
167        _ => todo!(),
168    }
169}
170
171fn parse_rela_symbol_table(
172    class: header::Class,
173    sct: &section::Section,
174    raw_symtab: &Vec<u8>,
175) -> section::Contents {
176    let entry_size = sct.entry_size();
177    let entry_number = sct.size() / entry_size;
178    match class {
179        header::Class::Bit32 => section::Contents::Contents32(section::Contents32::RelaSymbols(
180            parse_table(entry_size, entry_number, raw_symtab),
181        )),
182        header::Class::Bit64 => section::Contents::Contents64(section::Contents64::RelaSymbols(
183            parse_table(entry_size, entry_number, raw_symtab),
184        )),
185        _ => todo!(),
186    }
187}
188
189fn parse_dynamic_information(
190    class: header::Class,
191    sct: &section::Section,
192    raw_symtab: &Vec<u8>,
193) -> section::Contents {
194    let entry_size = sct.entry_size();
195    let entry_number = sct.size() / entry_size;
196    match class {
197        header::Class::Bit32 => section::Contents::Contents32(section::Contents32::Dynamics(
198            parse_table(entry_size, entry_number, raw_symtab),
199        )),
200        header::Class::Bit64 => section::Contents::Contents64(section::Contents64::Dynamics(
201            parse_table(entry_size, entry_number, raw_symtab),
202        )),
203        _ => todo!(),
204    }
205}
206
207fn parse_symbol_table(
208    class: header::Class,
209    sct: &section::Section,
210    raw_symtab: &Vec<u8>,
211) -> section::Contents {
212    let entry_size = sct.entry_size();
213    let entry_number = sct.size() / entry_size;
214    match class {
215        header::Class::Bit32 => section::Contents::Contents32(section::Contents32::Symbols(
216            parse_table(entry_size, entry_number, raw_symtab),
217        )),
218        header::Class::Bit64 => section::Contents::Contents64(section::Contents64::Symbols(
219            parse_table(entry_size, entry_number, raw_symtab),
220        )),
221        _ => todo!(),
222    }
223}
224
225fn parse_table<'a, T: Deserialize<'a>>(
226    entry_size: usize,
227    entry_number: usize,
228    buf: &'a [u8],
229) -> Vec<T> {
230    let mut table = Vec::new();
231    for idx in 0..entry_number {
232        let start = idx * entry_size;
233        let end = (idx + 1) * entry_size;
234        let entry = bincode::deserialize(&buf[start..end]).unwrap();
235        table.push(entry);
236    }
237    table
238}
239
240/// プログラムヘッダテーブルのパース
241fn read_pht(
242    class: header::Class,
243    phnum: usize,
244    pht_start: usize,
245    buf: &[u8],
246) -> Result<Vec<segment::Segment>, Box<dyn std::error::Error>> {
247    let mut segments = Vec::with_capacity(10);
248    let phdr_size = match class {
249        header::Class::Bit32 => segment::Phdr32::SIZE,
250        header::Class::Bit64 => segment::Phdr64::SIZE,
251        _ => todo!(),
252    };
253
254    for seg_idx in 0..phnum {
255        let header_start = pht_start as usize + phdr_size * seg_idx;
256        let phdr = match class {
257            header::Class::Bit32 => {
258                segment::Phdr::Phdr32(segment::Phdr32::deserialize(buf, header_start)?)
259            }
260            header::Class::Bit64 => {
261                segment::Phdr::Phdr64(segment::Phdr64::deserialize(buf, header_start)?)
262            }
263            _ => todo!(),
264        };
265
266        let seg = segment::Segment { phdr };
267        segments.push(seg);
268    }
269
270    Ok(segments)
271}
272
273/// セクション名を.shstrtabから探して,Section構造体に書き込む
274/// このようにしているのは,SHTのパースがすべて終わってからでないとshstrtabを使用できない為
275fn naming_sections_from_shstrtab(shstrndx: usize, sections: &mut Vec<section::Section>) {
276    let shstrtab = sections[shstrndx].contents.as_strtab();
277
278    for sct in sections.iter_mut() {
279        let name_idx = sct.name_idx();
280        if name_idx == 0 {
281            continue;
282        }
283
284        let s = shstrtab
285            .iter()
286            .find(|&s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
287            .unwrap();
288
289        let (_, name) = s.v.split_at(name_idx - s.idx);
290        sct.name = name.to_string();
291    }
292}
293
294/// シンボル名をsh_linkが指す文字列テーブルから探して割り当てる
295/// このようにしているのは,SHTのパースがすべて終わってからでないとshstrtabを使用できない為
296fn naming_symbols(sections: &mut Vec<section::Section>) {
297    let section_number = sections.len();
298    for sct_idx in 0..section_number {
299        let sct = &sections[sct_idx];
300        if sct.ty() != section::Type::SymTab && sct.ty() != section::Type::DynSym {
301            continue;
302        }
303
304        let strtab = sections[sct.link()].contents.as_strtab();
305
306        match &mut sections[sct_idx].contents {
307            section::Contents::Contents32(c) => {
308                if let section::Contents32::Symbols(ref mut symbols) = c {
309                    for sym in symbols.iter_mut() {
310                        let name_idx = sym.st_name as usize;
311                        if name_idx == 0 {
312                            continue;
313                        }
314
315                        let s = strtab
316                            .iter()
317                            .find(|s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
318                            .unwrap();
319                        let (_, name) = s.v.split_at(name_idx - s.idx);
320
321                        sym.symbol_name = name.to_string();
322                    }
323                }
324            }
325
326            section::Contents::Contents64(c) => {
327                if let section::Contents64::Symbols(ref mut symbols) = c {
328                    for sym in symbols.iter_mut() {
329                        let name_idx = sym.st_name as usize;
330                        if name_idx == 0 {
331                            continue;
332                        }
333
334                        let s = strtab
335                            .iter()
336                            .find(|s| s.idx <= name_idx && name_idx <= s.idx + s.v.len())
337                            .unwrap();
338                        let (_, name) = s.v.split_at(name_idx - s.idx);
339
340                        sym.symbol_name = name.to_string();
341                    }
342                }
343            }
344        }
345    }
346}
347
348fn check_elf_magic(file_path: &str, buf: &[u8]) -> Result<(), Box<dyn std::error::Error>> {
349    assert_eq!(buf.len(), 4);
350
351    if buf[0] != 0x7f || buf[1] != 0x45 || buf[2] != 0x4c || buf[3] != 0x46 {
352        return Err(Box::new(ReadELFError::NotELF {
353            file_path: file_path.to_string(),
354        }));
355    }
356
357    Ok(())
358}
359
360fn parse_elf_header(
361    class: header::Class,
362    buf: &[u8],
363) -> Result<header::Ehdr, Box<dyn std::error::Error>> {
364    match class {
365        header::Class::Bit32 => Ok(header::Ehdr::Ehdr32(bincode::deserialize(buf)?)),
366        header::Class::Bit64 => Ok(header::Ehdr::Ehdr64(bincode::deserialize(buf)?)),
367        _ => todo!(),
368    }
369}
370
371#[cfg(test)]
372mod parse_tests {
373    use crate::section::Contents64;
374
375    use super::*;
376
377    #[test]
378    fn check_elf_magic_test() {
379        assert!(check_elf_magic("", &[0x7f, 0x45, 0x4c, 0x46]).is_ok());
380        assert!(check_elf_magic("", &[0x7f, 0x45, 0x4b, 0x46]).is_err());
381        assert!(check_elf_magic("", &[0x7f, 0x42, 0x43, 0x46]).is_err());
382    }
383
384    #[test]
385    fn parse_elf64_header_test() {
386        let header_bytes = vec![
387            0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
388            0x00, 0x00, 0x03, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00, 0x60, 0xe1, 0x00, 0x00,
389            0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x1d,
390            0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00,
391            0x0c, 0x00, 0x40, 0x00, 0x2c, 0x00, 0x2b, 0x00,
392        ];
393        let hdr_result = parse_elf_header(header::Class::Bit64, &header_bytes).unwrap();
394        assert!(matches!(hdr_result, header::Ehdr::Ehdr64(_)));
395
396        if let header::Ehdr::Ehdr64(ehdr) = hdr_result {
397            assert_eq!(ehdr.get_type(), header::Type::Dyn);
398            assert_eq!(ehdr.e_entry, 0xe160);
399            assert_eq!(ehdr.e_shnum, 44);
400        }
401    }
402
403    #[test]
404    fn parse_elf32_header_test() {
405        let header_bytes = vec![
406            0x7f, 0x45, 0x4c, 0x46, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
407            0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x90, 0x10, 0x00, 0x00,
408            0x34, 0x00, 0x00, 0x00, 0xe4, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00,
409            0x20, 0x00, 0x0c, 0x00, 0x28, 0x00, 0x1f, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x34, 0x00,
410            0x00, 0x00, 0x40, 0x00, 0x2c, 0x00, 0x2b, 0x00,
411        ];
412        let hdr_result = parse_elf_header(header::Class::Bit32, &header_bytes).unwrap();
413        assert!(matches!(hdr_result, header::Ehdr::Ehdr32(_)));
414
415        if let header::Ehdr::Ehdr32(ehdr) = hdr_result {
416            assert_eq!(ehdr.get_type(), header::Type::Dyn);
417            assert_eq!(ehdr.e_entry, 0x1090);
418            assert_eq!(ehdr.e_shnum, 31);
419        }
420    }
421
422    #[test]
423    fn read_elf64_test() {
424        let f_result = parse_elf("src/parser/testdata/sample");
425        assert!(f_result.is_ok());
426        let f = f_result.unwrap();
427        assert!(matches!(f, file::ELF::ELF64(_)));
428        if let file::ELF::ELF64(f) = f {
429            assert_eq!(f.ehdr.e_entry, 0x1040);
430            assert_eq!(f.ehdr.e_shnum, 29);
431            assert_eq!(f.ehdr.e_shstrndx, 28);
432
433            assert_eq!(f.sections.len(), 29);
434            assert_eq!(f.segments.len(), 13);
435
436            assert_eq!(".interp", &f.sections[1].name);
437            assert_eq!(f.sections[1].header.get_type(), section::Type::ProgBits);
438            assert_eq!(f.sections[1].header.sh_addr, 0x318);
439            assert_eq!(f.sections[1].header.sh_offset, 0x318);
440            assert_eq!(f.sections[1].header.sh_addralign, 0x1);
441            assert!(f.sections[1]
442                .header
443                .get_flags()
444                .contains(&section::Flag::Alloc));
445            assert_eq!(f.sections[1].header.sh_size, 0x1c);
446            assert!(
447                matches!(&f.sections[1].contents, Contents64::Raw(x) if x.len() == f.sections[1].header.sh_size as usize )
448            );
449
450            assert_eq!(f.sections[2].header.get_type(), section::Type::Note);
451            assert_eq!(f.sections[2].header.sh_addr, 0x338);
452            assert!(
453                matches!(&f.sections[2].contents, Contents64::Raw(x) if x.len() == f.sections[2].header.sh_size as usize )
454            );
455
456            assert_eq!(f.sections[10].header.get_type(), section::Type::Rela);
457            assert!(matches!(
458                f.sections[10].contents,
459                Contents64::RelaSymbols(_)
460            ));
461            assert_eq!(f.sections[26].header.get_type(), section::Type::SymTab);
462            assert!(matches!(
463                &f.sections[26].contents,
464                Contents64::Symbols(x) if x.len() == 62
465            ));
466            assert!(matches!(
467                &f.sections[26].contents,
468                Contents64::Symbols(x) if x[26].symbol_name == "crtstuff.c"
469            ));
470            assert!(matches!(
471                &f.sections[26].contents,
472                Contents64::Symbols(x) if x[45].symbol_name == "_ITM_deregisterTMCloneTable"
473            ));
474
475            assert_eq!(f.sections[21].header.get_type(), section::Type::Dynamic);
476            assert!(matches!(
477                &f.sections[21].contents,
478                Contents64::Dynamics(x) if x[1].get_type() == dynamic::EntryType::Init
479            ));
480            assert!(matches!(
481                &f.sections[21].contents,
482                Contents64::Dynamics(x) if x[2].get_type() == dynamic::EntryType::Fini
483            ));
484
485            assert_eq!(f.segments[0].header.get_type(), segment::Type::Phdr);
486            assert!(f.segments[0].header.get_flags().contains(&segment::Flag::R));
487            assert_eq!(f.segments[0].header.p_align, 8);
488
489            assert_eq!(f.segments[1].header.get_type(), segment::Type::Interp);
490            assert!(f.segments[1].header.get_flags().contains(&segment::Flag::R));
491            assert_eq!(f.segments[1].header.p_align, 1);
492        }
493    }
494
495    #[test]
496    fn read_elf32_test() {
497        let f_result = parse_elf("src/parser/testdata/32bit");
498        assert!(f_result.is_ok());
499
500        let f = f_result.unwrap();
501        assert!(matches!(f, file::ELF::ELF32(_)));
502
503        if let file::ELF::ELF32(f) = f {
504            assert_eq!(header::Type::Dyn, f.ehdr.get_type());
505            assert_eq!(0x1090, f.ehdr.e_entry);
506            assert_eq!(32, f.ehdr.e_phentsize);
507            assert_eq!(40, f.ehdr.e_shentsize);
508            assert_eq!(30, f.ehdr.e_shstrndx);
509
510            assert_eq!(".interp", f.sections[1].name);
511            assert_eq!(0x1b4, f.sections[1].header.sh_addr);
512            assert_eq!(0x13, f.sections[1].header.sh_size);
513
514            assert_eq!(".note.ABI-tag", f.sections[4].name);
515            assert_eq!(0x208, f.sections[4].header.sh_addr);
516        }
517    }
518}