xabc_lib/
abc.rs

1use memmap2::Mmap;
2use std::collections::{HashMap, HashSet};
3use std::{
4    fs::{self, File},
5    io::Read,
6    path::Path,
7};
8
9use crate::bytecode::BytecodeParser;
10use crate::class::{Class, ForeignClass};
11use crate::code::Code;
12use crate::header::Header;
13use crate::lnp::LineNumberProgramIndex;
14use crate::region::{
15    ClassRegionIndex, FieldRegionIndex, FieldType, MethodStringLiteralRegionIndex,
16    ProtoRegionIndex, Region, RegionHeader,
17};
18use crate::source::Source;
19use crate::string::ABCString;
20use crate::{error, init_logging, literal};
21
22use super::uint32_t;
23
24use scroll::Pread;
25
26/// 对外暴露的接口
27pub struct AbcFile<T> {
28    source: Source<T>,
29    header: Header,
30    classes: HashMap<uint32_t, Class>,
31    foreign_classes: HashMap<uint32_t, ForeignClass>,
32    regions: Vec<Region>,
33    literal_array_map: HashMap<usize, String>,
34}
35
36impl<T> AbcFile<T>
37where
38    T: AsRef<[u8]>,
39{
40    pub fn header(&self) -> &Header {
41        &self.header
42    }
43
44    fn parse_header(&mut self) {
45        self.header = self.source.pread::<Header>(0).unwrap();
46    }
47
48    pub fn classes(&self) -> &HashMap<uint32_t, Class> {
49        &self.classes
50    }
51
52    fn init(&mut self) {
53        self.parse_header();
54        self.parse_class_index();
55        self.parse_region_index();
56        self.parse_literal_array_index();
57    }
58
59    /// 解析 LiteralArray 并将数据存放起来
60    fn parse_literal_array_index(&mut self) {
61        self.literal_array_map = literal::parse_literal_array_index(
62            self.source.as_ref(),
63            self.header.literalarray_idx_off(),
64            self.header.literalarrays_size(),
65            &self.regions,
66        );
67    }
68
69    fn get_region(&self, offset: usize) -> Option<&Region> {
70        let mut result: Option<&Region> = None;
71        for one in self.regions.iter() {
72            let is_it = one.is_here(offset);
73            if is_it {
74                result = Some(one);
75                break;
76            }
77        }
78
79        result
80    }
81
82    pub fn parse_method(&self, name: String) {
83        let arr = name.split("->").collect::<Vec<&str>>();
84        let target_clazz = arr[0];
85        let target_method = arr[1];
86
87        let bytecode_map = BytecodeParser::new();
88        for item in &self.classes {
89            let offset = item.0;
90            let region = self.get_region(*offset as usize).unwrap();
91            let clazz = item.1;
92
93            let class_name = clazz.name().str();
94            if target_clazz == class_name {
95                for (_offset, method) in clazz.method_map().iter() {
96                    let _name = self.get_string_by_off(*method.name_off());
97                    if target_method == _name {
98                        let data = method.method_data();
99                        let code_off = data.code_off();
100                        let code = self
101                            .source
102                            .as_ref()
103                            .pread::<Code>(*code_off as usize)
104                            .unwrap();
105                        bytecode_map.parse(
106                            &code,
107                            region,
108                            self.source.as_ref(),
109                            &self.literal_array_map,
110                        );
111
112                        break;
113                    }
114                }
115
116                break;
117            }
118        }
119    }
120    /// 解析 Code,按需解析
121    // TODO: 解析整个文件,则输出到文件中?
122    // TODO: 解析指定类?
123
124    pub fn parse_code(&mut self) {
125        let bytecode_map = BytecodeParser::new();
126        for item in &self.classes {
127            let offset = item.0;
128            let region = self.get_region(*offset as usize).unwrap();
129            let clazz = item.1;
130
131            let class_name = clazz.name().str();
132            tracing::debug!("Class Name -> {}", class_name);
133            for field in clazz.fields() {
134                let off = field.name_off();
135                let type_idx = *field.type_idx();
136                let type_name = region.get_class_name(type_idx as usize);
137                tracing::debug!(
138                    "Field Name -> {}:{}",
139                    self.get_string_by_off(*off),
140                    type_name
141                );
142                // TODO: 解析 Field 的值
143            }
144
145            // TODO: 调整代码的输出
146            for (_offset, method) in clazz.method_map().iter() {
147                let name = self.get_string_by_off(*method.name_off());
148                println!("\n[方法]{}->{}", class_name, name);
149                let data = method.method_data();
150                let code_off = data.code_off();
151                let code = self
152                    .source
153                    .as_ref()
154                    .pread::<Code>(*code_off as usize)
155                    .unwrap();
156                bytecode_map.parse(&code, region, self.source.as_ref(), &self.literal_array_map);
157            }
158        }
159    }
160
161    /// 解析 Class
162    fn parse_class_index(&mut self) {
163        let num_classes = self.header.classes_size() as usize;
164        let class_idx_off = self.header.class_idx_off() as usize;
165
166        // 一次性解析所有的Class
167        for i in 0..num_classes {
168            let off = class_idx_off + i * 4;
169            let class_idx_off = self.source.pread::<uint32_t>(off).unwrap();
170
171            let is_foreign_class = self.is_foreign_off(class_idx_off);
172
173            if is_foreign_class {
174                let class = self
175                    .source
176                    .as_ref()
177                    .pread::<ForeignClass>(class_idx_off as usize)
178                    .unwrap();
179                self.foreign_classes.insert(class_idx_off, class);
180            } else {
181                let class = self
182                    .source
183                    .as_ref()
184                    .pread::<Class>(class_idx_off as usize)
185                    .unwrap();
186                self.classes.insert(class_idx_off, class);
187            }
188        }
189    }
190
191    #[allow(dead_code)]
192    fn parse_lnp_idx(&mut self) {
193        // NOTE: 解析行号程序,未来再说。
194        let mut lnp_idx = LineNumberProgramIndex::default();
195        let num_lnp = self.header().num_lnps() as usize;
196        let lnp_off = self.header().lnp_idx_off() as usize;
197        for i in 0..num_lnp {
198            let offset = self
199                .source
200                .as_ref()
201                .pread::<uint32_t>(lnp_off + i * 4)
202                .unwrap();
203
204            lnp_idx.push(offset);
205        }
206    }
207
208    fn get_class_name_by_offset(&self, idx: uint32_t) -> ABCString {
209        if self.is_foreign_off(idx) {
210            let v = self.foreign_classes.get(&idx).unwrap();
211            return v.name().clone();
212        }
213
214        self.classes.get(&idx).unwrap().name().clone()
215    }
216
217    /// 获取基本类型
218    fn get_primitive_type(&self, i: uint32_t) -> FieldType {
219        let names = [
220            "i8", "u8", "i16", "u16", "i32", "u32", "f32", "f64", "i64", "u64", "any",
221        ];
222
223        let n = names[i as usize];
224        FieldType {
225            name: n.to_string(),
226        }
227    }
228
229    /// 解析字段类型
230    fn parse_field_type(&mut self, idx: uint32_t) -> FieldType {
231        if idx <= 0xb {
232            return self.get_primitive_type(idx);
233        }
234
235        let item = self.get_class_name_by_offset(idx).to_string();
236        FieldType { name: item }
237    }
238
239    fn get_string_by_off(&self, off: uint32_t) -> String {
240        self.source
241            .as_ref()
242            .pread::<ABCString>(off as usize)
243            .unwrap()
244            .str()
245    }
246
247    /// 解析 RegionIndex
248    fn parse_region_index(&mut self) {
249        for i in 0..self.header().region_size() as usize {
250            let off = self.header().region_off() as usize + i * 4;
251            let region_header = self.source.pread::<RegionHeader>(off).unwrap();
252
253            // 解析 ClassRegionIndex
254            let mut class_region_idx = ClassRegionIndex::default();
255            let class_idx_off = region_header.class_idx_off() as usize;
256            for i in 0..region_header.class_idx_size() as usize {
257                let off = class_idx_off + i * 4;
258
259                // 一个FiedType 大小是u32
260                let class_offset = self.source.pread_with::<uint32_t>(off, scroll::LE).unwrap();
261                let f = self.parse_field_type(class_offset);
262                // tracing::debug!("FieldType: {} -> {:?}", off, &f);
263                class_region_idx.push(f);
264            }
265
266            // 解析 MethodStringLiteralRegionIndex
267            let msl_off = region_header.method_string_literal_region_idx_off() as usize;
268            let mut mslr_idx = MethodStringLiteralRegionIndex::default();
269            for i in 0..region_header.method_string_literal_region_idx_size() as usize {
270                let offset = self.source.pread::<uint32_t>(msl_off + i * 4).unwrap();
271                mslr_idx.push(offset);
272            }
273
274            // 解析 FieldRegionIndex
275            let mut field_idx = FieldRegionIndex::default();
276            let field_idx_off = region_header.field_idx_off() as usize;
277            let field_idx_size = region_header.field_idx_size() as usize;
278            if field_idx_size <= 65536 {
279                for i in 0..region_header.field_idx_size() as usize {
280                    let offset = self
281                        .source
282                        .pread_with::<uint32_t>(field_idx_off + i * 4, scroll::LE)
283                        .unwrap();
284                    field_idx.push(offset);
285                }
286            }
287
288            // 解析 ProtoRegionIndex
289            let mut proto_idx = ProtoRegionIndex::default();
290            let proto_idx_off = region_header.proto_idx_off() as usize;
291            let proto_idx_size = region_header.proto_idx_size() as usize;
292            if proto_idx_size <= 65536 {
293                for i in 0..region_header.proto_idx_size() as usize {
294                    let offset = self
295                        .source
296                        .pread_with::<uint32_t>(proto_idx_off + i * 4, scroll::LE)
297                        .unwrap();
298                    proto_idx.push(offset);
299                }
300            }
301
302            let region = Region::new(
303                region_header,
304                class_region_idx,
305                mslr_idx,
306                field_idx,
307                proto_idx,
308            );
309            self.regions.push(region);
310        }
311    }
312
313    /// 判断数据是否在外部区域
314    fn is_foreign_off(&self, class_idx: u32) -> bool {
315        let start = self.header().foreign_off();
316        let end = start + self.header().foreign_size();
317        start <= class_idx && class_idx <= end
318    }
319
320    /// 获取所有的类名
321    pub fn get_class_names(&self) -> Vec<String> {
322        let mut class_names = Vec::new();
323        for clz in self.classes.values() {
324            let class_name = clz.name().str();
325            class_names.push(class_name);
326        }
327        class_names
328    }
329
330    /// 获取所有的方法名
331    pub fn get_method_names(&self) -> Vec<String> {
332        let mut method_names = Vec::new();
333        for clz in self.classes.values() {
334            let class_name = clz.name().str();
335            for (_, method) in clz.method_map().iter() {
336                let name = self.get_string_by_off(*method.name_off());
337                method_names.push(class_name.to_string() + "->" + &name);
338            }
339        }
340        method_names
341    }
342
343    /// 获取所有的字符串
344    pub fn get_strings(&self) -> Vec<String> {
345        let mut method_offsets = HashSet::new();
346        for clz in self.classes.values() {
347            for (offset, _) in clz.method_map().iter() {
348                method_offsets.insert(offset);
349            }
350        }
351
352        let mut strings = Vec::new();
353        for region in self.regions.iter() {
354            let offsets = region.method_string_literal_region_idx().offsets();
355            for offset in offsets {
356                let offset = *offset as usize;
357                if method_offsets.contains(&offset) {
358                    continue;
359                }
360
361                if self.literal_array_map.contains_key(&offset) {
362                    continue;
363                }
364
365                // tracing::debug!("string offset -> {}", offset);
366                let string = self.get_string_by_off(offset as u32);
367                if string == "-utf8-error-" {
368                    tracing::warn!("{} -> 解析错误,不是字符串", offset);
369                    continue;
370                }
371                tracing::debug!("{} -> {}", offset, string);
372                strings.push(string);
373            }
374        }
375
376        strings
377    }
378}
379
380/// 用于读取 `Abc` 文件
381pub struct AbcReader {}
382
383// 100MB以上的文件为大文件
384const LARGE_FILE: u64 = 100 * 1024 * 1024;
385
386impl AbcReader {
387    fn read_file_to_vec<P: AsRef<Path>>(path: P) -> Result<Vec<u8>, error::Error> {
388        let metadata = fs::metadata(path.as_ref())?;
389        let file_size = metadata.len();
390
391        if file_size > LARGE_FILE {
392            let file = File::open(path.as_ref())?;
393            let mmap = unsafe { Mmap::map(&file)? };
394            return Ok(Vec::from(&mmap[..]));
395        }
396
397        let mut file = File::open(path.as_ref())?;
398        let mut buffer = Vec::new();
399        file.read_to_end(&mut buffer)?;
400        Ok(buffer)
401    }
402
403    /// 从文件中加载 Abc 文件到内存
404    pub fn from_file<P>(file: P) -> Result<AbcFile<Vec<u8>>, error::Error>
405    where
406        P: AsRef<Path>,
407    {
408        let buf = AbcReader::read_file_to_vec(file)?;
409        AbcReader::from_vec(buf)
410    }
411
412    pub fn from_array(buf: &[u8]) -> Result<AbcFile<Vec<u8>>, error::Error> {
413        let buf = Vec::from(buf);
414        AbcReader::from_vec(buf)
415    }
416
417    pub fn from_vec(buf: Vec<u8>) -> Result<AbcFile<Vec<u8>>, error::Error> {
418        init_logging();
419
420        let source = Source::new(buf);
421        let mut abc_file = AbcFile {
422            source: source.clone(),
423            header: Header::default(),
424            classes: HashMap::new(),
425            foreign_classes: HashMap::new(),
426            regions: Vec::new(),
427            literal_array_map: HashMap::new(),
428        };
429        abc_file.init();
430
431        Ok(abc_file)
432    }
433}