Skip to main content

jvm_assembler/formats/class/reader/
mod.rs

1#![doc = include_str!("readme.md")]
2use crate::{
3    formats::class::{view::ClassInfo, ClassReadConfig},
4    program::JvmProgram,
5};
6use gaia_binary::{BigEndian, BinaryReader, Fixed};
7use gaia_types::{GaiaDiagnostics, GaiaError};
8use std::{
9    cell::{OnceCell, RefCell},
10    io::{Read, Seek},
11};
12
13/// JVM Class 文件读取器
14///
15/// 提供了按需读取 Class 文件内容的能力,支持延迟加载。
16pub struct ClassReader<'config, R: Read + Seek> {
17    config: &'config ClassReadConfig,
18    reader: RefCell<BinaryReader<R, Fixed<BigEndian>>>,
19    /// 缓存的程序
20    program: OnceCell<JvmProgram>,
21    /// 缓存类信息
22    info: OnceCell<ClassInfo>,
23}
24
25impl ClassReadConfig {
26    /// 创建一个 ClassReader 实例
27    pub fn as_reader<R: Read + Seek>(&self, reader: R) -> ClassReader<'_, R> {
28        ClassReader::new(reader, self)
29    }
30}
31
32impl<'config, R: Read + Seek> ClassReader<'config, R> {
33    /// 创建一个新的 ClassReader 实例
34    pub fn new(reader: R, config: &'config ClassReadConfig) -> Self {
35        Self {
36            reader: RefCell::new(BinaryReader::<R, Fixed<BigEndian>>::new(reader)),
37            program: Default::default(),
38            info: Default::default(),
39            config: config,
40        }
41    }
42
43    /// 获取解析后的 JvmProgram
44    pub fn get_program(&self) -> Result<&JvmProgram, GaiaError> {
45        self.program.get_or_try_init(|| self.read_program())
46    }
47
48    /// 获取解析后的 ClassInfo
49    pub fn get_info(&self) -> Result<&ClassInfo, GaiaError> {
50        self.info.get_or_try_init(|| self.read_view())
51    }
52}
53
54impl<'config, R: Read + Seek> ClassReader<'config, R> {
55    /// 读取整个 Class 文件并返回 JvmProgram
56    pub fn read(mut self) -> GaiaDiagnostics<JvmProgram> {
57        match self.get_program() {
58            Ok(_) => {
59                let errors = self.reader.borrow_mut().take_errors();
60                GaiaDiagnostics { result: self.program.take().ok_or(GaiaError::unreachable()), diagnostics: errors }
61            }
62            Err(e) => {
63                let errors = self.reader.borrow_mut().take_errors();
64                GaiaDiagnostics { result: Err(e), diagnostics: errors }
65            }
66        }
67    }
68    fn read_program(&self) -> Result<JvmProgram, GaiaError> {
69        let mut reader = self.reader.borrow_mut();
70
71        // 从文件开头开始读取
72        reader.seek(std::io::SeekFrom::Start(0))?;
73
74        // 读取类文件头
75        let (minor_version, major_version) = self.read_class_header(&mut reader)?;
76
77        // 读取常量池
78        let constant_pool_entries = self.read_constant_pool(&mut reader)?;
79
80        // 读取类信息
81        let (access_flags, this_class, super_class) = self.read_class_info(&mut reader)?;
82
83        // 解析类名和超类名
84        let class_name = self.resolve_class_name(&constant_pool_entries, this_class);
85        let super_class_name =
86            if super_class == 0 { None } else { Some(self.resolve_class_name(&constant_pool_entries, super_class)) };
87
88        // 跳过接口
89        self.skip_interfaces(&mut reader)?;
90
91        // 读取字段
92        let fields = self.read_fields(&mut reader, &constant_pool_entries)?;
93
94        // 读取方法
95        let methods = self.read_methods(&mut reader, &constant_pool_entries)?;
96
97        // 创建常量池
98        let mut constant_pool = crate::program::JvmConstantPool::new();
99        for entry in constant_pool_entries.iter() {
100            if let Some(value) = entry {
101                let cp_entry = crate::program::JvmConstantPoolEntry::Utf8(value.clone());
102                constant_pool.entries.push(cp_entry);
103            }
104            else {
105                constant_pool.entries.push(crate::program::JvmConstantPoolEntry::Nop);
106            }
107        }
108
109        // 创建 JvmProgram
110        let mut program = crate::program::JvmProgram::new(class_name);
111        program.version = crate::program::JvmVersion { major: major_version, minor: minor_version };
112        program.access_flags = crate::program::JvmAccessFlags::from_flags(access_flags);
113        program.super_class = super_class_name;
114        program.fields = fields;
115        program.methods = methods;
116        program.constant_pool = constant_pool;
117
118        Ok(program)
119    }
120
121    /// 读取并验证 class 文件头
122    fn read_class_header(
123        &self,
124        reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>,
125    ) -> Result<(u16, u16), GaiaError> {
126        let magic = reader.read_u32()?;
127        if magic != 0xCAFEBABE {
128            return Err(GaiaError::invalid_data("Invalid class file magic number"));
129        }
130
131        let minor_version = reader.read_u16()?;
132        let major_version = reader.read_u16()?;
133
134        Ok((minor_version, major_version))
135    }
136
137    /// 读取常量池
138    fn read_constant_pool(
139        &self,
140        reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>,
141    ) -> Result<Vec<Option<String>>, GaiaError> {
142        let constant_pool_count = reader.read_u16()?;
143        let mut constant_pool_entries: Vec<Option<String>> = vec![None; constant_pool_count as usize];
144
145        // 解析常量池,特别关注 UTF8 和 Class 条目
146        let mut i = 1;
147        while i < constant_pool_count {
148            let tag = reader.read_u8()?;
149            match tag {
150                0 => {
151                    // 占位符条目(通常在 Long/Double 之后)
152                    // 不需要读取任何数据,只是跳过
153                }
154                1 => {
155                    // UTF8
156                    let length = reader.read_u16()?;
157                    let mut bytes = vec![0u8; length as usize];
158                    reader.read_exact(&mut bytes)?;
159                    let utf8_string = String::from_utf8_lossy(&bytes).to_string();
160                    constant_pool_entries[i as usize] = Some(utf8_string);
161                }
162                3 => {
163                    reader.read_u32()?;
164                } // Integer
165                4 => {
166                    reader.read_u32()?;
167                } // Float
168                5 => {
169                    // Long - 占用两个常量池位置
170                    reader.read_u64()?;
171                    i += 1; // 跳过下一个位置
172                }
173                6 => {
174                    // Double - 占用两个常量池位置
175                    reader.read_u64()?;
176                    i += 1; // 跳过下一个位置
177                }
178                7 => {
179                    // Class - 存储对 UTF8 条目的引用
180                    let name_index = reader.read_u16()?;
181                    constant_pool_entries[i as usize] = Some(format!("CLASS_REF:{}", name_index));
182                }
183                8 => {
184                    reader.read_u16()?;
185                } // String
186                9 | 10 | 11 => {
187                    // Fieldref, Methodref, InterfaceMethodref
188                    reader.read_u16()?;
189                    reader.read_u16()?;
190                }
191                12 => {
192                    // NameAndType
193                    reader.read_u16()?;
194                    reader.read_u16()?;
195                }
196                15 => {
197                    // MethodHandle
198                    reader.read_u8()?;
199                    reader.read_u16()?;
200                }
201                16 => {
202                    reader.read_u16()?;
203                } // MethodType
204                17 | 18 => {
205                    // Dynamic, InvokeDynamic
206                    reader.read_u16()?;
207                    reader.read_u16()?;
208                }
209                19 | 20 => {
210                    reader.read_u16()?;
211                } // Module, Package
212                _ => {
213                    return Err(GaiaError::invalid_data(&format!("Unknown constant pool tag: {}", tag)));
214                }
215            };
216            i += 1;
217        }
218
219        Ok(constant_pool_entries)
220    }
221
222    /// 读取类基本信息
223    fn read_class_info(
224        &self,
225        reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>,
226    ) -> Result<(u16, u16, u16), GaiaError> {
227        let access_flags = reader.read_u16()?;
228        let this_class = reader.read_u16()?;
229        let super_class = reader.read_u16()?;
230
231        Ok((access_flags, this_class, super_class))
232    }
233
234    /// 解析类名
235    fn resolve_class_name(&self, constant_pool_entries: &[Option<String>], this_class: u16) -> String {
236        if let Some(Some(class_ref)) = constant_pool_entries.get(this_class as usize) {
237            if let Some(class_ref_str) = class_ref.strip_prefix("CLASS_REF:") {
238                if let Ok(name_index) = class_ref_str.parse::<u16>() {
239                    if let Some(Some(name)) = constant_pool_entries.get(name_index as usize) {
240                        return name.clone();
241                    }
242                }
243            }
244        }
245        "UnknownClass".to_string()
246    }
247
248    /// 跳过接口
249    fn skip_interfaces(&self, reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>) -> Result<(), GaiaError> {
250        let interfaces_count = reader.read_u16()?;
251        for _ in 0..interfaces_count {
252            reader.read_u16()?;
253        }
254        Ok(())
255    }
256
257    /// 读取字段
258    fn read_fields(
259        &self,
260        reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>,
261        constant_pool_entries: &[Option<String>],
262    ) -> Result<Vec<crate::program::JvmField>, GaiaError> {
263        let fields_count = reader.read_u16()?;
264        let mut fields = Vec::new();
265
266        for _ in 0..fields_count {
267            let field_access_flags = reader.read_u16()?;
268            let name_index = reader.read_u16()?;
269            let descriptor_index = reader.read_u16()?;
270
271            // 获取字段名和描述符
272            let field_name = self.get_string_from_pool(constant_pool_entries, name_index, "UnknownField");
273            let field_descriptor = self.get_string_from_pool(constant_pool_entries, descriptor_index, "UnknownDescriptor");
274
275            let mut field = crate::program::JvmField::new(field_name, field_descriptor);
276            field.access_flags = crate::program::JvmAccessFlags::from_flags(field_access_flags);
277
278            // 跳过字段属性
279            self.skip_attributes(reader)?;
280
281            fields.push(field);
282        }
283
284        Ok(fields)
285    }
286
287    /// 读取方法
288    fn read_methods(
289        &self,
290        reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>,
291        constant_pool_entries: &[Option<String>],
292    ) -> Result<Vec<crate::program::JvmMethod>, GaiaError> {
293        let methods_count = reader.read_u16()?;
294        let mut methods = Vec::new();
295
296        for _ in 0..methods_count {
297            let method_access_flags = reader.read_u16()?;
298            let name_index = reader.read_u16()?;
299            let descriptor_index = reader.read_u16()?;
300
301            // 获取方法名和描述符
302            let method_name = self.get_string_from_pool(constant_pool_entries, name_index, "UnknownMethod");
303            let method_descriptor = self.get_string_from_pool(constant_pool_entries, descriptor_index, "UnknownDescriptor");
304
305            let mut method = crate::program::JvmMethod::new(method_name, method_descriptor);
306            method.access_flags = crate::program::JvmAccessFlags::from_flags(method_access_flags);
307
308            // 跳过方法属性
309            self.skip_attributes(reader)?;
310
311            methods.push(method);
312        }
313
314        Ok(methods)
315    }
316
317    /// 从常量池获取字符串
318    fn get_string_from_pool(&self, constant_pool_entries: &[Option<String>], index: u16, default: &str) -> String {
319        constant_pool_entries.get(index as usize).and_then(|opt| opt.as_ref()).unwrap_or(&default.to_string()).clone()
320    }
321
322    /// 跳过属性
323    fn skip_attributes(&self, reader: &mut BinaryReader<impl Read + Seek, Fixed<BigEndian>>) -> Result<(), GaiaError> {
324        let attributes_count = reader.read_u16()?;
325        for _ in 0..attributes_count {
326            reader.read_u16()?; // attribute_name_index
327            let attribute_length = reader.read_u32()?;
328            let mut attribute_data = vec![0u8; attribute_length as usize];
329            reader.read_exact(&mut attribute_data)?;
330        }
331        Ok(())
332    }
333
334    fn read_view(&self) -> Result<ClassInfo, GaiaError> {
335        let mut reader = self.reader.borrow_mut();
336
337        // 重新定位到文件开头
338        reader.seek(std::io::SeekFrom::Start(0))?;
339
340        // 读取并验证 class 文件头
341        let (minor_version, major_version) = self.read_class_header(&mut reader)?;
342
343        // 读取常量池
344        let constant_pool_entries = self.read_constant_pool(&mut reader)?;
345
346        // 读取类基本信息
347        let (access_flags, this_class, super_class) = self.read_class_info(&mut reader)?;
348
349        // 解析类名和超类名
350        let class_name = self.resolve_class_name(&constant_pool_entries, this_class);
351        let super_class_name =
352            if super_class == 0 { None } else { Some(self.resolve_class_name(&constant_pool_entries, super_class)) };
353
354        Ok(ClassInfo {
355            magic: 0xCAFEBABE,
356            version: crate::program::JvmVersion { major: major_version, minor: minor_version },
357            access_flags: crate::program::JvmAccessFlags::from_flags(access_flags),
358            this_class: class_name,
359            super_class: super_class_name,
360        })
361    }
362}