jvm_assembler/formats/class/reader/
mod.rs

1use crate::{
2    formats::class::{view::ClassInfo, ClassReadConfig},
3    program::JvmProgram,
4};
5use byteorder::BigEndian;
6use gaia_types::{BinaryReader, GaiaDiagnostics, GaiaError};
7use std::{
8    cell::{OnceCell, RefCell},
9    io::{Read, Seek},
10};
11
12/// jvm class lazy reader
13///
14/// 可以只读取必要的部分
15pub struct ClassReader<'config, R: Read + Seek> {
16    _config: &'config ClassReadConfig,
17    reader: RefCell<BinaryReader<R, BigEndian>>,
18    /// 缓存的程序
19    program: OnceCell<JvmProgram>,
20    /// 缓存的类信息
21    info: OnceCell<ClassInfo>,
22}
23
24impl ClassReadConfig {
25    pub fn as_reader<R: Read + Seek>(&self, reader: R) -> ClassReader<'_, R> {
26        ClassReader::new(reader, self)
27    }
28}
29
30impl<'config, R: Read + Seek> ClassReader<'config, R> {
31    pub fn new(reader: R, config: &'config ClassReadConfig) -> Self {
32        Self { _config: config, reader: RefCell::new(BinaryReader::new(reader)), program: Default::default(), info: Default::default() }
33    }
34
35    pub fn get_program(&self) -> Result<&JvmProgram, GaiaError> {
36        self.program.get_or_try_init(|| self.read_program())
37    }
38    pub fn get_info(&self) -> Result<&ClassInfo, GaiaError> {
39        self.info.get_or_try_init(|| self.read_view())
40    }
41}
42
43impl<'config, R: Read + Seek> ClassReader<'config, R> {
44    pub fn read(mut self) -> GaiaDiagnostics<JvmProgram> {
45        match self.get_program() {
46            Ok(_) => {
47                let errors = self.reader.borrow_mut().take_errors();
48                GaiaDiagnostics { result: self.program.take().ok_or(GaiaError::unreachable()), diagnostics: errors }
49            }
50            Err(e) => {
51                let errors = self.reader.borrow_mut().take_errors();
52                GaiaDiagnostics { result: Err(e), diagnostics: errors }
53            }
54        }
55    }
56    fn read_program(&self) -> Result<JvmProgram, GaiaError> {
57        let mut reader = self.reader.borrow_mut();
58
59        // 从文件开头开始读取
60        reader.seek(std::io::SeekFrom::Start(0))?;
61
62        // 读取类文件头
63        let (minor_version, major_version) = self.read_class_header(&mut reader)?;
64        
65        // 读取常量池
66        let constant_pool_entries = self.read_constant_pool(&mut reader)?;
67
68        // 读取类信息
69        let (access_flags, this_class, super_class) = self.read_class_info(&mut reader)?;
70        
71        // 解析类名和超类名
72        let class_name = self.resolve_class_name(&constant_pool_entries, this_class);
73        let super_class_name = if super_class == 0 {
74            None
75        } else {
76            Some(self.resolve_class_name(&constant_pool_entries, super_class))
77        };
78
79        // 跳过接口
80        self.skip_interfaces(&mut reader)?;
81
82        // 读取字段
83        let fields = self.read_fields(&mut reader, &constant_pool_entries)?;
84
85        // 读取方法
86        let methods = self.read_methods(&mut reader, &constant_pool_entries)?;
87
88        // 创建常量池
89        let mut constant_pool = crate::program::JvmConstantPool::new();
90        for entry in constant_pool_entries.iter() {
91            if let Some(value) = entry {
92                let cp_entry = crate::program::JvmConstantPoolEntry::Utf8 {
93                    value: value.clone(),
94                };
95                constant_pool.entries.push(cp_entry);
96            } else {
97                constant_pool.entries.push(crate::program::JvmConstantPoolEntry::Nop);
98            }
99        }
100
101        // 创建 JvmProgram
102        let mut program = crate::program::JvmProgram::new(class_name);
103        program.version = crate::program::JvmVersion { major: major_version, minor: minor_version };
104        program.access_flags = crate::program::JvmAccessFlags::from_flags(access_flags);
105        program.super_class = super_class_name;
106        program.fields = fields;
107        program.methods = methods;
108        program.constant_pool = constant_pool;
109
110        Ok(program)
111    }
112
113    /// 读取并验证 class 文件头
114    fn read_class_header(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>) -> Result<(u16, u16), GaiaError> {
115        let magic = reader.read_u32()?;
116        if magic != 0xCAFEBABE {
117            return Err(GaiaError::invalid_data("Invalid class file magic number"));
118        }
119
120        let minor_version = reader.read_u16()?;
121        let major_version = reader.read_u16()?;
122        
123        Ok((minor_version, major_version))
124    }
125
126    /// 读取常量池
127    fn read_constant_pool(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>) -> Result<Vec<Option<String>>, GaiaError> {
128        let constant_pool_count = reader.read_u16()?;
129        let mut constant_pool_entries: Vec<Option<String>> = vec![None; constant_pool_count as usize];
130
131        // 解析常量池,特别关注 UTF8 和 Class 条目
132        let mut i = 1;
133        while i < constant_pool_count {
134            let tag = reader.read_u8()?;
135            match tag {
136                0 => {
137                    // 占位符条目(通常在 Long/Double 之后)
138                    // 不需要读取任何数据,只是跳过
139                }
140                1 => {
141                    // UTF8
142                    let length = reader.read_u16()?;
143                    let mut bytes = vec![0u8; length as usize];
144                    reader.read_exact(&mut bytes)?;
145                    let utf8_string = String::from_utf8_lossy(&bytes).to_string();
146                    constant_pool_entries[i as usize] = Some(utf8_string);
147                }
148                3 => { reader.read_u32()?; }, // Integer
149                4 => { reader.read_u32()?; }, // Float
150                5 => { 
151                    // Long - 占用两个常量池位置
152                    reader.read_u64()?; 
153                    i += 1; // 跳过下一个位置
154                }, 
155                6 => { 
156                    // Double - 占用两个常量池位置
157                    reader.read_u64()?; 
158                    i += 1; // 跳过下一个位置
159                }, 
160                7 => {
161                    // Class - 存储对 UTF8 条目的引用
162                    let name_index = reader.read_u16()?;
163                    constant_pool_entries[i as usize] = Some(format!("CLASS_REF:{}", name_index));
164                }
165                8 => { reader.read_u16()?; }, // String
166                9 | 10 | 11 => {
167                    // Fieldref, Methodref, InterfaceMethodref
168                    reader.read_u16()?;
169                    reader.read_u16()?;
170                }
171                12 => {
172                    // NameAndType
173                    reader.read_u16()?;
174                    reader.read_u16()?;
175                }
176                15 => {
177                    // MethodHandle
178                    reader.read_u8()?;
179                    reader.read_u16()?;
180                }
181                16 => { reader.read_u16()?; }, // MethodType
182                17 | 18 => {
183                    // Dynamic, InvokeDynamic
184                    reader.read_u16()?;
185                    reader.read_u16()?;
186                }
187                19 | 20 => { reader.read_u16()?; }, // Module, Package
188                _ => {
189                    return Err(GaiaError::invalid_data(&format!("Unknown constant pool tag: {}", tag)));
190                }
191            };
192            i += 1;
193        }
194
195        Ok(constant_pool_entries)
196    }
197
198    /// 读取类基本信息
199    fn read_class_info(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>) -> Result<(u16, u16, u16), GaiaError> {
200        let access_flags = reader.read_u16()?;
201        let this_class = reader.read_u16()?;
202        let super_class = reader.read_u16()?;
203        
204        Ok((access_flags, this_class, super_class))
205    }
206
207    /// 解析类名
208    fn resolve_class_name(&self, constant_pool_entries: &[Option<String>], this_class: u16) -> String {
209        if let Some(Some(class_ref)) = constant_pool_entries.get(this_class as usize) {
210            if let Some(class_ref_str) = class_ref.strip_prefix("CLASS_REF:") {
211                if let Ok(name_index) = class_ref_str.parse::<u16>() {
212                    if let Some(Some(name)) = constant_pool_entries.get(name_index as usize) {
213                        return name.clone();
214                    }
215                }
216            }
217        }
218        "UnknownClass".to_string()
219    }
220
221    /// 跳过接口
222    fn skip_interfaces(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>) -> Result<(), GaiaError> {
223        let interfaces_count = reader.read_u16()?;
224        for _ in 0..interfaces_count {
225            reader.read_u16()?;
226        }
227        Ok(())
228    }
229
230    /// 读取字段
231    fn read_fields(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>, constant_pool_entries: &[Option<String>]) -> Result<Vec<crate::program::JvmField>, GaiaError> {
232        let fields_count = reader.read_u16()?;
233        let mut fields = Vec::new();
234        
235        for _ in 0..fields_count {
236            let field_access_flags = reader.read_u16()?;
237            let name_index = reader.read_u16()?;
238            let descriptor_index = reader.read_u16()?;
239            
240            // 获取字段名和描述符
241            let field_name = self.get_string_from_pool(constant_pool_entries, name_index, "UnknownField");
242            let field_descriptor = self.get_string_from_pool(constant_pool_entries, descriptor_index, "UnknownDescriptor");
243            
244            let mut field = crate::program::JvmField::new(field_name, field_descriptor);
245            field.access_flags = crate::program::JvmAccessFlags::from_flags(field_access_flags);
246            
247            // 跳过字段属性
248            self.skip_attributes(reader)?;
249            
250            fields.push(field);
251        }
252        
253        Ok(fields)
254    }
255
256    /// 读取方法
257    fn read_methods(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>, constant_pool_entries: &[Option<String>]) -> Result<Vec<crate::program::JvmMethod>, GaiaError> {
258        let methods_count = reader.read_u16()?;
259        let mut methods = Vec::new();
260        
261        for _ in 0..methods_count {
262            let method_access_flags = reader.read_u16()?;
263            let name_index = reader.read_u16()?;
264            let descriptor_index = reader.read_u16()?;
265            
266            // 获取方法名和描述符
267            let method_name = self.get_string_from_pool(constant_pool_entries, name_index, "UnknownMethod");
268            let method_descriptor = self.get_string_from_pool(constant_pool_entries, descriptor_index, "UnknownDescriptor");
269            
270            let mut method = crate::program::JvmMethod::new(method_name, method_descriptor);
271            method.access_flags = crate::program::JvmAccessFlags::from_flags(method_access_flags);
272            
273            // 跳过方法属性
274            self.skip_attributes(reader)?;
275            
276            methods.push(method);
277        }
278        
279        Ok(methods)
280    }
281
282    /// 从常量池获取字符串
283    fn get_string_from_pool(&self, constant_pool_entries: &[Option<String>], index: u16, default: &str) -> String {
284        constant_pool_entries.get(index as usize)
285            .and_then(|opt| opt.as_ref())
286            .unwrap_or(&default.to_string())
287            .clone()
288    }
289
290    /// 跳过属性
291    fn skip_attributes(&self, reader: &mut gaia_types::BinaryReader<impl Read + Seek, BigEndian>) -> Result<(), GaiaError> {
292        let attributes_count = reader.read_u16()?;
293        for _ in 0..attributes_count {
294            reader.read_u16()?; // attribute_name_index
295            let attribute_length = reader.read_u32()?;
296            let mut attribute_data = vec![0u8; attribute_length as usize];
297            reader.read_exact(&mut attribute_data)?;
298        }
299        Ok(())
300    }
301
302    fn read_view(&self) -> Result<ClassInfo, GaiaError> {
303        let mut reader = self.reader.borrow_mut();
304
305        // 重新定位到文件开头
306        reader.seek(std::io::SeekFrom::Start(0))?;
307
308        // 读取并验证 class 文件头
309        let (minor_version, major_version) = self.read_class_header(&mut reader)?;
310
311        // 读取常量池
312        let constant_pool_entries = self.read_constant_pool(&mut reader)?;
313
314        // 读取类基本信息
315        let (access_flags, this_class, super_class) = self.read_class_info(&mut reader)?;
316
317        // 解析类名和超类名
318        let class_name = self.resolve_class_name(&constant_pool_entries, this_class);
319        let super_class_name = if super_class == 0 {
320            None
321        } else {
322            Some(self.resolve_class_name(&constant_pool_entries, super_class))
323        };
324
325        Ok(ClassInfo {
326            magic: 0xCAFEBABE,
327            version: crate::program::JvmVersion { major: major_version, minor: minor_version },
328            access_flags: crate::program::JvmAccessFlags::from_flags(access_flags),
329            this_class: class_name,
330            super_class: super_class_name,
331        })
332    }
333}