javaclass/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::convert::From;
4use std::error::Error;
5use std::fmt::{Display, Formatter, Result as FmtResult};
6use std::io::Read;
7
8mod mutf8 {
9    pub enum MUtf8Error {
10        MissingByte,
11        UnknownByte,
12        InvalidChar,
13    }
14
15    pub fn to_string<T: IntoIterator<Item = u8>>(bytes: T) -> Result<String, MUtf8Error> {
16        let mut s = String::new();
17        let mut iterator = bytes.into_iter();
18        loop {
19            if let Some(b) = iterator.next() {
20                if b == 0b1110_1101 {
21                    let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
22                    if !b2 & 0b1111_0000 == 0b1010_0000 {
23                        return Err(MUtf8Error::UnknownByte);
24                    }
25                    let b3 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
26                    if !b3 & 0b1100_0000 == 0b1000_0000 {
27                        return Err(MUtf8Error::UnknownByte);
28                    }
29                    if !iterator.next().ok_or(MUtf8Error::MissingByte)? & 0xFF == 0b1110_1101 {
30                        return Err(MUtf8Error::UnknownByte);
31                    }
32                    let b4 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
33                    if !b4 & 0b1111_0000 == 0b1011_0000 {
34                        return Err(MUtf8Error::UnknownByte);
35                    }
36                    let b5 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
37                    if !b5 & 0b1100_0000 == 0b1000_0000 {
38                        return Err(MUtf8Error::UnknownByte);
39                    }
40                    let codepoint: u32 = 0x10000
41                        + ((b2 as u32 & 0x0f) << 16)
42                        + ((b3 as u32 & 0x3f) << 10)
43                        + ((b4 as u32 & 0x0f) << 6)
44                        + (b5 as u32 & 0x3f);
45                    s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
46                } else if b & 0b1111_0000 == 0b1110_0000 {
47                    let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
48                    if !b2 & 0b1100_0000 == 0b1000_0000 {
49                        return Err(MUtf8Error::UnknownByte);
50                    }
51                    let b3 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
52                    if !b3 & 0b1100_0000 == 0b1000_0000 {
53                        return Err(MUtf8Error::UnknownByte);
54                    }
55                    let codepoint: u32 = b3 as u32 & 0b11_1111
56                        | ((b2 as u32 & 0b11_1111) << 6)
57                        | ((b as u32 & 0b1_1111) << 12);
58                    s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
59                } else if b & 0b1110_0000 == 0b1100_0000 {
60                    let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
61                    if !b2 & 0b1100_0000 == 0b1000_0000 {
62                        return Err(MUtf8Error::UnknownByte);
63                    }
64                    let codepoint: u32 = b2 as u32 & 0b11_1111 | ((b as u32 & 0b1_1111) << 6);
65                    s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
66                } else if b & 0b1000_0000 == 0 {
67                    s.push(b as char);
68                } else {
69                    return Err(MUtf8Error::UnknownByte);
70                }
71            } else {
72                break;
73            }
74        }
75        Ok(s)
76    }
77}
78
79#[derive(Debug)]
80pub enum ClassFileError {
81    InvalidMagic,
82    Read,
83    InvalidCPType,
84    InvalidCPEntry,
85    MUtf8Format,
86    EndOfFile,
87    MoreData,
88}
89
90impl From<std::io::Error> for ClassFileError {
91    fn from(_: std::io::Error) -> Self {
92        ClassFileError::Read
93    }
94}
95
96impl From<mutf8::MUtf8Error> for ClassFileError {
97    fn from(_: mutf8::MUtf8Error) -> Self {
98        ClassFileError::MUtf8Format
99    }
100}
101
102impl Error for ClassFileError {}
103
104impl Display for ClassFileError {
105    fn fmt(&self, f: &mut Formatter) -> FmtResult {
106        write!(
107            f,
108            "{}",
109            match self {
110                ClassFileError::InvalidMagic => "invalid magic value",
111                ClassFileError::Read => "error reading input",
112                ClassFileError::InvalidCPType => "invalid constant pool type",
113                ClassFileError::InvalidCPEntry => "invalid index into constant pool",
114                ClassFileError::MUtf8Format => "error in mutf8 format",
115                ClassFileError::EndOfFile => "end of file",
116                ClassFileError::MoreData => "more data after expected end of file",
117            }
118        )
119    }
120}
121
122fn read_u8<T: Read>(data: &mut T) -> Result<u8, ClassFileError> {
123    let mut buf = [0_u8; 1];
124    let amt = data.read(&mut buf)?;
125    if amt < 1 {
126        return Err(ClassFileError::EndOfFile);
127    }
128    Ok(buf[0])
129}
130
131fn read_u16<T: Read>(data: &mut T) -> Result<u16, ClassFileError> {
132    let mut buf = [0_u8; 2];
133    let amt = data.read(&mut buf)?;
134    if amt < 2 {
135        return Err(ClassFileError::EndOfFile);
136    }
137    let r: u16 = unsafe { std::mem::transmute(buf) };
138    Ok(r.to_be())
139}
140
141fn read_u32<T: Read>(data: &mut T) -> Result<u32, ClassFileError> {
142    let mut buf = [0_u8; 4];
143    let amt = data.read(&mut buf)?;
144    if amt < 4 {
145        return Err(ClassFileError::EndOfFile);
146    }
147    let r: u32 = unsafe { std::mem::transmute(buf) };
148    Ok(r.to_be())
149}
150
151#[derive(Debug, Serialize, Deserialize, Clone)]
152pub enum ConstantPoolInfo {
153    Class {
154        name_index: u16,
155    },
156    Fieldref {
157        class_index: u16,
158        name_and_type_index: u16,
159    },
160    Methodref {
161        class_index: u16,
162        name_and_type_index: u16,
163    },
164    InterfaceMethodref {
165        class_index: u16,
166        name_and_type_index: u16,
167    },
168    String {
169        string_index: u16,
170    },
171    Integer {
172        data: i32,
173    },
174    Float {
175        data: f32,
176    },
177    Long {
178        data: i64,
179    },
180    Double {
181        data: f64,
182    },
183    NameAndType {
184        name_index: u16,
185        descriptor_index: u16,
186    },
187    Utf8 {
188        length: u16,
189        string: String,
190    },
191    MethodHandle {
192        reference_kind: u8,
193        reference_index: u16,
194    },
195    MethodType {
196        descriptor_index: u16,
197    },
198    InvokeDynamic {
199        bootstrap_method_attr_index: u16,
200        name_and_type_index: u16,
201    },
202}
203
204#[derive(Debug, Serialize, Deserialize)]
205pub struct ConstantPool {
206    data: HashMap<u16, ConstantPoolInfo>,
207}
208
209impl ConstantPool {
210    pub fn get_entry(&self, index: u16) -> Result<ConstantPoolInfo, ClassFileError> {
211        Ok(self
212            .data
213            .get(&index)
214            .ok_or(ClassFileError::InvalidCPEntry)?
215            .clone())
216    }
217
218    pub fn get_utf8_entry(&self, index: u16) -> Result<String, ClassFileError> {
219        if let ConstantPoolInfo::Utf8 { length: _, string } = self.get_entry(index)? {
220            Ok(string)
221        } else {
222            Err(ClassFileError::InvalidCPEntry)
223        }
224    }
225}
226
227fn read_constant_pool<T: Read>(data: &mut T) -> Result<ConstantPool, ClassFileError> {
228    let constant_pool_count = read_u16(data)?;
229    let mut constant_pool = HashMap::new();
230    let mut i = 1;
231    while i < constant_pool_count {
232        let cp_type = read_u8(data)?;
233        let entry = match cp_type {
234            7 => ConstantPoolInfo::Class {
235                name_index: read_u16(data)?,
236            },
237            9 => ConstantPoolInfo::Fieldref {
238                class_index: read_u16(data)?,
239                name_and_type_index: read_u16(data)?,
240            },
241            10 => ConstantPoolInfo::Methodref {
242                class_index: read_u16(data)?,
243                name_and_type_index: read_u16(data)?,
244            },
245            11 => ConstantPoolInfo::InterfaceMethodref {
246                class_index: read_u16(data)?,
247                name_and_type_index: read_u16(data)?,
248            },
249            8 => ConstantPoolInfo::String {
250                string_index: read_u16(data)?,
251            },
252            3 => ConstantPoolInfo::Integer {
253                data: unsafe { std::mem::transmute(read_u32(data)?) },
254            },
255            4 => ConstantPoolInfo::Float {
256                data: unsafe { std::mem::transmute(read_u32(data)?) },
257            },
258            5 => {
259                let high = read_u32(data)?;
260                let low = read_u32(data)?;
261                ConstantPoolInfo::Long {
262                    data: unsafe { std::mem::transmute([low, high]) },
263                }
264            }
265            6 => {
266                let high = read_u32(data)?;
267                let low = read_u32(data)?;
268                ConstantPoolInfo::Double {
269                    data: unsafe { std::mem::transmute([low, high]) },
270                }
271            }
272            12 => ConstantPoolInfo::NameAndType {
273                name_index: read_u16(data)?,
274                descriptor_index: read_u16(data)?,
275            },
276            1 => {
277                let length = read_u16(data)?;
278                let bytes_result: Result<Vec<_>, _> =
279                    (0..length).into_iter().map(|_| read_u8(data)).collect();
280                ConstantPoolInfo::Utf8 {
281                    length,
282                    string: mutf8::to_string(bytes_result?)?,
283                }
284            }
285            15 => ConstantPoolInfo::MethodHandle {
286                reference_kind: read_u8(data)?,
287                reference_index: read_u16(data)?,
288            },
289            16 => ConstantPoolInfo::MethodType {
290                descriptor_index: read_u16(data)?,
291            },
292            18 => ConstantPoolInfo::InvokeDynamic {
293                bootstrap_method_attr_index: read_u16(data)?,
294                name_and_type_index: read_u16(data)?,
295            },
296            _ => return Err(ClassFileError::InvalidCPType),
297        };
298        constant_pool.insert(i, entry);
299        i += 1;
300        if cp_type == 5 || cp_type == 6 {
301            i += 1;
302        }
303    }
304    Ok(ConstantPool {
305        data: constant_pool,
306    })
307}
308
309#[derive(Debug, Serialize, Deserialize)]
310pub struct ClassAccessFlags {
311    pub acc_public: bool,
312    pub acc_final: bool,
313    pub acc_super: bool,
314    pub acc_interface: bool,
315    pub acc_abstract: bool,
316    pub acc_synthetic: bool,
317    pub acc_annotation: bool,
318    pub acc_enum: bool,
319}
320
321fn read_class_access_flags<T: Read>(data: &mut T) -> Result<ClassAccessFlags, ClassFileError> {
322    let flags = read_u16(data)?;
323    Ok(ClassAccessFlags {
324        acc_public: flags & 0x0001 > 0,
325        acc_final: flags & 0x0010 > 0,
326        acc_super: flags & 0x0020 > 0,
327        acc_interface: flags & 0x0200 > 0,
328        acc_abstract: flags & 0x0400 > 0,
329        acc_synthetic: flags & 0x1000 > 0,
330        acc_annotation: flags & 0x2000 > 0,
331        acc_enum: flags & 0x4000 > 0,
332    })
333}
334
335fn read_interfaces<T: Read>(data: &mut T) -> Result<Vec<u16>, ClassFileError> {
336    let interaces_count = read_u16(data)?;
337    let interaces_result: Result<Vec<_>, _> = (0..interaces_count)
338        .into_iter()
339        .map(|_| read_u16(data))
340        .collect();
341    Ok(interaces_result?)
342}
343
344#[derive(Debug, Serialize, Deserialize)]
345pub struct ExceptionTableInfo {
346    start_pc: u16,
347    end_pc: u16,
348    handler_pc: u16,
349    catch_type: u16,
350}
351
352#[derive(Debug, Serialize, Deserialize)]
353pub enum AttributeInfo {
354    Raw {
355        attribute_name: String,
356        info: Vec<u8>,
357    },
358    ConstantValue {
359        constant_value_index: u16,
360    },
361    Code {
362        max_stack: u16,
363        max_locals: u16,
364        code: Vec<u8>,
365        exception_table: Vec<ExceptionTableInfo>,
366        attributes: Vec<AttributeInfo>,
367    },
368    SourceFile {
369        sourcefile_index: u16,
370    },
371}
372
373fn read_attributes<T: Read>(
374    data: &mut T,
375    constant_pool: &ConstantPool,
376) -> Result<Vec<AttributeInfo>, ClassFileError> {
377    let attributes_count = read_u16(data)?;
378    let mut attributes = Vec::new();
379
380    for _ in 0..attributes_count {
381        let attribute_name_index = read_u16(data)?;
382        let attribute_length = read_u32(data)?;
383        let attribute_name = constant_pool.get_utf8_entry(attribute_name_index)?;
384
385        let attribute = match attribute_name.as_str() {
386            "ConstantValue" => AttributeInfo::ConstantValue {
387                constant_value_index: read_u16(data)?,
388            },
389            "SourceFile" => AttributeInfo::SourceFile {
390                sourcefile_index: read_u16(data)?,
391            },
392            "Code" => {
393                let max_stack = read_u16(data)?;
394                let max_locals = read_u16(data)?;
395                let code_length = read_u32(data)?;
396                let code_result: Result<Vec<_>, _> = (0..code_length)
397                    .into_iter()
398                    .map(|_| read_u8(data))
399                    .collect();
400                let exception_table_length = read_u16(data)?;
401                let mut exception_table = Vec::with_capacity(exception_table_length as usize);
402                for _ in 0..exception_table_length {
403                    exception_table.push(ExceptionTableInfo {
404                        start_pc: read_u16(data)?,
405                        end_pc: read_u16(data)?,
406                        handler_pc: read_u16(data)?,
407                        catch_type: read_u16(data)?,
408                    });
409                }
410                let inner_attributes = read_attributes(data, constant_pool)?;
411                AttributeInfo::Code {
412                    max_stack,
413                    max_locals,
414                    code: code_result?,
415                    exception_table,
416                    attributes: inner_attributes,
417                }
418            }
419            _ => {
420                let bytes_result: Result<Vec<_>, _> = (0..attribute_length)
421                    .into_iter()
422                    .map(|_| read_u8(data))
423                    .collect();
424                AttributeInfo::Raw {
425                    attribute_name,
426                    info: bytes_result?,
427                }
428            }
429        };
430        attributes.push(attribute);
431    }
432    Ok(attributes)
433}
434
435#[derive(Debug, Serialize, Deserialize)]
436pub struct FieldAccessFlags {
437    pub acc_public: bool,
438    pub acc_private: bool,
439    pub acc_protected: bool,
440    pub acc_static: bool,
441    pub acc_final: bool,
442    pub acc_volatile: bool,
443    pub acc_transient: bool,
444    pub acc_synthetic: bool,
445    pub acc_enum: bool,
446}
447
448fn read_field_access_flags<T: Read>(data: &mut T) -> Result<FieldAccessFlags, ClassFileError> {
449    let flags = read_u16(data)?;
450    Ok(FieldAccessFlags {
451        acc_public: flags & 0x0001 > 0,
452        acc_private: flags & 0x0002 > 0,
453        acc_protected: flags & 0x0004 > 0,
454        acc_static: flags & 0x0008 > 0,
455        acc_final: flags & 0x0010 > 0,
456        acc_volatile: flags & 0x0040 > 0,
457        acc_transient: flags & 0x0080 > 0,
458        acc_synthetic: flags & 0x1000 > 0,
459        acc_enum: flags & 0x4000 > 0,
460    })
461}
462
463#[derive(Debug, Serialize, Deserialize)]
464pub struct FieldInfo {
465    pub access_flags: FieldAccessFlags,
466    pub name_index: u16,
467    pub descriptor_index: u16,
468    pub attributes: Vec<AttributeInfo>,
469}
470
471fn read_fields<T: Read>(
472    data: &mut T,
473    constant_pool: &ConstantPool,
474) -> Result<Vec<FieldInfo>, ClassFileError> {
475    let fields_count = read_u16(data)?;
476    let mut fields = Vec::new();
477    for _ in 0..fields_count {
478        let access_flags = read_field_access_flags(data)?;
479        let name_index = read_u16(data)?;
480        let descriptor_index = read_u16(data)?;
481        let attributes = read_attributes(data, constant_pool)?;
482        let field = FieldInfo {
483            access_flags,
484            name_index,
485            descriptor_index,
486            attributes,
487        };
488        fields.push(field);
489    }
490    Ok(fields)
491}
492
493#[derive(Debug, Serialize, Deserialize)]
494pub struct MethodAccessFlags {
495    pub acc_public: bool,
496    pub acc_private: bool,
497    pub acc_protected: bool,
498    pub acc_static: bool,
499    pub acc_final: bool,
500    pub acc_synchronized: bool,
501    pub acc_bridge: bool,
502    pub acc_varargs: bool,
503    pub acc_native: bool,
504    pub acc_abstract: bool,
505    pub acc_strict: bool,
506    pub acc_synthetic: bool,
507}
508
509fn read_method_access_flags<T: Read>(data: &mut T) -> Result<MethodAccessFlags, ClassFileError> {
510    let flags = read_u16(data)?;
511    Ok(MethodAccessFlags {
512        acc_public: flags & 0x0001 > 0,
513        acc_private: flags & 0x0002 > 0,
514        acc_protected: flags & 0x0004 > 0,
515        acc_static: flags & 0x0008 > 0,
516        acc_final: flags & 0x0010 > 0,
517        acc_synchronized: flags & 0x0020 > 0,
518        acc_bridge: flags & 0x0040 > 0,
519        acc_varargs: flags & 0x0080 > 0,
520        acc_native: flags & 0x0100 > 0,
521        acc_abstract: flags & 0x0400 > 0,
522        acc_strict: flags & 0x0800 > 0,
523        acc_synthetic: flags & 0x1000 > 0,
524    })
525}
526
527#[derive(Debug, Serialize, Deserialize)]
528pub struct MethodInfo {
529    pub access_flags: MethodAccessFlags,
530    pub name_index: u16,
531    pub descriptor_index: u16,
532    pub attributes: Vec<AttributeInfo>,
533}
534
535fn read_methods<T: Read>(
536    data: &mut T,
537    constant_pool: &ConstantPool,
538) -> Result<Vec<MethodInfo>, ClassFileError> {
539    let methods_count = read_u16(data)?;
540    let mut methods = Vec::new();
541    for _ in 0..methods_count {
542        let access_flags = read_method_access_flags(data)?;
543        let name_index = read_u16(data)?;
544        let descriptor_index = read_u16(data)?;
545        let attributes = read_attributes(data, constant_pool)?;
546        let field = MethodInfo {
547            access_flags,
548            name_index,
549            descriptor_index,
550            attributes,
551        };
552        methods.push(field);
553    }
554    Ok(methods)
555}
556
557#[derive(Debug, Serialize, Deserialize)]
558pub struct ClassFile {
559    pub major_version: u16,
560    pub minor_version: u16,
561    pub constant_pool: ConstantPool,
562    pub access_flags: ClassAccessFlags,
563    pub this_class: u16,
564    pub super_class: u16,
565    pub interfaces: Vec<u16>,
566    pub fields: Vec<FieldInfo>,
567    pub methods: Vec<MethodInfo>,
568    pub attributes: Vec<AttributeInfo>,
569}
570
571pub fn read_classfile<T: Read>(data: &mut T) -> Result<ClassFile, ClassFileError> {
572    if read_u32(data)? != 0xcafebabe {
573        return Err(ClassFileError::InvalidMagic);
574    }
575    let minor_version = read_u16(data)?;
576    let major_version = read_u16(data)?;
577
578    let constant_pool = read_constant_pool(data)?;
579
580    let access_flags = read_class_access_flags(data)?;
581
582    let this_class = read_u16(data)?;
583    let super_class = read_u16(data)?;
584
585    let interfaces = read_interfaces(data)?;
586    let fields = read_fields(data, &constant_pool)?;
587    let methods = read_methods(data, &constant_pool)?;
588    let attributes = read_attributes(data, &constant_pool)?;
589
590    if let Ok(_) = read_u8(data) {
591        return Err(ClassFileError::MoreData);
592    }
593
594    Ok(ClassFile {
595        major_version,
596        minor_version,
597        constant_pool,
598        access_flags,
599        this_class,
600        super_class,
601        interfaces,
602        fields,
603        methods,
604        attributes,
605    })
606}