classfmt/
parser.rs

1use std::convert::TryFrom;
2use std::str;
3
4use crate::attribute::{
5    BootstrapMethodAttribute, ExceptionTableEntry, InnerClassAttribute, LineNumberTableEntry,
6    ParameterAttribute
7};
8use crate::error::ErrorType;
9use crate::{Attribute, Constant, ConstantTag, Field, Method, Opcode, RawClass, ReferenceKind};
10
11use crate::access_flags::{
12    ClassAccessFlag, FieldAccessFlag, InnerClassAccessFlag, MethodAccessFlag, ParameterAccessFlag
13};
14use crate::opcode::Instruction;
15
16/// The class parser. Used to construct instances of [`RawClass`]
17pub struct ClassParser<'c> {
18    bytes: &'c [u8],
19    offset: usize
20}
21
22impl<'c> ClassParser<'c> {
23    /// Creates a new parser from given bytes
24    pub fn from_bytes(bytes: &'c [u8]) -> ClassParser<'c> {
25        ClassParser { bytes, offset: 0 }
26    }
27
28    /// Parses the provided bytes and tries to construct a new instance of [`RawClass`]
29    pub fn parse(&mut self) -> Result<RawClass<'c>, ErrorType> {
30        let magic = self.read_u32_be();
31        let minor_version = self.read_u16_be();
32        let major_version = self.read_u16_be();
33        let constant_pool_count = self.read_u16_be();
34        let constant_pool = self.read_constant_pool(constant_pool_count)?;
35        let access_flags = ClassAccessFlag::from_bits(self.read_u16_be()).unwrap();
36        let this_class = self.read_u16_be();
37        let super_class = self.read_u16_be();
38        let interface_count = self.read_u16_be();
39        let field_count = self.read_u16_be();
40        let fields = self.read_fields(field_count, &constant_pool)?;
41        let methods_count = self.read_u16_be();
42        let methods = self.read_methods(methods_count, &constant_pool)?;
43        let attributes_count = self.read_u16_be();
44        let attributes = self.read_attributes(attributes_count, &constant_pool)?;
45
46        Ok(RawClass {
47            magic,
48            minor_version,
49            major_version,
50            constant_pool_count,
51            constant_pool,
52            access_flags,
53            this_class,
54            super_class,
55            interface_count,
56            field_count,
57            fields,
58            methods_count,
59            methods,
60            attributes_count,
61            attributes
62        })
63    }
64
65    fn read_u32_be(&mut self) -> u32 {
66        let off = self.offset;
67        self.offset += 4;
68        u32::from_be_bytes([
69            self.bytes[off],
70            self.bytes[off + 1],
71            self.bytes[off + 2],
72            self.bytes[off + 3]
73        ])
74    }
75
76    fn read_i32_be(&mut self) -> i32 {
77        let off = self.offset;
78        self.offset += 4;
79        i32::from_be_bytes([
80            self.bytes[off],
81            self.bytes[off + 1],
82            self.bytes[off + 2],
83            self.bytes[off + 3]
84        ])
85    }
86
87    fn read_u16_be(&mut self) -> u16 {
88        let off = self.offset;
89        self.offset += 2;
90        u16::from_be_bytes([self.bytes[off], self.bytes[off + 1]])
91    }
92
93    fn read_constant_pool(
94        &mut self,
95        constant_pool_count: u16
96    ) -> Result<Vec<Constant<'c>>, ErrorType> {
97        let mut i = 1;
98        let mut constant_pool = Vec::with_capacity(constant_pool_count as usize);
99
100        while i < constant_pool_count {
101            let tag = ConstantTag::try_from(self.bytes[self.offset])?;
102            self.offset += 1;
103
104            let constant = match tag {
105                ConstantTag::Methodref => {
106                    let class_index = self.read_u16_be();
107                    let name_and_type_index = self.read_u16_be();
108
109                    Constant::Methodref {
110                        tag,
111                        class_index,
112                        name_and_type_index
113                    }
114                }
115                ConstantTag::Fieldref => {
116                    let class_index = self.read_u16_be();
117                    let name_and_type_index = self.read_u16_be();
118
119                    Constant::Fieldref {
120                        tag,
121                        class_index,
122                        name_and_type_index
123                    }
124                }
125                ConstantTag::String => {
126                    let string_index = self.read_u16_be();
127
128                    Constant::String { tag, string_index }
129                }
130                ConstantTag::Class => {
131                    let name_index = self.read_u16_be();
132
133                    Constant::Class { tag, name_index }
134                }
135                ConstantTag::Utf8 => {
136                    let length = self.read_u16_be();
137                    let end = self.offset + length as usize;
138                    let bytes = &self.bytes[self.offset..end];
139                    self.offset += length as usize;
140
141                    Constant::Utf8 { tag, length, bytes }
142                }
143                ConstantTag::NameAndType => {
144                    let name_index = self.read_u16_be();
145                    let descriptor_index = self.read_u16_be();
146
147                    Constant::NameAndType {
148                        tag,
149                        name_index,
150                        descriptor_index
151                    }
152                }
153                ConstantTag::Integer => {
154                    let value = self.read_i32_be();
155
156                    Constant::Integer { tag, value }
157                }
158                ConstantTag::MethodHandle => {
159                    let reference_kind = ReferenceKind::try_from(self.bytes[self.offset])?;
160                    self.offset += 1;
161                    let reference_index = self.read_u16_be();
162
163                    Constant::MethodHandle {
164                        tag,
165                        reference_kind,
166                        reference_index
167                    }
168                }
169                ConstantTag::InvokeDynamic => {
170                    let bootstrap_method_attr_index = self.read_u16_be();
171                    let name_index = self.read_u16_be();
172
173                    Constant::InvokeDynamic {
174                        tag,
175                        bootstrap_method_attr_index,
176                        name_index
177                    }
178                }
179                _ => unimplemented!("Unsupported constant tag {:?}", tag)
180            };
181
182            constant_pool.push(constant);
183            i += 1;
184        }
185
186        Ok(constant_pool)
187    }
188
189    fn read_fields(
190        &mut self,
191        field_count: u16,
192        constant_pool: &[Constant]
193    ) -> Result<Vec<Field>, ErrorType> {
194        let mut i = 0;
195        let mut fields = Vec::with_capacity(field_count as usize);
196
197        while i < field_count {
198            let access_flags = FieldAccessFlag::from_bits(self.read_u16_be()).unwrap();
199            let name_index = self.read_u16_be();
200            let descriptor_index = self.read_u16_be();
201            let attributes_count = self.read_u16_be();
202
203            let attributes = self.read_attributes(attributes_count, constant_pool)?;
204
205            let field = Field {
206                access_flags,
207                name_index,
208                descriptor_index,
209                attributes_count,
210                attributes
211            };
212
213            fields.push(field);
214
215            i += 1;
216        }
217
218        Ok(fields)
219    }
220
221    fn read_methods(
222        &mut self,
223        method_count: u16,
224        constant_pool: &[Constant]
225    ) -> Result<Vec<Method>, ErrorType> {
226        let mut i = 0;
227        let mut methods = Vec::with_capacity(method_count as usize);
228
229        while i < method_count {
230            let access_flags = MethodAccessFlag::from_bits(self.read_u16_be()).unwrap();
231            let name_index = self.read_u16_be();
232            let descriptor_index = self.read_u16_be();
233            let attributes_count = self.read_u16_be();
234
235            let attributes = self.read_attributes(attributes_count, constant_pool)?;
236
237            let method = Method {
238                access_flags,
239                name_index,
240                descriptor_index,
241                attributes_count,
242                attributes
243            };
244
245            methods.push(method);
246
247            i += 1;
248        }
249
250        Ok(methods)
251    }
252
253    fn read_attributes(
254        &mut self,
255        attribute_count: u16,
256        constant_pool: &[Constant]
257    ) -> Result<Vec<Attribute>, ErrorType> {
258        let mut i = 0;
259        let mut attributes = Vec::with_capacity(attribute_count as usize);
260
261        while i < attribute_count {
262            let attribute_name_index = self.read_u16_be();
263            let attribute_length = self.read_u32_be();
264
265            if let Constant::Utf8 {
266                tag: _,
267                length: _,
268                bytes
269            } = &constant_pool[(attribute_name_index - 1) as usize]
270            {
271                let s = str::from_utf8(bytes)?;
272
273                let attribute = match s {
274                    "ConstantValue" => {
275                        let constantvalue_index = self.read_u16_be();
276
277                        Attribute::ConstantValue {
278                            attribute_name_index,
279                            attribute_length,
280                            constantvalue_index
281                        }
282                    }
283                    "Code" => self.read_code_attribute(
284                        attribute_name_index,
285                        attribute_length,
286                        constant_pool
287                    )?,
288                    "InnerClasses" => {
289                        let number_of_classes = self.read_u16_be();
290                        let classes = self.read_inner_class_attributes(number_of_classes);
291
292                        Attribute::InnerClasses {
293                            attribute_name_index,
294                            attribute_length,
295                            number_of_classes,
296                            classes
297                        }
298                    }
299                    "LineNumberTable" => self
300                        .read_line_number_table_attribute(attribute_name_index, attribute_length)?,
301                    "SourceFile" => {
302                        let sourcefile_index = self.read_u16_be();
303
304                        Attribute::SourceFile {
305                            attribute_name_index,
306                            attribute_length,
307                            sourcefile_index
308                        }
309                    }
310                    "BootstrapMethods" => {
311                        let num_bootstrap_methods = self.read_u16_be();
312                        let bootstrap_methods =
313                            self.read_bootstrap_method_attributes(num_bootstrap_methods);
314
315                        Attribute::BootstrapMethods {
316                            attribute_name_index,
317                            attribute_length,
318                            num_bootstrap_methods,
319                            bootstrap_methods
320                        }
321                    }
322                    "MethodParameters" => {
323                        let parameters_count = self.bytes[self.offset];
324                        self.offset += 1;
325                        let parameters = self.read_method_parameter_attributes(parameters_count);
326
327                        Attribute::MethodParameters {
328                            attribute_name_index,
329                            attribute_length,
330                            parameters_count,
331                            parameters
332                        }
333                    }
334                    "NestMembers" => {
335                        let number_of_classes = self.read_u16_be();
336                        let mut classes = Vec::with_capacity(number_of_classes as usize);
337
338                        let mut i = 0;
339                        while i < number_of_classes {
340                            classes.push(self.read_u16_be());
341                            i += 1;
342                        }
343
344                        Attribute::NestMembers {
345                            attribute_name_index,
346                            attribute_length,
347                            number_of_classes,
348                            classes
349                        }
350                    }
351                    _ => panic!("unknown tag: `{}`", s)
352                };
353
354                attributes.push(attribute);
355            } else {
356                return Err(ErrorType::InvalidNameIndex);
357            }
358
359            i += 1;
360        }
361
362        Ok(attributes)
363    }
364
365    fn read_code_attribute(
366        &mut self,
367        attribute_name_index: u16,
368        attribute_length: u32,
369        constant_pool: &[Constant]
370    ) -> Result<Attribute, ErrorType> {
371        let max_stack = self.read_u16_be();
372        let max_locals = self.read_u16_be();
373        let code_length = self.read_u32_be();
374        let (offset, code) = self.read_instructions(code_length)?;
375        self.offset = offset;
376
377        let exception_table_length = self.read_u16_be();
378        let mut exception_table = Vec::with_capacity(exception_table_length as usize);
379        let mut i = 0;
380
381        while i < exception_table_length {
382            let start_pc = self.read_u16_be();
383            let end_pc = self.read_u16_be();
384            let handler_pc = self.read_u16_be();
385            let catch_type = self.read_u16_be();
386
387            let entry = ExceptionTableEntry {
388                start_pc,
389                end_pc,
390                handler_pc,
391                catch_type
392            };
393
394            exception_table.push(entry);
395            i += 1;
396        }
397
398        let attributes_count = self.read_u16_be();
399        let attributes = self.read_attributes(attributes_count, constant_pool)?;
400
401        Ok(Attribute::Code {
402            attribute_name_index,
403            attribute_length,
404            max_stack,
405            max_locals,
406            code_length,
407            code,
408            exception_table_length,
409            exception_table,
410            attributes_count,
411            attributes
412        })
413    }
414
415    fn read_inner_class_attributes(&mut self, number_of_classes: u16) -> Vec<InnerClassAttribute> {
416        let mut classes = Vec::with_capacity(number_of_classes as usize);
417        let mut i = 0;
418
419        while i < number_of_classes {
420            let inner_class_info_index = self.read_u16_be();
421            let outer_class_info_index = self.read_u16_be();
422            let inner_name_index = self.read_u16_be();
423            let inner_class_access_flags =
424                InnerClassAccessFlag::from_bits(self.read_u16_be()).unwrap();
425
426            classes.push(InnerClassAttribute {
427                inner_class_info_index,
428                outer_class_info_index,
429                inner_name_index,
430                inner_class_access_flags
431            });
432            i += 1;
433        }
434
435        classes
436    }
437
438    fn read_method_parameter_attributes(
439        &mut self,
440        parameters_count: u8
441    ) -> Vec<ParameterAttribute> {
442        let mut parameters = Vec::with_capacity(parameters_count as usize);
443        let mut i = 0;
444
445        while i < parameters_count {
446            let name_index = self.read_u16_be();
447            let access_flags = ParameterAccessFlag::from_bits(self.read_u16_be()).unwrap();
448
449            parameters.push(ParameterAttribute {
450                name_index,
451                access_flags
452            });
453            i += 1;
454        }
455
456        parameters
457    }
458
459    fn read_bootstrap_method_attributes(
460        &mut self,
461        num_bootstrap_methods: u16
462    ) -> Vec<BootstrapMethodAttribute> {
463        let mut methods = Vec::with_capacity(num_bootstrap_methods as usize);
464        let mut i = 0;
465
466        while i < num_bootstrap_methods {
467            let bootstrap_method_ref = self.read_u16_be();
468            let num_bootstrap_arguments = self.read_u16_be();
469            let mut bootstrap_arguments = Vec::with_capacity(num_bootstrap_arguments as usize);
470            let mut j = 0;
471
472            while j < num_bootstrap_arguments {
473                bootstrap_arguments.push(self.read_u16_be());
474                j += 1;
475            }
476
477            methods.push(BootstrapMethodAttribute {
478                bootstrap_method_ref,
479                num_bootstrap_arguments,
480                bootstrap_arguments
481            });
482
483            i += 1;
484        }
485
486        methods
487    }
488
489    fn read_line_number_table_attribute(
490        &mut self,
491        attribute_name_index: u16,
492        attribute_length: u32
493    ) -> Result<Attribute, ErrorType> {
494        let line_number_table_length = self.read_u16_be();
495        let mut i = 0;
496        let mut line_number_table = Vec::with_capacity(line_number_table_length as usize);
497
498        while i < line_number_table_length {
499            let start_pc = self.read_u16_be();
500            let line_number = self.read_u16_be();
501
502            let entry = LineNumberTableEntry {
503                start_pc,
504                line_number
505            };
506
507            line_number_table.push(entry);
508            i += 1;
509        }
510
511        Ok(Attribute::LineNumberTable {
512            attribute_name_index,
513            attribute_length,
514            line_number_table_length,
515            line_number_table
516        })
517    }
518
519    fn read_instructions(
520        &mut self,
521        code_length: u32
522    ) -> Result<(usize, Vec<Instruction>), ErrorType> {
523        let mut offset = self.offset;
524        let mut instructions = Vec::with_capacity(code_length as usize);
525
526        while offset < self.offset + code_length as usize {
527            let opcode = Opcode::try_from(self.bytes[offset])?;
528            offset += 1;
529
530            let ins = match opcode {
531                Opcode::aload_0 => Instruction::aload_0,
532                Opcode::aload_1 => Instruction::aload_1,
533                Opcode::aload_2 => Instruction::aload_2,
534                Opcode::aload_3 => Instruction::aload_3,
535                Opcode::astore => {
536                    let index = self.bytes[offset];
537                    offset += 1;
538
539                    Instruction::astore { index }
540                }
541                Opcode::astore_0 => Instruction::astore_0,
542                Opcode::astore_1 => Instruction::astore_1,
543                Opcode::astore_2 => Instruction::astore_2,
544                Opcode::astore_3 => Instruction::astore_3,
545                Opcode::dup => Instruction::dup,
546                Opcode::bipush => {
547                    let byte = self.bytes[offset];
548                    offset += 1;
549
550                    Instruction::bipush { byte }
551                }
552                Opcode::new => {
553                    let indexbyte1 = self.bytes[offset];
554                    let indexbyte2 = self.bytes[offset + 1];
555                    offset += 2;
556
557                    Instruction::new {
558                        indexbyte1,
559                        indexbyte2
560                    }
561                }
562                Opcode::r#eturn => Instruction::r#eturn,
563                Opcode::invokedynamic => {
564                    let indexbyte1 = self.bytes[offset];
565                    let indexbyte2 = self.bytes[offset + 1];
566                    let byte3 = self.bytes[offset + 2];
567                    let byte4 = self.bytes[offset + 3];
568                    offset += 4;
569
570                    if byte3 != 0 || byte4 != 0 {
571                        return Err(ErrorType::ParseError);
572                    }
573
574                    Instruction::invokedynamic {
575                        indexbyte1,
576                        indexbyte2,
577                        byte3,
578                        byte4
579                    }
580                }
581                Opcode::invokestatic => {
582                    let indexbyte1 = self.bytes[offset];
583                    let indexbyte2 = self.bytes[offset + 1];
584                    offset += 2;
585
586                    Instruction::invokestatic {
587                        indexbyte1,
588                        indexbyte2
589                    }
590                }
591                Opcode::invokespecial => {
592                    let indexbyte1 = self.bytes[offset];
593                    let indexbyte2 = self.bytes[offset + 1];
594                    offset += 2;
595
596                    Instruction::invokespecial {
597                        indexbyte1,
598                        indexbyte2
599                    }
600                }
601                Opcode::invokevirtual => {
602                    let indexbyte1 = self.bytes[offset];
603                    let indexbyte2 = self.bytes[offset + 1];
604                    offset += 2;
605
606                    Instruction::invokevirtual {
607                        indexbyte1,
608                        indexbyte2
609                    }
610                }
611                Opcode::getstatic => {
612                    let indexbyte1 = self.bytes[offset];
613                    let indexbyte2 = self.bytes[offset + 1];
614                    offset += 2;
615
616                    Instruction::getstatic {
617                        indexbyte1,
618                        indexbyte2
619                    }
620                }
621                Opcode::ldc => {
622                    let index = self.bytes[offset];
623                    offset += 1;
624
625                    Instruction::ldc { index }
626                }
627                _ => unimplemented!("Unsupported opcode {:?}", opcode)
628            };
629
630            instructions.push(ins);
631        }
632
633        Ok((offset, instructions))
634    }
635}
636
637#[cfg(test)]
638mod test {
639    use super::ClassParser;
640    use crate::attribute::Attribute;
641    use crate::constant_pool::Constant;
642    use crate::error::ErrorType;
643    use std::{fs::File, io::Read, path::Path, str};
644
645    fn read_class_file(p: &str) -> Result<Vec<u8>, ErrorType> {
646        let mut f = File::open(Path::new(p)).unwrap();
647        let mut buf = Vec::with_capacity(64);
648
649        f.read_to_end(&mut buf).unwrap();
650        Ok(buf)
651    }
652
653    #[test]
654    fn parse_simple() {
655        let buf = read_class_file("./tests/Hello.class").unwrap();
656        let mut parser = ClassParser::from_bytes(&buf);
657        let class = parser.parse().unwrap();
658
659        assert_eq!(class.magic, 0xCAFEBABE);
660        assert_eq!(class.methods_count, 2);
661        assert_eq!(class.field_count, 0);
662
663        let constant_pool = class.constant_pool;
664
665        if let Constant::Class { tag: _, name_index } =
666            &constant_pool[(class.this_class - 1) as usize]
667        {
668            if let Constant::Utf8 {
669                tag: _,
670                length: _,
671                bytes
672            } = &constant_pool[(name_index - 1) as usize]
673            {
674                let s = str::from_utf8(bytes).unwrap();
675                assert_eq!(s, "Hello");
676            } else {
677                panic!("expected Constant::Utf8");
678            }
679        } else {
680            panic!("expected Constant::Class");
681        }
682
683        if let Constant::Class { tag: _, name_index } =
684            &constant_pool[(class.super_class - 1) as usize]
685        {
686            if let Constant::Utf8 {
687                tag: _,
688                length: _,
689                bytes
690            } = &constant_pool[(name_index - 1) as usize]
691            {
692                let s = str::from_utf8(bytes).unwrap();
693                assert_eq!(s, "java/lang/Object");
694            } else {
695                panic!("expected Constant::Utf8");
696            }
697        } else {
698            panic!("expected Constant::Class");
699        }
700    }
701
702    #[test]
703    fn parse_fields() {
704        let buf = read_class_file("./tests/Fields.class").unwrap();
705        let mut parser = ClassParser::from_bytes(&buf);
706        let class = parser.parse().unwrap();
707
708        assert_eq!(class.field_count, 3);
709        let constant_pool = class.constant_pool;
710        let fields = class.fields;
711        let f0 = &fields[0];
712
713        let constant = &constant_pool[(f0.name_index - 1) as usize];
714
715        if let Constant::Utf8 {
716            tag: _,
717            length: _,
718            bytes
719        } = constant
720        {
721            let s = str::from_utf8(bytes).unwrap();
722
723            assert_eq!(s, "test");
724            assert_eq!(f0.attributes_count, 1);
725            let attribute = &f0.attributes[0];
726
727            if let Attribute::ConstantValue {
728                attribute_name_index: _,
729                attribute_length: _,
730                constantvalue_index
731            } = attribute
732            {
733                let constant = &constant_pool[(constantvalue_index - 1) as usize];
734
735                if let Constant::Integer { tag: _, value } = constant {
736                    assert_eq!(*value, 2147483647);
737                } else {
738                    panic!("expected Constant::Integer, found {:?}", constant);
739                }
740            } else {
741                panic!("expected Attribute::ConstantValue, found {:?}", attribute);
742            }
743        } else {
744            panic!("expected Constant::Utf8, found {:?}", constant);
745        }
746    }
747}