1use std::convert::TryFrom;
2use std::str;
3
4use crate::attribute::{
5 BootstrapMethodAttribute, ExceptionTableEntry, InnerClassAttribute, LineNumberTableEntry,
6 ParameterAttribute
7};
8use crate::error::ErrorType;
9use crate::{Attribute, Constant, ConstantTag, Field, Method, Opcode, RawClass, ReferenceKind};
10
11use crate::access_flags::{
12 ClassAccessFlag, FieldAccessFlag, InnerClassAccessFlag, MethodAccessFlag, ParameterAccessFlag
13};
14use crate::opcode::Instruction;
15
16pub struct ClassParser<'c> {
18 bytes: &'c [u8],
19 offset: usize
20}
21
22impl<'c> ClassParser<'c> {
23 pub fn from_bytes(bytes: &'c [u8]) -> ClassParser<'c> {
25 ClassParser { bytes, offset: 0 }
26 }
27
28 pub fn parse(&mut self) -> Result<RawClass<'c>, ErrorType> {
30 let magic = self.read_u32_be();
31 let minor_version = self.read_u16_be();
32 let major_version = self.read_u16_be();
33 let constant_pool_count = self.read_u16_be();
34 let constant_pool = self.read_constant_pool(constant_pool_count)?;
35 let access_flags = ClassAccessFlag::from_bits(self.read_u16_be()).unwrap();
36 let this_class = self.read_u16_be();
37 let super_class = self.read_u16_be();
38 let interface_count = self.read_u16_be();
39 let field_count = self.read_u16_be();
40 let fields = self.read_fields(field_count, &constant_pool)?;
41 let methods_count = self.read_u16_be();
42 let methods = self.read_methods(methods_count, &constant_pool)?;
43 let attributes_count = self.read_u16_be();
44 let attributes = self.read_attributes(attributes_count, &constant_pool)?;
45
46 Ok(RawClass {
47 magic,
48 minor_version,
49 major_version,
50 constant_pool_count,
51 constant_pool,
52 access_flags,
53 this_class,
54 super_class,
55 interface_count,
56 field_count,
57 fields,
58 methods_count,
59 methods,
60 attributes_count,
61 attributes
62 })
63 }
64
65 fn read_u32_be(&mut self) -> u32 {
66 let off = self.offset;
67 self.offset += 4;
68 u32::from_be_bytes([
69 self.bytes[off],
70 self.bytes[off + 1],
71 self.bytes[off + 2],
72 self.bytes[off + 3]
73 ])
74 }
75
76 fn read_i32_be(&mut self) -> i32 {
77 let off = self.offset;
78 self.offset += 4;
79 i32::from_be_bytes([
80 self.bytes[off],
81 self.bytes[off + 1],
82 self.bytes[off + 2],
83 self.bytes[off + 3]
84 ])
85 }
86
87 fn read_u16_be(&mut self) -> u16 {
88 let off = self.offset;
89 self.offset += 2;
90 u16::from_be_bytes([self.bytes[off], self.bytes[off + 1]])
91 }
92
93 fn read_constant_pool(
94 &mut self,
95 constant_pool_count: u16
96 ) -> Result<Vec<Constant<'c>>, ErrorType> {
97 let mut i = 1;
98 let mut constant_pool = Vec::with_capacity(constant_pool_count as usize);
99
100 while i < constant_pool_count {
101 let tag = ConstantTag::try_from(self.bytes[self.offset])?;
102 self.offset += 1;
103
104 let constant = match tag {
105 ConstantTag::Methodref => {
106 let class_index = self.read_u16_be();
107 let name_and_type_index = self.read_u16_be();
108
109 Constant::Methodref {
110 tag,
111 class_index,
112 name_and_type_index
113 }
114 }
115 ConstantTag::Fieldref => {
116 let class_index = self.read_u16_be();
117 let name_and_type_index = self.read_u16_be();
118
119 Constant::Fieldref {
120 tag,
121 class_index,
122 name_and_type_index
123 }
124 }
125 ConstantTag::String => {
126 let string_index = self.read_u16_be();
127
128 Constant::String { tag, string_index }
129 }
130 ConstantTag::Class => {
131 let name_index = self.read_u16_be();
132
133 Constant::Class { tag, name_index }
134 }
135 ConstantTag::Utf8 => {
136 let length = self.read_u16_be();
137 let end = self.offset + length as usize;
138 let bytes = &self.bytes[self.offset..end];
139 self.offset += length as usize;
140
141 Constant::Utf8 { tag, length, bytes }
142 }
143 ConstantTag::NameAndType => {
144 let name_index = self.read_u16_be();
145 let descriptor_index = self.read_u16_be();
146
147 Constant::NameAndType {
148 tag,
149 name_index,
150 descriptor_index
151 }
152 }
153 ConstantTag::Integer => {
154 let value = self.read_i32_be();
155
156 Constant::Integer { tag, value }
157 }
158 ConstantTag::MethodHandle => {
159 let reference_kind = ReferenceKind::try_from(self.bytes[self.offset])?;
160 self.offset += 1;
161 let reference_index = self.read_u16_be();
162
163 Constant::MethodHandle {
164 tag,
165 reference_kind,
166 reference_index
167 }
168 }
169 ConstantTag::InvokeDynamic => {
170 let bootstrap_method_attr_index = self.read_u16_be();
171 let name_index = self.read_u16_be();
172
173 Constant::InvokeDynamic {
174 tag,
175 bootstrap_method_attr_index,
176 name_index
177 }
178 }
179 _ => unimplemented!("Unsupported constant tag {:?}", tag)
180 };
181
182 constant_pool.push(constant);
183 i += 1;
184 }
185
186 Ok(constant_pool)
187 }
188
189 fn read_fields(
190 &mut self,
191 field_count: u16,
192 constant_pool: &[Constant]
193 ) -> Result<Vec<Field>, ErrorType> {
194 let mut i = 0;
195 let mut fields = Vec::with_capacity(field_count as usize);
196
197 while i < field_count {
198 let access_flags = FieldAccessFlag::from_bits(self.read_u16_be()).unwrap();
199 let name_index = self.read_u16_be();
200 let descriptor_index = self.read_u16_be();
201 let attributes_count = self.read_u16_be();
202
203 let attributes = self.read_attributes(attributes_count, constant_pool)?;
204
205 let field = Field {
206 access_flags,
207 name_index,
208 descriptor_index,
209 attributes_count,
210 attributes
211 };
212
213 fields.push(field);
214
215 i += 1;
216 }
217
218 Ok(fields)
219 }
220
221 fn read_methods(
222 &mut self,
223 method_count: u16,
224 constant_pool: &[Constant]
225 ) -> Result<Vec<Method>, ErrorType> {
226 let mut i = 0;
227 let mut methods = Vec::with_capacity(method_count as usize);
228
229 while i < method_count {
230 let access_flags = MethodAccessFlag::from_bits(self.read_u16_be()).unwrap();
231 let name_index = self.read_u16_be();
232 let descriptor_index = self.read_u16_be();
233 let attributes_count = self.read_u16_be();
234
235 let attributes = self.read_attributes(attributes_count, constant_pool)?;
236
237 let method = Method {
238 access_flags,
239 name_index,
240 descriptor_index,
241 attributes_count,
242 attributes
243 };
244
245 methods.push(method);
246
247 i += 1;
248 }
249
250 Ok(methods)
251 }
252
253 fn read_attributes(
254 &mut self,
255 attribute_count: u16,
256 constant_pool: &[Constant]
257 ) -> Result<Vec<Attribute>, ErrorType> {
258 let mut i = 0;
259 let mut attributes = Vec::with_capacity(attribute_count as usize);
260
261 while i < attribute_count {
262 let attribute_name_index = self.read_u16_be();
263 let attribute_length = self.read_u32_be();
264
265 if let Constant::Utf8 {
266 tag: _,
267 length: _,
268 bytes
269 } = &constant_pool[(attribute_name_index - 1) as usize]
270 {
271 let s = str::from_utf8(bytes)?;
272
273 let attribute = match s {
274 "ConstantValue" => {
275 let constantvalue_index = self.read_u16_be();
276
277 Attribute::ConstantValue {
278 attribute_name_index,
279 attribute_length,
280 constantvalue_index
281 }
282 }
283 "Code" => self.read_code_attribute(
284 attribute_name_index,
285 attribute_length,
286 constant_pool
287 )?,
288 "InnerClasses" => {
289 let number_of_classes = self.read_u16_be();
290 let classes = self.read_inner_class_attributes(number_of_classes);
291
292 Attribute::InnerClasses {
293 attribute_name_index,
294 attribute_length,
295 number_of_classes,
296 classes
297 }
298 }
299 "LineNumberTable" => self
300 .read_line_number_table_attribute(attribute_name_index, attribute_length)?,
301 "SourceFile" => {
302 let sourcefile_index = self.read_u16_be();
303
304 Attribute::SourceFile {
305 attribute_name_index,
306 attribute_length,
307 sourcefile_index
308 }
309 }
310 "BootstrapMethods" => {
311 let num_bootstrap_methods = self.read_u16_be();
312 let bootstrap_methods =
313 self.read_bootstrap_method_attributes(num_bootstrap_methods);
314
315 Attribute::BootstrapMethods {
316 attribute_name_index,
317 attribute_length,
318 num_bootstrap_methods,
319 bootstrap_methods
320 }
321 }
322 "MethodParameters" => {
323 let parameters_count = self.bytes[self.offset];
324 self.offset += 1;
325 let parameters = self.read_method_parameter_attributes(parameters_count);
326
327 Attribute::MethodParameters {
328 attribute_name_index,
329 attribute_length,
330 parameters_count,
331 parameters
332 }
333 }
334 "NestMembers" => {
335 let number_of_classes = self.read_u16_be();
336 let mut classes = Vec::with_capacity(number_of_classes as usize);
337
338 let mut i = 0;
339 while i < number_of_classes {
340 classes.push(self.read_u16_be());
341 i += 1;
342 }
343
344 Attribute::NestMembers {
345 attribute_name_index,
346 attribute_length,
347 number_of_classes,
348 classes
349 }
350 }
351 _ => panic!("unknown tag: `{}`", s)
352 };
353
354 attributes.push(attribute);
355 } else {
356 return Err(ErrorType::InvalidNameIndex);
357 }
358
359 i += 1;
360 }
361
362 Ok(attributes)
363 }
364
365 fn read_code_attribute(
366 &mut self,
367 attribute_name_index: u16,
368 attribute_length: u32,
369 constant_pool: &[Constant]
370 ) -> Result<Attribute, ErrorType> {
371 let max_stack = self.read_u16_be();
372 let max_locals = self.read_u16_be();
373 let code_length = self.read_u32_be();
374 let (offset, code) = self.read_instructions(code_length)?;
375 self.offset = offset;
376
377 let exception_table_length = self.read_u16_be();
378 let mut exception_table = Vec::with_capacity(exception_table_length as usize);
379 let mut i = 0;
380
381 while i < exception_table_length {
382 let start_pc = self.read_u16_be();
383 let end_pc = self.read_u16_be();
384 let handler_pc = self.read_u16_be();
385 let catch_type = self.read_u16_be();
386
387 let entry = ExceptionTableEntry {
388 start_pc,
389 end_pc,
390 handler_pc,
391 catch_type
392 };
393
394 exception_table.push(entry);
395 i += 1;
396 }
397
398 let attributes_count = self.read_u16_be();
399 let attributes = self.read_attributes(attributes_count, constant_pool)?;
400
401 Ok(Attribute::Code {
402 attribute_name_index,
403 attribute_length,
404 max_stack,
405 max_locals,
406 code_length,
407 code,
408 exception_table_length,
409 exception_table,
410 attributes_count,
411 attributes
412 })
413 }
414
415 fn read_inner_class_attributes(&mut self, number_of_classes: u16) -> Vec<InnerClassAttribute> {
416 let mut classes = Vec::with_capacity(number_of_classes as usize);
417 let mut i = 0;
418
419 while i < number_of_classes {
420 let inner_class_info_index = self.read_u16_be();
421 let outer_class_info_index = self.read_u16_be();
422 let inner_name_index = self.read_u16_be();
423 let inner_class_access_flags =
424 InnerClassAccessFlag::from_bits(self.read_u16_be()).unwrap();
425
426 classes.push(InnerClassAttribute {
427 inner_class_info_index,
428 outer_class_info_index,
429 inner_name_index,
430 inner_class_access_flags
431 });
432 i += 1;
433 }
434
435 classes
436 }
437
438 fn read_method_parameter_attributes(
439 &mut self,
440 parameters_count: u8
441 ) -> Vec<ParameterAttribute> {
442 let mut parameters = Vec::with_capacity(parameters_count as usize);
443 let mut i = 0;
444
445 while i < parameters_count {
446 let name_index = self.read_u16_be();
447 let access_flags = ParameterAccessFlag::from_bits(self.read_u16_be()).unwrap();
448
449 parameters.push(ParameterAttribute {
450 name_index,
451 access_flags
452 });
453 i += 1;
454 }
455
456 parameters
457 }
458
459 fn read_bootstrap_method_attributes(
460 &mut self,
461 num_bootstrap_methods: u16
462 ) -> Vec<BootstrapMethodAttribute> {
463 let mut methods = Vec::with_capacity(num_bootstrap_methods as usize);
464 let mut i = 0;
465
466 while i < num_bootstrap_methods {
467 let bootstrap_method_ref = self.read_u16_be();
468 let num_bootstrap_arguments = self.read_u16_be();
469 let mut bootstrap_arguments = Vec::with_capacity(num_bootstrap_arguments as usize);
470 let mut j = 0;
471
472 while j < num_bootstrap_arguments {
473 bootstrap_arguments.push(self.read_u16_be());
474 j += 1;
475 }
476
477 methods.push(BootstrapMethodAttribute {
478 bootstrap_method_ref,
479 num_bootstrap_arguments,
480 bootstrap_arguments
481 });
482
483 i += 1;
484 }
485
486 methods
487 }
488
489 fn read_line_number_table_attribute(
490 &mut self,
491 attribute_name_index: u16,
492 attribute_length: u32
493 ) -> Result<Attribute, ErrorType> {
494 let line_number_table_length = self.read_u16_be();
495 let mut i = 0;
496 let mut line_number_table = Vec::with_capacity(line_number_table_length as usize);
497
498 while i < line_number_table_length {
499 let start_pc = self.read_u16_be();
500 let line_number = self.read_u16_be();
501
502 let entry = LineNumberTableEntry {
503 start_pc,
504 line_number
505 };
506
507 line_number_table.push(entry);
508 i += 1;
509 }
510
511 Ok(Attribute::LineNumberTable {
512 attribute_name_index,
513 attribute_length,
514 line_number_table_length,
515 line_number_table
516 })
517 }
518
519 fn read_instructions(
520 &mut self,
521 code_length: u32
522 ) -> Result<(usize, Vec<Instruction>), ErrorType> {
523 let mut offset = self.offset;
524 let mut instructions = Vec::with_capacity(code_length as usize);
525
526 while offset < self.offset + code_length as usize {
527 let opcode = Opcode::try_from(self.bytes[offset])?;
528 offset += 1;
529
530 let ins = match opcode {
531 Opcode::aload_0 => Instruction::aload_0,
532 Opcode::aload_1 => Instruction::aload_1,
533 Opcode::aload_2 => Instruction::aload_2,
534 Opcode::aload_3 => Instruction::aload_3,
535 Opcode::astore => {
536 let index = self.bytes[offset];
537 offset += 1;
538
539 Instruction::astore { index }
540 }
541 Opcode::astore_0 => Instruction::astore_0,
542 Opcode::astore_1 => Instruction::astore_1,
543 Opcode::astore_2 => Instruction::astore_2,
544 Opcode::astore_3 => Instruction::astore_3,
545 Opcode::dup => Instruction::dup,
546 Opcode::bipush => {
547 let byte = self.bytes[offset];
548 offset += 1;
549
550 Instruction::bipush { byte }
551 }
552 Opcode::new => {
553 let indexbyte1 = self.bytes[offset];
554 let indexbyte2 = self.bytes[offset + 1];
555 offset += 2;
556
557 Instruction::new {
558 indexbyte1,
559 indexbyte2
560 }
561 }
562 Opcode::r#eturn => Instruction::r#eturn,
563 Opcode::invokedynamic => {
564 let indexbyte1 = self.bytes[offset];
565 let indexbyte2 = self.bytes[offset + 1];
566 let byte3 = self.bytes[offset + 2];
567 let byte4 = self.bytes[offset + 3];
568 offset += 4;
569
570 if byte3 != 0 || byte4 != 0 {
571 return Err(ErrorType::ParseError);
572 }
573
574 Instruction::invokedynamic {
575 indexbyte1,
576 indexbyte2,
577 byte3,
578 byte4
579 }
580 }
581 Opcode::invokestatic => {
582 let indexbyte1 = self.bytes[offset];
583 let indexbyte2 = self.bytes[offset + 1];
584 offset += 2;
585
586 Instruction::invokestatic {
587 indexbyte1,
588 indexbyte2
589 }
590 }
591 Opcode::invokespecial => {
592 let indexbyte1 = self.bytes[offset];
593 let indexbyte2 = self.bytes[offset + 1];
594 offset += 2;
595
596 Instruction::invokespecial {
597 indexbyte1,
598 indexbyte2
599 }
600 }
601 Opcode::invokevirtual => {
602 let indexbyte1 = self.bytes[offset];
603 let indexbyte2 = self.bytes[offset + 1];
604 offset += 2;
605
606 Instruction::invokevirtual {
607 indexbyte1,
608 indexbyte2
609 }
610 }
611 Opcode::getstatic => {
612 let indexbyte1 = self.bytes[offset];
613 let indexbyte2 = self.bytes[offset + 1];
614 offset += 2;
615
616 Instruction::getstatic {
617 indexbyte1,
618 indexbyte2
619 }
620 }
621 Opcode::ldc => {
622 let index = self.bytes[offset];
623 offset += 1;
624
625 Instruction::ldc { index }
626 }
627 _ => unimplemented!("Unsupported opcode {:?}", opcode)
628 };
629
630 instructions.push(ins);
631 }
632
633 Ok((offset, instructions))
634 }
635}
636
637#[cfg(test)]
638mod test {
639 use super::ClassParser;
640 use crate::attribute::Attribute;
641 use crate::constant_pool::Constant;
642 use crate::error::ErrorType;
643 use std::{fs::File, io::Read, path::Path, str};
644
645 fn read_class_file(p: &str) -> Result<Vec<u8>, ErrorType> {
646 let mut f = File::open(Path::new(p)).unwrap();
647 let mut buf = Vec::with_capacity(64);
648
649 f.read_to_end(&mut buf).unwrap();
650 Ok(buf)
651 }
652
653 #[test]
654 fn parse_simple() {
655 let buf = read_class_file("./tests/Hello.class").unwrap();
656 let mut parser = ClassParser::from_bytes(&buf);
657 let class = parser.parse().unwrap();
658
659 assert_eq!(class.magic, 0xCAFEBABE);
660 assert_eq!(class.methods_count, 2);
661 assert_eq!(class.field_count, 0);
662
663 let constant_pool = class.constant_pool;
664
665 if let Constant::Class { tag: _, name_index } =
666 &constant_pool[(class.this_class - 1) as usize]
667 {
668 if let Constant::Utf8 {
669 tag: _,
670 length: _,
671 bytes
672 } = &constant_pool[(name_index - 1) as usize]
673 {
674 let s = str::from_utf8(bytes).unwrap();
675 assert_eq!(s, "Hello");
676 } else {
677 panic!("expected Constant::Utf8");
678 }
679 } else {
680 panic!("expected Constant::Class");
681 }
682
683 if let Constant::Class { tag: _, name_index } =
684 &constant_pool[(class.super_class - 1) as usize]
685 {
686 if let Constant::Utf8 {
687 tag: _,
688 length: _,
689 bytes
690 } = &constant_pool[(name_index - 1) as usize]
691 {
692 let s = str::from_utf8(bytes).unwrap();
693 assert_eq!(s, "java/lang/Object");
694 } else {
695 panic!("expected Constant::Utf8");
696 }
697 } else {
698 panic!("expected Constant::Class");
699 }
700 }
701
702 #[test]
703 fn parse_fields() {
704 let buf = read_class_file("./tests/Fields.class").unwrap();
705 let mut parser = ClassParser::from_bytes(&buf);
706 let class = parser.parse().unwrap();
707
708 assert_eq!(class.field_count, 3);
709 let constant_pool = class.constant_pool;
710 let fields = class.fields;
711 let f0 = &fields[0];
712
713 let constant = &constant_pool[(f0.name_index - 1) as usize];
714
715 if let Constant::Utf8 {
716 tag: _,
717 length: _,
718 bytes
719 } = constant
720 {
721 let s = str::from_utf8(bytes).unwrap();
722
723 assert_eq!(s, "test");
724 assert_eq!(f0.attributes_count, 1);
725 let attribute = &f0.attributes[0];
726
727 if let Attribute::ConstantValue {
728 attribute_name_index: _,
729 attribute_length: _,
730 constantvalue_index
731 } = attribute
732 {
733 let constant = &constant_pool[(constantvalue_index - 1) as usize];
734
735 if let Constant::Integer { tag: _, value } = constant {
736 assert_eq!(*value, 2147483647);
737 } else {
738 panic!("expected Constant::Integer, found {:?}", constant);
739 }
740 } else {
741 panic!("expected Attribute::ConstantValue, found {:?}", attribute);
742 }
743 } else {
744 panic!("expected Constant::Utf8, found {:?}", constant);
745 }
746 }
747}