1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::convert::From;
4use std::error::Error;
5use std::fmt::{Display, Formatter, Result as FmtResult};
6use std::io::Read;
7
8mod mutf8 {
9 pub enum MUtf8Error {
10 MissingByte,
11 UnknownByte,
12 InvalidChar,
13 }
14
15 pub fn to_string<T: IntoIterator<Item = u8>>(bytes: T) -> Result<String, MUtf8Error> {
16 let mut s = String::new();
17 let mut iterator = bytes.into_iter();
18 loop {
19 if let Some(b) = iterator.next() {
20 if b == 0b1110_1101 {
21 let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
22 if !b2 & 0b1111_0000 == 0b1010_0000 {
23 return Err(MUtf8Error::UnknownByte);
24 }
25 let b3 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
26 if !b3 & 0b1100_0000 == 0b1000_0000 {
27 return Err(MUtf8Error::UnknownByte);
28 }
29 if !iterator.next().ok_or(MUtf8Error::MissingByte)? & 0xFF == 0b1110_1101 {
30 return Err(MUtf8Error::UnknownByte);
31 }
32 let b4 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
33 if !b4 & 0b1111_0000 == 0b1011_0000 {
34 return Err(MUtf8Error::UnknownByte);
35 }
36 let b5 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
37 if !b5 & 0b1100_0000 == 0b1000_0000 {
38 return Err(MUtf8Error::UnknownByte);
39 }
40 let codepoint: u32 = 0x10000
41 + ((b2 as u32 & 0x0f) << 16)
42 + ((b3 as u32 & 0x3f) << 10)
43 + ((b4 as u32 & 0x0f) << 6)
44 + (b5 as u32 & 0x3f);
45 s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
46 } else if b & 0b1111_0000 == 0b1110_0000 {
47 let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
48 if !b2 & 0b1100_0000 == 0b1000_0000 {
49 return Err(MUtf8Error::UnknownByte);
50 }
51 let b3 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
52 if !b3 & 0b1100_0000 == 0b1000_0000 {
53 return Err(MUtf8Error::UnknownByte);
54 }
55 let codepoint: u32 = b3 as u32 & 0b11_1111
56 | ((b2 as u32 & 0b11_1111) << 6)
57 | ((b as u32 & 0b1_1111) << 12);
58 s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
59 } else if b & 0b1110_0000 == 0b1100_0000 {
60 let b2 = iterator.next().ok_or(MUtf8Error::MissingByte)?;
61 if !b2 & 0b1100_0000 == 0b1000_0000 {
62 return Err(MUtf8Error::UnknownByte);
63 }
64 let codepoint: u32 = b2 as u32 & 0b11_1111 | ((b as u32 & 0b1_1111) << 6);
65 s.push(std::char::from_u32(codepoint).ok_or(MUtf8Error::InvalidChar)?);
66 } else if b & 0b1000_0000 == 0 {
67 s.push(b as char);
68 } else {
69 return Err(MUtf8Error::UnknownByte);
70 }
71 } else {
72 break;
73 }
74 }
75 Ok(s)
76 }
77}
78
79#[derive(Debug)]
80pub enum ClassFileError {
81 InvalidMagic,
82 Read,
83 InvalidCPType,
84 InvalidCPEntry,
85 MUtf8Format,
86 EndOfFile,
87 MoreData,
88}
89
90impl From<std::io::Error> for ClassFileError {
91 fn from(_: std::io::Error) -> Self {
92 ClassFileError::Read
93 }
94}
95
96impl From<mutf8::MUtf8Error> for ClassFileError {
97 fn from(_: mutf8::MUtf8Error) -> Self {
98 ClassFileError::MUtf8Format
99 }
100}
101
102impl Error for ClassFileError {}
103
104impl Display for ClassFileError {
105 fn fmt(&self, f: &mut Formatter) -> FmtResult {
106 write!(
107 f,
108 "{}",
109 match self {
110 ClassFileError::InvalidMagic => "invalid magic value",
111 ClassFileError::Read => "error reading input",
112 ClassFileError::InvalidCPType => "invalid constant pool type",
113 ClassFileError::InvalidCPEntry => "invalid index into constant pool",
114 ClassFileError::MUtf8Format => "error in mutf8 format",
115 ClassFileError::EndOfFile => "end of file",
116 ClassFileError::MoreData => "more data after expected end of file",
117 }
118 )
119 }
120}
121
122fn read_u8<T: Read>(data: &mut T) -> Result<u8, ClassFileError> {
123 let mut buf = [0_u8; 1];
124 let amt = data.read(&mut buf)?;
125 if amt < 1 {
126 return Err(ClassFileError::EndOfFile);
127 }
128 Ok(buf[0])
129}
130
131fn read_u16<T: Read>(data: &mut T) -> Result<u16, ClassFileError> {
132 let mut buf = [0_u8; 2];
133 let amt = data.read(&mut buf)?;
134 if amt < 2 {
135 return Err(ClassFileError::EndOfFile);
136 }
137 let r: u16 = unsafe { std::mem::transmute(buf) };
138 Ok(r.to_be())
139}
140
141fn read_u32<T: Read>(data: &mut T) -> Result<u32, ClassFileError> {
142 let mut buf = [0_u8; 4];
143 let amt = data.read(&mut buf)?;
144 if amt < 4 {
145 return Err(ClassFileError::EndOfFile);
146 }
147 let r: u32 = unsafe { std::mem::transmute(buf) };
148 Ok(r.to_be())
149}
150
151#[derive(Debug, Serialize, Deserialize, Clone)]
152pub enum ConstantPoolInfo {
153 Class {
154 name_index: u16,
155 },
156 Fieldref {
157 class_index: u16,
158 name_and_type_index: u16,
159 },
160 Methodref {
161 class_index: u16,
162 name_and_type_index: u16,
163 },
164 InterfaceMethodref {
165 class_index: u16,
166 name_and_type_index: u16,
167 },
168 String {
169 string_index: u16,
170 },
171 Integer {
172 data: i32,
173 },
174 Float {
175 data: f32,
176 },
177 Long {
178 data: i64,
179 },
180 Double {
181 data: f64,
182 },
183 NameAndType {
184 name_index: u16,
185 descriptor_index: u16,
186 },
187 Utf8 {
188 length: u16,
189 string: String,
190 },
191 MethodHandle {
192 reference_kind: u8,
193 reference_index: u16,
194 },
195 MethodType {
196 descriptor_index: u16,
197 },
198 InvokeDynamic {
199 bootstrap_method_attr_index: u16,
200 name_and_type_index: u16,
201 },
202}
203
204#[derive(Debug, Serialize, Deserialize)]
205pub struct ConstantPool {
206 data: HashMap<u16, ConstantPoolInfo>,
207}
208
209impl ConstantPool {
210 pub fn get_entry(&self, index: u16) -> Result<ConstantPoolInfo, ClassFileError> {
211 Ok(self
212 .data
213 .get(&index)
214 .ok_or(ClassFileError::InvalidCPEntry)?
215 .clone())
216 }
217
218 pub fn get_utf8_entry(&self, index: u16) -> Result<String, ClassFileError> {
219 if let ConstantPoolInfo::Utf8 { length: _, string } = self.get_entry(index)? {
220 Ok(string)
221 } else {
222 Err(ClassFileError::InvalidCPEntry)
223 }
224 }
225}
226
227fn read_constant_pool<T: Read>(data: &mut T) -> Result<ConstantPool, ClassFileError> {
228 let constant_pool_count = read_u16(data)?;
229 let mut constant_pool = HashMap::new();
230 let mut i = 1;
231 while i < constant_pool_count {
232 let cp_type = read_u8(data)?;
233 let entry = match cp_type {
234 7 => ConstantPoolInfo::Class {
235 name_index: read_u16(data)?,
236 },
237 9 => ConstantPoolInfo::Fieldref {
238 class_index: read_u16(data)?,
239 name_and_type_index: read_u16(data)?,
240 },
241 10 => ConstantPoolInfo::Methodref {
242 class_index: read_u16(data)?,
243 name_and_type_index: read_u16(data)?,
244 },
245 11 => ConstantPoolInfo::InterfaceMethodref {
246 class_index: read_u16(data)?,
247 name_and_type_index: read_u16(data)?,
248 },
249 8 => ConstantPoolInfo::String {
250 string_index: read_u16(data)?,
251 },
252 3 => ConstantPoolInfo::Integer {
253 data: unsafe { std::mem::transmute(read_u32(data)?) },
254 },
255 4 => ConstantPoolInfo::Float {
256 data: unsafe { std::mem::transmute(read_u32(data)?) },
257 },
258 5 => {
259 let high = read_u32(data)?;
260 let low = read_u32(data)?;
261 ConstantPoolInfo::Long {
262 data: unsafe { std::mem::transmute([low, high]) },
263 }
264 }
265 6 => {
266 let high = read_u32(data)?;
267 let low = read_u32(data)?;
268 ConstantPoolInfo::Double {
269 data: unsafe { std::mem::transmute([low, high]) },
270 }
271 }
272 12 => ConstantPoolInfo::NameAndType {
273 name_index: read_u16(data)?,
274 descriptor_index: read_u16(data)?,
275 },
276 1 => {
277 let length = read_u16(data)?;
278 let bytes_result: Result<Vec<_>, _> =
279 (0..length).into_iter().map(|_| read_u8(data)).collect();
280 ConstantPoolInfo::Utf8 {
281 length,
282 string: mutf8::to_string(bytes_result?)?,
283 }
284 }
285 15 => ConstantPoolInfo::MethodHandle {
286 reference_kind: read_u8(data)?,
287 reference_index: read_u16(data)?,
288 },
289 16 => ConstantPoolInfo::MethodType {
290 descriptor_index: read_u16(data)?,
291 },
292 18 => ConstantPoolInfo::InvokeDynamic {
293 bootstrap_method_attr_index: read_u16(data)?,
294 name_and_type_index: read_u16(data)?,
295 },
296 _ => return Err(ClassFileError::InvalidCPType),
297 };
298 constant_pool.insert(i, entry);
299 i += 1;
300 if cp_type == 5 || cp_type == 6 {
301 i += 1;
302 }
303 }
304 Ok(ConstantPool {
305 data: constant_pool,
306 })
307}
308
309#[derive(Debug, Serialize, Deserialize)]
310pub struct ClassAccessFlags {
311 pub acc_public: bool,
312 pub acc_final: bool,
313 pub acc_super: bool,
314 pub acc_interface: bool,
315 pub acc_abstract: bool,
316 pub acc_synthetic: bool,
317 pub acc_annotation: bool,
318 pub acc_enum: bool,
319}
320
321fn read_class_access_flags<T: Read>(data: &mut T) -> Result<ClassAccessFlags, ClassFileError> {
322 let flags = read_u16(data)?;
323 Ok(ClassAccessFlags {
324 acc_public: flags & 0x0001 > 0,
325 acc_final: flags & 0x0010 > 0,
326 acc_super: flags & 0x0020 > 0,
327 acc_interface: flags & 0x0200 > 0,
328 acc_abstract: flags & 0x0400 > 0,
329 acc_synthetic: flags & 0x1000 > 0,
330 acc_annotation: flags & 0x2000 > 0,
331 acc_enum: flags & 0x4000 > 0,
332 })
333}
334
335fn read_interfaces<T: Read>(data: &mut T) -> Result<Vec<u16>, ClassFileError> {
336 let interaces_count = read_u16(data)?;
337 let interaces_result: Result<Vec<_>, _> = (0..interaces_count)
338 .into_iter()
339 .map(|_| read_u16(data))
340 .collect();
341 Ok(interaces_result?)
342}
343
344#[derive(Debug, Serialize, Deserialize)]
345pub struct ExceptionTableInfo {
346 start_pc: u16,
347 end_pc: u16,
348 handler_pc: u16,
349 catch_type: u16,
350}
351
352#[derive(Debug, Serialize, Deserialize)]
353pub enum AttributeInfo {
354 Raw {
355 attribute_name: String,
356 info: Vec<u8>,
357 },
358 ConstantValue {
359 constant_value_index: u16,
360 },
361 Code {
362 max_stack: u16,
363 max_locals: u16,
364 code: Vec<u8>,
365 exception_table: Vec<ExceptionTableInfo>,
366 attributes: Vec<AttributeInfo>,
367 },
368 SourceFile {
369 sourcefile_index: u16,
370 },
371}
372
373fn read_attributes<T: Read>(
374 data: &mut T,
375 constant_pool: &ConstantPool,
376) -> Result<Vec<AttributeInfo>, ClassFileError> {
377 let attributes_count = read_u16(data)?;
378 let mut attributes = Vec::new();
379
380 for _ in 0..attributes_count {
381 let attribute_name_index = read_u16(data)?;
382 let attribute_length = read_u32(data)?;
383 let attribute_name = constant_pool.get_utf8_entry(attribute_name_index)?;
384
385 let attribute = match attribute_name.as_str() {
386 "ConstantValue" => AttributeInfo::ConstantValue {
387 constant_value_index: read_u16(data)?,
388 },
389 "SourceFile" => AttributeInfo::SourceFile {
390 sourcefile_index: read_u16(data)?,
391 },
392 "Code" => {
393 let max_stack = read_u16(data)?;
394 let max_locals = read_u16(data)?;
395 let code_length = read_u32(data)?;
396 let code_result: Result<Vec<_>, _> = (0..code_length)
397 .into_iter()
398 .map(|_| read_u8(data))
399 .collect();
400 let exception_table_length = read_u16(data)?;
401 let mut exception_table = Vec::with_capacity(exception_table_length as usize);
402 for _ in 0..exception_table_length {
403 exception_table.push(ExceptionTableInfo {
404 start_pc: read_u16(data)?,
405 end_pc: read_u16(data)?,
406 handler_pc: read_u16(data)?,
407 catch_type: read_u16(data)?,
408 });
409 }
410 let inner_attributes = read_attributes(data, constant_pool)?;
411 AttributeInfo::Code {
412 max_stack,
413 max_locals,
414 code: code_result?,
415 exception_table,
416 attributes: inner_attributes,
417 }
418 }
419 _ => {
420 let bytes_result: Result<Vec<_>, _> = (0..attribute_length)
421 .into_iter()
422 .map(|_| read_u8(data))
423 .collect();
424 AttributeInfo::Raw {
425 attribute_name,
426 info: bytes_result?,
427 }
428 }
429 };
430 attributes.push(attribute);
431 }
432 Ok(attributes)
433}
434
435#[derive(Debug, Serialize, Deserialize)]
436pub struct FieldAccessFlags {
437 pub acc_public: bool,
438 pub acc_private: bool,
439 pub acc_protected: bool,
440 pub acc_static: bool,
441 pub acc_final: bool,
442 pub acc_volatile: bool,
443 pub acc_transient: bool,
444 pub acc_synthetic: bool,
445 pub acc_enum: bool,
446}
447
448fn read_field_access_flags<T: Read>(data: &mut T) -> Result<FieldAccessFlags, ClassFileError> {
449 let flags = read_u16(data)?;
450 Ok(FieldAccessFlags {
451 acc_public: flags & 0x0001 > 0,
452 acc_private: flags & 0x0002 > 0,
453 acc_protected: flags & 0x0004 > 0,
454 acc_static: flags & 0x0008 > 0,
455 acc_final: flags & 0x0010 > 0,
456 acc_volatile: flags & 0x0040 > 0,
457 acc_transient: flags & 0x0080 > 0,
458 acc_synthetic: flags & 0x1000 > 0,
459 acc_enum: flags & 0x4000 > 0,
460 })
461}
462
463#[derive(Debug, Serialize, Deserialize)]
464pub struct FieldInfo {
465 pub access_flags: FieldAccessFlags,
466 pub name_index: u16,
467 pub descriptor_index: u16,
468 pub attributes: Vec<AttributeInfo>,
469}
470
471fn read_fields<T: Read>(
472 data: &mut T,
473 constant_pool: &ConstantPool,
474) -> Result<Vec<FieldInfo>, ClassFileError> {
475 let fields_count = read_u16(data)?;
476 let mut fields = Vec::new();
477 for _ in 0..fields_count {
478 let access_flags = read_field_access_flags(data)?;
479 let name_index = read_u16(data)?;
480 let descriptor_index = read_u16(data)?;
481 let attributes = read_attributes(data, constant_pool)?;
482 let field = FieldInfo {
483 access_flags,
484 name_index,
485 descriptor_index,
486 attributes,
487 };
488 fields.push(field);
489 }
490 Ok(fields)
491}
492
493#[derive(Debug, Serialize, Deserialize)]
494pub struct MethodAccessFlags {
495 pub acc_public: bool,
496 pub acc_private: bool,
497 pub acc_protected: bool,
498 pub acc_static: bool,
499 pub acc_final: bool,
500 pub acc_synchronized: bool,
501 pub acc_bridge: bool,
502 pub acc_varargs: bool,
503 pub acc_native: bool,
504 pub acc_abstract: bool,
505 pub acc_strict: bool,
506 pub acc_synthetic: bool,
507}
508
509fn read_method_access_flags<T: Read>(data: &mut T) -> Result<MethodAccessFlags, ClassFileError> {
510 let flags = read_u16(data)?;
511 Ok(MethodAccessFlags {
512 acc_public: flags & 0x0001 > 0,
513 acc_private: flags & 0x0002 > 0,
514 acc_protected: flags & 0x0004 > 0,
515 acc_static: flags & 0x0008 > 0,
516 acc_final: flags & 0x0010 > 0,
517 acc_synchronized: flags & 0x0020 > 0,
518 acc_bridge: flags & 0x0040 > 0,
519 acc_varargs: flags & 0x0080 > 0,
520 acc_native: flags & 0x0100 > 0,
521 acc_abstract: flags & 0x0400 > 0,
522 acc_strict: flags & 0x0800 > 0,
523 acc_synthetic: flags & 0x1000 > 0,
524 })
525}
526
527#[derive(Debug, Serialize, Deserialize)]
528pub struct MethodInfo {
529 pub access_flags: MethodAccessFlags,
530 pub name_index: u16,
531 pub descriptor_index: u16,
532 pub attributes: Vec<AttributeInfo>,
533}
534
535fn read_methods<T: Read>(
536 data: &mut T,
537 constant_pool: &ConstantPool,
538) -> Result<Vec<MethodInfo>, ClassFileError> {
539 let methods_count = read_u16(data)?;
540 let mut methods = Vec::new();
541 for _ in 0..methods_count {
542 let access_flags = read_method_access_flags(data)?;
543 let name_index = read_u16(data)?;
544 let descriptor_index = read_u16(data)?;
545 let attributes = read_attributes(data, constant_pool)?;
546 let field = MethodInfo {
547 access_flags,
548 name_index,
549 descriptor_index,
550 attributes,
551 };
552 methods.push(field);
553 }
554 Ok(methods)
555}
556
557#[derive(Debug, Serialize, Deserialize)]
558pub struct ClassFile {
559 pub major_version: u16,
560 pub minor_version: u16,
561 pub constant_pool: ConstantPool,
562 pub access_flags: ClassAccessFlags,
563 pub this_class: u16,
564 pub super_class: u16,
565 pub interfaces: Vec<u16>,
566 pub fields: Vec<FieldInfo>,
567 pub methods: Vec<MethodInfo>,
568 pub attributes: Vec<AttributeInfo>,
569}
570
571pub fn read_classfile<T: Read>(data: &mut T) -> Result<ClassFile, ClassFileError> {
572 if read_u32(data)? != 0xcafebabe {
573 return Err(ClassFileError::InvalidMagic);
574 }
575 let minor_version = read_u16(data)?;
576 let major_version = read_u16(data)?;
577
578 let constant_pool = read_constant_pool(data)?;
579
580 let access_flags = read_class_access_flags(data)?;
581
582 let this_class = read_u16(data)?;
583 let super_class = read_u16(data)?;
584
585 let interfaces = read_interfaces(data)?;
586 let fields = read_fields(data, &constant_pool)?;
587 let methods = read_methods(data, &constant_pool)?;
588 let attributes = read_attributes(data, &constant_pool)?;
589
590 if let Ok(_) = read_u8(data) {
591 return Err(ClassFileError::MoreData);
592 }
593
594 Ok(ClassFile {
595 major_version,
596 minor_version,
597 constant_pool,
598 access_flags,
599 this_class,
600 super_class,
601 interfaces,
602 fields,
603 methods,
604 attributes,
605 })
606}