#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef uint8_t u1;
typedef uint16_t u2;
typedef uint32_t u4;
typedef struct {
u4 magic;
u2 minor_version;
u2 major_version;
} class_header_t;
typedef struct {
u2 access_flags;
u2 this_class;
u2 super_class;
} class_info_t;
typedef struct {
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u2 attributes_count;
} method_info;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
} attribute_info;
typedef struct {
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 *code;
} code_t;
typedef struct {
char *name;
char *descriptor;
code_t code;
} method_t;
typedef enum {
CONSTANT_Utf8 = 1,
CONSTANT_Integer = 3,
CONSTANT_Class = 7,
CONSTANT_FieldRef = 9,
CONSTANT_MethodRef = 10,
CONSTANT_NameAndType = 12
} const_pool_tag_t;
typedef struct {
u2 string_index;
} CONSTANT_Class_info;
typedef struct {
u2 class_index;
u2 name_and_type_index;
} CONSTANT_FieldOrMethodRef_info;
typedef struct {
int32_t bytes;
} CONSTANT_Integer_info;
typedef struct {
u2 name_index;
u2 descriptor_index;
} CONSTANT_NameAndType_info;
typedef struct {
const_pool_tag_t tag;
u1 *info;
} const_pool_info;
typedef struct {
u2 constant_pool_count;
const_pool_info *constant_pool;
} constant_pool_t;
typedef struct {
constant_pool_t constant_pool;
method_t *methods;
} class_file_t;
typedef enum {
i_iconst_m1 = 0x2,
i_iconst_0 = 0x3,
i_iconst_1 = 0x4,
i_iconst_2 = 0x5,
i_iconst_3 = 0x6,
i_iconst_4 = 0x7,
i_iconst_5 = 0x8,
i_bipush = 0x10,
i_sipush = 0x11,
i_ldc = 0x12,
i_iload = 0x15,
i_iload_0 = 0x1a,
i_iload_1 = 0x1b,
i_iload_2 = 0x1c,
i_iload_3 = 0x1d,
i_istore = 0x36,
i_istore_0 = 0x3b,
i_istore_1 = 0x3c,
i_istore_2 = 0x3d,
i_istore_3 = 0x3e,
i_iadd = 0x60,
i_isub = 0x64,
i_imul = 0x68,
i_idiv = 0x6c,
i_irem = 0x70,
i_ineg = 0x74,
i_iinc = 0x84,
i_ifeq = 0x99,
i_ifne = 0x9a,
i_iflt = 0x9b,
i_ifge = 0x9c,
i_ifgt = 0x9d,
i_ifle = 0x9e,
i_if_icmpeq = 0x9f,
i_if_icmpne = 0xa0,
i_if_icmplt = 0xa1,
i_if_icmpge = 0xa2,
i_if_icmpgt = 0xa3,
i_if_icmple = 0xa4,
i_goto = 0xa7,
i_ireturn = 0xac,
i_return = 0xb1,
i_getstatic = 0xb2,
i_invokevirtual = 0xb6,
i_invokestatic = 0xb8,
} jvm_opcode_t;
u1 read_u1(FILE *class_file)
{
int result = fgetc(class_file);
assert(result != EOF && "Reached end of file prematurely");
return result;
}
u2 read_u2(FILE *class_file)
{
return (u2) read_u1(class_file) << 8 | read_u1(class_file);
}
u4 read_u4(FILE *class_file)
{
return (u4) read_u2(class_file) << 16 | read_u2(class_file);
}
const_pool_info *get_constant(constant_pool_t *constant_pool, u2 index)
{
assert(0 < index && index <= constant_pool->constant_pool_count &&
"Invalid constant pool index");
return &constant_pool->constant_pool[index - 1];
}
CONSTANT_NameAndType_info *get_method_name_and_type(constant_pool_t *cp, u2 idx)
{
const_pool_info *method = get_constant(cp, idx);
assert(method->tag == CONSTANT_MethodRef && "Expected a MethodRef");
const_pool_info *name_and_type_constant = get_constant(
cp,
((CONSTANT_FieldOrMethodRef_info *) method->info)->name_and_type_index);
assert(name_and_type_constant->tag == CONSTANT_NameAndType &&
"Expected a NameAndType");
return (CONSTANT_NameAndType_info *) name_and_type_constant->info;
}
uint16_t get_number_of_parameters(method_t *method)
{
return strlen(method->descriptor) - 3;
}
method_t *find_method(const char *name, const char *desc, class_file_t *clazz)
{
for (method_t *method = clazz->methods; method->name; method++) {
if (!(strcmp(name, method->name) || strcmp(desc, method->descriptor)))
return method;
}
return NULL;
}
method_t *find_method_from_index(uint16_t idx, class_file_t *clazz)
{
CONSTANT_NameAndType_info *name_and_type =
get_method_name_and_type(&clazz->constant_pool, idx);
const_pool_info *name =
get_constant(&clazz->constant_pool, name_and_type->name_index);
assert(name->tag == CONSTANT_Utf8 && "Expected a UTF8");
const_pool_info *descriptor =
get_constant(&clazz->constant_pool, name_and_type->descriptor_index);
assert(descriptor->tag == CONSTANT_Utf8 && "Expected a UTF8");
return find_method((char *) name->info, (char *) descriptor->info, clazz);
}
class_header_t get_class_header(FILE *class_file)
{
u4 magic = read_u4(class_file);
u2 major = read_u2(class_file);
u2 minor = read_u2(class_file);
return (class_header_t){
.magic = magic,
.major_version = major,
.minor_version = minor,
};
}
constant_pool_t get_constant_pool(FILE *class_file)
{
constant_pool_t cp = {
.constant_pool_count = read_u2(class_file) - 1,
.constant_pool =
malloc(sizeof(const_pool_info) * cp.constant_pool_count),
};
assert(cp.constant_pool && "Failed to allocate constant pool");
const_pool_info *constant = cp.constant_pool;
for (u2 i = 0; i < cp.constant_pool_count; i++, constant++) {
constant->tag = read_u1(class_file);
switch (constant->tag) {
case CONSTANT_Utf8: {
u2 length = read_u2(class_file);
char *value = malloc(length + 1);
assert(value && "Failed to allocate UTF8 constant");
size_t bytes_read = fread(value, 1, length, class_file);
assert(bytes_read == length && "Failed to read UTF8 constant");
value[length] = '\0';
constant->info = (u1 *) value;
break;
}
case CONSTANT_Integer: {
CONSTANT_Integer_info *value = malloc(sizeof(*value));
assert(value && "Failed to allocate integer constant");
value->bytes = read_u4(class_file);
constant->info = (u1 *) value;
break;
}
case CONSTANT_Class: {
CONSTANT_Class_info *value = malloc(sizeof(*value));
assert(value && "Failed to allocate class constant");
value->string_index = read_u2(class_file);
constant->info = (u1 *) value;
break;
}
case CONSTANT_MethodRef:
case CONSTANT_FieldRef: {
CONSTANT_FieldOrMethodRef_info *value = malloc(sizeof(*value));
assert(value &&
"Failed to allocate FieldRef or MethodRef constant");
value->class_index = read_u2(class_file);
value->name_and_type_index = read_u2(class_file);
constant->info = (u1 *) value;
break;
}
case CONSTANT_NameAndType: {
CONSTANT_NameAndType_info *value = malloc(sizeof(*value));
assert(value && "Failed to allocate NameAndType constant");
value->name_index = read_u2(class_file);
value->descriptor_index = read_u2(class_file);
constant->info = (u1 *) value;
break;
}
default:
fprintf(stderr, "Unknown constant type %d\n", constant->tag);
exit(1);
}
}
return cp;
}
class_info_t get_class_info(FILE *class_file)
{
class_info_t info = {
.access_flags = read_u2(class_file),
.this_class = read_u2(class_file),
.super_class = read_u2(class_file),
};
u2 interfaces_count = read_u2(class_file);
assert(!interfaces_count && "This VM does not support interfaces.");
u2 fields_count = read_u2(class_file);
assert(!fields_count && "This VM does not support fields.");
return info;
}
void read_method_attributes(FILE *class_file,
method_info *info,
code_t *code,
constant_pool_t *cp)
{
bool found_code = false;
for (u2 i = 0; i < info->attributes_count; i++) {
attribute_info ainfo = {
.attribute_name_index = read_u2(class_file),
.attribute_length = read_u4(class_file),
};
long attribute_end = ftell(class_file) + ainfo.attribute_length;
const_pool_info *type_constant =
get_constant(cp, ainfo.attribute_name_index);
assert(type_constant->tag == CONSTANT_Utf8 && "Expected a UTF8");
if (!strcmp((char *) type_constant->info, "Code")) {
assert(!found_code && "Duplicate method code");
found_code = true;
code->max_stack = read_u2(class_file);
code->max_locals = read_u2(class_file);
code->code_length = read_u4(class_file);
code->code = malloc(code->code_length);
assert(code->code && "Failed to allocate method code");
size_t bytes_read =
fread(code->code, 1, code->code_length, class_file);
assert(bytes_read == code->code_length &&
"Failed to read method code");
}
fseek(class_file, attribute_end, SEEK_SET);
}
assert(found_code && "Missing method code");
}
#define IS_STATIC 0x0008
method_t *get_methods(FILE *class_file, constant_pool_t *cp)
{
u2 method_count = read_u2(class_file);
method_t *methods = malloc(sizeof(*methods) * (method_count + 1));
assert(methods && "Failed to allocate methods");
method_t *method = methods;
for (u2 i = 0; i < method_count; i++, method++) {
method_info info = {
.access_flags = read_u2(class_file),
.name_index = read_u2(class_file),
.descriptor_index = read_u2(class_file),
.attributes_count = read_u2(class_file),
};
const_pool_info *name = get_constant(cp, info.name_index);
assert(name->tag == CONSTANT_Utf8 && "Expected a UTF8");
method->name = (char *) name->info;
const_pool_info *descriptor = get_constant(cp, info.descriptor_index);
assert(descriptor->tag == CONSTANT_Utf8 && "Expected a UTF8");
method->descriptor = (char *) descriptor->info;
if (strcmp(method->name, "<init>"))
assert((info.access_flags & IS_STATIC) &&
"Only static methods are supported by this VM.");
read_method_attributes(class_file, &info, &method->code, cp);
}
method->name = NULL;
return methods;
}
class_file_t get_class(FILE *class_file)
{
get_class_header(class_file);
class_file_t clazz = {.constant_pool = get_constant_pool(class_file)};
get_class_info(class_file);
clazz.methods = get_methods(class_file, &clazz.constant_pool);
return clazz;
}
void free_class(class_file_t *clazz)
{
constant_pool_t *cp = &clazz->constant_pool;
for (u2 i = 0; i < cp->constant_pool_count; i++)
free(cp->constant_pool[i].info);
free(cp->constant_pool);
for (method_t *method = clazz->methods; method->name; method++)
free(method->code.code);
free(clazz->methods);
}
static inline void bipush(int32_t *op_stack,
int32_t op_count,
uint32_t pc,
uint8_t *code_buf)
{
int8_t param = code_buf[pc + 1];
op_stack[op_count] = param;
}
static inline void sipush(int32_t *op_stack,
int32_t op_count,
uint32_t pc,
uint8_t *code_buf)
{
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int16_t res = ((param1 << 8) | param2);
op_stack[op_count] = res;
}
static inline void iadd(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
printf("%d/%d ", op1, op2);
int32_t res = op1 + op2;
op_stack[op_count - 2] = res;
}
static inline void isub(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
op_stack[op_count - 2] = op2 - op1;
}
static inline void imul(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
op_stack[op_count - 2] = op2 * op1;
}
static inline void idiv(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
op_stack[op_count - 2] = op2 / op1;
}
static inline void irem(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
op_stack[op_count - 2] = op2 % op1;
}
static inline void ineg(int32_t *op_stack, int32_t op_count)
{
int32_t op1 = op_stack[op_count - 1];
op_stack[op_count - 1] = -1 * op1;
}
static inline void invokevirtual(int32_t *op_stack, int32_t op_count)
{
int32_t op = op_stack[op_count - 1];
printf("%d\n", op);
}
static inline void iconst(int32_t *op_stack, int32_t op_count, uint8_t current)
{
op_stack[op_count] = current - i_iconst_0;
}
int32_t *execute(method_t *method, int32_t *locals, class_file_t *clazz)
{
code_t code = method->code;
int32_t op_stack[code.max_stack];
uint32_t op_count = 0;
uint32_t pc = 0;
uint8_t *code_buf = code.code;
int loop_count = 0;
while (pc < code.code_length) {
loop_count += 1;
uint8_t current = code_buf[pc];
switch (current) {
case i_ireturn: {
int32_t *ret = malloc(sizeof(int32_t));
*ret = op_stack[op_count - 1];
return ret;
} break;
case i_return:
return NULL;
case i_invokestatic: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
uint16_t index = ((param1 << 8) | param2);
method_t *own_method = find_method_from_index(index, clazz);
uint16_t num_params = get_number_of_parameters(own_method);
int32_t own_locals[own_method->code.max_locals];
for (int i = num_params - 1; i >= 0; i--) {
own_locals[i] = op_stack[op_count - 1];
op_count -= 1;
}
int32_t *exec_res = execute(own_method, own_locals, clazz);
if (exec_res) {
op_stack[op_count] = *exec_res;
op_count += 1;
}
free(exec_res);
pc += 3;
} break;
case i_ifeq: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional == 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_ifne: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional != 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_iflt: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional < 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_ifge: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional >= 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_ifgt: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional > 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_ifle: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t conditional = op_stack[op_count - 1];
pc += 3;
if (conditional <= 0) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 1;
} break;
case i_if_icmpeq: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 == op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_if_icmpne: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 != op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_if_icmplt: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 < op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_if_icmpge: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 >= op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_if_icmpgt: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 > op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_if_icmple: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int32_t op1 = op_stack[op_count - 1], op2 = op_stack[op_count - 2];
pc += 3;
if (op2 <= op1) {
int16_t res = ((param1 << 8) | param2);
pc += res - 3;
}
op_count -= 2;
} break;
case i_goto: {
uint8_t param1 = code_buf[pc + 1], param2 = code_buf[pc + 2];
int16_t res = ((param1 << 8) | param2);
printf("%d/%d", pc, res);
pc += res;
} break;
case i_ldc: {
constant_pool_t constant_pool = clazz->constant_pool;
int16_t param = code_buf[pc + 1];
uint8_t *info = get_constant(&constant_pool, param)->info;
op_stack[op_count] = ((CONSTANT_Integer_info *) info)->bytes;
pc += 2;
op_count += 1;
} break;
case i_iload_0:
case i_iload_1:
case i_iload_2:
case i_iload_3: {
int32_t param = current - i_iload_0;
int32_t loaded = locals[param];
op_stack[op_count] = loaded;
pc += 1;
op_count += 1;
} break;
case i_iload: {
int32_t param = code_buf[pc + 1];
int32_t loaded = locals[param];
op_stack[op_count] = loaded;
pc += 2;
op_count += 1;
} break;
case i_istore: {
int32_t param = code_buf[pc + 1];
int32_t stored = op_stack[op_count - 1];
locals[param] = stored;
pc += 2;
op_count -= 1;
} break;
case i_istore_0:
case i_istore_1:
case i_istore_2:
case i_istore_3: {
int32_t param = current - i_istore_0;
int32_t stored = op_stack[op_count - 1];
locals[param] = stored;
pc += 1;
op_count -= 1;
} break;
case i_iinc: {
uint8_t i = code_buf[pc + 1];
int8_t b = code_buf[pc + 2];
locals[i] += b;
pc += 3;
} break;
case i_bipush: {
bipush(op_stack, op_count, pc, code_buf);
op_count += 1;
pc += 2;
} break;
case i_iadd: {
iadd(op_stack, op_count);
op_count -= 1;
pc += 1;
} break;
case i_isub: {
isub(op_stack, op_count);
op_count -= 1;
pc += 1;
} break;
case i_imul: {
imul(op_stack, op_count);
op_count -= 1;
pc += 1;
} break;
case i_idiv: {
idiv(op_stack, op_count);
op_count -= 1;
pc += 1;
} break;
case i_irem: {
irem(op_stack, op_count);
op_count -= 1;
pc += 1;
} break;
case i_ineg: {
ineg(op_stack, op_count);
pc += 1;
} break;
case i_getstatic: {
pc += 3;
} break;
case i_invokevirtual: {
invokevirtual(op_stack, op_count);
op_count -= 1;
pc += 3;
} break;
case i_iconst_m1:
case i_iconst_0:
case i_iconst_1:
case i_iconst_2:
case i_iconst_3:
case i_iconst_4:
case i_iconst_5: {
iconst(op_stack, op_count, current);
op_count += 1;
pc += 1;
} break;
case i_sipush: {
sipush(op_stack, op_count, pc, code_buf);
op_count += 1;
pc += 3;
} break;
}
}
return NULL;
}
int main(int argc, char *argv[])
{
if (argc < 2)
return -1;
FILE *class_file = fopen(argv[1], "r");
assert(class_file && "Failed to open file");
class_file_t clazz = get_class(class_file);
int error = fclose(class_file);
assert(!error && "Failed to close file");
method_t *main_method =
find_method("main", "([Ljava/lang/String;)V", &clazz);
assert(main_method && "Missing main() method");
int32_t locals[main_method->code.max_locals];
int32_t *result = execute(main_method, locals, &clazz);
assert(!result && "main() should return void");
free_class(&clazz);
return 0;
}