quickpython 0.1.3

A lightweight Python bytecode VM written in Rust
Documentation
use crate::bytecode::{ByteCode, Instruction};
use std::io::Write;

const MAGIC: &[u8; 4] = b"QPY\0";
const VERSION: u32 = 5;

pub fn serialize_bytecode(bytecode: &ByteCode) -> Result<Vec<u8>, String> {
    let mut buffer = Vec::new();

    buffer.write_all(MAGIC).map_err(|e| e.to_string())?;
    buffer
        .write_all(&VERSION.to_le_bytes())
        .map_err(|e| e.to_string())?;

    let count = bytecode.len() as u32;
    buffer
        .write_all(&count.to_le_bytes())
        .map_err(|e| e.to_string())?;

    for instruction in bytecode {
        serialize_instruction(&mut buffer, instruction)?;
    }

    Ok(buffer)
}

pub fn deserialize_bytecode(data: &[u8]) -> Result<ByteCode, String> {
    if data.len() < 12 {
        return Err("Invalid bytecode: too short".to_string());
    }

    if &data[0..4] != MAGIC {
        return Err("Invalid bytecode: wrong magic number".to_string());
    }

    let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
    if version > VERSION {
        return Err(format!("Unsupported bytecode version: {}", version));
    }

    let count = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;

    let mut bytecode = Vec::with_capacity(count);
    let mut offset = 12;

    for _ in 0..count {
        let (instruction, bytes_read) = deserialize_instruction(&data[offset..])?;
        bytecode.push(instruction);
        offset += bytes_read;
    }

    Ok(bytecode)
}

fn serialize_instruction(buffer: &mut Vec<u8>, instruction: &Instruction) -> Result<(), String> {
    match instruction {
        Instruction::PushInt(value) => {
            buffer.push(0x01);
            buffer
                .write_all(&value.to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::PushFloat(value) => {
            buffer.push(0x06);
            buffer
                .write_all(&value.to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::PushBool(b) => {
            buffer.push(0x02);
            buffer.push(if *b { 1 } else { 0 });
        }
        Instruction::PushNone => buffer.push(0x03),
        Instruction::PushString(s) => {
            buffer.push(0x05);
            let bytes = s.as_bytes();
            buffer
                .write_all(&(bytes.len() as u32).to_le_bytes())
                .map_err(|e| e.to_string())?;
            buffer.write_all(bytes).map_err(|e| e.to_string())?;
        }
        Instruction::Pop => buffer.push(0x04),
        Instruction::Add => buffer.push(0x10),
        Instruction::Sub => buffer.push(0x11),
        Instruction::Mul => buffer.push(0x12),
        Instruction::Div => buffer.push(0x13),
        Instruction::Mod => buffer.push(0x1B),
        Instruction::Negate => buffer.push(0x1A),
        Instruction::Eq => buffer.push(0x14),
        Instruction::Ne => buffer.push(0x15),
        Instruction::Lt => buffer.push(0x16),
        Instruction::Le => buffer.push(0x17),
        Instruction::Gt => buffer.push(0x18),
        Instruction::Ge => buffer.push(0x19),
        Instruction::Contains | Instruction::NotContains | Instruction::Is | Instruction::IsNot => {
            return Err(
                "'in', 'not in', 'is', 'is not' operator instructions cannot be serialized yet"
                    .to_string(),
            );
        }
        Instruction::Yield => {
            return Err("'yield' instruction cannot be serialized yet".to_string());
        }
        Instruction::GetGlobal(name) => {
            buffer.push(0x20);
            let bytes = name.as_bytes();
            buffer
                .write_all(&(bytes.len() as u16).to_le_bytes())
                .map_err(|e| e.to_string())?;
            buffer.write_all(bytes).map_err(|e| e.to_string())?;
        }
        Instruction::SetGlobal(name) => {
            buffer.push(0x21);
            let bytes = name.as_bytes();
            buffer
                .write_all(&(bytes.len() as u16).to_le_bytes())
                .map_err(|e| e.to_string())?;
            buffer.write_all(bytes).map_err(|e| e.to_string())?;
        }
        Instruction::GetLocal(index) => {
            buffer.push(0x22);
            buffer
                .write_all(&(*index as u16).to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::SetLocal(index) => {
            buffer.push(0x23);
            buffer
                .write_all(&(*index as u16).to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::Jump(offset) => {
            buffer.push(0x30);
            buffer
                .write_all(&(*offset as u32).to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::JumpIfFalse(offset) => {
            buffer.push(0x31);
            buffer
                .write_all(&(*offset as u32).to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::MakeFunction { .. } | Instruction::Call(_) | Instruction::Return => {
            return Err("Function instructions cannot be serialized yet".to_string());
        }
        Instruction::Print(_) => {
            return Err("Print instruction cannot be serialized yet".to_string());
        }
        Instruction::Int => buffer.push(0x41),
        Instruction::Float => buffer.push(0x42),
        Instruction::Str => buffer.push(0x44),
        Instruction::Len => buffer.push(0x43),
        Instruction::BuildList(_)
        | Instruction::BuildDict(_)
        | Instruction::BuildTuple(_)
        | Instruction::UnpackSequence(_)
        | Instruction::GetItem
        | Instruction::SetItem
        | Instruction::CallMethod(_, _) => {
            return Err("List/Dict/Tuple instructions cannot be serialized yet".to_string());
        }
        Instruction::Range => buffer.push(0x50),
        Instruction::GetIter => buffer.push(0x51),
        Instruction::ForIter(offset) => {
            buffer.push(0x52);
            buffer
                .write_all(&(*offset as u32).to_le_bytes())
                .map_err(|e| e.to_string())?;
        }
        Instruction::Break => {
            return Err(
                "Break instructions cannot be serialized (should be compiled to Jump)".to_string(),
            );
        }
        Instruction::Continue => {
            return Err(
                "Continue instructions cannot be serialized (should be compiled to Jump)"
                    .to_string(),
            );
        }
        Instruction::JumpIfFalseOrPop(_) | Instruction::JumpIfTrueOrPop(_) | Instruction::Not => {
            return Err("Logical operator instructions cannot be serialized yet".to_string());
        }
        Instruction::MakeException(_) | Instruction::Raise => {
            return Err("Exception instructions cannot be serialized yet".to_string());
        }
        Instruction::SetupTry(_)
        | Instruction::PopTry
        | Instruction::GetExceptionType
        | Instruction::MatchException
        | Instruction::Dup
        | Instruction::SetupFinally(_)
        | Instruction::PopFinally
        | Instruction::EndFinally => {
            return Err("Try-except-finally instructions cannot be serialized yet".to_string());
        }
        Instruction::Import(_) | Instruction::GetAttr(_) => {
            return Err("Import instructions cannot be serialized yet".to_string());
        }
        Instruction::BuildSlice | Instruction::GetItemSlice => {
            return Err("Slice instructions cannot be serialized yet".to_string());
        }
        Instruction::PushType(_) | Instruction::IsInstance => {
            return Err("Type and isinstance instructions cannot be serialized yet".to_string());
        }
        Instruction::Await => {
            return Err("Await instruction cannot be serialized yet".to_string());
        }
    }
    Ok(())
}

fn deserialize_instruction(data: &[u8]) -> Result<(Instruction, usize), String> {
    if data.is_empty() {
        return Err("Unexpected end of bytecode".to_string());
    }

    let opcode = data[0];
    match opcode {
        0x01 => {
            if data.len() < 5 {
                return Err("Invalid PushInt instruction".to_string());
            }
            let value = i32::from_le_bytes([data[1], data[2], data[3], data[4]]);
            Ok((Instruction::PushInt(value), 5))
        }
        0x02 => {
            if data.len() < 2 {
                return Err("Invalid PushBool instruction".to_string());
            }
            Ok((Instruction::PushBool(data[1] != 0), 2))
        }
        0x03 => Ok((Instruction::PushNone, 1)),
        0x04 => Ok((Instruction::Pop, 1)),
        0x05 => {
            if data.len() < 5 {
                return Err("Invalid PushString instruction".to_string());
            }
            let len = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize;
            if data.len() < 5 + len {
                return Err("Invalid PushString instruction: string too short".to_string());
            }
            let s = String::from_utf8(data[5..5 + len].to_vec())
                .map_err(|_| "Invalid UTF-8 in string".to_string())?;
            Ok((Instruction::PushString(s), 5 + len))
        }
        0x06 => {
            if data.len() < 9 {
                return Err("Invalid PushFloat instruction".to_string());
            }
            let value = f64::from_le_bytes([
                data[1], data[2], data[3], data[4], data[5], data[6], data[7], data[8],
            ]);
            Ok((Instruction::PushFloat(value), 9))
        }
        0x10 => Ok((Instruction::Add, 1)),
        0x11 => Ok((Instruction::Sub, 1)),
        0x12 => Ok((Instruction::Mul, 1)),
        0x13 => Ok((Instruction::Div, 1)),
        0x1B => Ok((Instruction::Mod, 1)),
        0x1A => Ok((Instruction::Negate, 1)),
        0x14 => Ok((Instruction::Eq, 1)),
        0x15 => Ok((Instruction::Ne, 1)),
        0x16 => Ok((Instruction::Lt, 1)),
        0x17 => Ok((Instruction::Le, 1)),
        0x18 => Ok((Instruction::Gt, 1)),
        0x19 => Ok((Instruction::Ge, 1)),
        0x20 => {
            if data.len() < 3 {
                return Err("Invalid GetGlobal instruction".to_string());
            }
            let len = u16::from_le_bytes([data[1], data[2]]) as usize;
            if data.len() < 3 + len {
                return Err("Invalid GetGlobal instruction: string too short".to_string());
            }
            let name = String::from_utf8(data[3..3 + len].to_vec())
                .map_err(|_| "Invalid UTF-8 in variable name".to_string())?;
            Ok((Instruction::GetGlobal(name), 3 + len))
        }
        0x21 => {
            if data.len() < 3 {
                return Err("Invalid SetGlobal instruction".to_string());
            }
            let len = u16::from_le_bytes([data[1], data[2]]) as usize;
            if data.len() < 3 + len {
                return Err("Invalid SetGlobal instruction: string too short".to_string());
            }
            let name = String::from_utf8(data[3..3 + len].to_vec())
                .map_err(|_| "Invalid UTF-8 in variable name".to_string())?;
            Ok((Instruction::SetGlobal(name), 3 + len))
        }
        0x22 => {
            if data.len() < 3 {
                return Err("Invalid GetLocal instruction".to_string());
            }
            let index = u16::from_le_bytes([data[1], data[2]]) as usize;
            Ok((Instruction::GetLocal(index), 3))
        }
        0x23 => {
            if data.len() < 3 {
                return Err("Invalid SetLocal instruction".to_string());
            }
            let index = u16::from_le_bytes([data[1], data[2]]) as usize;
            Ok((Instruction::SetLocal(index), 3))
        }
        0x30 => {
            if data.len() < 5 {
                return Err("Invalid Jump instruction".to_string());
            }
            let offset = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize;
            Ok((Instruction::Jump(offset), 5))
        }
        0x31 => {
            if data.len() < 5 {
                return Err("Invalid JumpIfFalse instruction".to_string());
            }
            let offset = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize;
            Ok((Instruction::JumpIfFalse(offset), 5))
        }
        0x41 => Ok((Instruction::Int, 1)),
        0x42 => Ok((Instruction::Float, 1)),
        0x43 => Ok((Instruction::Len, 1)),
        0x50 => Ok((Instruction::Range, 1)),
        0x51 => Ok((Instruction::GetIter, 1)),
        0x52 => {
            if data.len() < 5 {
                return Err("Invalid ForIter instruction".to_string());
            }
            let offset = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize;
            Ok((Instruction::ForIter(offset), 5))
        }
        _ => Err(format!("Unknown opcode: 0x{:02x}", opcode)),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_serialize_deserialize() {
        let bytecode = vec![
            Instruction::PushInt(42),
            Instruction::PushInt(10),
            Instruction::Add,
        ];

        let serialized = serialize_bytecode(&bytecode).unwrap();
        let deserialized = deserialize_bytecode(&serialized).unwrap();

        assert_eq!(bytecode, deserialized);
    }

    #[test]
    fn test_magic_number() {
        let bytecode = vec![Instruction::PushInt(1)];
        let serialized = serialize_bytecode(&bytecode).unwrap();

        assert_eq!(&serialized[0..4], b"QPY\0");
    }

    #[test]
    fn test_variable_instructions() {
        let bytecode = vec![
            Instruction::PushInt(42),
            Instruction::SetGlobal("x".to_string()),
            Instruction::GetGlobal("x".to_string()),
        ];

        let serialized = serialize_bytecode(&bytecode).unwrap();
        let deserialized = deserialize_bytecode(&serialized).unwrap();

        assert_eq!(bytecode, deserialized);
    }

    #[test]
    fn test_bool_and_comparison() {
        let bytecode = vec![
            Instruction::PushInt(5),
            Instruction::PushInt(3),
            Instruction::Lt,
        ];

        let serialized = serialize_bytecode(&bytecode).unwrap();
        let deserialized = deserialize_bytecode(&serialized).unwrap();

        assert_eq!(bytecode, deserialized);
    }
}