Skip to main content

move_neovm/bytecode/
parser.rs

1use anyhow::{bail, Result};
2
3use super::reader::BytecodeReader;
4use super::types::{AbilitySet, BytecodeVersion, FunctionDef, MoveModule, MoveOpcode, StructDef};
5use super::MOVE_MAGIC;
6
7/// Parse Move bytecode into a module
8pub fn parse_move_bytecode(bytes: &[u8]) -> Result<MoveModule> {
9    if bytes.len() < 8 {
10        bail!("Move bytecode too short: {} bytes", bytes.len());
11    }
12
13    // Check magic
14    if bytes[0..4] != MOVE_MAGIC {
15        bail!(
16            "Invalid Move bytecode magic: expected {:02x?}, got {:02x?}",
17            MOVE_MAGIC,
18            &bytes[0..4]
19        );
20    }
21
22    let mut reader = BytecodeReader::new(bytes);
23
24    // Skip magic
25    reader.read_bytes(4)?;
26
27    // Read version
28    let version = reader.read_u32()?;
29
30    // Parse a minimal module header (table count only). This is not a full parser
31    // but avoids rejecting valid Move artefacts while the lowering remains experimental.
32    let mut module = parse_module_tables(&mut reader, version)?;
33    let code_start = reader.position() as usize;
34
35    // Resolve identifiers table for names if present
36    let identifiers = read_identifiers(
37        &mut reader,
38        module.identifiers_offset,
39        module.identifiers_count,
40    )?;
41    module.name = identifiers
42        .first()
43        .cloned()
44        .unwrap_or_else(|| "MoveModule".to_string());
45    module.structs = parse_structs(
46        &mut reader,
47        module.struct_defs_offset,
48        module.struct_defs_count,
49        &identifiers,
50    )?;
51    module.functions = parse_functions(
52        &mut reader,
53        module.function_defs_offset,
54        module.function_defs_count,
55        module.identifiers_offset,
56        module.identifiers_count,
57        &identifiers,
58    )?;
59
60    // If no functions are present, treat remaining bytes as a single entry function.
61    if module.functions.is_empty() {
62        let bytes = reader.bytes();
63        let opcode_bytes = if code_start < bytes.len() {
64            &bytes[code_start..]
65        } else {
66            &[]
67        };
68
69        let mut code_reader = BytecodeReader::new(opcode_bytes);
70        let mut code = Vec::new();
71        while code_reader.remaining() > 0 {
72            code.push(parse_opcode(&mut code_reader)?);
73        }
74        if !code.is_empty() {
75            module.functions.push(FunctionDef {
76                name: "main".to_string(),
77                is_public: true,
78                is_entry: true,
79                parameters: Vec::new(),
80                returns: Vec::new(),
81                locals: Vec::new(),
82                code,
83            });
84        }
85    }
86
87    Ok(module)
88}
89
90fn parse_module_tables(reader: &mut BytecodeReader<'_>, version: u32) -> Result<MoveModule> {
91    // Move bytecode structure:
92    // - Table count (ULEB128)
93    // - Table headers: [kind: u8, offset: u32, count: u32]
94    // - Table contents
95
96    let table_count = reader.read_uleb128()? as usize;
97
98    let mut _module_handles_offset = 0u32;
99    let mut _module_handles_count = 0u32;
100    let mut _struct_handles_offset = 0u32;
101    let mut _struct_handles_count = 0u32;
102    let mut _function_handles_offset = 0u32;
103    let mut _function_handles_count = 0u32;
104    let mut identifiers_offset = 0u32;
105    let mut identifiers_count = 0u32;
106    let mut struct_defs_offset = 0u32;
107    let mut struct_defs_count = 0u32;
108    let mut function_defs_offset = 0u32;
109    let mut function_defs_count = 0u32;
110
111    // Read table headers
112    for _ in 0..table_count {
113        let kind = reader.read_u8()?;
114        let offset = reader.read_u32()?;
115        let count = reader.read_u32()?;
116
117        match kind {
118            0x01 => {
119                _module_handles_offset = offset;
120                _module_handles_count = count;
121            }
122            0x02 => {
123                _struct_handles_offset = offset;
124                _struct_handles_count = count;
125            }
126            0x03 => {
127                _function_handles_offset = offset;
128                _function_handles_count = count;
129            }
130            0x05 => {
131                identifiers_offset = offset;
132                identifiers_count = count;
133            }
134            0x08 => {
135                struct_defs_offset = offset;
136                struct_defs_count = count;
137            }
138            0x09 => {
139                function_defs_offset = offset;
140                function_defs_count = count;
141            }
142            _ => {} // Skip other tables
143        }
144    }
145
146    Ok(MoveModule {
147        version: BytecodeVersion(version),
148        name: "MoveModule".to_string(),
149        identifiers_offset,
150        identifiers_count,
151        struct_defs_offset,
152        struct_defs_count,
153        _function_handles_offset,
154        _function_handles_count,
155        function_defs_offset,
156        function_defs_count,
157        structs: Vec::new(),
158        functions: Vec::new(),
159    })
160}
161
162fn read_identifiers(
163    reader: &mut BytecodeReader<'_>,
164    identifiers_offset: u32,
165    identifiers_count: u32,
166) -> Result<Vec<String>> {
167    if identifiers_count == 0 {
168        return Ok(Vec::new());
169    }
170    let bytes = reader.bytes();
171    let start = identifiers_offset as usize;
172    if start > bytes.len() {
173        bail!(
174            "identifiers offset {} out of range for bytecode length {}",
175            start,
176            bytes.len()
177        );
178    }
179    let mut names = Vec::new();
180    let mut cursor = BytecodeReader::new(&bytes[start..]);
181    for _ in 0..identifiers_count {
182        names.push(cursor.read_string()?);
183    }
184    Ok(names)
185}
186
187fn parse_structs(
188    _reader: &mut BytecodeReader<'_>,
189    _offset: u32,
190    count: u32,
191    identifiers: &[String],
192) -> Result<Vec<StructDef>> {
193    if count == 0 {
194        return Ok(Vec::new());
195    }
196    let mut structs = Vec::new();
197    for idx in 0..count {
198        let name = identifiers
199            .get(idx as usize)
200            .cloned()
201            .unwrap_or_else(|| format!("Struct{}", idx));
202        structs.push(StructDef {
203            name,
204            abilities: AbilitySet::default(),
205            fields: Vec::new(),
206        });
207    }
208    Ok(structs)
209}
210
211fn parse_functions(
212    reader: &mut BytecodeReader<'_>,
213    offset: u32,
214    count: u32,
215    id_offset: u32,
216    id_count: u32,
217    identifiers: &[String],
218) -> Result<Vec<FunctionDef>> {
219    if count == 0 {
220        return Ok(Vec::new());
221    }
222    let mut funcs = Vec::new();
223    let names = if id_count == identifiers.len() as u32 {
224        identifiers.to_vec()
225    } else {
226        read_identifiers(reader, id_offset, id_count)?
227    };
228    let bytes = reader.bytes();
229    let base = offset as usize;
230    if base > bytes.len() {
231        bail!(
232            "function defs offset {} out of range for bytecode length {}",
233            base,
234            bytes.len()
235        );
236    }
237    let mut cursor = BytecodeReader::new(&bytes[base..]);
238
239    for _ in 0..count {
240        let name_idx = cursor.read_uleb128()? as usize;
241        let _flags = cursor.read_u8()?; // visibility/entry flags
242        let _params_count = cursor.read_uleb128()?;
243        let _returns_count = cursor.read_uleb128()?;
244        let _code_offset = cursor.read_u32()?;
245        let _locals_count = cursor.read_uleb128()?;
246
247        let name = names
248            .get(name_idx)
249            .cloned()
250            .unwrap_or_else(|| "fn".to_string());
251        funcs.push(FunctionDef {
252            name,
253            is_public: true,
254            is_entry: true,
255            parameters: Vec::new(),
256            returns: Vec::new(),
257            locals: Vec::new(),
258            code: Vec::new(),
259        });
260    }
261
262    Ok(funcs)
263}
264
265fn parse_opcode(reader: &mut BytecodeReader<'_>) -> Result<MoveOpcode> {
266    let op = reader.read_u8()?;
267
268    let opcode = match op {
269        0x00 => MoveOpcode::Nop,
270        0x01 => MoveOpcode::Pop,
271        0x02 => MoveOpcode::Ret,
272        0x03 => MoveOpcode::BrTrue(reader.read_u16()?),
273        0x04 => MoveOpcode::BrFalse(reader.read_u16()?),
274        0x05 => MoveOpcode::Branch(reader.read_u16()?),
275        0x06 => MoveOpcode::LdU8(reader.read_u8()?),
276        0x07 => MoveOpcode::LdU64(reader.read_u64()?),
277        0x08 => MoveOpcode::LdU128(reader.read_u128()?),
278        0x09 => MoveOpcode::CastU8,
279        0x0A => MoveOpcode::CastU64,
280        0x0B => MoveOpcode::CastU128,
281        0x0C => MoveOpcode::LdConst(reader.read_u16()?),
282        0x0D => MoveOpcode::LdTrue,
283        0x0E => MoveOpcode::LdFalse,
284        0x0F => MoveOpcode::CopyLoc(reader.read_u8()?),
285        0x10 => MoveOpcode::MoveLoc(reader.read_u8()?),
286        0x11 => MoveOpcode::StLoc(reader.read_u8()?),
287        0x12 => MoveOpcode::MutBorrowLoc(reader.read_u8()?),
288        0x13 => MoveOpcode::ImmBorrowLoc(reader.read_u8()?),
289        0x14 => MoveOpcode::MutBorrowField(reader.read_u16()?),
290        0x15 => MoveOpcode::BorrowField(reader.read_u16()?),
291        0x16 => MoveOpcode::Call(reader.read_u16()?),
292        0x17 => MoveOpcode::Pack(reader.read_u16()?),
293        0x18 => MoveOpcode::Unpack(reader.read_u16()?),
294        0x22 => MoveOpcode::Add,
295        0x23 => MoveOpcode::Sub,
296        0x24 => MoveOpcode::Mul,
297        0x25 => MoveOpcode::Mod,
298        0x26 => MoveOpcode::Div,
299        0x32 => MoveOpcode::Lt,
300        0x33 => MoveOpcode::Gt,
301        0x34 => MoveOpcode::Le,
302        0x35 => MoveOpcode::Ge,
303        0x40 => MoveOpcode::And,
304        0x41 => MoveOpcode::Or,
305        0x42 => MoveOpcode::Not,
306        0x43 => MoveOpcode::Eq,
307        0x44 => MoveOpcode::Neq,
308        0x45 => MoveOpcode::Abort,
309        0x50 => MoveOpcode::Exists(reader.read_u16()?),
310        0x51 => MoveOpcode::BorrowGlobal(reader.read_u16()?),
311        0x52 => MoveOpcode::MutBorrowGlobal(reader.read_u16()?),
312        0x53 => MoveOpcode::MoveFrom(reader.read_u16()?),
313        0x54 => MoveOpcode::MoveTo(reader.read_u16()?),
314        other => bail!("unsupported Move opcode 0x{other:02X}"),
315    };
316
317    Ok(opcode)
318}