Skip to main content

move_neovm/bytecode/
parser.rs

1// Copyright (c) 2025-2026 R3E Network
2// SPDX-License-Identifier: MIT
3
4use anyhow::{bail, Result};
5
6use super::reader::BytecodeReader;
7use super::types::{AbilitySet, BytecodeVersion, FunctionDef, MoveModule, MoveOpcode, StructDef};
8use super::MOVE_MAGIC;
9
10/// Parse Move bytecode into a module
11pub fn parse_move_bytecode(bytes: &[u8]) -> Result<MoveModule> {
12    if bytes.len() < 8 {
13        bail!("Move bytecode too short: {} bytes", bytes.len());
14    }
15
16    // Check magic
17    if bytes[0..4] != MOVE_MAGIC {
18        bail!(
19            "Invalid Move bytecode magic: expected {:02x?}, got {:02x?}",
20            MOVE_MAGIC,
21            &bytes[0..4]
22        );
23    }
24
25    let mut reader = BytecodeReader::new(bytes);
26
27    // Skip magic
28    reader.read_bytes(4)?;
29
30    // Read version
31    let version = reader.read_u32()?;
32
33    // Parse a minimal module header (table count only). This is not a full parser
34    // but avoids rejecting valid Move artefacts while the lowering remains experimental.
35    let mut module = parse_module_tables(&mut reader, version)?;
36    let code_start = reader.position() as usize;
37
38    // Resolve identifiers table for names if present
39    let identifiers = read_identifiers(
40        &mut reader,
41        module.identifiers_offset,
42        module.identifiers_count,
43    )?;
44    module.name = identifiers
45        .first()
46        .cloned()
47        .unwrap_or_else(|| "MoveModule".to_string());
48    module.structs = parse_structs(
49        &mut reader,
50        module.struct_defs_offset,
51        module.struct_defs_count,
52        &identifiers,
53    )?;
54    module.functions = parse_functions(
55        &mut reader,
56        module.function_defs_offset,
57        module.function_defs_count,
58        module.identifiers_offset,
59        module.identifiers_count,
60        &identifiers,
61    )?;
62
63    // If no functions are present, treat remaining bytes as a single entry function.
64    if module.functions.is_empty() {
65        let bytes = reader.bytes();
66        let opcode_bytes = if code_start < bytes.len() {
67            &bytes[code_start..]
68        } else {
69            &[]
70        };
71
72        let mut code_reader = BytecodeReader::new(opcode_bytes);
73        let mut code = Vec::new();
74        while code_reader.remaining() > 0 {
75            code.push(parse_opcode(&mut code_reader)?);
76        }
77        if !code.is_empty() {
78            module.functions.push(FunctionDef {
79                name: "main".to_string(),
80                is_public: true,
81                is_entry: true,
82                parameters: Vec::new(),
83                returns: Vec::new(),
84                locals: Vec::new(),
85                code,
86            });
87        }
88    }
89
90    Ok(module)
91}
92
93fn parse_module_tables(reader: &mut BytecodeReader<'_>, version: u32) -> Result<MoveModule> {
94    // Move bytecode structure:
95    // - Table count (ULEB128)
96    // - Table headers: [kind: u8, offset: u32, count: u32]
97    // - Table contents
98
99    let table_count = reader.read_uleb128()? as usize;
100
101    let mut _module_handles_offset = 0u32;
102    let mut _module_handles_count = 0u32;
103    let mut _struct_handles_offset = 0u32;
104    let mut _struct_handles_count = 0u32;
105    let mut _function_handles_offset = 0u32;
106    let mut _function_handles_count = 0u32;
107    let mut identifiers_offset = 0u32;
108    let mut identifiers_count = 0u32;
109    let mut struct_defs_offset = 0u32;
110    let mut struct_defs_count = 0u32;
111    let mut function_defs_offset = 0u32;
112    let mut function_defs_count = 0u32;
113
114    // Read table headers
115    for _ in 0..table_count {
116        let kind = reader.read_u8()?;
117        let offset = reader.read_u32()?;
118        let count = reader.read_u32()?;
119
120        match kind {
121            0x01 => {
122                _module_handles_offset = offset;
123                _module_handles_count = count;
124            }
125            0x02 => {
126                _struct_handles_offset = offset;
127                _struct_handles_count = count;
128            }
129            0x03 => {
130                _function_handles_offset = offset;
131                _function_handles_count = count;
132            }
133            0x05 => {
134                identifiers_offset = offset;
135                identifiers_count = count;
136            }
137            0x08 => {
138                struct_defs_offset = offset;
139                struct_defs_count = count;
140            }
141            0x09 => {
142                function_defs_offset = offset;
143                function_defs_count = count;
144            }
145            _ => {} // Skip other tables
146        }
147    }
148
149    Ok(MoveModule {
150        version: BytecodeVersion(version),
151        name: "MoveModule".to_string(),
152        identifiers_offset,
153        identifiers_count,
154        struct_defs_offset,
155        struct_defs_count,
156        _function_handles_offset,
157        _function_handles_count,
158        function_defs_offset,
159        function_defs_count,
160        structs: Vec::new(),
161        functions: Vec::new(),
162    })
163}
164
165fn read_identifiers(
166    reader: &mut BytecodeReader<'_>,
167    identifiers_offset: u32,
168    identifiers_count: u32,
169) -> Result<Vec<String>> {
170    if identifiers_count == 0 {
171        return Ok(Vec::new());
172    }
173    let bytes = reader.bytes();
174    let start = identifiers_offset as usize;
175    if start > bytes.len() {
176        bail!(
177            "identifiers offset {} out of range for bytecode length {}",
178            start,
179            bytes.len()
180        );
181    }
182    let mut names = Vec::new();
183    let mut cursor = BytecodeReader::new(&bytes[start..]);
184    for _ in 0..identifiers_count {
185        names.push(cursor.read_string()?);
186    }
187    Ok(names)
188}
189
190fn parse_structs(
191    _reader: &mut BytecodeReader<'_>,
192    _offset: u32,
193    count: u32,
194    identifiers: &[String],
195) -> Result<Vec<StructDef>> {
196    if count == 0 {
197        return Ok(Vec::new());
198    }
199    let mut structs = Vec::new();
200    for idx in 0..count {
201        let name = identifiers
202            .get(idx as usize)
203            .cloned()
204            .unwrap_or_else(|| format!("Struct{}", idx));
205        structs.push(StructDef {
206            name,
207            abilities: AbilitySet::default(),
208            fields: Vec::new(),
209        });
210    }
211    Ok(structs)
212}
213
214fn parse_functions(
215    reader: &mut BytecodeReader<'_>,
216    offset: u32,
217    count: u32,
218    id_offset: u32,
219    id_count: u32,
220    identifiers: &[String],
221) -> Result<Vec<FunctionDef>> {
222    if count == 0 {
223        return Ok(Vec::new());
224    }
225    let mut funcs = Vec::new();
226    let names = if id_count == identifiers.len() as u32 {
227        identifiers.to_vec()
228    } else {
229        read_identifiers(reader, id_offset, id_count)?
230    };
231    let bytes = reader.bytes();
232    let base = offset as usize;
233    if base > bytes.len() {
234        bail!(
235            "function defs offset {} out of range for bytecode length {}",
236            base,
237            bytes.len()
238        );
239    }
240    let mut cursor = BytecodeReader::new(&bytes[base..]);
241
242    for _ in 0..count {
243        let name_idx = cursor.read_uleb128()? as usize;
244        let _flags = cursor.read_u8()?; // visibility/entry flags
245        let _params_count = cursor.read_uleb128()?;
246        let _returns_count = cursor.read_uleb128()?;
247        let _code_offset = cursor.read_u32()?;
248        let _locals_count = cursor.read_uleb128()?;
249
250        let name = names
251            .get(name_idx)
252            .cloned()
253            .unwrap_or_else(|| "fn".to_string());
254        funcs.push(FunctionDef {
255            name,
256            is_public: true,
257            is_entry: true,
258            parameters: Vec::new(),
259            returns: Vec::new(),
260            locals: Vec::new(),
261            code: Vec::new(),
262        });
263    }
264
265    Ok(funcs)
266}
267
268fn parse_opcode(reader: &mut BytecodeReader<'_>) -> Result<MoveOpcode> {
269    let op = reader.read_u8()?;
270
271    let opcode = match op {
272        0x00 => MoveOpcode::Nop,
273        0x01 => MoveOpcode::Pop,
274        0x02 => MoveOpcode::Ret,
275        0x03 => MoveOpcode::BrTrue(reader.read_u16()?),
276        0x04 => MoveOpcode::BrFalse(reader.read_u16()?),
277        0x05 => MoveOpcode::Branch(reader.read_u16()?),
278        0x06 => MoveOpcode::LdU8(reader.read_u8()?),
279        0x07 => MoveOpcode::LdU64(reader.read_u64()?),
280        0x08 => MoveOpcode::LdU128(reader.read_u128()?),
281        0x09 => MoveOpcode::CastU8,
282        0x0A => MoveOpcode::CastU64,
283        0x0B => MoveOpcode::CastU128,
284        0x0C => MoveOpcode::LdConst(reader.read_u16()?),
285        0x0D => MoveOpcode::LdTrue,
286        0x0E => MoveOpcode::LdFalse,
287        0x0F => MoveOpcode::CopyLoc(reader.read_u8()?),
288        0x10 => MoveOpcode::MoveLoc(reader.read_u8()?),
289        0x11 => MoveOpcode::StLoc(reader.read_u8()?),
290        0x12 => MoveOpcode::MutBorrowLoc(reader.read_u8()?),
291        0x13 => MoveOpcode::ImmBorrowLoc(reader.read_u8()?),
292        0x14 => MoveOpcode::MutBorrowField(reader.read_u16()?),
293        0x15 => MoveOpcode::BorrowField(reader.read_u16()?),
294        0x16 => MoveOpcode::Call(reader.read_u16()?),
295        0x17 => MoveOpcode::Pack(reader.read_u16()?),
296        0x18 => MoveOpcode::Unpack(reader.read_u16()?),
297        0x22 => MoveOpcode::Add,
298        0x23 => MoveOpcode::Sub,
299        0x24 => MoveOpcode::Mul,
300        0x25 => MoveOpcode::Mod,
301        0x26 => MoveOpcode::Div,
302        0x32 => MoveOpcode::Lt,
303        0x33 => MoveOpcode::Gt,
304        0x34 => MoveOpcode::Le,
305        0x35 => MoveOpcode::Ge,
306        0x40 => MoveOpcode::And,
307        0x41 => MoveOpcode::Or,
308        0x42 => MoveOpcode::Not,
309        0x43 => MoveOpcode::Eq,
310        0x44 => MoveOpcode::Neq,
311        0x45 => MoveOpcode::Abort,
312        0x50 => MoveOpcode::Exists(reader.read_u16()?),
313        0x51 => MoveOpcode::BorrowGlobal(reader.read_u16()?),
314        0x52 => MoveOpcode::MutBorrowGlobal(reader.read_u16()?),
315        0x53 => MoveOpcode::MoveFrom(reader.read_u16()?),
316        0x54 => MoveOpcode::MoveTo(reader.read_u16()?),
317        other => bail!("unsupported Move opcode 0x{other:02X}"),
318    };
319
320    Ok(opcode)
321}