hlbc/
lib.rs

1//! [Hashlink](https://hashlink.haxe.org/) bytecode disassembler and analyzer.
2//! See [Bytecode] for an entrypoint to the library.
3//!
4//! #### Note about safety
5//! We don't deal with self-references, hence we deal with indexes into structures.
6//! Be careful when calling functions on Ref* objects, as no bound checking is done and every index is assumed to be valid.
7
8extern crate core;
9
10use std::collections::HashMap;
11use std::fmt::Debug;
12use std::ops::Index;
13
14use crate::opcodes::Opcode;
15use crate::types::{
16    ConstantDef, FunPtr, Function, Native, ObjField, RefFloat, RefFun, RefGlobal, RefInt,
17    RefString, RefType, Type, TypeObj,
18};
19
20pub mod analysis;
21pub mod fmt;
22/// Opcodes definitions.
23pub mod opcodes;
24/// All about reading bytecode
25mod read;
26/// Bytecode elements definitions.
27/// All the Ref* types in this modules are references to bytecode elements like constants or function.
28/// They are required since we cannot use rust references as that would make our structure self-referential.
29/// They makes the code look a bit more complicated than it actually is. Every Ref* struct is cheaply copyable.
30pub mod types;
31/// All about writing bytecode
32mod write;
33
34/// Cheaply clonable with inline storage
35// pub type Str = smol_str::SmolStr;
36// pub type Str = kstring::KStringBase<kstring::backend::RcStr>;
37pub type Str = flexstr::SharedStr;
38// pub type Str = String;
39
40pub type Result<T> = core::result::Result<T, Error>;
41
42#[derive(thiserror::Error, Debug)]
43pub enum Error {
44    #[error("Malformed bytecode: {0}")]
45    MalformedBytecode(String),
46    #[error("Unsupported bytecode version {version} (expected {min} <= version <= {max})")]
47    UnsupportedVersion { version: u8, min: u8, max: u8 },
48    #[error("Value '{value}' is too big to be serialized (|expected| < {limit})")]
49    ValueOutOfBounds { value: i32, limit: u32 },
50    #[error(transparent)]
51    IoError(#[from] std::io::Error),
52    #[error(transparent)]
53    Utf8Error(#[from] core::str::Utf8Error),
54}
55
56/// Bytecode structure containing all the information.
57/// Every field is public for flexibility, but you aren't encouraged to modify them.
58///
59/// This type is like an arena, you usually work with custom
60#[derive(Debug)]
61pub struct Bytecode {
62    /// Bytecode format version
63    pub version: u8,
64    /// Program entrypoint
65    pub entrypoint: RefFun,
66    /// i32 constant pool
67    pub ints: Vec<i32>,
68    /// f64 constant pool
69    pub floats: Vec<f64>,
70    /// String constant pool
71    pub strings: Vec<Str>,
72    /// Bytes constant pool
73    ///
74    /// *Since bytecode v5*
75    pub bytes: Option<(Vec<u8>, Vec<usize>)>,
76    /// *Debug* file names constant pool
77    pub debug_files: Option<Vec<Str>>,
78    /// Types, contains every possible types expressed in the program
79    pub types: Vec<Type>,
80    /// Globals, holding static variables and such
81    pub globals: Vec<RefType>,
82    /// Native functions references
83    pub natives: Vec<Native>,
84    /// Code functions pool
85    pub functions: Vec<Function>,
86    /// Constants, initializers for globals
87    ///
88    /// *Since bytecode v4*
89    pub constants: Option<Vec<ConstantDef>>,
90
91    // Fields below are not part of the data.
92    // Those are acceleration structures used to speed up lookup.
93    /// Acceleration structure mapping function references (findex) to functions indexes in the native or function pool.
94    findexes: Vec<RefFunKnown>,
95    /// Acceleration structure mapping function names to function indexes in the function pool
96    fnames: HashMap<Str, usize>,
97    pub globals_initializers: HashMap<RefGlobal, usize>,
98}
99
100impl Bytecode {
101    /// Get the entrypoint function.
102    pub fn entrypoint(&self) -> &Function {
103        self.get(self.entrypoint).as_fn().unwrap()
104    }
105
106    /// Get the main function.
107    /// This will panic if there is no main function in the bytecode (there should always be one)
108    pub fn main(&self) -> &Function {
109        &self.functions[*self.fnames.get("main").unwrap()]
110    }
111
112    /// Get a function by its name.
113    pub fn function_by_name(&self, name: &str) -> Option<&Function> {
114        self.fnames.get(name).map(|&i| &self.functions[i])
115    }
116
117    pub fn findex_max(&self) -> usize {
118        self.findexes.len()
119    }
120
121    pub fn functions<'a>(&'a self) -> impl Iterator<Item = FunPtr<'a>> + 'a {
122        (0..self.findex_max()).map(RefFun).map(|r| self.get(r))
123    }
124}
125
126impl Default for Bytecode {
127    fn default() -> Self {
128        Self {
129            version: 5,
130            entrypoint: Default::default(),
131            ints: vec![],
132            floats: vec![],
133            strings: vec![],
134            bytes: None,
135            debug_files: None,
136            types: vec![],
137            globals: vec![],
138            natives: vec![],
139            functions: vec![],
140            constants: None,
141            findexes: vec![],
142            fnames: Default::default(),
143            globals_initializers: Default::default(),
144        }
145    }
146}
147
148/// Index reference to either a function or a native.
149#[derive(Debug, Copy, Clone)]
150enum RefFunKnown {
151    Fun(usize),
152    Native(usize),
153}
154
155//region Resolve
156
157/// Like the [Index] trait but allows returning any type.
158pub trait Resolve<I> {
159    type Output<'a>
160    where
161        Self: 'a;
162
163    fn get(&self, index: I) -> Self::Output<'_>;
164}
165
166impl Resolve<RefInt> for Bytecode {
167    type Output<'a> = i32;
168
169    fn get(&self, index: RefInt) -> Self::Output<'_> {
170        self.ints[index.0]
171    }
172}
173
174impl Resolve<RefFloat> for Bytecode {
175    type Output<'a> = f64;
176
177    fn get(&self, index: RefFloat) -> Self::Output<'_> {
178        self.floats[index.0]
179    }
180}
181
182impl Resolve<RefString> for Bytecode {
183    type Output<'a> = Str;
184
185    fn get(&self, index: RefString) -> Self::Output<'_> {
186        if index.0 > 0 {
187            self.strings[index.0].clone()
188        } else {
189            Str::from_static("<none>")
190        }
191    }
192}
193
194impl Resolve<RefType> for Bytecode {
195    type Output<'a> = &'a Type;
196
197    fn get(&self, index: RefType) -> Self::Output<'_> {
198        &self.types[index.0]
199    }
200}
201
202impl Resolve<RefFun> for Bytecode {
203    type Output<'a> = FunPtr<'a>;
204
205    fn get(&self, index: RefFun) -> Self::Output<'_> {
206        match self.findexes[index.0] {
207            RefFunKnown::Fun(fun) => FunPtr::Fun(&self.functions[fun]),
208            RefFunKnown::Native(n) => FunPtr::Native(&self.natives[n]),
209        }
210    }
211}
212
213//endregion
214
215// region Index impl
216
217impl Index<RefInt> for Bytecode {
218    type Output = i32;
219
220    fn index(&self, index: RefInt) -> &Self::Output {
221        self.ints.index(index.0)
222    }
223}
224
225impl Index<RefFloat> for Bytecode {
226    type Output = f64;
227
228    fn index(&self, index: RefFloat) -> &Self::Output {
229        self.floats.index(index.0)
230    }
231}
232
233impl Index<RefString> for Bytecode {
234    type Output = Str;
235
236    fn index(&self, index: RefString) -> &Self::Output {
237        self.strings.index(index.0)
238    }
239}
240
241impl Index<RefType> for Bytecode {
242    type Output = Type;
243
244    fn index(&self, index: RefType) -> &Self::Output {
245        self.types.index(index.0)
246    }
247}
248
249//endregion