hlbc/
read.rs

1use std::collections::{HashMap, VecDeque};
2use std::fs;
3use std::io::{BufReader, Read};
4use std::path::Path;
5use std::str::from_utf8;
6
7use byteorder::{LittleEndian, ReadBytesExt};
8
9use crate::types::{
10    EnumConstruct, Function, Native, ObjField, ObjProto, RefField, RefFloat, RefInt, RefString,
11    RefType, Type, TypeFun, TypeObj,
12};
13use crate::{Bytecode, ConstantDef, Opcode, RefFun, RefFunKnown, RefGlobal, Str};
14use crate::{Error, Result};
15
16impl Bytecode {
17    /// Load the bytecode from any source.
18    /// Must be a valid hashlink bytecode binary.
19    pub fn deserialize(r: &mut impl Read) -> Result<Bytecode> {
20        let mut header = [0u8; 3];
21        r.read_exact(&mut header)?;
22        if header != [b'H', b'L', b'B'] {
23            return Err(Error::MalformedBytecode(format!(
24                "Invalid magic header (expected: {:?}, got: {header:?})",
25                b"HLB"
26            )));
27        }
28        let version = r.read_u8()?;
29        if version < 4 && version > 5 {
30            return Err(Error::UnsupportedVersion {
31                version,
32                min: 4,
33                max: 5,
34            });
35        }
36        let flags = read_varu(r)?;
37        let has_debug = flags & 1 == 1;
38        let nints = read_varu(r)? as usize;
39        let nfloats = read_varu(r)? as usize;
40        let nstrings = read_varu(r)? as usize;
41        let nbytes = if version >= 5 {
42            Some(read_varu(r)? as usize)
43        } else {
44            None
45        };
46        let ntypes = read_varu(r)? as usize;
47        let nglobals = read_varu(r)? as usize;
48        let nnatives = read_varu(r)? as usize;
49        let nfunctions = read_varu(r)? as usize;
50        let nconstants = if version >= 4 {
51            Some(read_varu(r)? as usize)
52        } else {
53            None
54        };
55        let entrypoint = RefFun::read(r)?;
56
57        let mut ints = vec![0i32; nints];
58        for i in ints.iter_mut() {
59            *i = r.read_i32::<LittleEndian>()?;
60        }
61
62        let mut floats = vec![0f64; nfloats];
63        for i in floats.iter_mut() {
64            *i = r.read_f64::<LittleEndian>()?;
65        }
66
67        let strings = read_strings(r, nstrings)?;
68
69        let bytes = if let Some(nbytes) = nbytes {
70            let size = r.read_i32::<LittleEndian>()? as usize;
71            let mut bytes = vec![0; size];
72            r.read_exact(&mut bytes)?;
73            let mut pos = Vec::with_capacity(nbytes);
74            for _ in 0..nbytes {
75                pos.push(read_varu(r)? as usize);
76            }
77            Some((bytes, pos))
78        } else {
79            None
80        };
81
82        let debug_files = if has_debug {
83            let n = read_varu(r)? as usize;
84            Some(read_strings(r, n)?)
85        } else {
86            None
87        };
88
89        let mut types = Vec::with_capacity(ntypes);
90        for _ in 0..ntypes {
91            types.push(Type::read(r)?);
92        }
93
94        let mut globals = Vec::with_capacity(nglobals);
95        for _ in 0..nglobals {
96            globals.push(RefType::read(r)?);
97        }
98
99        let mut natives = Vec::with_capacity(nnatives);
100        for _ in 0..nnatives {
101            natives.push(Native::read(r)?);
102        }
103
104        let mut functions = Vec::with_capacity(nfunctions);
105        for _ in 0..nfunctions {
106            functions.push(Function::read(r, has_debug, version)?);
107        }
108
109        let constants = if let Some(n) = nconstants {
110            let mut constants = Vec::with_capacity(n);
111            for _ in 0..n {
112                constants.push(ConstantDef::read(r)?)
113            }
114            Some(constants)
115        } else {
116            None
117        };
118
119        // Parsing is finished, we now build links between everything
120
121        // Global function indexes
122        let mut findexes = vec![RefFunKnown::Fun(0); nfunctions + nnatives];
123        for (i, f) in functions.iter().enumerate() {
124            findexes[f.findex.0] = RefFunKnown::Fun(i);
125        }
126        for (i, n) in natives.iter().enumerate() {
127            findexes[n.findex.0] = RefFunKnown::Native(i);
128        }
129
130        // Flatten types fields
131        // Start by collecting every fields in the hierarchy
132        // The order is important because we refer to fields by index
133        let mut new_fields: Vec<Option<Vec<ObjField>>> = Vec::with_capacity(types.len());
134        for t in &types {
135            if let Some(obj) = t.get_type_obj() {
136                let mut parent = obj.super_.as_ref().map(|s| &types[s.0]);
137                let mut acc = VecDeque::with_capacity(obj.own_fields.len());
138                acc.extend(obj.own_fields.clone());
139                while let Some(p) = parent.and_then(|t| t.get_type_obj()) {
140                    for f in p.own_fields.iter().rev() {
141                        acc.push_front(f.clone());
142                    }
143                    parent = p.super_.as_ref().map(|s| &types[s.0]);
144                }
145                new_fields.push(Some(acc.into()));
146            } else {
147                new_fields.push(None);
148            }
149        }
150        // Apply new fields
151        for (t, new) in types.iter_mut().zip(new_fields.into_iter()) {
152            if let Some(fields) = new {
153                t.get_type_obj_mut().unwrap().fields = fields;
154            }
155        }
156
157        // Give functions name based on object fields bindings and methods
158        for (i, t) in types.iter().enumerate() {
159            if let Some(TypeObj {
160                protos, bindings, ..
161            }) = t.get_type_obj()
162            {
163                for p in protos {
164                    if let RefFunKnown::Fun(x) = findexes[p.findex.0] {
165                        functions[x].name = p.name;
166                        functions[x].parent = Some(RefType(i));
167                    }
168                }
169                for (fid, findex) in bindings {
170                    if let Some(field) = t.get_type_obj().map(|o| &o.fields[fid.0]) {
171                        if let RefFunKnown::Fun(x) = findexes[findex.0] {
172                            functions[x].name = field.name;
173                            functions[x].parent = Some(RefType(i));
174                        }
175                    }
176                }
177            }
178        }
179
180        // Function names
181        let mut fnames = HashMap::with_capacity(functions.len());
182        for (i, f) in functions.iter().enumerate() {
183            // FIXME duplicates ?
184            fnames.insert(strings[f.name.0].clone(), i);
185        }
186        fnames.insert(
187            Str::from("init"),
188            match findexes[entrypoint.0] {
189                RefFunKnown::Fun(x) => x,
190                _ => 0,
191            },
192        );
193
194        let globals_initializers = if let Some(constants) = &constants {
195            let mut tmp = HashMap::with_capacity(constants.len());
196            for (i, c) in constants.iter().enumerate() {
197                tmp.insert(c.global, i);
198            }
199            tmp
200        } else {
201            HashMap::new()
202        };
203
204        Ok(Bytecode {
205            version,
206            entrypoint,
207            ints,
208            floats,
209            strings,
210            bytes,
211            debug_files,
212            types,
213            globals,
214            natives,
215            functions,
216            constants,
217            findexes,
218            fnames,
219            globals_initializers,
220        })
221    }
222
223    pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
224        Self::deserialize(&mut BufReader::new(fs::File::open(path)?))
225    }
226}
227
228impl RefInt {
229    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
230        Ok(Self(read_vari(r)? as usize))
231    }
232}
233
234impl RefFloat {
235    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
236        Ok(Self(read_vari(r)? as usize))
237    }
238}
239
240impl RefString {
241    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
242        Ok(Self(read_vari(r)? as usize))
243    }
244}
245
246impl RefGlobal {
247    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
248        Ok(Self(read_vari(r)? as usize))
249    }
250}
251
252impl RefFun {
253    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
254        Ok(Self(read_vari(r)? as usize))
255    }
256}
257
258impl RefType {
259    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
260        Ok(Self(read_vari(r)? as usize))
261    }
262}
263
264impl RefField {
265    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
266        Ok(Self(read_vari(r)? as usize))
267    }
268}
269
270impl ObjField {
271    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
272        Ok(ObjField {
273            name: RefString::read(r)?,
274            t: RefType::read(r)?,
275        })
276    }
277}
278
279impl TypeFun {
280    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
281        let nargs = r.read_u8()?;
282        let mut args = Vec::with_capacity(nargs as usize);
283        for _ in 0..nargs {
284            args.push(RefType::read(r)?);
285        }
286        Ok(TypeFun {
287            args,
288            ret: RefType::read(r)?,
289        })
290    }
291}
292
293impl TypeObj {
294    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
295        let name = RefString::read(r)?;
296        let super_ = read_vari(r)?;
297        let global = RefGlobal::read(r)?;
298        let nfields = read_varu(r)? as usize;
299        let nprotos = read_varu(r)? as usize;
300        let nbindings = read_varu(r)? as usize;
301        let mut own_fields = Vec::with_capacity(nfields);
302        for _ in 0..nfields {
303            own_fields.push(ObjField::read(r)?);
304        }
305        let mut protos = Vec::with_capacity(nprotos);
306        for _ in 0..nprotos {
307            protos.push(ObjProto {
308                name: RefString::read(r)?,
309                findex: RefFun::read(r)?,
310                pindex: read_vari(r)?,
311            });
312        }
313        let mut bindings = HashMap::with_capacity(nbindings);
314        for _ in 0..nbindings {
315            bindings.insert(RefField::read(r)?, RefFun::read(r)?);
316        }
317        Ok(TypeObj {
318            name,
319            super_: if super_ < 0 {
320                None
321            } else {
322                Some(RefType(super_ as usize))
323            },
324            global,
325            own_fields,
326            fields: Vec::new(),
327            protos,
328            bindings,
329        })
330    }
331}
332
333impl Type {
334    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
335        use crate::Type::*;
336        match r.read_u8()? {
337            0 => Ok(Void),
338            1 => Ok(UI8),
339            2 => Ok(UI16),
340            3 => Ok(I32),
341            4 => Ok(I64),
342            5 => Ok(F32),
343            6 => Ok(F64),
344            7 => Ok(Bool),
345            8 => Ok(Bytes),
346            9 => Ok(Dyn),
347            10 => Ok(Fun(TypeFun::read(r)?)),
348            11 => Ok(Obj(TypeObj::read(r)?)),
349            12 => Ok(Array),
350            13 => Ok(Type),
351            14 => Ok(Ref(RefType::read(r)?)),
352            15 => {
353                let nfields = read_varu(r)? as usize;
354                let mut fields = Vec::with_capacity(nfields);
355                for _ in 0..nfields {
356                    fields.push(ObjField::read(r)?);
357                }
358                Ok(Virtual { fields })
359            }
360            16 => Ok(DynObj),
361            17 => Ok(Abstract {
362                name: RefString::read(r)?,
363            }),
364            18 => {
365                let name = RefString::read(r)?;
366                let global = RefGlobal::read(r)?;
367                let nconstructs = read_varu(r)? as usize;
368                let mut constructs = Vec::with_capacity(nconstructs);
369                for _ in 0..nconstructs {
370                    let name = RefString::read(r)?;
371                    let nparams = read_varu(r)? as usize;
372                    let mut params = Vec::with_capacity(nparams);
373                    for _ in 0..nparams {
374                        params.push(RefType::read(r)?);
375                    }
376                    constructs.push(EnumConstruct { name, params })
377                }
378                Ok(Enum {
379                    name,
380                    global,
381                    constructs,
382                })
383            }
384            19 => Ok(Null(RefType::read(r)?)),
385            20 => Ok(Method(TypeFun::read(r)?)),
386            21 => Ok(Struct(TypeObj::read(r)?)),
387            22 => Ok(Packed(RefType::read(r)?)),
388            other => Err(Error::MalformedBytecode(format!(
389                "Invalid type kind '{other}'"
390            ))),
391        }
392    }
393}
394
395impl Native {
396    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
397        Ok(Native {
398            lib: RefString::read(r)?,
399            name: RefString::read(r)?,
400            t: RefType::read(r)?,
401            findex: RefFun::read(r)?,
402        })
403    }
404}
405
406impl Function {
407    pub(crate) fn read(r: &mut impl Read, has_debug: bool, version: u8) -> Result<Self> {
408        let t = RefType::read(r)?;
409        let findex = RefFun::read(r)?;
410        let nregs = read_varu(r)? as usize;
411        let nops = read_varu(r)? as usize;
412        let mut regs = Vec::with_capacity(nregs);
413        for _ in 0..nregs {
414            regs.push(RefType::read(r)?);
415        }
416        let mut ops = Vec::with_capacity(nops);
417        for _ in 0..nops {
418            ops.push(Opcode::read(r)?);
419        }
420
421        // This is extracted from the hashlink source code, do not count on me to explain what it does
422        let debug_info = if has_debug {
423            let mut tmp = Vec::with_capacity(nops);
424            let mut currfile: i32 = -1;
425            let mut currline: i32 = 0;
426            let mut i = 0;
427            while i < nops {
428                let mut c = r.read_u8()? as i32;
429                if c & 1 != 0 {
430                    c >>= 1;
431                    currfile = (c << 8) | (r.read_u8()? as i32);
432                } else if c & 2 != 0 {
433                    let delta = c >> 6;
434                    let mut count = (c >> 2) & 15;
435                    while count > 0 {
436                        count -= 1;
437                        tmp.push((currfile as usize, currline as usize));
438                        i += 1;
439                    }
440                    currline += delta;
441                } else if c & 4 != 0 {
442                    currline += c >> 3;
443                    tmp.push((currfile as usize, currline as usize));
444                    i += 1;
445                } else {
446                    let b2 = r.read_u8()? as i32;
447                    let b3 = r.read_u8()? as i32;
448                    currline = (c >> 3) | (b2 << 5) | (b3 << 13);
449                    tmp.push((currfile as usize, currline as usize));
450                    i += 1;
451                }
452            }
453            Some(tmp)
454        } else {
455            None
456        };
457
458        let assigns = if has_debug && version >= 3 {
459            let len = read_varu(r)? as usize;
460            let mut assigns = Vec::with_capacity(len);
461            for _ in 0..len {
462                assigns.push((RefString::read(r)?, read_vari(r)? as usize));
463            }
464            Some(assigns)
465        } else {
466            None
467        };
468        Ok(Function {
469            name: RefString(0),
470            t,
471            findex,
472            regs,
473            ops,
474            debug_info,
475            assigns,
476            parent: None,
477        })
478    }
479}
480
481impl ConstantDef {
482    pub(crate) fn read(r: &mut impl Read) -> Result<Self> {
483        let global = RefGlobal::read(r)?;
484        let nfields = read_varu(r)? as usize;
485        let mut fields = Vec::with_capacity(nfields);
486        for _ in 0..nfields {
487            fields.push(read_varu(r)? as usize);
488        }
489        Ok(ConstantDef { global, fields })
490    }
491}
492
493pub(crate) fn read_vari(r: &mut impl Read) -> Result<i32> {
494    let b = r.read_u8()? as i32;
495    if b & 0x80 == 0 {
496        Ok(b & 0x7F)
497    } else if b & 0x40 == 0 {
498        let v = r.read_u8()? as i32 | ((b & 31) << 8);
499        Ok(if b & 0x20 == 0 { v } else { -v })
500    } else {
501        let c = r.read_u8()? as i32;
502        let d = r.read_u8()? as i32;
503        let e = r.read_u8()? as i32;
504        let v = ((b & 31) << 24) | (c << 16) | (d << 8) | e;
505        Ok(if b & 0x20 == 0 { v } else { -v })
506    }
507}
508
509pub(crate) fn read_varu(r: &mut impl Read) -> Result<u32> {
510    let i = read_vari(r)?;
511    if i < 0 {
512        Err(Error::MalformedBytecode(format!(
513            "Got negative index '{i}' (expected >= 0)"
514        )))
515    } else {
516        Ok(i as u32)
517    }
518}
519
520fn read_strings(r: &mut impl Read, nstrings: usize) -> Result<Vec<Str>> {
521    let mut strings = Vec::with_capacity(nstrings);
522    let mut string_data = vec![0u8; r.read_i32::<LittleEndian>()? as usize];
523    r.read_exact(&mut string_data)?;
524    let mut acc = 0;
525    for _ in 0..nstrings {
526        let ssize = read_varu(r)? as usize + 1;
527        //println!("size: {ssize} {:?}", &string_data[acc..(acc + ssize)]);
528        //let cstr = unsafe { CStr::from_bytes_with_nul_unchecked(&string_data[acc..(acc + ssize)]) };
529        strings.push(Str::from_ref(from_utf8(
530            &string_data[acc..(acc + ssize - 1)],
531        )?));
532        acc += ssize;
533    }
534    Ok(strings)
535}
536
537#[cfg(test)]
538mod tests {
539    use std::collections::HashSet;
540    use std::fs;
541    use std::io::{BufWriter, Write};
542
543    use crate::Bytecode;
544
545    #[test]
546    fn test_deserialize_all() {
547        for entry in fs::read_dir("../../data").unwrap() {
548            let path = entry.unwrap().path();
549            if let Some(ext) = path.extension() {
550                if ext == "hl" {
551                    let code = Bytecode::from_file(&path);
552                    assert!(code.is_ok());
553                }
554            }
555        }
556    }
557
558    #[test]
559    fn test_deserialize_wartales() {
560        let path = "E:\\Games\\Wartales\\hlboot.dat";
561        let code = Bytecode::from_file(path);
562        assert!(code.is_ok());
563    }
564
565    #[test]
566    fn test_deserialize_northgard() {
567        let path = "E:\\Games\\Northgard\\hlboot.dat";
568        let code = Bytecode::from_file(path);
569        assert!(code.is_ok());
570    }
571
572    //#[test]
573    fn list_strings() {
574        let code = Bytecode::from_file("E:\\Games\\Northgard\\hlboot.dat").unwrap();
575        let code2 = Bytecode::from_file("E:\\Games\\Wartales\\hlboot.dat").unwrap();
576        let mut file = BufWriter::new(
577            fs::OpenOptions::new()
578                .write(true)
579                .create(true)
580                .truncate(true)
581                .open("strings.txt")
582                .unwrap(),
583        );
584        let mut set = HashSet::with_capacity(code.strings.len() + code2.strings.len());
585        for s in code.strings {
586            set.insert(s);
587        }
588        for s in code2.strings {
589            set.insert(s);
590        }
591        for s in &set {
592            file.write(s.as_bytes()).unwrap();
593            file.write(b"\n").unwrap();
594        }
595    }
596}