beamcode/
term.rs

1//! Terms.
2//!
3//! # References
4//!
5//! - [The BEAM Book - Compact Term Encoding](https://blog.stenmans.org/theBeamBook/#SEC-BeamModulesCTE)
6//! - [erlang/otp/lib/compiler/src/beam_asm.erl](https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl)
7//! - [erlang/otp/lib/compiler/src/beam_disasm.erl](https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_disasm.erl)
8use crate::{Decode, DecodeError, Encode, EncodeError};
9use byteorder::{ReadBytesExt, WriteBytesExt};
10use num::BigInt;
11use std::io::{Read, Write};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum TermKind {
15    Usize,
16    Integer,
17    Atom,
18    XRegister,
19    YRegister,
20    Label,
21    Character,
22    List,
23    FloatingPointRegister,
24    AllocationList,
25    Literal,
26    TypedRegister,
27    Unknown(u8),
28}
29
30impl TermKind {
31    fn from_tag(tag: u8) -> Self {
32        match tag & 0b111 {
33            0 => Self::Usize,
34            1 => Self::Integer,
35            2 => Self::Atom,
36            3 => Self::XRegister,
37            4 => Self::YRegister,
38            5 => Self::Label,
39            6 => Self::Character,
40            7 => match tag >> 4 {
41                0b0001 => Self::List,
42                0b0010 => Self::FloatingPointRegister,
43                0b0011 => Self::AllocationList,
44                0b0100 => Self::Literal,
45                0b0101 => Self::TypedRegister,
46                _ => Self::Unknown(tag),
47            },
48            _ => unreachable!(),
49        }
50    }
51
52    fn expect(self, expected: &[Self]) -> Result<(), DecodeError> {
53        if expected.iter().any(|&x| x == self) {
54            Ok(())
55        } else {
56            Err(DecodeError::UnexpectedTerm {
57                expected: expected.to_owned(),
58                actual: self,
59            })
60        }
61    }
62
63    const fn tag(self) -> u8 {
64        match self {
65            Self::Usize => 0,
66            Self::Integer => 1,
67            Self::Atom => 2,
68            Self::XRegister => 3,
69            Self::YRegister => 4,
70            Self::Label => 5,
71            Self::Character => 6,
72            Self::List => 0b0001_0111,
73            Self::FloatingPointRegister => 0b0010_0111,
74            Self::AllocationList => 0b0011_0111,
75            Self::Literal => 0b0100_0111,
76            Self::TypedRegister => 0b0101_0111,
77            Self::Unknown(tag) => tag,
78        }
79    }
80}
81
82#[derive(Debug, Clone, PartialEq, Eq, Hash, Encode)]
83pub enum Term {
84    Usize(usize),
85    Integer(BigInt),
86    Atom(Atom),
87    XRegister(XRegister),
88    YRegister(YRegister),
89    Label(Label),
90    Character(char),
91    List(List),
92    FloatingPointRegister(FloatingPointRegister),
93    AllocationList(AllocationList),
94    Literal(Literal),
95    TypedRegister(TypedRegister),
96}
97
98impl Decode for Term {
99    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
100        match TermKind::from_tag(tag) {
101            TermKind::Usize => Decode::decode_with_tag(reader, tag).map(Self::Usize),
102            TermKind::Integer => Decode::decode_with_tag(reader, tag).map(Self::Integer),
103            TermKind::Atom => Decode::decode_with_tag(reader, tag).map(Self::Atom),
104            TermKind::XRegister => Decode::decode_with_tag(reader, tag).map(Self::XRegister),
105            TermKind::YRegister => Decode::decode_with_tag(reader, tag).map(Self::YRegister),
106            TermKind::Label => Decode::decode_with_tag(reader, tag).map(Self::Label),
107            TermKind::Character => Decode::decode_with_tag(reader, tag).map(Self::Character),
108            TermKind::List => Decode::decode_with_tag(reader, tag).map(Self::List),
109            TermKind::FloatingPointRegister => {
110                Decode::decode_with_tag(reader, tag).map(Self::FloatingPointRegister)
111            }
112            TermKind::AllocationList => {
113                Decode::decode_with_tag(reader, tag).map(Self::AllocationList)
114            }
115            TermKind::Literal => Decode::decode_with_tag(reader, tag).map(Self::Literal),
116            TermKind::TypedRegister => {
117                Decode::decode_with_tag(reader, tag).map(Self::TypedRegister)
118            }
119            TermKind::Unknown(_) => Err(DecodeError::UnknownTermTag { tag }),
120        }
121    }
122}
123
124#[derive(Debug, Clone, PartialEq, Eq, Hash, Encode)]
125pub enum Allocation {
126    Words(usize),
127    List(AllocationList),
128}
129
130impl Decode for Allocation {
131    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
132        let kind = TermKind::from_tag(tag);
133        kind.expect(&[TermKind::Usize, TermKind::AllocationList])?;
134        if kind == TermKind::Usize {
135            Decode::decode_with_tag(reader, tag).map(Self::Words)
136        } else {
137            Decode::decode_with_tag(reader, tag).map(Self::List)
138        }
139    }
140}
141
142#[derive(Debug, Clone, PartialEq, Eq, Hash)]
143pub struct AllocationList {
144    pub items: Vec<AllocationListItem>,
145}
146
147impl Decode for AllocationList {
148    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
149        TermKind::from_tag(tag).expect(&[TermKind::AllocationList])?;
150        let size = usize::decode(reader)?;
151        let items = (0..size)
152            .map(|_| Decode::decode(reader))
153            .collect::<Result<_, _>>()?;
154        Ok(Self { items })
155    }
156}
157
158impl Encode for AllocationList {
159    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
160        writer.write_u8(TermKind::AllocationList.tag())?;
161        self.items.len().encode(writer)?;
162        for item in &self.items {
163            item.encode(writer)?;
164        }
165        Ok(())
166    }
167}
168
169#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
170pub enum AllocationListItem {
171    Words(usize),
172    Floats(usize),
173    Funs(usize),
174}
175
176impl Decode for AllocationListItem {
177    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
178        match usize::decode_with_tag(reader, tag)? {
179            0 => usize::decode(reader).map(Self::Words),
180            1 => usize::decode(reader).map(Self::Floats),
181            2 => usize::decode(reader).map(Self::Funs),
182            tag => Err(DecodeError::UnknownAllocationListItemTag { tag }),
183        }
184    }
185}
186
187impl Encode for AllocationListItem {
188    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
189        match self {
190            Self::Words(v) => {
191                0.encode(writer)?;
192                v.encode(writer)?;
193            }
194            Self::Floats(v) => {
195                1.encode(writer)?;
196                v.encode(writer)?;
197            }
198            Self::Funs(v) => {
199                2.encode(writer)?;
200                v.encode(writer)?;
201            }
202        }
203        Ok(())
204    }
205}
206
207#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
208pub struct FloatingPointRegister {
209    pub value: usize,
210}
211
212impl Decode for FloatingPointRegister {
213    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
214        TermKind::from_tag(tag).expect(&[TermKind::FloatingPointRegister])?;
215        Ok(Self {
216            value: usize::decode(reader)?,
217        })
218    }
219}
220
221impl Encode for FloatingPointRegister {
222    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
223        writer.write_u8(TermKind::FloatingPointRegister.tag())?;
224        self.value.encode(writer)
225    }
226}
227
228#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
229pub enum TypedRegister {
230    X { register: XRegister, ty: usize },
231    Y { register: YRegister, ty: usize },
232}
233
234impl Decode for TypedRegister {
235    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
236        TermKind::from_tag(tag).expect(&[TermKind::TypedRegister])?;
237
238        let tag = reader.read_u8()?;
239        let kind = TermKind::from_tag(tag);
240        kind.expect(&[TermKind::XRegister, TermKind::YRegister])?;
241        if kind == TermKind::XRegister {
242            let register = XRegister::decode_with_tag(reader, tag)?;
243            let ty = usize::decode(reader)?;
244            Ok(Self::X { register, ty })
245        } else {
246            let register = YRegister::decode_with_tag(reader, tag)?;
247            let ty = usize::decode(reader)?;
248            Ok(Self::Y { register, ty })
249        }
250    }
251}
252
253impl Encode for TypedRegister {
254    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
255        writer.write_u8(TermKind::TypedRegister.tag())?;
256        match self {
257            Self::X { register, ty } => {
258                register.encode(writer)?;
259                ty.encode(writer)?;
260            }
261            Self::Y { register, ty } => {
262                register.encode(writer)?;
263                ty.encode(writer)?;
264            }
265        }
266        Ok(())
267    }
268}
269
270#[derive(Debug, Clone, PartialEq, Eq, Hash, Encode)]
271pub enum Register {
272    X(XRegister),
273    Y(YRegister),
274    Typed(TypedRegister),
275}
276
277impl Decode for Register {
278    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
279        match TermKind::from_tag(tag) {
280            TermKind::XRegister => Decode::decode_with_tag(reader, tag).map(Self::X),
281            TermKind::YRegister => Decode::decode_with_tag(reader, tag).map(Self::Y),
282            TermKind::TypedRegister => Decode::decode_with_tag(reader, tag).map(Self::Typed),
283            actual => Err(DecodeError::UnexpectedTerm {
284                actual,
285                expected: vec![
286                    TermKind::XRegister,
287                    TermKind::YRegister,
288                    TermKind::TypedRegister,
289                ],
290            }),
291        }
292    }
293}
294
295impl Decode for usize {
296    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
297        TermKind::from_tag(tag).expect(&[TermKind::Usize])?;
298        let value = usize::try_from(decode_integer(tag, reader)?)?;
299        Ok(value)
300    }
301}
302
303impl Encode for usize {
304    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
305        encode_integer(TermKind::Usize.tag(), &BigInt::from(*self), writer)
306    }
307}
308
309impl Decode for char {
310    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
311        TermKind::from_tag(tag).expect(&[TermKind::Character])?;
312        let value = u32::try_from(decode_integer(tag, reader)?)?;
313        char::from_u32(value).ok_or(DecodeError::InvalidUnicodeCodepoint { value })
314    }
315}
316
317impl Encode for char {
318    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
319        encode_integer(
320            TermKind::Character.tag(),
321            &BigInt::from(u32::from(*self)),
322            writer,
323        )
324    }
325}
326
327#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
328pub struct Literal {
329    pub value: usize,
330}
331
332impl Decode for Literal {
333    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
334        TermKind::from_tag(tag).expect(&[TermKind::Literal])?;
335        Ok(Self {
336            value: usize::decode(reader)?,
337        })
338    }
339}
340
341impl Encode for Literal {
342    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
343        writer.write_u8(TermKind::Literal.tag())?;
344        self.value.encode(writer)
345    }
346}
347
348impl Decode for BigInt {
349    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
350        TermKind::from_tag(tag).expect(&[TermKind::Integer])?;
351        let value = decode_integer(tag, reader)?;
352        Ok(value)
353    }
354}
355
356impl Encode for BigInt {
357    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
358        encode_integer(TermKind::Integer.tag(), self, writer)
359    }
360}
361
362#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
363pub struct Atom {
364    pub value: usize,
365}
366
367impl Decode for Atom {
368    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
369        TermKind::from_tag(tag).expect(&[TermKind::Atom])?;
370        let value = usize::try_from(decode_integer(tag, reader)?)?;
371        Ok(Self { value })
372    }
373}
374
375impl Encode for Atom {
376    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
377        encode_integer(TermKind::Atom.tag(), &BigInt::from(self.value), writer)
378    }
379}
380
381#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
382pub struct XRegister {
383    pub value: usize,
384    pub ty: Option<usize>,
385}
386
387impl Decode for XRegister {
388    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
389        TermKind::from_tag(tag).expect(&[TermKind::XRegister])?;
390        let value = usize::try_from(decode_integer(tag, reader)?)?;
391        Ok(Self { value, ty: None })
392    }
393}
394
395impl Encode for XRegister {
396    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
397        encode_integer(TermKind::XRegister.tag(), &BigInt::from(self.value), writer)
398    }
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
402pub struct YRegister {
403    pub value: usize,
404    pub ty: Option<usize>,
405}
406
407impl Decode for YRegister {
408    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
409        TermKind::from_tag(tag).expect(&[TermKind::YRegister])?;
410        let value = usize::try_from(decode_integer(tag, reader)?)?;
411        Ok(Self { value, ty: None })
412    }
413}
414
415impl Encode for YRegister {
416    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
417        encode_integer(TermKind::YRegister.tag(), &BigInt::from(self.value), writer)
418    }
419}
420
421impl Decode for Vec<YRegister> {
422    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
423        let list = List::decode_with_tag(reader, tag)?;
424        Ok(list.items)
425    }
426}
427
428impl Encode for Vec<YRegister> {
429    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
430        let list = List {
431            items: self.iter().copied().map(Term::YRegister).collect(),
432        };
433        list.encode(writer)
434    }
435}
436
437#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
438pub struct Label {
439    pub value: usize,
440}
441
442impl Decode for Label {
443    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
444        TermKind::from_tag(tag).expect(&[TermKind::Label])?;
445        let value = usize::try_from(decode_integer(tag, reader)?)?;
446        Ok(Self { value })
447    }
448}
449
450impl Encode for Label {
451    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
452        encode_integer(TermKind::Label.tag(), &BigInt::from(self.value), writer)
453    }
454}
455
456#[derive(Debug, Clone, PartialEq, Eq, Hash)]
457pub struct List<T = Term> {
458    pub items: Vec<T>,
459}
460
461impl<T: Decode> Decode for List<T> {
462    fn decode_with_tag<R: Read>(reader: &mut R, tag: u8) -> Result<Self, DecodeError> {
463        TermKind::from_tag(tag).expect(&[TermKind::List])?;
464
465        let size = usize::decode(reader)?;
466        let items = (0..size)
467            .map(|_| T::decode(reader))
468            .collect::<Result<_, _>>()?;
469        Ok(Self { items })
470    }
471}
472
473impl<T: Encode> Encode for List<T> {
474    fn encode<W: Write>(&self, writer: &mut W) -> Result<(), EncodeError> {
475        writer.write_u8(TermKind::List.tag())?;
476        self.items.len().encode(writer)?;
477        for x in &self.items {
478            x.encode(writer)?;
479        }
480        Ok(())
481    }
482}
483
484fn decode_integer<R: Read>(tag: u8, reader: &mut R) -> Result<BigInt, DecodeError> {
485    if (tag & 0b1000) == 0 {
486        Ok(BigInt::from(tag >> 4))
487    } else if (tag & 0b1_0000) == 0 {
488        let v = u64::from(reader.read_u8()?);
489        Ok(BigInt::from((u64::from(tag) & 0b1110_0000) << 3 | v))
490    } else if (tag >> 5) != 0b111 {
491        let byte_size = usize::from(tag >> 5) + 2;
492        let mut buf = vec![0; byte_size];
493        reader.read_exact(&mut buf)?;
494        Ok(BigInt::from_signed_bytes_be(&buf))
495    } else {
496        let byte_size = usize::decode(reader)?;
497        let mut buf = vec![0; byte_size + 9];
498        reader.read_exact(&mut buf)?;
499        Ok(BigInt::from_signed_bytes_be(&buf))
500    }
501}
502
503fn encode_integer<W: Write>(tag: u8, value: &BigInt, writer: &mut W) -> Result<(), EncodeError> {
504    if let Ok(v) = i16::try_from(value.clone()) {
505        if v < 0 {
506            let bytes = v.to_be_bytes();
507            return encode_integer_bytes(tag, &bytes, writer);
508        } else if v < 16 {
509            writer.write_u8((v << 4) as u8 | tag)?;
510            return Ok(());
511        } else if v < 0x800 {
512            writer.write_u8(((v >> 3) as u8 & 0b1110_0000) | tag | 0b000_1000)?;
513            writer.write_u8((v & 0xFF) as u8)?;
514            return Ok(());
515        }
516    }
517
518    let bytes = value.to_signed_bytes_be();
519    encode_integer_bytes(tag, &bytes, writer)
520}
521
522fn encode_integer_bytes<W: Write>(
523    tag: u8,
524    bytes: &[u8],
525    writer: &mut W,
526) -> Result<(), EncodeError> {
527    assert!(bytes.len() >= 2, "bug");
528
529    if bytes.len() <= 8 {
530        writer.write_u8(((bytes.len() - 2) << 5) as u8 | 0b0001_1000 | tag)?;
531        writer.write_all(bytes)?;
532    } else {
533        writer.write_u8(tag | 0b1111_1000)?;
534        (bytes.len() - 9).encode(writer)?;
535        writer.write_all(bytes)?;
536    }
537    Ok(())
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    #[test]
545    fn decode_encode_integer_works() {
546        let data: &[(&[u8], i64)] = &[
547            (&[0], 0),
548            (&[16], 1),
549            (&[8, 20], 20),
550            (&[40, 144], 400),
551            (&[24, 87, 28], 22300),
552            (&[56, 15, 18, 6], 987654),
553            (&[24, 255, 255], -1),
554            (&[24, 254, 189], -323),
555            (&[88, 248, 164, 147, 83], -123432109),
556        ];
557        for (input, expected) in data {
558            let decoded = decode_integer(input[0], &mut &input[1..]).expect("decode failure");
559            assert_eq!(decoded, BigInt::from(*expected));
560
561            let mut encoded = Vec::new();
562            encode_integer(input[0] & 0b111, &decoded, &mut encoded).expect("encode failure");
563            assert_eq!(encoded, *input);
564        }
565
566        let data: &[(&[u8], u64)] =
567            &[(&[248, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0], 9223372036854775808)];
568        for (input, expected) in data {
569            let decoded = decode_integer(input[0], &mut &input[1..]).expect("decode failure");
570            assert_eq!(decoded, BigInt::from(*expected));
571
572            let mut encoded = Vec::new();
573            encode_integer(input[0] & 0b111, &decoded, &mut encoded).expect("encode failure");
574            assert_eq!(encoded, *input);
575        }
576    }
577}