beamr 0.4.9

A Rust runtime with the BEAM's execution model, targeting Gleam
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
//! Term representation — what all data is made of.
//!
//! A term is a single 64-bit machine word with low-bit tagging.
//! Immediates (small integers, atoms, pids, nil) fit entirely in
//! the word. Boxed values (tuples, lists, binaries, floats, big
//! integers, closures, maps, references) are tagged pointers into
//! the process-local heap.
pub mod bigint_convert;
pub mod bigint_math;
pub mod binary;
pub mod binary_ref;
pub mod boxed;
pub mod compare;
pub mod format;
pub mod hash;
#[cfg(feature = "json")]
pub mod json;
pub mod pid_ref;
pub mod reference_ref;
pub mod shared_binary;
pub mod sub_binary;

use crate::atom::Atom;

const TAG_BITS: u32 = 3;
const TAG_MASK: u64 = (1 << TAG_BITS) - 1;
const PAYLOAD_BITS: u32 = u64::BITS - TAG_BITS;

const SMALL_INT_TAG: u64 = 0b000;
const ATOM_TAG: u64 = 0b001;
const PID_TAG: u64 = 0b010;
const NIL_TAG: u64 = 0b011;
const BOXED_TAG: u64 = 0b100;
const LIST_TAG: u64 = 0b101;

const SMALL_INT_MIN: i64 = -(1_i64 << (PAYLOAD_BITS - 1));
const SMALL_INT_MAX: i64 = (1_i64 << (PAYLOAD_BITS - 1)) - 1;
const UNSIGNED_PAYLOAD_MAX: u64 = (1_u64 << PAYLOAD_BITS) - 1;

/// Primary tag for a [`Term`].
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Tag {
    /// Signed small integer stored directly in the term payload.
    SmallInt,
    /// Atom index stored directly in the term payload.
    Atom,
    /// Local process identifier stored directly in the term payload.
    Pid,
    /// Distinguished empty list value.
    Nil,
    /// Boxed heap pointer tag, reserved for boxed values.
    Boxed,
    /// List heap pointer tag, reserved for cons cells.
    List,
}

/// A single tagged BEAM term word.
///
/// The low three bits hold the primary tag. The remaining bits hold immediate
/// payload data or, for future boxed/list terms, tagged heap pointer data.
#[repr(transparent)]
#[derive(Copy, Clone, Debug)]
pub struct Term(u64);

impl PartialEq for Term {
    fn eq(&self, other: &Self) -> bool {
        compare::partial_eq(self, other)
    }
}

impl Eq for Term {}

impl PartialOrd for Term {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Term {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        compare::raw_cmp(*self, *other)
    }
}

impl Term {
    /// Distinguished empty list / nil value.
    pub const NIL: Self = Self(NIL_TAG);

    /// Minimum integer that can be represented as an immediate small integer.
    pub const SMALL_INT_MIN: i64 = SMALL_INT_MIN;

    /// Maximum integer that can be represented as an immediate small integer.
    pub const SMALL_INT_MAX: i64 = SMALL_INT_MAX;

    /// Maximum local pid payload that can be represented as an immediate pid.
    pub const PID_MAX: u64 = UNSIGNED_PAYLOAD_MAX;

    /// Creates a small integer term.
    ///
    /// This infallible constructor is for compile-time in-range constants. Use
    /// [`Term::try_small_int`] for runtime arithmetic that may produce
    /// out-of-range values needing big-integer boxing in a later phase. Passing
    /// an out-of-range value is a programming error: it is caught by
    /// `debug_assert!` in debug/test builds; in release builds the value is
    /// truncated by the raw tagged-word encoding rather than panicking, so a
    /// runtime overflow can never abort the whole VM.
    #[must_use]
    pub fn small_int(value: i64) -> Self {
        debug_assert!(
            (SMALL_INT_MIN..=SMALL_INT_MAX).contains(&value),
            "small integer value is outside the immediate range"
        );
        Self(((value as u64) << TAG_BITS) | SMALL_INT_TAG)
    }

    /// Attempts to create a small integer term without truncating out-of-range
    /// values.
    pub const fn try_small_int(value: i64) -> Option<Self> {
        if value < SMALL_INT_MIN || value > SMALL_INT_MAX {
            None
        } else {
            Some(Self(((value as u64) << TAG_BITS) | SMALL_INT_TAG))
        }
    }

    /// Creates an atom term.
    pub const fn atom(atom: Atom) -> Self {
        Self(((atom.index() as u64) << TAG_BITS) | ATOM_TAG)
    }

    /// Creates an immediate local pid term.
    ///
    /// This infallible constructor is for known in-range pid literals. Use
    /// [`Term::try_pid`] for fallible construction from arbitrary `u64` values.
    /// Passing an out-of-range value is a programming error: it is caught by
    /// `debug_assert!` in debug/test builds; in release builds the high bits are
    /// truncated by the raw tagged-word encoding rather than panicking, so an
    /// untrusted-reachable pid can never abort the whole VM.
    #[must_use]
    pub fn pid(pid: u64) -> Self {
        debug_assert!(
            pid <= UNSIGNED_PAYLOAD_MAX,
            "pid value is outside the immediate range"
        );
        Self((pid << TAG_BITS) | PID_TAG)
    }

    /// Attempts to create an immediate local pid term without truncating high
    /// bits.
    pub const fn try_pid(pid: u64) -> Option<Self> {
        if pid > UNSIGNED_PAYLOAD_MAX {
            None
        } else {
            Some(Self((pid << TAG_BITS) | PID_TAG))
        }
    }

    /// Returns the primary tag for this term.
    pub const fn tag(self) -> Tag {
        match self.0 & TAG_MASK {
            SMALL_INT_TAG => Tag::SmallInt,
            ATOM_TAG => Tag::Atom,
            PID_TAG => Tag::Pid,
            NIL_TAG => {
                if self.0 == Self::NIL.0 {
                    Tag::Nil
                } else {
                    Tag::Boxed
                }
            }
            BOXED_TAG => Tag::Boxed,
            LIST_TAG => Tag::List,
            _ => Tag::Boxed,
        }
    }

    /// Returns `true` when this term is an immediate small integer.
    pub const fn is_small_int(self) -> bool {
        matches!(self.tag(), Tag::SmallInt)
    }

    /// Returns `true` when this term is an immediate atom.
    pub const fn is_atom(self) -> bool {
        matches!(self.tag(), Tag::Atom)
    }

    /// Returns `true` when this term is any PID term.
    pub fn is_pid(self) -> bool {
        self.is_local_pid() || self.is_remote_pid()
    }

    /// Returns `true` when this term is an immediate local PID.
    pub const fn is_local_pid(self) -> bool {
        matches!(self.tag(), Tag::Pid)
    }

    /// Returns `true` when this term is a boxed remote PID.
    pub fn is_remote_pid(self) -> bool {
        boxed::ExternalPid::new(self).is_some()
    }

    /// Returns `true` only for the canonical empty list / nil value.
    pub const fn is_nil(self) -> bool {
        self.0 == Self::NIL.0
    }

    /// Returns `true` when this term carries the boxed heap pointer tag.
    pub const fn is_boxed(self) -> bool {
        matches!(self.tag(), Tag::Boxed)
    }

    /// Returns `true` when this term carries the list heap pointer tag.
    pub const fn is_list(self) -> bool {
        matches!(self.tag(), Tag::List)
    }

    /// Creates a boxed heap-pointer term from a word-aligned heap address.
    ///
    /// The pointer must be aligned so its low tag bits are zero; heap words
    /// (`u64`) satisfy this requirement. This constructor is intentionally
    /// crate-visible so boxed term modules can build terms without exposing raw
    /// bit manipulation outside `beamr::term`.
    pub(crate) fn boxed_ptr(ptr: *const u64) -> Self {
        Self::tagged_ptr(ptr, BOXED_TAG)
    }

    /// Creates a list heap-pointer term from a pointer to a cons cell head.
    pub(crate) fn list_ptr(ptr: *const u64) -> Self {
        Self::tagged_ptr(ptr, LIST_TAG)
    }

    /// Returns the untagged heap pointer for a boxed or list term.
    pub(crate) fn heap_ptr(self) -> Option<*const u64> {
        if self.is_boxed() || self.is_list() {
            Some((self.0 & !TAG_MASK) as *const u64)
        } else {
            None
        }
    }

    /// Returns the raw encoded word for heap layout storage.
    pub(crate) const fn raw(self) -> u64 {
        self.0
    }

    /// Reconstructs a term from its raw encoded word.
    pub(crate) const fn from_raw(raw: u64) -> Self {
        Self(raw)
    }

    /// Decodes this term as a small integer, if it is one.
    pub const fn as_small_int(self) -> Option<i64> {
        if self.is_small_int() {
            Some((self.0 as i64) >> TAG_BITS)
        } else {
            None
        }
    }

    /// Decodes this term as an atom, if it is one.
    pub const fn as_atom(self) -> Option<Atom> {
        if self.is_atom() {
            Some(Atom::new((self.0 >> TAG_BITS) as u32))
        } else {
            None
        }
    }

    /// Decodes this term as local pid data, if it is one.
    pub const fn as_pid(self) -> Option<u64> {
        if self.is_local_pid() {
            Some(self.0 >> TAG_BITS)
        } else {
            None
        }
    }

    fn tagged_ptr(ptr: *const u64, tag: u64) -> Self {
        let raw = ptr as u64;
        debug_assert_eq!(raw & TAG_MASK, 0, "heap term pointers must be aligned");

        Self(raw | tag)
    }
}

#[cfg(test)]
mod tests {
    use super::{Tag, Term};
    use crate::atom::Atom;

    #[test]
    fn term_is_one_machine_word_with_private_tagged_value() {
        assert_eq!(std::mem::size_of::<Term>(), 8);
        assert_eq!(Term::small_int(1).tag(), Tag::SmallInt);
    }

    #[test]
    fn small_int_round_trips_and_preserves_sign() {
        for value in [0, 42, -1, Term::SMALL_INT_MAX, Term::SMALL_INT_MIN] {
            let term = Term::small_int(value);

            assert_eq!(term.as_small_int(), Some(value));
            assert!(term.is_small_int());
            assert_eq!(term.tag(), Tag::SmallInt);
        }
    }

    #[test]
    fn small_int_checked_constructor_rejects_out_of_range_values() {
        assert_eq!(Term::try_small_int(Term::SMALL_INT_MAX + 1), None);
        assert_eq!(Term::try_small_int(Term::SMALL_INT_MIN - 1), None);
    }

    /// PR-7: the infallible `small_int` constructor previously `panic!`ed on
    /// out-of-range input. It now relies on `debug_assert!` and otherwise never
    /// panics; the boundary in-range values must round-trip, and out-of-range
    /// runtime values must flow through the explicit-error `try_small_int` path
    /// rather than aborting the VM.
    #[test]
    fn small_int_boundary_constructs_without_panicking_and_overflow_uses_error_path() {
        assert_eq!(
            Term::small_int(Term::SMALL_INT_MAX).as_small_int(),
            Some(Term::SMALL_INT_MAX)
        );
        assert_eq!(
            Term::small_int(Term::SMALL_INT_MIN).as_small_int(),
            Some(Term::SMALL_INT_MIN)
        );
        // Overflowing arithmetic must use the fallible path, which reports the
        // error instead of panicking.
        assert!(Term::try_small_int(Term::SMALL_INT_MAX + 1).is_none());
    }

    #[test]
    fn atom_round_trips_without_becoming_nil() {
        for atom in [Atom::OK, Atom::ERROR, Atom::NIL] {
            let term = Term::atom(atom);

            assert_eq!(term.as_atom(), Some(atom));
            assert!(term.is_atom());
            assert_eq!(term.tag(), Tag::Atom);
            assert!(!term.is_small_int());
            assert!(!term.is_pid());
            assert!(!term.is_nil());
        }
    }

    #[test]
    fn pid_round_trips() {
        for pid in [0, 12_345, Term::PID_MAX] {
            let term = Term::pid(pid);

            assert_eq!(term.as_pid(), Some(pid));
            assert!(term.is_pid());
            assert!(term.is_local_pid());
            assert!(!term.is_remote_pid());
            assert_eq!(term.tag(), Tag::Pid);
            assert!(!term.is_small_int());
            assert!(!term.is_atom());
        }
    }

    #[test]
    fn pid_checked_constructor_rejects_out_of_range_values() {
        assert_eq!(Term::try_pid(Term::PID_MAX + 1), None);
    }

    /// PR-7: the infallible `pid` constructor previously `panic!`ed on
    /// out-of-range input. It now relies on `debug_assert!` and otherwise never
    /// panics; the boundary in-range pid must round-trip, and an out-of-range
    /// pid must flow through the explicit-error `try_pid` path.
    #[test]
    fn pid_boundary_constructs_without_panicking_and_overflow_uses_error_path() {
        assert_eq!(Term::pid(Term::PID_MAX).as_pid(), Some(Term::PID_MAX));
        assert!(Term::try_pid(Term::PID_MAX + 1).is_none());
    }

    #[test]
    fn nil_is_distinguished_from_integer_atom_and_pid_values() {
        assert!(Term::NIL.is_nil());
        assert_eq!(Term::NIL.tag(), Tag::Nil);
        assert!(!Term::small_int(0).is_nil());
        assert!(!Term::atom(Atom::NIL).is_nil());
        assert!(!Term::pid(0).is_nil());
        assert_ne!(Term::NIL, Term::small_int(0));
    }

    #[test]
    fn tag_dispatch_and_predicates_agree_for_immediates() {
        let terms = [
            (Term::small_int(1), Tag::SmallInt),
            (Term::atom(Atom::OK), Tag::Atom),
            (Term::pid(1), Tag::Pid),
            (Term::NIL, Tag::Nil),
        ];

        for (term, tag) in terms {
            assert_eq!(term.tag(), tag);
            assert_eq!(term.is_small_int(), tag == Tag::SmallInt);
            assert_eq!(term.is_atom(), tag == Tag::Atom);
            assert_eq!(term.is_pid(), tag == Tag::Pid);
            assert_eq!(term.is_nil(), tag == Tag::Nil);
            assert_eq!(term.is_boxed(), tag == Tag::Boxed);
            assert_eq!(term.is_list(), tag == Tag::List);
        }
    }

    #[test]
    fn cross_type_extractors_return_none() {
        let integer = Term::small_int(42);
        let atom = Term::atom(Atom::OK);
        let pid = Term::pid(12_345);
        let nil = Term::NIL;

        assert_eq!(integer.as_atom(), None);
        assert_eq!(integer.as_pid(), None);
        assert_eq!(atom.as_small_int(), None);
        assert_eq!(atom.as_pid(), None);
        assert_eq!(pid.as_small_int(), None);
        assert_eq!(pid.as_atom(), None);
        assert_eq!(nil.as_small_int(), None);
        assert_eq!(nil.as_atom(), None);
        assert_eq!(nil.as_pid(), None);
    }
}