Skip to main content

aver/
nan_value.rs

1//! NaN-boxed compact Value representation (8 bytes per value).
2//!
3//! Layout: every value is a `u64` interpreted as an IEEE 754 `f64`.
4//!
5//! - **Float**: any f64 that is NOT a quiet NaN with our marker -> stored directly.
6//! - **Everything else**: encoded as a quiet NaN with tag + payload in the mantissa.
7//!
8//! IEEE 754 quiet NaN: exponent=0x7FF (all 1s), quiet bit=1, plus our marker bit.
9//! We use `0x7FFC` as 14-bit prefix (bits 63-50), leaving bits 49-0 free.
10//!
11//! ```text
12//! 63      50 49  46 45                    0
13//! ┌────────┬──────┬────────────────────────┐
14//! │ 0x7FFC │ tag  │       payload          │
15//! │ 14 bits│ 4 bit│       46 bits          │
16//! └────────┴──────┴────────────────────────┘
17//! ```
18//!
19//! Tag map:
20//!   0  = Immediate       payload 0-2: false/true/unit
21//!   1  = Symbol          payload bits 0-1: fn/builtin/namespace/nullary-variant; rest=symbol index
22//!   2  = Int             payload bit45: 0=inline(45-bit signed), 1=arena index
23//!   3  = String          payload bit45: 0=inline small string (len + 5 bytes), 1=arena index
24//!   4  = Some            payload bit45: 0=inline inner, 1=arena index
25//!   5  = None            singleton
26//!   6  = Ok              payload bit45: 0=inline inner, 1=arena index
27//!   7  = Err             payload bit45: 0=inline inner, 1=arena index
28//!   8  = List            payload bit45: 0=empty list, 1=arena index
29//!   9  = Vector          payload bit45: 0=empty vector, 1=arena index
30//!   10 = Map             payload bit45: 0=empty map, 1=arena index
31//!   11 = Record          payload bit45: 1=arena index
32//!   12 = Variant         payload bit45: 1=arena index
33//!   13 = Tuple           payload bit45: 1=arena index
34//!   14 = InlineVariant  [45:30]=ctor_id, [29]=kind(0=int,1=imm), [28:0]=value
35//!   15 = (reserved)
36
37use std::cmp::Ordering;
38use std::hash::{Hash, Hasher};
39use std::ops::Deref;
40use std::rc::Rc;
41
42use crate::value::FunctionValue;
43
44/// COW map: O(1) amortized insert when single-owner, O(n) clone when shared.
45pub type PersistentMap = aver_rt::AverMap<u64, (NanValue, NanValue)>;
46
47// ---------------------------------------------------------------------------
48// Bit layout constants
49// ---------------------------------------------------------------------------
50
51const QNAN: u64 = 0x7FFC_0000_0000_0000;
52const QNAN_MASK: u64 = 0xFFFC_0000_0000_0000;
53const TAG_SHIFT: u32 = 46;
54const TAG_MASK: u64 = 0xF;
55const PAYLOAD_MASK: u64 = (1u64 << 46) - 1;
56
57const TAG_IMMEDIATE: u64 = 0;
58const TAG_SYMBOL: u64 = 1;
59const TAG_INT: u64 = 2;
60const TAG_STRING: u64 = 3;
61const TAG_SOME: u64 = 4;
62const TAG_NONE: u64 = 5;
63const TAG_OK: u64 = 6;
64const TAG_ERR: u64 = 7;
65const TAG_LIST: u64 = 8;
66const TAG_VECTOR: u64 = 9; // sequential: List(8) + Vector(9) → tag & 0b1110 == 0b1000
67const TAG_MAP: u64 = 10;
68const TAG_RECORD: u64 = 11;
69const TAG_VARIANT: u64 = 12;
70const TAG_TUPLE: u64 = 13;
71const TAG_INLINE_VARIANT: u64 = 14;
72
73const SYMBOL_FN: u64 = 0;
74const SYMBOL_BUILTIN: u64 = 1;
75const SYMBOL_NAMESPACE: u64 = 2;
76const SYMBOL_NULLARY_VARIANT: u64 = 3;
77const SYMBOL_KIND_MASK: u64 = 0b11;
78
79const IMM_FALSE: u64 = 0;
80const IMM_TRUE: u64 = 1;
81const IMM_UNIT: u64 = 2;
82
83const WRAP_SOME: u64 = 0;
84const WRAP_OK: u64 = 1;
85const WRAP_ERR: u64 = 2;
86const WRAPPER_INLINE_KIND_SHIFT: u32 = 43;
87const WRAPPER_INLINE_KIND_MASK: u64 = 0b11 << WRAPPER_INLINE_KIND_SHIFT;
88const WRAPPER_INLINE_PAYLOAD_MASK: u64 = (1u64 << WRAPPER_INLINE_KIND_SHIFT) - 1;
89const WRAPPER_INLINE_IMMEDIATE: u64 = 0;
90const WRAPPER_INLINE_INT: u64 = 1;
91const WRAPPER_INLINE_NONE: u64 = 2;
92const WRAPPER_INT_INLINE_MASK: u64 = WRAPPER_INLINE_PAYLOAD_MASK;
93const WRAPPER_INT_INLINE_MAX: i64 = (1i64 << 42) - 1;
94const WRAPPER_INT_INLINE_MIN: i64 = -(1i64 << 42);
95
96const ARENA_REF_BIT: u64 = 1u64 << 45;
97const INT_BIG_BIT: u64 = ARENA_REF_BIT;
98const INT_INLINE_MASK: u64 = (1u64 << 45) - 1;
99const INT_INLINE_MAX: i64 = (1i64 << 44) - 1;
100const INT_INLINE_MIN: i64 = -(1i64 << 44);
101
102const STRING_ARENA_BIT: u64 = ARENA_REF_BIT;
103const STRING_INLINE_LEN_SHIFT: u32 = 40;
104const STRING_INLINE_LEN_MASK: u64 = 0b111 << STRING_INLINE_LEN_SHIFT;
105const STRING_INLINE_MAX_BYTES: usize = 5;
106
107// -- Inline variant layout --------------------------------------------------
108// [45:30] ctor_id (16 bits)
109// [29]    kind    (0=int, 1=immediate)
110// [28:0]  value   (int: 29-bit signed | imm: bits[28:27] = false/true/unit/none)
111const IV_CTOR_SHIFT: u32 = 30;
112const IV_CTOR_MASK: u64 = 0xFFFF;
113const IV_KIND_BIT: u64 = 1 << 29;
114const IV_INT_MASK: u64 = (1u64 << 29) - 1;
115const IV_INT_SIGN_BIT: u64 = 1u64 << 28;
116const IV_INT_MAX: i64 = (1i64 << 28) - 1;
117const IV_INT_MIN: i64 = -(1i64 << 28);
118const IV_IMM_SHIFT: u32 = 27;
119const IV_IMM_FALSE: u64 = 0;
120const IV_IMM_TRUE: u64 = 1;
121const IV_IMM_UNIT: u64 = 2;
122const IV_IMM_NONE: u64 = 3;
123
124// ---------------------------------------------------------------------------
125// NanValue - the 8-byte compact value
126// ---------------------------------------------------------------------------
127
128#[derive(Clone, Copy)]
129pub struct NanValue(u64);
130
131#[derive(Clone, Copy, Debug)]
132pub enum NanString<'a> {
133    Borrowed(&'a str),
134    Inline {
135        len: u8,
136        bytes: [u8; STRING_INLINE_MAX_BYTES],
137    },
138}
139
140impl<'a> NanString<'a> {
141    #[inline]
142    pub fn as_str(&self) -> &str {
143        match self {
144            NanString::Borrowed(s) => s,
145            NanString::Inline { len, bytes } => std::str::from_utf8(&bytes[..*len as usize])
146                .expect("NanString inline payload must be valid UTF-8"),
147        }
148    }
149}
150
151impl Deref for NanString<'_> {
152    type Target = str;
153
154    #[inline]
155    fn deref(&self) -> &Self::Target {
156        self.as_str()
157    }
158}
159
160impl PartialEq for NanString<'_> {
161    #[inline]
162    fn eq(&self, other: &Self) -> bool {
163        self.as_str() == other.as_str()
164    }
165}
166
167impl Eq for NanString<'_> {}
168
169impl PartialEq<&str> for NanString<'_> {
170    #[inline]
171    fn eq(&self, other: &&str) -> bool {
172        self.as_str() == *other
173    }
174}
175
176impl PartialEq<NanString<'_>> for &str {
177    #[inline]
178    fn eq(&self, other: &NanString<'_>) -> bool {
179        *self == other.as_str()
180    }
181}
182
183impl PartialOrd for NanString<'_> {
184    #[inline]
185    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
186        Some(self.cmp(other))
187    }
188}
189
190impl Ord for NanString<'_> {
191    #[inline]
192    fn cmp(&self, other: &Self) -> Ordering {
193        self.as_str().cmp(other.as_str())
194    }
195}
196
197impl Hash for NanString<'_> {
198    #[inline]
199    fn hash<H: Hasher>(&self, state: &mut H) {
200        self.as_str().hash(state);
201    }
202}
203
204impl std::fmt::Display for NanString<'_> {
205    #[inline]
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        f.write_str(self.as_str())
208    }
209}
210
211// -- Encoding / decoding ---------------------------------------------------
212
213impl NanValue {
214    #[inline]
215    fn decode_inline_int_payload(payload: u64) -> i64 {
216        debug_assert!(payload & INT_BIG_BIT == 0);
217        let raw = payload & INT_INLINE_MASK;
218        if raw & (1u64 << 44) != 0 {
219            (raw | !INT_INLINE_MASK) as i64
220        } else {
221            raw as i64
222        }
223    }
224
225    #[inline]
226    fn encode(tag: u64, payload: u64) -> Self {
227        debug_assert!(tag <= TAG_MASK);
228        debug_assert!(payload <= PAYLOAD_MASK);
229        NanValue(QNAN | (tag << TAG_SHIFT) | payload)
230    }
231
232    #[inline]
233    fn is_nan_boxed(self) -> bool {
234        (self.0 & QNAN_MASK) == QNAN
235    }
236
237    #[inline]
238    fn tag(self) -> u64 {
239        (self.0 >> TAG_SHIFT) & TAG_MASK
240    }
241
242    #[inline]
243    fn payload(self) -> u64 {
244        self.0 & PAYLOAD_MASK
245    }
246
247    // -- Constructors ------------------------------------------------------
248
249    #[inline]
250    pub fn new_float(f: f64) -> Self {
251        let bits = f.to_bits();
252        if (bits & QNAN_MASK) == QNAN {
253            NanValue(bits ^ 1)
254        } else {
255            NanValue(bits)
256        }
257    }
258
259    #[inline]
260    pub fn as_float(self) -> f64 {
261        f64::from_bits(self.0)
262    }
263
264    #[inline]
265    pub fn new_int_inline(i: i64) -> Self {
266        debug_assert!((INT_INLINE_MIN..=INT_INLINE_MAX).contains(&i));
267        let payload = (i as u64) & INT_INLINE_MASK;
268        Self::encode(TAG_INT, payload)
269    }
270
271    #[inline]
272    pub fn new_int_arena(arena_index: u32) -> Self {
273        Self::encode(TAG_INT, INT_BIG_BIT | (arena_index as u64))
274    }
275
276    #[inline]
277    pub fn new_int(i: i64, arena: &mut Arena) -> Self {
278        if (INT_INLINE_MIN..=INT_INLINE_MAX).contains(&i) {
279            Self::new_int_inline(i)
280        } else {
281            let idx = arena.push_i64(i);
282            Self::new_int_arena(idx)
283        }
284    }
285
286    #[inline]
287    pub fn as_int(self, arena: &Arena) -> i64 {
288        let p = self.payload();
289        if p & INT_BIG_BIT != 0 {
290            let idx = (p & !INT_BIG_BIT) as u32;
291            arena.get_i64(idx)
292        } else {
293            Self::decode_inline_int_payload(p)
294        }
295    }
296
297    #[inline]
298    fn inline_int_payload(self) -> Option<u64> {
299        (self.is_nan_boxed() && self.tag() == TAG_INT && self.payload() & INT_BIG_BIT == 0)
300            .then_some(self.payload())
301    }
302
303    #[inline]
304    pub fn inline_int_value(self) -> Option<i64> {
305        self.inline_int_payload()
306            .map(Self::decode_inline_int_payload)
307    }
308
309    // -- Immediates --------------------------------------------------------
310
311    pub const FALSE: NanValue = NanValue(QNAN | (TAG_IMMEDIATE << TAG_SHIFT) | IMM_FALSE);
312    pub const TRUE: NanValue = NanValue(QNAN | (TAG_IMMEDIATE << TAG_SHIFT) | IMM_TRUE);
313    pub const UNIT: NanValue = NanValue(QNAN | (TAG_IMMEDIATE << TAG_SHIFT) | IMM_UNIT);
314    pub const NONE: NanValue = NanValue(QNAN | (TAG_NONE << TAG_SHIFT));
315    pub const EMPTY_LIST: NanValue = NanValue(QNAN | (TAG_LIST << TAG_SHIFT));
316    pub const EMPTY_MAP: NanValue = NanValue(QNAN | (TAG_MAP << TAG_SHIFT));
317    pub const EMPTY_VECTOR: NanValue = NanValue(QNAN | (TAG_VECTOR << TAG_SHIFT));
318    pub const EMPTY_STRING: NanValue = NanValue(QNAN | (TAG_STRING << TAG_SHIFT));
319
320    #[inline]
321    pub fn new_bool(b: bool) -> Self {
322        if b { Self::TRUE } else { Self::FALSE }
323    }
324
325    #[inline]
326    pub fn as_bool(self) -> bool {
327        self.0 == Self::TRUE.0
328    }
329
330    #[inline]
331    fn plain_immediate_payload(self) -> Option<u64> {
332        (self.is_nan_boxed() && self.tag() == TAG_IMMEDIATE && self.payload() <= IMM_UNIT)
333            .then_some(self.payload())
334    }
335
336    #[inline]
337    fn wrapper_kind(self) -> u64 {
338        match self.tag() {
339            TAG_SOME => WRAP_SOME,
340            TAG_OK => WRAP_OK,
341            TAG_ERR => WRAP_ERR,
342            _ => panic!("wrapper_kind() called on non-wrapper"),
343        }
344    }
345
346    #[inline]
347    fn wrapper_inline_kind(self) -> Option<u64> {
348        if !self.is_nan_boxed() {
349            return None;
350        }
351        match self.tag() {
352            TAG_SOME | TAG_OK | TAG_ERR if self.payload() & ARENA_REF_BIT == 0 => {
353                Some((self.payload() & WRAPPER_INLINE_KIND_MASK) >> WRAPPER_INLINE_KIND_SHIFT)
354            }
355            _ => None,
356        }
357    }
358
359    #[inline]
360    fn decode_wrapper_inline_int_payload(payload: u64) -> i64 {
361        let raw = payload & WRAPPER_INT_INLINE_MASK;
362        if raw & (1u64 << 42) != 0 {
363            (raw | !WRAPPER_INT_INLINE_MASK) as i64
364        } else {
365            raw as i64
366        }
367    }
368
369    #[inline]
370    fn encode_wrapper_inline_int(i: i64) -> u64 {
371        debug_assert!((WRAPPER_INT_INLINE_MIN..=WRAPPER_INT_INLINE_MAX).contains(&i));
372        (i as u64) & WRAPPER_INT_INLINE_MASK
373    }
374
375    #[inline]
376    fn wrapper_inline_inner(self) -> Option<NanValue> {
377        let kind = self.wrapper_inline_kind()?;
378        let payload = self.payload() & WRAPPER_INLINE_PAYLOAD_MASK;
379        match kind {
380            WRAPPER_INLINE_IMMEDIATE => Some(Self::encode(TAG_IMMEDIATE, payload)),
381            WRAPPER_INLINE_INT => Some(Self::new_int_inline(
382                Self::decode_wrapper_inline_int_payload(payload),
383            )),
384            WRAPPER_INLINE_NONE => Some(Self::NONE),
385            _ => None,
386        }
387    }
388
389    #[inline]
390    fn new_inline_wrapper(tag: u64, inline_kind: u64, payload: u64) -> Self {
391        debug_assert!(matches!(tag, TAG_SOME | TAG_OK | TAG_ERR));
392        debug_assert!(inline_kind <= WRAPPER_INLINE_NONE);
393        debug_assert!(payload <= WRAPPER_INLINE_PAYLOAD_MASK);
394        Self::encode(tag, (inline_kind << WRAPPER_INLINE_KIND_SHIFT) | payload)
395    }
396
397    #[inline]
398    fn wrapper_parts(self, arena: &Arena) -> Option<(u64, NanValue)> {
399        if !self.is_nan_boxed() {
400            return None;
401        }
402        match self.tag() {
403            TAG_SOME | TAG_OK | TAG_ERR if self.payload() & ARENA_REF_BIT != 0 => {
404                Some((self.wrapper_kind(), arena.get_boxed(self.arena_index())))
405            }
406            TAG_SOME | TAG_OK | TAG_ERR => self
407                .wrapper_inline_inner()
408                .map(|inner| (self.wrapper_kind(), inner)),
409            _ => None,
410        }
411    }
412
413    // -- Wrappers (Some/Ok/Err) -------------------------------------------
414
415    #[inline]
416    pub fn new_some(inner_index: u32) -> Self {
417        Self::encode(TAG_SOME, ARENA_REF_BIT | (inner_index as u64))
418    }
419
420    #[inline]
421    pub fn new_ok(inner_index: u32) -> Self {
422        Self::encode(TAG_OK, ARENA_REF_BIT | (inner_index as u64))
423    }
424
425    #[inline]
426    pub fn new_err(inner_index: u32) -> Self {
427        Self::encode(TAG_ERR, ARENA_REF_BIT | (inner_index as u64))
428    }
429
430    #[inline]
431    pub fn wrapper_index(self) -> u32 {
432        debug_assert!(
433            self.is_nan_boxed()
434                && matches!(self.tag(), TAG_SOME | TAG_OK | TAG_ERR)
435                && self.payload() & ARENA_REF_BIT != 0
436        );
437        self.arena_index()
438    }
439
440    #[inline]
441    pub fn wrapper_inner(self, arena: &Arena) -> NanValue {
442        self.wrapper_parts(arena)
443            .map(|(_, inner)| inner)
444            .expect("wrapper_inner() called on non-wrapper")
445    }
446
447    #[inline]
448    fn wrap_value(kind: u64, inner: NanValue, arena: &mut Arena) -> Self {
449        if let Some(payload) = inner.plain_immediate_payload() {
450            let tag = match kind {
451                WRAP_SOME => TAG_SOME,
452                WRAP_OK => TAG_OK,
453                WRAP_ERR => TAG_ERR,
454                _ => unreachable!("invalid wrapper kind"),
455            };
456            Self::new_inline_wrapper(tag, WRAPPER_INLINE_IMMEDIATE, payload)
457        } else if inner.is_none() {
458            let tag = match kind {
459                WRAP_SOME => TAG_SOME,
460                WRAP_OK => TAG_OK,
461                WRAP_ERR => TAG_ERR,
462                _ => unreachable!("invalid wrapper kind"),
463            };
464            Self::new_inline_wrapper(tag, WRAPPER_INLINE_NONE, 0)
465        } else if let Some(value) = inner.inline_int_value() {
466            if (WRAPPER_INT_INLINE_MIN..=WRAPPER_INT_INLINE_MAX).contains(&value) {
467                let tag = match kind {
468                    WRAP_SOME => TAG_SOME,
469                    WRAP_OK => TAG_OK,
470                    WRAP_ERR => TAG_ERR,
471                    _ => unreachable!("invalid wrapper kind"),
472                };
473                return Self::new_inline_wrapper(
474                    tag,
475                    WRAPPER_INLINE_INT,
476                    Self::encode_wrapper_inline_int(value),
477                );
478            }
479            let idx = arena.push_boxed(inner);
480            match kind {
481                WRAP_SOME => Self::new_some(idx),
482                WRAP_OK => Self::new_ok(idx),
483                WRAP_ERR => Self::new_err(idx),
484                _ => unreachable!("invalid wrapper kind"),
485            }
486        } else {
487            let idx = arena.push_boxed(inner);
488            match kind {
489                WRAP_SOME => Self::new_some(idx),
490                WRAP_OK => Self::new_ok(idx),
491                WRAP_ERR => Self::new_err(idx),
492                _ => unreachable!("invalid wrapper kind"),
493            }
494        }
495    }
496
497    #[inline]
498    pub fn new_some_value(inner: NanValue, arena: &mut Arena) -> Self {
499        Self::wrap_value(WRAP_SOME, inner, arena)
500    }
501
502    #[inline]
503    pub fn new_ok_value(inner: NanValue, arena: &mut Arena) -> Self {
504        Self::wrap_value(WRAP_OK, inner, arena)
505    }
506
507    #[inline]
508    pub fn new_err_value(inner: NanValue, arena: &mut Arena) -> Self {
509        Self::wrap_value(WRAP_ERR, inner, arena)
510    }
511
512    // -- Arena-backed constructors -----------------------------------------
513
514    #[inline]
515    pub fn new_string(arena_index: u32) -> Self {
516        Self::encode(TAG_STRING, STRING_ARENA_BIT | (arena_index as u64))
517    }
518
519    #[inline]
520    fn new_small_string_bytes(bytes: &[u8]) -> Self {
521        debug_assert!(bytes.len() <= STRING_INLINE_MAX_BYTES);
522        let mut payload = (bytes.len() as u64) << STRING_INLINE_LEN_SHIFT;
523        for (idx, byte) in bytes.iter().enumerate() {
524            payload |= (*byte as u64) << (idx * 8);
525        }
526        Self::encode(TAG_STRING, payload)
527    }
528
529    #[inline]
530    pub(crate) fn small_string(self) -> Option<NanString<'static>> {
531        if !self.is_nan_boxed()
532            || self.tag() != TAG_STRING
533            || self.payload() & STRING_ARENA_BIT != 0
534        {
535            return None;
536        }
537        let payload = self.payload();
538        let len = ((payload & STRING_INLINE_LEN_MASK) >> STRING_INLINE_LEN_SHIFT) as u8;
539        if len as usize > STRING_INLINE_MAX_BYTES {
540            return None;
541        }
542        let mut bytes = [0u8; STRING_INLINE_MAX_BYTES];
543        for (idx, slot) in bytes.iter_mut().take(len as usize).enumerate() {
544            *slot = ((payload >> (idx * 8)) & 0xFF) as u8;
545        }
546        Some(NanString::Inline { len, bytes })
547    }
548
549    #[inline]
550    pub fn new_string_value(s: &str, arena: &mut Arena) -> Self {
551        if s.len() <= STRING_INLINE_MAX_BYTES {
552            Self::new_small_string_bytes(s.as_bytes())
553        } else {
554            Self::new_string(arena.push_string(s))
555        }
556    }
557
558    #[inline]
559    pub fn new_list(arena_index: u32) -> Self {
560        Self::encode(TAG_LIST, ARENA_REF_BIT | (arena_index as u64))
561    }
562
563    #[inline]
564    pub fn new_tuple(arena_index: u32) -> Self {
565        Self::encode(TAG_TUPLE, ARENA_REF_BIT | (arena_index as u64))
566    }
567
568    #[inline]
569    pub fn new_map(arena_index: u32) -> Self {
570        Self::encode(TAG_MAP, ARENA_REF_BIT | (arena_index as u64))
571    }
572
573    #[inline]
574    pub fn new_vector(arena_index: u32) -> Self {
575        Self::encode(TAG_VECTOR, ARENA_REF_BIT | (arena_index as u64))
576    }
577
578    #[inline]
579    pub fn new_record(arena_index: u32) -> Self {
580        Self::encode(TAG_RECORD, ARENA_REF_BIT | (arena_index as u64))
581    }
582
583    #[inline]
584    pub fn new_variant(arena_index: u32) -> Self {
585        Self::encode(TAG_VARIANT, ARENA_REF_BIT | (arena_index as u64))
586    }
587
588    #[inline]
589    fn new_symbol(symbol_kind: u64, symbol_index: u32) -> Self {
590        Self::encode(TAG_SYMBOL, symbol_kind | ((symbol_index as u64) << 2))
591    }
592
593    #[inline]
594    pub(crate) fn symbol_kind(self) -> u64 {
595        debug_assert!(self.is_nan_boxed() && self.tag() == TAG_SYMBOL);
596        self.payload() & SYMBOL_KIND_MASK
597    }
598
599    #[inline]
600    pub(crate) fn symbol_index(self) -> u32 {
601        debug_assert!(self.is_nan_boxed() && self.tag() == TAG_SYMBOL);
602        (self.payload() >> 2) as u32
603    }
604
605    #[inline]
606    pub fn new_nullary_variant(symbol_index: u32) -> Self {
607        Self::new_symbol(SYMBOL_NULLARY_VARIANT, symbol_index)
608    }
609
610    /// Try to create an inline variant (TAG 14) for a single-field variant
611    /// whose inner value is a small int, bool, unit, or none.
612    /// Returns None if the value can't be inlined.
613    #[inline]
614    pub fn try_new_inline_variant(ctor_id: u32, inner: NanValue) -> Option<Self> {
615        if ctor_id > IV_CTOR_MASK as u32 {
616            return None;
617        }
618        let ctor_bits = (ctor_id as u64) << IV_CTOR_SHIFT;
619
620        if inner.is_nan_boxed() {
621            match inner.tag() {
622                TAG_INT if inner.payload() & INT_BIG_BIT == 0 => {
623                    let i = Self::decode_inline_int_payload(inner.payload());
624                    if (IV_INT_MIN..=IV_INT_MAX).contains(&i) {
625                        let int_bits = (i as u64) & IV_INT_MASK;
626                        return Some(Self::encode(TAG_INLINE_VARIANT, ctor_bits | int_bits));
627                    }
628                }
629                TAG_IMMEDIATE => {
630                    let imm = match inner.payload() {
631                        IMM_FALSE => IV_IMM_FALSE,
632                        IMM_TRUE => IV_IMM_TRUE,
633                        IMM_UNIT => IV_IMM_UNIT,
634                        _ => return None,
635                    };
636                    return Some(Self::encode(
637                        TAG_INLINE_VARIANT,
638                        ctor_bits | IV_KIND_BIT | (imm << IV_IMM_SHIFT),
639                    ));
640                }
641                TAG_NONE => {
642                    return Some(Self::encode(
643                        TAG_INLINE_VARIANT,
644                        ctor_bits | IV_KIND_BIT | (IV_IMM_NONE << IV_IMM_SHIFT),
645                    ));
646                }
647                _ => {}
648            }
649        }
650        None
651    }
652
653    #[inline]
654    pub fn inline_variant_ctor_id(self) -> u32 {
655        debug_assert!(self.is_nan_boxed() && self.tag() == TAG_INLINE_VARIANT);
656        ((self.payload() >> IV_CTOR_SHIFT) & IV_CTOR_MASK) as u32
657    }
658
659    #[inline]
660    pub fn inline_variant_inner(self) -> NanValue {
661        debug_assert!(self.is_nan_boxed() && self.tag() == TAG_INLINE_VARIANT);
662        let payload = self.payload();
663        if payload & IV_KIND_BIT == 0 {
664            // Inline int — sign-extend 29 bits to i64
665            let raw = payload & IV_INT_MASK;
666            let i = if raw & IV_INT_SIGN_BIT != 0 {
667                (raw | !IV_INT_MASK) as i64
668            } else {
669                raw as i64
670            };
671            Self::new_int_inline(i)
672        } else {
673            let imm = (payload >> IV_IMM_SHIFT) & 0b11;
674            match imm {
675                IV_IMM_FALSE => Self::FALSE,
676                IV_IMM_TRUE => Self::TRUE,
677                IV_IMM_UNIT => Self::UNIT,
678                IV_IMM_NONE => Self::NONE,
679                _ => unreachable!(),
680            }
681        }
682    }
683
684    #[inline]
685    pub fn new_fn(arena_index: u32) -> Self {
686        Self::new_symbol(SYMBOL_FN, arena_index)
687    }
688
689    #[inline]
690    pub fn new_builtin(arena_index: u32) -> Self {
691        Self::new_symbol(SYMBOL_BUILTIN, arena_index)
692    }
693
694    #[inline]
695    pub fn new_namespace(arena_index: u32) -> Self {
696        Self::new_symbol(SYMBOL_NAMESPACE, arena_index)
697    }
698
699    #[inline]
700    pub fn arena_index(self) -> u32 {
701        (self.payload() & !ARENA_REF_BIT) as u32
702    }
703
704    #[inline]
705    pub fn heap_index(self) -> Option<u32> {
706        if !self.is_nan_boxed() {
707            return None;
708        }
709        match self.tag() {
710            TAG_INT => {
711                let p = self.payload();
712                if p & INT_BIG_BIT != 0 {
713                    Some((p & !INT_BIG_BIT) as u32)
714                } else {
715                    None
716                }
717            }
718            TAG_STRING | TAG_SOME | TAG_OK | TAG_ERR | TAG_LIST | TAG_TUPLE | TAG_MAP
719            | TAG_RECORD | TAG_VARIANT | TAG_VECTOR => {
720                (self.payload() & ARENA_REF_BIT != 0).then_some(self.arena_index())
721            }
722            _ => None,
723        }
724    }
725
726    #[inline]
727    pub fn with_heap_index(self, index: u32) -> Self {
728        if !self.is_nan_boxed() {
729            return self;
730        }
731        match self.tag() {
732            TAG_INT => {
733                debug_assert!(self.payload() & INT_BIG_BIT != 0);
734                Self::new_int_arena(index)
735            }
736            TAG_SOME => Self::new_some(index),
737            TAG_OK => Self::new_ok(index),
738            TAG_ERR => Self::new_err(index),
739            TAG_STRING => Self::new_string(index),
740            TAG_LIST => Self::new_list(index),
741            TAG_TUPLE => Self::new_tuple(index),
742            TAG_MAP => Self::new_map(index),
743            TAG_VECTOR => Self::new_vector(index),
744            TAG_RECORD => Self::new_record(index),
745            TAG_VARIANT => Self::new_variant(index),
746            _ => self,
747        }
748    }
749
750    // -- Type checks -------------------------------------------------------
751
752    #[inline]
753    pub fn is_float(self) -> bool {
754        !self.is_nan_boxed()
755    }
756
757    #[inline]
758    pub fn is_int(self) -> bool {
759        self.is_nan_boxed() && self.tag() == TAG_INT
760    }
761
762    #[inline]
763    pub fn is_bool(self) -> bool {
764        self.is_nan_boxed()
765            && self.tag() == TAG_IMMEDIATE
766            && (self.payload() == IMM_TRUE || self.payload() == IMM_FALSE)
767    }
768
769    #[inline]
770    pub fn is_unit(self) -> bool {
771        self.0 == Self::UNIT.0
772    }
773
774    #[inline]
775    pub fn is_none(self) -> bool {
776        self.0 == Self::NONE.0
777    }
778
779    #[inline]
780    pub fn is_some(self) -> bool {
781        self.is_nan_boxed() && self.tag() == TAG_SOME
782    }
783
784    #[inline]
785    pub fn is_ok(self) -> bool {
786        self.is_nan_boxed() && self.tag() == TAG_OK
787    }
788
789    #[inline]
790    pub fn is_err(self) -> bool {
791        self.is_nan_boxed() && self.tag() == TAG_ERR
792    }
793
794    #[inline]
795    pub fn is_string(self) -> bool {
796        self.is_nan_boxed() && self.tag() == TAG_STRING
797    }
798
799    /// Deep string equality: compare actual string content, not NanValue bits.
800    /// Handles both inline short strings and arena-allocated strings.
801    pub fn string_eq(self, other: NanValue, arena: &Arena) -> bool {
802        if self.bits() == other.bits() {
803            return true; // fast path: same bits (inline or same arena entry)
804        }
805        if !self.is_string() || !other.is_string() {
806            return false;
807        }
808        arena.get_string_value(self).as_str() == arena.get_string_value(other).as_str()
809    }
810
811    #[inline]
812    pub fn is_list(self) -> bool {
813        self.is_nan_boxed() && self.tag() == TAG_LIST
814    }
815
816    #[inline]
817    pub fn is_record(self) -> bool {
818        self.is_nan_boxed() && self.tag() == TAG_RECORD
819    }
820
821    #[inline]
822    pub fn is_fn(self) -> bool {
823        self.is_nan_boxed() && self.tag() == TAG_SYMBOL && self.symbol_kind() == SYMBOL_FN
824    }
825
826    #[inline]
827    pub fn is_variant(self) -> bool {
828        self.is_nan_boxed()
829            && (self.tag() == TAG_VARIANT
830                || self.tag() == TAG_INLINE_VARIANT
831                || (self.tag() == TAG_SYMBOL && self.symbol_kind() == SYMBOL_NULLARY_VARIANT))
832    }
833
834    #[inline]
835    pub fn is_inline_variant(self) -> bool {
836        self.is_nan_boxed() && self.tag() == TAG_INLINE_VARIANT
837    }
838
839    #[inline]
840    pub fn is_map(self) -> bool {
841        self.is_nan_boxed() && self.tag() == TAG_MAP
842    }
843
844    #[inline]
845    pub fn is_vector(self) -> bool {
846        self.is_nan_boxed() && self.tag() == TAG_VECTOR
847    }
848
849    #[inline]
850    pub fn is_empty_vector_immediate(self) -> bool {
851        self.is_nan_boxed() && self.tag() == TAG_VECTOR && self.payload() & ARENA_REF_BIT == 0
852    }
853
854    #[inline]
855    pub fn is_tuple(self) -> bool {
856        self.is_nan_boxed() && self.tag() == TAG_TUPLE
857    }
858
859    #[inline]
860    pub fn is_builtin(self) -> bool {
861        self.is_nan_boxed() && self.tag() == TAG_SYMBOL && self.symbol_kind() == SYMBOL_BUILTIN
862    }
863
864    #[inline]
865    pub fn is_namespace(self) -> bool {
866        self.is_nan_boxed() && self.tag() == TAG_SYMBOL && self.symbol_kind() == SYMBOL_NAMESPACE
867    }
868
869    #[inline]
870    pub fn is_empty_list_immediate(self) -> bool {
871        self.is_nan_boxed() && self.tag() == TAG_LIST && self.payload() & ARENA_REF_BIT == 0
872    }
873
874    #[inline]
875    pub fn is_empty_map_immediate(self) -> bool {
876        self.is_nan_boxed() && self.tag() == TAG_MAP && self.payload() & ARENA_REF_BIT == 0
877    }
878
879    pub fn type_name(self) -> &'static str {
880        if self.is_float() {
881            return "Float";
882        }
883        match self.tag() {
884            TAG_INT => "Int",
885            TAG_IMMEDIATE => match self.payload() {
886                IMM_FALSE | IMM_TRUE => "Bool",
887                IMM_UNIT => "Unit",
888                _ => "Unknown",
889            },
890            TAG_SOME => "Option.Some",
891            TAG_NONE => "Option.None",
892            TAG_OK => "Result.Ok",
893            TAG_ERR => "Result.Err",
894            TAG_STRING => "String",
895            TAG_LIST => "List",
896            TAG_TUPLE => "Tuple",
897            TAG_MAP => "Map",
898            TAG_VECTOR => "Vector",
899            TAG_RECORD => "Record",
900            TAG_VARIANT | TAG_INLINE_VARIANT => "Variant",
901            TAG_SYMBOL => match self.symbol_kind() {
902                SYMBOL_FN => "Fn",
903                SYMBOL_BUILTIN => "Builtin",
904                SYMBOL_NAMESPACE => "Namespace",
905                SYMBOL_NULLARY_VARIANT => "Variant",
906                _ => "Unknown",
907            },
908            _ => "Unknown",
909        }
910    }
911
912    #[inline]
913    pub fn variant_ctor_id(self, arena: &Arena) -> Option<u32> {
914        if !self.is_nan_boxed() {
915            return None;
916        }
917        match self.tag() {
918            TAG_INLINE_VARIANT => Some(self.inline_variant_ctor_id()),
919            TAG_VARIANT => {
920                let (type_id, variant_id, _) = arena.get_variant(self.arena_index());
921                arena.find_ctor_id(type_id, variant_id)
922            }
923            TAG_SYMBOL if self.symbol_kind() == SYMBOL_NULLARY_VARIANT => {
924                Some(arena.get_nullary_variant_ctor(self.symbol_index()))
925            }
926            _ => None,
927        }
928    }
929
930    /// Returns (type_id, variant_id, fields) for arena-backed and nullary variants.
931    /// Does NOT handle TAG_INLINE_VARIANT — use `inline_variant_info` for those.
932    #[inline]
933    pub fn variant_parts(self, arena: &Arena) -> Option<(u32, u16, &[NanValue])> {
934        if !self.is_nan_boxed() {
935            return None;
936        }
937        match self.tag() {
938            TAG_VARIANT => {
939                let (type_id, variant_id, fields) = arena.get_variant(self.arena_index());
940                Some((type_id, variant_id, fields))
941            }
942            TAG_SYMBOL if self.symbol_kind() == SYMBOL_NULLARY_VARIANT => {
943                let (type_id, variant_id) =
944                    arena.get_ctor_parts(arena.get_nullary_variant_ctor(self.symbol_index()));
945                Some((type_id, variant_id, &[]))
946            }
947            _ => None,
948        }
949    }
950
951    /// Extract the single field from any 1-field variant representation.
952    /// For inline variants: decode from bits. For arena variants: arena lookup.
953    /// Panics if the variant has != 1 field.
954    #[inline]
955    pub fn variant_single_field(self, arena: &Arena) -> NanValue {
956        if self.tag() == TAG_INLINE_VARIANT {
957            self.inline_variant_inner()
958        } else {
959            let (_, _, fields) = arena.get_variant(self.arena_index());
960            debug_assert_eq!(fields.len(), 1);
961            fields[0]
962        }
963    }
964
965    /// Returns (type_id, variant_id, inner_value) for inline variants.
966    #[inline]
967    pub fn inline_variant_info(self, arena: &Arena) -> Option<(u32, u16, NanValue)> {
968        if !self.is_nan_boxed() || self.tag() != TAG_INLINE_VARIANT {
969            return None;
970        }
971        let ctor_id = self.inline_variant_ctor_id();
972        let (type_id, variant_id) = arena.get_ctor_parts(ctor_id);
973        Some((type_id, variant_id, self.inline_variant_inner()))
974    }
975
976    /// Raw bits - useful for using as HashMap key (inline values only).
977    #[inline]
978    pub fn bits(self) -> u64 {
979        self.0
980    }
981
982    #[inline]
983    pub fn from_bits(bits: u64) -> Self {
984        NanValue(bits)
985    }
986
987    /// Content-based hash for use as map key. For inline values (int, float, bool),
988    /// uses bits(). For arena-backed strings, hashes the string content so that
989    /// two NanValues for the same string content produce the same key regardless
990    /// of arena index.
991    pub fn map_key_hash(self, arena: &Arena) -> u64 {
992        if self.is_string() {
993            use std::hash::{Hash, Hasher};
994            let mut hasher = std::collections::hash_map::DefaultHasher::new();
995            3u8.hash(&mut hasher);
996            arena.get_string_value(self).hash(&mut hasher);
997            hasher.finish()
998        } else {
999            self.bits()
1000        }
1001    }
1002}
1003
1004// -- Debug -----------------------------------------------------------------
1005
1006impl std::fmt::Debug for NanValue {
1007    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1008        if self.is_float() {
1009            return write!(f, "Float({})", self.as_float());
1010        }
1011        match self.tag() {
1012            TAG_INT => {
1013                if self.payload() & INT_BIG_BIT != 0 {
1014                    write!(f, "Int(arena:{})", (self.payload() & !INT_BIG_BIT) as u32)
1015                } else {
1016                    write!(
1017                        f,
1018                        "Int({})",
1019                        Self::decode_inline_int_payload(self.payload())
1020                    )
1021                }
1022            }
1023            TAG_IMMEDIATE => match self.payload() {
1024                IMM_FALSE => write!(f, "False"),
1025                IMM_TRUE => write!(f, "True"),
1026                IMM_UNIT => write!(f, "Unit"),
1027                _ => write!(f, "Immediate({})", self.payload()),
1028            },
1029            TAG_NONE => write!(f, "None"),
1030            TAG_SOME | TAG_OK | TAG_ERR => {
1031                let kind = match self.tag() {
1032                    TAG_SOME => "Some",
1033                    TAG_OK => "Ok",
1034                    TAG_ERR => "Err",
1035                    _ => "?",
1036                };
1037                if self.payload() & ARENA_REF_BIT != 0 {
1038                    write!(f, "{}(arena:{})", kind, self.arena_index())
1039                } else if let Some(inner) = self.wrapper_inline_inner() {
1040                    write!(f, "{}({:?})", kind, inner)
1041                } else {
1042                    write!(f, "{}(?)", kind)
1043                }
1044            }
1045            TAG_SYMBOL => match self.symbol_kind() {
1046                SYMBOL_FN => write!(f, "Fn(symbol:{})", self.symbol_index()),
1047                SYMBOL_BUILTIN => write!(f, "Builtin(symbol:{})", self.symbol_index()),
1048                SYMBOL_NAMESPACE => write!(f, "Namespace(symbol:{})", self.symbol_index()),
1049                SYMBOL_NULLARY_VARIANT => {
1050                    write!(f, "NullaryVariant(symbol:{})", self.symbol_index())
1051                }
1052                _ => write!(f, "Symbol({})", self.payload()),
1053            },
1054            TAG_STRING => {
1055                if let Some(s) = self.small_string() {
1056                    write!(f, "String({:?})", s.as_str())
1057                } else {
1058                    write!(f, "String(arena:{})", self.arena_index())
1059                }
1060            }
1061            TAG_INLINE_VARIANT => {
1062                let ctor = self.inline_variant_ctor_id();
1063                let inner = self.inline_variant_inner();
1064                write!(f, "InlineVariant(ctor:{}, {:?})", ctor, inner)
1065            }
1066            TAG_LIST if self.is_empty_list_immediate() => write!(f, "EmptyList"),
1067            TAG_MAP if self.is_empty_map_immediate() => write!(f, "EmptyMap"),
1068            TAG_VECTOR if self.is_empty_vector_immediate() => write!(f, "EmptyVector"),
1069            _ => write!(f, "{}(arena:{})", self.type_name(), self.arena_index()),
1070        }
1071    }
1072}
1073
1074// ---------------------------------------------------------------------------
1075// Arena
1076// ---------------------------------------------------------------------------
1077
1078#[derive(Debug, Clone)]
1079pub struct Arena {
1080    young_entries: Vec<ArenaEntry>,
1081    yard_entries: Vec<ArenaEntry>,
1082    handoff_entries: Vec<ArenaEntry>,
1083    stable_entries: Vec<ArenaEntry>,
1084    scratch_young: Vec<u32>,
1085    scratch_yard: Vec<u32>,
1086    scratch_handoff: Vec<u32>,
1087    scratch_stable: Vec<u32>,
1088    peak_usage: ArenaUsage,
1089    alloc_space: AllocSpace,
1090    pub(crate) type_names: Vec<String>,
1091    pub(crate) type_field_names: Vec<Vec<String>>,
1092    pub(crate) type_variant_names: Vec<Vec<String>>,
1093    pub(crate) type_variant_ctor_ids: Vec<Vec<u32>>,
1094    pub(crate) ctor_to_type_variant: Vec<(u32, u16)>,
1095    pub(crate) symbol_entries: Vec<ArenaSymbol>,
1096}
1097
1098#[derive(Debug, Clone)]
1099pub enum ArenaEntry {
1100    Int(i64),
1101    String(Rc<str>),
1102    List(ArenaList),
1103    Tuple(Vec<NanValue>),
1104    Map(PersistentMap),
1105    Vector(Vec<NanValue>),
1106    Record {
1107        type_id: u32,
1108        fields: Vec<NanValue>,
1109    },
1110    Variant {
1111        type_id: u32,
1112        variant_id: u16,
1113        fields: Vec<NanValue>,
1114    },
1115    Fn(Rc<FunctionValue>),
1116    Builtin(Rc<str>),
1117    Namespace {
1118        name: Rc<str>,
1119        members: Vec<(Rc<str>, NanValue)>,
1120    },
1121    Boxed(NanValue),
1122}
1123
1124#[derive(Debug, Clone)]
1125pub enum ArenaSymbol {
1126    Fn(Rc<FunctionValue>),
1127    Builtin(Rc<str>),
1128    Namespace {
1129        name: Rc<str>,
1130        members: Vec<(Rc<str>, NanValue)>,
1131    },
1132    NullaryVariant {
1133        ctor_id: u32,
1134    },
1135}
1136
1137#[derive(Debug, Clone)]
1138pub enum ArenaList {
1139    Flat {
1140        items: Rc<Vec<NanValue>>,
1141        start: usize,
1142    },
1143    Prepend {
1144        head: NanValue,
1145        tail: NanValue,
1146        len: usize,
1147    },
1148    Concat {
1149        left: NanValue,
1150        right: NanValue,
1151        len: usize,
1152    },
1153    Segments {
1154        current: NanValue,
1155        rest: Rc<Vec<NanValue>>,
1156        start: usize,
1157        len: usize,
1158    },
1159}
1160
1161const LIST_APPEND_CHUNK_LIMIT: usize = 128;
1162
1163#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1164enum HeapSpace {
1165    Young = 0,
1166    Yard = 1,
1167    Handoff = 2,
1168    Stable = 3,
1169}
1170
1171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1172pub enum AllocSpace {
1173    Young,
1174    Yard,
1175    Handoff,
1176}
1177
1178#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1179pub struct ArenaUsage {
1180    pub young: usize,
1181    pub yard: usize,
1182    pub handoff: usize,
1183    pub stable: usize,
1184}
1185
1186impl ArenaUsage {
1187    pub fn total(self) -> usize {
1188        self.young + self.yard + self.handoff + self.stable
1189    }
1190}
1191
1192const HEAP_SPACE_SHIFT: u32 = 30;
1193const HEAP_SPACE_MASK_U32: u32 = 0b11 << HEAP_SPACE_SHIFT;
1194const HEAP_INDEX_MASK_U32: u32 = (1 << HEAP_SPACE_SHIFT) - 1;
1195
1196mod arena;
1197mod compare;
1198mod convert;
1199mod lists;
1200mod memory;
1201
1202#[cfg(test)]
1203#[allow(clippy::approx_constant)]
1204mod tests;