axess-strings 0.2.0

Short, hot-path string primitive for the axess workspace. ShortString is optimized for the workload of short identifiers that are hashed, compared, and cloned at high volume: event taxonomy tags, factor names, routing discriminators. The current internal representation is a placeholder (heap Box<str> + &'static str) suitable for getting the API contract in place; it will be swapped to an Umbra-style 16-byte representation in a follow-up commit. The public API is stable across the swap.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
//! Internal Umbra-style 16-byte representation for [`ShortString`](crate::ShortString).
//!
//! # Layout
//!
//! ```text
//!         offset 0   offset 4   offset 8         offset 16
//!         |          |          |                |
//!  Inline | len:u32  | data: [u8; 12]            |   len ≤ 12, no tag bit
//!  Static | len|TAG  | prefix:[u8;4] | ptr:*u8   |   high bit of len-field set
//!  Heap   | len:u32  | prefix:[u8;4] | ptr:Heap  |   len > 12, no tag bit
//! ```
//!
//! All three variants share the first eight bytes:
//!  - bytes `0..4`: length (low 31 bits) plus the static-tag high bit
//!  - bytes `4..8`: prefix, first four bytes of the string, zero-padded
//!
//! The shared layout is what makes [`Repr::prefix`] and the prefix
//! fast-path of equality work without first deciding the variant.
//!
//! # Discriminator
//!
//! - high bit of the length field set ⇒ **Static** (immutable `&'static [u8]`)
//! - high bit clear, length ≤ 12 ⇒ **Inline** (bytes live in `data`)
//! - high bit clear, length > 12 ⇒ **Heap** (refcounted heap allocation)
//!
//! # Heap allocation layout
//!
//! Heap form stores a single allocation with an atomic refcount header
//! followed by the string bytes:
//!
//! ```text
//!  [ refcount: AtomicU32 ][ bytes: [u8; len] ]
//!  ^                       ^
//!  ptr                     ptr + DATA_OFFSET
//! ```
//!
//! `DATA_OFFSET` is `align_up(size_of::<AtomicU32>(), 1)`: for
//! `AtomicU32` with 4-byte alignment and `u8` with 1-byte alignment that
//! is just `4`. Computed via `Layout::extend` to stay forward-compatible
//! with any future alignment changes.
//!
//! # Soundness invariants
//!
//! 1. After construction, the active variant is determined entirely by
//!    the length field at offset 0..4 of the union. Every read site
//!    consults the discriminator before accessing variant-specific
//!    fields.
//! 2. `Heap` form's pointer is always non-null and always points to a
//!    valid allocation made via [`alloc_heap`] (and not yet freed).
//! 3. `Heap` form's refcount is decremented on drop; the allocation is
//!    freed on the transition from 1 → 0.
//! 4. `Static` form's pointer is `&'static`, never freed.
//! 5. `Inline` form has `len ≤ INLINE_CAP` and `data[len..]` is zero-
//!    padded.
//! 6. The 4-byte prefix at offset 4..8 always matches the first 4 bytes
//!    of the string content (zero-padded if the string is shorter).
//!
//! All `unsafe` blocks below cite the invariant they rely on.

#![allow(unsafe_code)]

use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
use std::ptr::NonNull;
use std::sync::atomic::{AtomicU32, Ordering};

/// Maximum number of UTF-8 bytes that fit in the inline variant.
pub(crate) const INLINE_CAP: usize = 12;

/// High-bit tag on the length field marking the **Static** variant.
const STATIC_TAG: u32 = 1 << 31;

/// Mask to extract the true length, regardless of variant.
const LEN_MASK: u32 = !STATIC_TAG;

#[repr(C)]
#[derive(Clone, Copy)]
struct InlineRepr {
    len: u32,
    data: [u8; INLINE_CAP],
}

#[repr(C)]
#[derive(Clone, Copy)]
struct PointerRepr {
    /// Length, with the [`STATIC_TAG`] high bit set when this is a
    /// Static-form pointer. For Heap form the high bit is clear.
    len_with_tag: u32,
    prefix: [u8; 4],
    ptr: *const u8,
}

#[repr(C)]
pub(crate) union Repr {
    inline: InlineRepr,
    pointer: PointerRepr,
}

/// Header at the start of a heap allocation. Followed by the string
/// bytes at `DATA_OFFSET` (computed via [`heap_layout`]).
#[repr(C)]
struct HeapHeader {
    refcount: AtomicU32,
}

fn heap_layout(len: usize) -> (Layout, usize) {
    let header = Layout::new::<HeapHeader>();
    let bytes = Layout::array::<u8>(len).expect("string length overflows Layout");
    let (combined, offset) = header
        .extend(bytes)
        .expect("combined heap layout overflows");
    (combined.pad_to_align(), offset)
}

/// Allocate a heap buffer for `s`, initialising the refcount to 1 and
/// copying the string bytes.
///
/// # Safety
///
/// The returned pointer is non-null, properly aligned, points to an
/// initialised `HeapHeader` followed by `s.len()` bytes copied from `s`,
/// and is owned by the caller (refcount = 1, must be released via
/// [`drop_heap`]).
fn alloc_heap(s: &str) -> NonNull<HeapHeader> {
    let len = s.len();
    let (layout, data_offset) = heap_layout(len);
    // Safety: `layout` is non-zero (header alone is 4 bytes).
    let raw = unsafe { alloc(layout) };
    let Some(raw) = NonNull::new(raw) else {
        handle_alloc_error(layout);
    };
    // Safety: `raw` is freshly allocated and points to space for a
    // HeapHeader. Initialise refcount=1 in place.
    unsafe {
        raw.cast::<HeapHeader>().as_ptr().write(HeapHeader {
            refcount: AtomicU32::new(1),
        });
    }
    // Safety: `raw + data_offset` is within the same allocation and
    // points to space for `len` u8 bytes; we copy from `s.as_ptr()`
    // which is valid for `len` bytes.
    unsafe {
        std::ptr::copy_nonoverlapping(s.as_ptr(), raw.as_ptr().add(data_offset), len);
    }
    raw.cast::<HeapHeader>()
}

/// Increment the refcount on a heap pointer.
///
/// # Safety
///
/// `ptr` must be a live heap allocation produced by [`alloc_heap`]
/// (refcount > 0).
unsafe fn retain_heap(ptr: NonNull<HeapHeader>) {
    // Safety: invariant 2: heap pointers are valid until drop.
    let header = unsafe { ptr.as_ref() };
    // Relaxed is sound for refcount inc; we already hold a strong
    // reference, so there's no synchronisation required for the
    // increment itself. (Matches `Arc::clone`.)
    header.refcount.fetch_add(1, Ordering::Relaxed);
}

/// Drop a heap reference: decrement refcount, free on transition 1 → 0.
///
/// # Safety
///
/// `ptr` must be a live heap allocation produced by [`alloc_heap`]
/// with the caller holding one reference. After this call the caller
/// must not use `ptr` again.
unsafe fn drop_heap(ptr: NonNull<HeapHeader>, len: usize) {
    // Safety: invariant 2.
    let header = unsafe { ptr.as_ref() };
    // AcqRel mirrors `Arc::drop`: pairs with the producer's Release
    // ordering on its own decrement; the final dropper observes a
    // happens-before edge to all prior modifications.
    if header.refcount.fetch_sub(1, Ordering::AcqRel) == 1 {
        let (layout, _) = heap_layout(len);
        // Safety: refcount transition 1 → 0 means we're the last owner;
        // the allocation was made with this same layout in `alloc_heap`.
        unsafe {
            dealloc(ptr.as_ptr().cast::<u8>(), layout);
        }
    }
}

/// Read the bytes of a heap allocation as a `&str`.
///
/// # Safety
///
/// `ptr` must be a live heap allocation produced by [`alloc_heap`] with
/// `len` bytes of valid UTF-8 string content.
unsafe fn heap_bytes<'a>(ptr: NonNull<HeapHeader>, len: usize) -> &'a [u8] {
    let (_, data_offset) = heap_layout(len);
    // Safety: invariants 2 + 3; the bytes after the header are valid
    // for `len` for the lifetime of the allocation.
    unsafe { std::slice::from_raw_parts(ptr.as_ptr().cast::<u8>().add(data_offset), len) }
}

fn compute_prefix(bytes: &[u8]) -> [u8; 4] {
    let mut out = [0u8; 4];
    let n = bytes.len().min(4);
    out[..n].copy_from_slice(&bytes[..n]);
    out
}

impl Repr {
    /// Construct an empty (Inline, len=0) repr.
    pub(crate) const fn empty() -> Self {
        Self {
            inline: InlineRepr {
                len: 0,
                data: [0; INLINE_CAP],
            },
        }
    }

    /// Construct from a `&'static str` without allocating. `const fn`,
    /// preserves the const-evaluable API for module-level constants.
    pub(crate) const fn from_static(s: &'static str) -> Self {
        let bytes = s.as_bytes();
        let len = bytes.len();
        // Hand-rolled prefix: const fn slice indexing is supported but
        // `copy_from_slice` is not yet const-stable.
        let prefix = [
            if len > 0 { bytes[0] } else { 0 },
            if len > 1 { bytes[1] } else { 0 },
            if len > 2 { bytes[2] } else { 0 },
            if len > 3 { bytes[3] } else { 0 },
        ];
        Self {
            pointer: PointerRepr {
                len_with_tag: (len as u32) | STATIC_TAG,
                prefix,
                ptr: bytes.as_ptr(),
            },
        }
    }

    /// Construct from an arbitrary `&str`, allocating on the heap if
    /// the string does not fit inline.
    pub(crate) fn from_str(s: &str) -> Self {
        let bytes = s.as_bytes();
        let len = bytes.len();
        if len <= INLINE_CAP {
            let mut data = [0u8; INLINE_CAP];
            data[..len].copy_from_slice(bytes);
            Self {
                inline: InlineRepr {
                    len: len as u32,
                    data,
                },
            }
        } else {
            let prefix = compute_prefix(bytes);
            let ptr = alloc_heap(s);
            Self {
                pointer: PointerRepr {
                    len_with_tag: len as u32, // high bit clear ⇒ Heap
                    prefix,
                    ptr: ptr.as_ptr().cast::<u8>(),
                },
            }
        }
    }

    /// Length of the string content (independent of variant).
    #[inline]
    pub(crate) fn len(&self) -> usize {
        // Safety: invariant 1; the length field is at offset 0..4 of
        // every variant; reading it via `inline.len` and masking off
        // the static tag is sound regardless of which variant is
        // active.
        let raw = unsafe { self.inline.len };
        (raw & LEN_MASK) as usize
    }

    /// Four-byte prefix of the string, zero-padded if shorter.
    #[inline]
    pub(crate) fn prefix(&self) -> [u8; 4] {
        // Safety: invariant 6; the prefix bytes at offset 4..8 are
        // initialised to the first 4 bytes of the string content
        // regardless of variant. For Inline form the slot also holds
        // the first 4 bytes of `data`, which equals the prefix by
        // construction.
        unsafe {
            // Read inline.data[0..4]; this works for every variant
            // because that 4-byte window contains the prefix in all
            // three layouts.
            [
                self.inline.data[0],
                self.inline.data[1],
                self.inline.data[2],
                self.inline.data[3],
            ]
        }
    }

    /// Borrow the contents as `&str`. Lifetime tied to `&self`.
    #[inline]
    pub(crate) fn as_str(&self) -> &str {
        // Safety: invariant 1 + invariants 2/4/5 per variant.
        let bytes = unsafe {
            let raw_len = self.inline.len;
            if raw_len & STATIC_TAG != 0 {
                // Static: pointer is &'static, length is masked.
                let len = (raw_len & LEN_MASK) as usize;
                std::slice::from_raw_parts(self.pointer.ptr, len)
            } else if (raw_len as usize) <= INLINE_CAP {
                // Inline: bytes live in `data`.
                let len = raw_len as usize;
                &self.inline.data[..len]
            } else {
                // Heap: deref the pointer; bytes follow the header.
                let len = raw_len as usize;
                let header =
                    NonNull::new_unchecked(self.pointer.ptr.cast::<HeapHeader>().cast_mut());
                heap_bytes(header, len)
            }
        };
        // Safety: the string content was validated as UTF-8 at
        // construction (`from_str` / `from_static` both take `&str`).
        unsafe { std::str::from_utf8_unchecked(bytes) }
    }
}

impl Clone for Repr {
    fn clone(&self) -> Self {
        // Safety: invariant 1; read len-with-tag to discriminate; for
        // Heap variant, retain the refcount so the new instance owns
        // a strong reference. Inline and Static are trivially Copy.
        unsafe {
            let raw_len = self.inline.len;
            if raw_len & STATIC_TAG != 0 {
                Self {
                    pointer: self.pointer,
                }
            } else if (raw_len as usize) <= INLINE_CAP {
                Self {
                    inline: self.inline,
                }
            } else {
                let header =
                    NonNull::new_unchecked(self.pointer.ptr.cast::<HeapHeader>().cast_mut());
                retain_heap(header);
                Self {
                    pointer: self.pointer,
                }
            }
        }
    }
}

impl Drop for Repr {
    fn drop(&mut self) {
        // Safety: invariant 1; only Heap form needs cleanup; Inline
        // and Static are trivially droppable.
        unsafe {
            let raw_len = self.inline.len;
            if raw_len & STATIC_TAG == 0 && (raw_len as usize) > INLINE_CAP {
                let len = raw_len as usize;
                let header =
                    NonNull::new_unchecked(self.pointer.ptr.cast::<HeapHeader>().cast_mut());
                drop_heap(header, len);
            }
        }
    }
}

// Safety: the underlying data is thread-safe in every variant:
//   - Inline: stack bytes (Send + Sync trivially).
//   - Static: `&'static [u8]` (Send + Sync).
//   - Heap: refcount is `AtomicU32`; bytes are immutable after
//     construction. Matches `Arc<[u8]>` Send/Sync rules.
unsafe impl Send for Repr {}
unsafe impl Sync for Repr {}

#[cfg(test)]
mod repr_tests {
    use super::*;

    #[test]
    fn empty_is_inline() {
        let r = Repr::empty();
        assert_eq!(r.len(), 0);
        assert_eq!(r.as_str(), "");
        assert_eq!(r.prefix(), [0; 4]);
    }

    #[test]
    fn short_string_uses_inline() {
        let r = Repr::from_str("hi");
        assert_eq!(r.len(), 2);
        assert_eq!(r.as_str(), "hi");
        assert_eq!(r.prefix(), *b"hi\0\0");
    }

    #[test]
    fn boundary_string_inline_at_12_bytes() {
        let r = Repr::from_str("abcdefghijkl"); // exactly 12 bytes
        assert_eq!(r.len(), 12);
        assert_eq!(r.as_str(), "abcdefghijkl");
        assert_eq!(r.prefix(), *b"abcd");
    }

    #[test]
    fn heap_form_for_strings_over_12_bytes() {
        let r = Repr::from_str("auth.login_attempt.v2"); // 21 bytes
        assert_eq!(r.len(), 21);
        assert_eq!(r.as_str(), "auth.login_attempt.v2");
        assert_eq!(r.prefix(), *b"auth");
    }

    #[test]
    fn static_form_short_string() {
        let r = Repr::from_static("hi");
        assert_eq!(r.len(), 2);
        assert_eq!(r.as_str(), "hi");
        assert_eq!(r.prefix(), *b"hi\0\0");
    }

    #[test]
    fn static_form_long_string() {
        let r = Repr::from_static("auth.login_attempt.v2");
        assert_eq!(r.len(), 21);
        assert_eq!(r.as_str(), "auth.login_attempt.v2");
        assert_eq!(r.prefix(), *b"auth");
    }

    #[test]
    fn clone_inline_is_independent() {
        let r1 = Repr::from_str("hi");
        let r2 = r1.clone();
        assert_eq!(r1.as_str(), r2.as_str());
        drop(r1);
        assert_eq!(r2.as_str(), "hi");
    }

    #[test]
    fn clone_heap_shares_buffer() {
        let r1 = Repr::from_str("auth.login_attempt.v2");
        let r2 = r1.clone();
        let r3 = r1.clone();
        // All three see the same content.
        assert_eq!(r1.as_str(), "auth.login_attempt.v2");
        assert_eq!(r2.as_str(), "auth.login_attempt.v2");
        assert_eq!(r3.as_str(), "auth.login_attempt.v2");
        // Drop in arbitrary order; invariant 3 must hold.
        drop(r2);
        assert_eq!(r1.as_str(), "auth.login_attempt.v2");
        drop(r1);
        assert_eq!(r3.as_str(), "auth.login_attempt.v2");
        drop(r3);
        // No panic on the final drop ⇒ refcount math is consistent.
    }

    #[test]
    fn clone_static_is_pointer_copy() {
        let r1 = Repr::from_static("auth.login_attempt.v2");
        let r2 = r1.clone();
        assert_eq!(r1.as_str(), r2.as_str());
    }

    #[test]
    fn struct_size_is_16_bytes() {
        // Sanity: the whole point of the Umbra layout is a 16-byte
        // stack representation.
        assert_eq!(std::mem::size_of::<Repr>(), 16);
    }

    #[test]
    fn long_heap_string_round_trips() {
        let s = "a".repeat(10_000);
        let r = Repr::from_str(&s);
        assert_eq!(r.len(), 10_000);
        assert_eq!(r.as_str(), s.as_str());
    }
}