Skip to main content

azul_css/
corety.rs

1//! Core FFI-safe types used across crate boundaries.
2//!
3//! This module defines the fundamental types for FFI interop: [`AzString`] (an FFI-safe
4//! string backed by [`U8Vec`] with destructor-based memory management), [`EmptyStruct`] (a
5//! non-zero-size unit type), and various `Vec`/`Option` wrappers generated by the
6//! `impl_vec!` and `impl_option!` macros.
7
8use alloc::{
9    string::{String, ToString},
10    vec::Vec,
11};
12
13use crate::props::basic::ColorU;
14
15// ============================================================================
16// EmptyStruct type - FFI-safe replacement for ()
17// ============================================================================
18
19/// FFI-safe void type to replace `()` in Result types.
20/// 
21/// Since `()` (unit type) has zero size, it's not FFI-safe.
22/// This type provides a minimal 1-byte representation that can be
23/// safely passed across the C ABI boundary.
24/// 
25/// # Usage
26/// Instead of `Result<(), Error>`, use `Result<EmptyStruct, Error>`.
27/// 
28/// # Example
29/// ```ignore
30/// fn do_something() -> Result<EmptyStruct, MyError> {
31///     // ... do work ...
32///     Ok(EmptyStruct::default())
33/// }
34/// ```
35#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
36#[repr(C)]
37#[derive(Default)]
38pub struct EmptyStruct {
39    /// Reserved byte to ensure the struct has non-zero size.
40    /// Always initialized to 0.
41    pub _reserved: u8,
42}
43
44
45impl EmptyStruct {
46    /// Create a new EmptyStruct value (equivalent to `()`)
47    #[must_use]
48    pub const fn new() -> Self {
49        Self { _reserved: 0 }
50    }
51}
52
53impl From<()> for EmptyStruct {
54    fn from(_: ()) -> Self {
55        Self::default()
56    }
57}
58
59impl From<EmptyStruct> for () {
60    fn from(_: EmptyStruct) -> Self {
61        
62    }
63}
64
65// ============================================================================
66// Debug message types
67// ============================================================================
68
69/// Debug message severity or category for layout diagnostics.
70#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
71#[repr(C)]
72#[derive(Default)]
73pub enum LayoutDebugMessageType {
74    #[default]
75    Info,
76    Warning,
77    Error,
78    // Layout-specific categories for filtering
79    BoxProps,
80    CssGetter,
81    /// Block Formatting Context layout
82    BfcLayout,
83    /// Inline Formatting Context layout
84    IfcLayout,
85    TableLayout,
86    DisplayType,
87    PositionCalculation,
88}
89
90
91/// A debug message emitted during layout, with severity, text, and source location.
92#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
93#[repr(C)]
94pub struct LayoutDebugMessage {
95    pub message_type: LayoutDebugMessageType,
96    pub message: AzString,
97    pub location: AzString,
98}
99
100impl LayoutDebugMessage {
101    /// Create a new debug message with automatic caller location tracking
102    #[track_caller]
103    pub fn new(message_type: LayoutDebugMessageType, message: impl Into<String>) -> Self {
104        let location = core::panic::Location::caller();
105        Self {
106            message_type,
107            message: AzString::from_string(message.into()),
108            location: AzString::from_string(format!(
109                "{}:{}:{}",
110                location.file(),
111                location.line(),
112                location.column()
113            )),
114        }
115    }
116
117    /// Helper for Info messages
118    #[track_caller]
119    pub fn info(message: impl Into<String>) -> Self {
120        Self::new(LayoutDebugMessageType::Info, message)
121    }
122
123    /// Helper for Warning messages
124    #[track_caller]
125    pub fn warning(message: impl Into<String>) -> Self {
126        Self::new(LayoutDebugMessageType::Warning, message)
127    }
128
129    /// Helper for Error messages
130    #[track_caller]
131    pub fn error(message: impl Into<String>) -> Self {
132        Self::new(LayoutDebugMessageType::Error, message)
133    }
134
135    /// Helper for BoxProps debug messages
136    #[track_caller]
137    pub fn box_props(message: impl Into<String>) -> Self {
138        Self::new(LayoutDebugMessageType::BoxProps, message)
139    }
140
141    /// Helper for CSS Getter debug messages
142    #[track_caller]
143    pub fn css_getter(message: impl Into<String>) -> Self {
144        Self::new(LayoutDebugMessageType::CssGetter, message)
145    }
146
147    /// Helper for BFC Layout debug messages
148    #[track_caller]
149    pub fn bfc_layout(message: impl Into<String>) -> Self {
150        Self::new(LayoutDebugMessageType::BfcLayout, message)
151    }
152
153    /// Helper for IFC Layout debug messages
154    #[track_caller]
155    pub fn ifc_layout(message: impl Into<String>) -> Self {
156        Self::new(LayoutDebugMessageType::IfcLayout, message)
157    }
158
159    /// Helper for Table Layout debug messages
160    #[track_caller]
161    pub fn table_layout(message: impl Into<String>) -> Self {
162        Self::new(LayoutDebugMessageType::TableLayout, message)
163    }
164
165    /// Helper for Display Type debug messages
166    #[track_caller]
167    pub fn display_type(message: impl Into<String>) -> Self {
168        Self::new(LayoutDebugMessageType::DisplayType, message)
169    }
170}
171
172/// FFI-safe string type backed by [`U8Vec`] with destructor-based memory management.
173///
174/// Contents are guaranteed to be valid UTF-8 by all safe constructors.
175/// Memory ownership is tracked via the inner `U8Vec`'s destructor field.
176#[repr(C)]
177pub struct AzString {
178    pub vec: U8Vec,
179}
180
181impl_option!(
182    AzString,
183    OptionString,
184    copy = false,
185    [Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
186);
187
188static DEFAULT_STR: &str = "";
189
190impl Default for AzString {
191    fn default() -> Self {
192        DEFAULT_STR.into()
193    }
194}
195
196impl<'a> From<&'a str> for AzString {
197    fn from(s: &'a str) -> Self {
198        s.to_string().into()
199    }
200}
201
202impl AsRef<str> for AzString {
203    fn as_ref(&self) -> &str {
204        self.as_str()
205    }
206}
207
208impl core::fmt::Debug for AzString {
209    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
210        self.as_str().fmt(f)
211    }
212}
213
214impl core::fmt::Display for AzString {
215    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
216        self.as_str().fmt(f)
217    }
218}
219
220impl AzString {
221    #[inline]
222    pub const fn from_const_str(s: &'static str) -> Self {
223        Self {
224            vec: U8Vec::from_const_slice(s.as_bytes()),
225        }
226    }
227
228    /// Creates a new AzString from a null-terminated C string (const char*).
229    /// This copies the string data into a new allocation.
230    ///
231    /// # Safety
232    /// - `ptr` must be a valid pointer to a null-terminated UTF-8 string
233    /// - The string must remain valid for the duration of this call
234    ///
235    /// Note: `ptr` is `*const i8` rather than `*const core::ffi::c_char`
236    /// so the auto-generated FFI signature in `dll_api_internal.rs`
237    /// (which uses a literal `i8`) matches on every target —
238    /// `c_char` is `i8` on x86/ARM Apple/Windows/Linux but `u8` on
239    /// Android, which would otherwise produce a `*const u8 vs *const i8`
240    /// mismatch at codegen-call sites. We cast internally before
241    /// handing the pointer to `CStr::from_ptr`.
242    #[inline]
243    pub unsafe fn from_c_str(ptr: *const i8) -> Self {
244        if ptr.is_null() {
245            return Self::default();
246        }
247        let c_str = core::ffi::CStr::from_ptr(ptr as *const core::ffi::c_char);
248        let bytes = c_str.to_bytes();
249        Self::copy_from_bytes(bytes.as_ptr(), 0, bytes.len())
250    }
251
252    /// Copies bytes from a pointer into a new AzString.
253    /// This is useful for C FFI where you have a char* buffer.
254    ///
255    /// Invalid UTF-8 sequences are replaced with U+FFFD to maintain
256    /// the UTF-8 invariant required by [`as_str()`](Self::as_str).
257    #[inline]
258    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
259        let raw = U8Vec::copy_from_bytes(ptr, start, len);
260        let s = String::from_utf8_lossy(raw.as_ref()).into_owned();
261        Self::from_string(s)
262    }
263
264    #[inline]
265    pub fn from_string(s: String) -> Self {
266        Self {
267            vec: U8Vec::from_vec(s.into_bytes()),
268        }
269    }
270
271    #[inline]
272    pub fn as_str(&self) -> &str {
273        unsafe { core::str::from_utf8_unchecked(self.vec.as_ref()) }
274    }
275
276    /// NOTE: CLONES the memory if the memory is external or &'static
277    /// Moves the memory out if the memory is library-allocated
278    #[inline]
279    pub fn clone_self(&self) -> Self {
280        Self {
281            vec: self.vec.clone_self(),
282        }
283    }
284
285    #[inline]
286    pub fn into_library_owned_string(self) -> String {
287        match self.vec.destructor {
288            U8VecDestructor::NoDestructor | U8VecDestructor::External(_) | U8VecDestructor::AlreadyDestroyed => {
289                self.as_str().to_string()
290            }
291            U8VecDestructor::DefaultRust => {
292                let m = core::mem::ManuallyDrop::new(self);
293                unsafe { String::from_raw_parts(m.vec.ptr as *mut u8, m.vec.len, m.vec.cap) }
294            }
295        }
296    }
297
298    #[inline]
299    pub fn as_bytes(&self) -> &[u8] {
300        self.vec.as_ref()
301    }
302
303    #[inline]
304    pub fn into_bytes(self) -> U8Vec {
305        let m = core::mem::ManuallyDrop::new(self);
306        U8Vec {
307            ptr: m.vec.ptr,
308            len: m.vec.len,
309            cap: m.vec.cap,
310            destructor: m.vec.destructor,
311        }
312    }
313
314    /// Returns the length of the string in bytes (not including null terminator)
315    #[inline]
316    pub fn len(&self) -> usize {
317        self.vec.len
318    }
319
320    /// Returns true if the string is empty
321    #[inline]
322    pub fn is_empty(&self) -> bool {
323        self.vec.len == 0
324    }
325
326    /// Creates a null-terminated copy of the string for C FFI usage.
327    /// Returns a new U8Vec that contains the string data followed by a null byte.
328    /// The caller is responsible for freeing this memory.
329    ///
330    /// Use this when you need to pass a string to C code that expects `const char*`.
331    #[inline]
332    pub fn to_c_str(&self) -> U8Vec {
333        let bytes = self.as_bytes();
334        let mut result = Vec::with_capacity(bytes.len() + 1);
335        result.extend_from_slice(bytes);
336        result.push(0); // null terminator
337        U8Vec::from_vec(result)
338    }
339
340    /// Shared implementation for UTF-16 decoding with a caller-supplied byte-order function.
341    ///
342    /// # Safety
343    /// - `ptr` must be valid for reading `len` bytes
344    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
345    unsafe fn from_utf16_with_byte_order(
346        ptr: *const u8,
347        len: usize,
348        from_bytes: fn([u8; 2]) -> u16,
349    ) -> Self {
350        if ptr.is_null() || len == 0 {
351            return Self::default();
352        }
353
354        // UTF-16 requires pairs of bytes
355        if !len.is_multiple_of(2) {
356            return Self::default();
357        }
358
359        let byte_slice = core::slice::from_raw_parts(ptr, len);
360        let code_units: Vec<u16> = byte_slice
361            .chunks_exact(2)
362            .map(|chunk| from_bytes([chunk[0], chunk[1]]))
363            .collect();
364
365        match String::from_utf16(&code_units) {
366            Ok(s) => Self::from_string(s),
367            Err(_) => Self::default(),
368        }
369    }
370
371    /// Creates a new AzString from UTF-16 encoded bytes (little-endian).
372    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
373    ///
374    /// # Arguments
375    /// * `ptr` - Pointer to UTF-16 encoded bytes
376    /// * `len` - Length in bytes (not code units) - must be even
377    ///
378    /// # Safety
379    /// - `ptr` must be valid for reading `len` bytes
380    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
381    #[inline]
382    pub unsafe fn from_utf16_le(ptr: *const u8, len: usize) -> Self {
383        Self::from_utf16_with_byte_order(ptr, len, u16::from_le_bytes)
384    }
385
386    /// Creates a new AzString from UTF-16 encoded bytes (big-endian).
387    /// Returns an empty string if the input is invalid UTF-16 or has odd length.
388    ///
389    /// # Arguments
390    /// * `ptr` - Pointer to UTF-16 encoded bytes
391    /// * `len` - Length in bytes (not code units) - must be even
392    ///
393    /// # Safety
394    /// - `ptr` must be valid for reading `len` bytes
395    /// - `len` must be even (UTF-16 uses 2 bytes per code unit)
396    #[inline]
397    pub unsafe fn from_utf16_be(ptr: *const u8, len: usize) -> Self {
398        Self::from_utf16_with_byte_order(ptr, len, u16::from_be_bytes)
399    }
400
401    /// Creates a new AzString from UTF-8 bytes with lossy conversion.
402    /// Invalid UTF-8 sequences are replaced with the Unicode replacement character (U+FFFD).
403    ///
404    /// # Safety
405    /// - `ptr` must be valid for reading `len` bytes
406    #[inline]
407    pub unsafe fn from_utf8_lossy(ptr: *const u8, len: usize) -> Self {
408        if ptr.is_null() || len == 0 {
409            return Self::default();
410        }
411        
412        let byte_slice = core::slice::from_raw_parts(ptr, len);
413        let s = String::from_utf8_lossy(byte_slice).into_owned();
414        Self::from_string(s)
415    }
416
417    /// Creates a new AzString from UTF-8 bytes.
418    /// Returns an empty string if the input is not valid UTF-8.
419    ///
420    /// # Safety
421    /// - `ptr` must be valid for reading `len` bytes
422    #[inline]
423    pub unsafe fn from_utf8(ptr: *const u8, len: usize) -> Self {
424        if ptr.is_null() || len == 0 {
425            return Self::default();
426        }
427        
428        let byte_slice = core::slice::from_raw_parts(ptr, len);
429        match core::str::from_utf8(byte_slice) {
430            Ok(s) => Self::from_string(s.to_string()),
431            Err(_) => Self::default(),
432        }
433    }
434}
435
436impl From<String> for AzString {
437    fn from(input: String) -> AzString {
438        AzString::from_string(input)
439    }
440}
441
442impl PartialOrd for AzString {
443    fn partial_cmp(&self, rhs: &Self) -> Option<core::cmp::Ordering> {
444        self.as_str().partial_cmp(rhs.as_str())
445    }
446}
447
448impl Ord for AzString {
449    fn cmp(&self, rhs: &Self) -> core::cmp::Ordering {
450        self.as_str().cmp(rhs.as_str())
451    }
452}
453
454impl Clone for AzString {
455    fn clone(&self) -> Self {
456        self.clone_self()
457    }
458}
459
460impl PartialEq for AzString {
461    fn eq(&self, rhs: &Self) -> bool {
462        self.as_str().eq(rhs.as_str())
463    }
464}
465
466impl Eq for AzString {}
467
468impl core::hash::Hash for AzString {
469    fn hash<H>(&self, state: &mut H)
470    where
471        H: core::hash::Hasher,
472    {
473        self.as_str().hash(state)
474    }
475}
476
477impl core::ops::Deref for AzString {
478    type Target = str;
479
480    fn deref(&self) -> &str {
481        self.as_str()
482    }
483}
484
485impl_option!(
486    u8,
487    OptionU8,
488    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
489);
490
491impl_vec!(u8, U8Vec, U8VecDestructor, U8VecDestructorType, U8VecSlice, OptionU8);
492impl_vec_mut!(u8, U8Vec);
493impl_vec_debug!(u8, U8Vec);
494impl_vec_partialord!(u8, U8Vec);
495impl_vec_ord!(u8, U8Vec);
496impl_vec_clone!(u8, U8Vec, U8VecDestructor);
497impl_vec_partialeq!(u8, U8Vec);
498impl_vec_eq!(u8, U8Vec);
499impl_vec_hash!(u8, U8Vec);
500
501impl U8Vec {
502    /// Copies bytes from a pointer into a new Vec.
503    /// This is useful for C FFI where you have a uint8_t* buffer.
504    ///
505    /// # Safety contract (caller must ensure)
506    /// - `ptr` must be valid for reading `start + len` bytes
507    /// - `start + len` must not overflow
508    #[inline]
509    pub fn copy_from_bytes(ptr: *const u8, start: usize, len: usize) -> Self {
510        if ptr.is_null() || len == 0 {
511            return Self::new();
512        }
513        debug_assert!(
514            start.checked_add(len).is_some(),
515            "U8Vec::copy_from_bytes: start + len overflows"
516        );
517        let slice = unsafe { core::slice::from_raw_parts(ptr.add(start), len) };
518        Self::from_vec(slice.to_vec())
519    }
520}
521
522impl_option!(
523    U8Vec,
524    OptionU8Vec,
525    copy = false,
526    [Debug, Clone, PartialEq, Ord, PartialOrd, Eq, Hash]
527);
528
529impl_vec!(u16, U16Vec, U16VecDestructor, U16VecDestructorType, U16VecSlice, OptionU16);
530impl_vec_debug!(u16, U16Vec);
531impl_vec_partialord!(u16, U16Vec);
532impl_vec_ord!(u16, U16Vec);
533impl_vec_clone!(u16, U16Vec, U16VecDestructor);
534impl_vec_partialeq!(u16, U16Vec);
535impl_vec_eq!(u16, U16Vec);
536impl_vec_hash!(u16, U16Vec);
537
538impl_vec!(f32, F32Vec, F32VecDestructor, F32VecDestructorType, F32VecSlice, OptionF32);
539impl_vec_debug!(f32, F32Vec);
540impl_vec_partialord!(f32, F32Vec);
541impl_vec_clone!(f32, F32Vec, F32VecDestructor);
542impl_vec_partialeq!(f32, F32Vec);
543
544// Vec<char>
545impl_vec!(u32, U32Vec, U32VecDestructor, U32VecDestructorType, U32VecSlice, OptionU32);
546impl_vec_mut!(u32, U32Vec);
547impl_vec_debug!(u32, U32Vec);
548impl_vec_partialord!(u32, U32Vec);
549impl_vec_ord!(u32, U32Vec);
550impl_vec_clone!(u32, U32Vec, U32VecDestructor);
551impl_vec_partialeq!(u32, U32Vec);
552impl_vec_eq!(u32, U32Vec);
553impl_vec_hash!(u32, U32Vec);
554
555impl_vec!(AzString, StringVec, StringVecDestructor, StringVecDestructorType, StringVecSlice, OptionString);
556impl_vec_debug!(AzString, StringVec);
557impl_vec_partialord!(AzString, StringVec);
558impl_vec_ord!(AzString, StringVec);
559impl_vec_clone!(AzString, StringVec, StringVecDestructor);
560impl_vec_partialeq!(AzString, StringVec);
561impl_vec_eq!(AzString, StringVec);
562impl_vec_hash!(AzString, StringVec);
563
564impl From<Vec<String>> for StringVec {
565    fn from(v: Vec<String>) -> StringVec {
566        let new_v: Vec<AzString> = v.into_iter().map(|s| s.into()).collect();
567        new_v.into()
568    }
569}
570
571impl_option!(
572    StringVec,
573    OptionStringVec,
574    copy = false,
575    [Debug, Clone, PartialOrd, PartialEq, Ord, Eq, Hash]
576);
577
578impl_option!(
579    u16,
580    OptionU16,
581    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
582);
583impl_option!(
584    u32,
585    OptionU32,
586    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
587);
588impl_option!(
589    u64,
590    OptionU64,
591    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
592);
593impl_option!(
594    usize,
595    OptionUsize,
596    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
597);
598impl_option!(
599    i16,
600    OptionI16,
601    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
602);
603impl_option!(
604    i32,
605    OptionI32,
606    [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]
607);
608impl_option!(bool, OptionBool, [Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash]);
609impl_option!(f32, OptionF32, [Debug, Copy, Clone, PartialEq, PartialOrd]);
610impl_option!(f64, OptionF64, [Debug, Copy, Clone, PartialEq, PartialOrd]);
611
612// Manual implementations for Hash and Ord on OptionF32 (since f32 doesn't implement these traits)
613impl core::hash::Hash for OptionF32 {
614    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
615        match self {
616            OptionF32::None => 0u8.hash(state),
617            OptionF32::Some(v) => {
618                1u8.hash(state);
619                v.to_bits().hash(state);
620            }
621        }
622    }
623}
624
625impl Eq for OptionF32 {}
626
627impl Ord for OptionF32 {
628    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
629        match (self, other) {
630            (OptionF32::None, OptionF32::None) => core::cmp::Ordering::Equal,
631            (OptionF32::None, OptionF32::Some(_)) => core::cmp::Ordering::Less,
632            (OptionF32::Some(_), OptionF32::None) => core::cmp::Ordering::Greater,
633            (OptionF32::Some(a), OptionF32::Some(b)) => {
634                a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal)
635            }
636        }
637    }
638}
639
640// ============================================================================
641// StringArena — bump allocator for AzString bytes
642// ============================================================================
643//
644// Consolidates thousands of small AzString allocations (tag names,
645// attribute values, text content) into a handful of 64 KiB chunks.
646// Each arena-backed AzString uses `U8VecDestructor::External` and stashes
647// a cloned `Arc<StringArenaInner>` pointer in the `cap` field — dropping
648// the AzString decrements the refcount, and the backing bytes are freed
649// only when the last reference goes away. This works across FFI without
650// changing any public struct layout.
651
652use alloc::sync::Arc;
653use core::cell::UnsafeCell;
654
655/// Shared interior of a [`StringArena`]. Refcounted via `Arc<Self>`;
656/// never accessed through its `Arc` for mutation — only the owning
657/// `StringArena` (with `&mut self`) mutates the chunks.
658struct StringArenaInner {
659    /// Pre-allocated byte chunks. Pointers into a chunk stay valid
660    /// because we never push past `Vec::capacity()` — no reallocation.
661    chunks: UnsafeCell<Vec<Vec<u8>>>,
662    /// Remaining unused bytes in the last chunk; `0` when a fresh
663    /// chunk is needed.
664    current_remaining: UnsafeCell<usize>,
665}
666
667// Safety:
668// - Mutation through `UnsafeCell` only happens via `&mut StringArena`,
669//   which owns the sole external reference to `Arc<StringArenaInner>`
670//   held in a `StringArena`. Other `Arc` references live inside AzString
671//   destructors and never touch chunks — they only drop the Arc.
672// - `Arc<T>` itself needs `T: Send + Sync` to cross threads; since the
673//   destructor can run on any thread, we claim Send+Sync and rely on the
674//   single-writer invariant for mutation safety.
675unsafe impl Send for StringArenaInner {}
676unsafe impl Sync for StringArenaInner {}
677
678/// Bump allocator backing arena-owned `AzString` instances.
679///
680/// Every AzString returned by [`StringArena::intern`] holds a cloned
681/// `Arc` to this arena; the backing bytes stay alive until the last
682/// such AzString (and the arena handle itself) is dropped.
683///
684/// Intended use: create one arena per XML/HTML parse pass, intern all
685/// tag names / attribute values / text content through it, then drop the
686/// handle. The AzStrings embedded in the resulting `StyledDom` keep the
687/// arena alive for as long as they need the bytes.
688pub struct StringArena {
689    inner: Arc<StringArenaInner>,
690}
691
692impl StringArena {
693    /// Size of a freshly allocated chunk. Large enough that a typical
694    /// DOM parse fits in 1-2 chunks, small enough to not over-allocate
695    /// for small documents.
696    pub const CHUNK_SIZE: usize = 64 * 1024;
697
698    pub fn new() -> Self {
699        Self {
700            inner: Arc::new(StringArenaInner {
701                chunks: UnsafeCell::new(Vec::new()),
702                current_remaining: UnsafeCell::new(0),
703            }),
704        }
705    }
706
707    /// Returns `(chunk_count, total_bytes_used)` for metrics.
708    pub fn metrics(&self) -> (usize, usize) {
709        // Safety: metrics is read-only; the caller holds &self so no
710        // concurrent mutation via &mut self is possible.
711        unsafe {
712            let chunks = &*self.inner.chunks.get();
713            let total: usize = chunks.iter().map(|c| c.len()).sum();
714            (chunks.len(), total)
715        }
716    }
717
718    /// Intern `s` into the arena and return an AzString whose backing
719    /// bytes live inside the arena. The returned AzString owns a cloned
720    /// `Arc` reference; dropping it decrements the refcount, and the
721    /// arena frees its chunks when the final reference is released.
722    pub fn intern(&mut self, s: &str) -> AzString {
723        let bytes = s.as_bytes();
724        let len = bytes.len();
725
726        let ptr: *const u8 = if len == 0 {
727            // Empty strings don't need arena storage; a non-null dangling
728            // pointer is fine because `len == 0` means nobody will deref.
729            core::ptr::NonNull::<u8>::dangling().as_ptr()
730        } else {
731            // Safety: `&mut self` ⇒ exclusive access to inner chunks.
732            unsafe {
733                let chunks: &mut Vec<Vec<u8>> = &mut *self.inner.chunks.get();
734                let remaining: &mut usize = &mut *self.inner.current_remaining.get();
735
736                // Oversized strings get their own dedicated chunk so we
737                // don't waste the tail of the current chunk.
738                if len > Self::CHUNK_SIZE / 2 {
739                    let mut v = Vec::with_capacity(len);
740                    v.extend_from_slice(bytes);
741                    let p = v.as_ptr();
742                    chunks.push(v);
743                    p
744                } else {
745                    if *remaining < len {
746                        chunks.push(Vec::with_capacity(Self::CHUNK_SIZE));
747                        *remaining = Self::CHUNK_SIZE;
748                    }
749                    // Safety: chunk was allocated with capacity ≥ len and
750                    // `remaining` tracks unused capacity — no realloc.
751                    let chunk = chunks.last_mut().unwrap();
752                    let offset = chunk.len();
753                    chunk.extend_from_slice(bytes);
754                    *remaining -= len;
755                    chunk.as_ptr().add(offset)
756                }
757            }
758        };
759
760        // Each AzString carries its own Arc reference count. Stash the
761        // raw Arc pointer in `cap` so the External destructor can decrement.
762        let arc_raw = Arc::into_raw(Arc::clone(&self.inner));
763
764        AzString {
765            vec: U8Vec {
766                ptr,
767                len,
768                // NOTE: `cap` stores an Arc pointer, not a capacity. This
769                // works because the `External` destructor path never calls
770                // `Vec::from_raw_parts(ptr, len, cap)` — only `DefaultRust`
771                // does that.
772                cap: arc_raw as usize,
773                destructor: U8VecDestructor::External(arena_string_destructor),
774            },
775        }
776    }
777}
778
779impl Default for StringArena {
780    fn default() -> Self {
781        Self::new()
782    }
783}
784
785/// Destructor installed on every arena-backed AzString. Reads the Arc
786/// pointer out of `cap` and drops one Arc reference; when the count
787/// reaches zero the `StringArenaInner` is freed.
788extern "C" fn arena_string_destructor(vec: *mut U8Vec) {
789    // Safety: called at most once per AzString drop. `cap` was set by
790    // `StringArena::intern` to `Arc::into_raw(Arc<StringArenaInner>)`.
791    unsafe {
792        let v = &mut *vec;
793        let arc_raw = v.cap as *const StringArenaInner;
794        if !arc_raw.is_null() {
795            let _ = Arc::from_raw(arc_raw);
796            // Prevent a hypothetical double-drop from dereferencing
797            // freed memory.
798            v.cap = 0;
799        }
800    }
801}
802
803#[cfg(test)]
804mod string_arena_tests {
805    use super::*;
806
807    #[test]
808    fn intern_round_trip() {
809        let mut arena = StringArena::new();
810        let a = arena.intern("hello");
811        let b = arena.intern("world");
812        let c = arena.intern("");
813        assert_eq!(a.as_str(), "hello");
814        assert_eq!(b.as_str(), "world");
815        assert_eq!(c.as_str(), "");
816    }
817
818    #[test]
819    fn strings_outlive_arena_handle() {
820        let a = {
821            let mut arena = StringArena::new();
822            arena.intern("survives drop of arena handle")
823        };
824        assert_eq!(a.as_str(), "survives drop of arena handle");
825    }
826
827    #[test]
828    fn oversized_string_gets_dedicated_chunk() {
829        let mut arena = StringArena::new();
830        let big = "x".repeat(StringArena::CHUNK_SIZE);
831        let s = arena.intern(&big);
832        assert_eq!(s.len(), big.len());
833        assert_eq!(s.as_str(), big.as_str());
834    }
835
836    #[test]
837    fn many_small_strings_share_chunk() {
838        let mut arena = StringArena::new();
839        let mut strings = Vec::new();
840        for i in 0..100 {
841            strings.push(arena.intern(&format!("s{i}")));
842        }
843        let (chunks, _bytes) = arena.metrics();
844        assert!(chunks <= 2, "expected ≤2 chunks for 100 small strings, got {chunks}");
845        for (i, s) in strings.iter().enumerate() {
846            assert_eq!(s.as_str(), format!("s{i}"));
847        }
848    }
849
850    #[test]
851    fn clone_deep_copies_and_is_independent() {
852        // Cloning an External AzString deep-copies into DefaultRust, so
853        // the clone doesn't depend on the arena at all.
854        let clone = {
855            let mut arena = StringArena::new();
856            let a = arena.intern("deep-copy test");
857            a.clone()
858        };
859        assert_eq!(clone.as_str(), "deep-copy test");
860    }
861}