asmkit/core/
jit_allocator.rs

1use alloc::vec::Vec;
2use core::cell::{Cell, UnsafeCell};
3use core::mem::size_of;
4use core::ops::Range;
5use core::ptr::null_mut;
6
7use crate::util::virtual_memory::{
8    self, alloc, alloc_dual_mapping, flush_instruction_cache, protect_jit_memory, release,
9    release_dual_mapping, DualMapping, MemoryFlags, ProtectJitAccess,
10};
11use crate::util::{
12    align_down, align_up, bit_vector_clear, bit_vector_fill, bit_vector_get_bit,
13    bit_vector_index_of, bit_vector_set_bit,
14};
15use crate::AsmError;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
18#[repr(u32)]
19/// A policy that can be used with `reset()` functions.
20pub enum ResetPolicy {
21    /// Soft reset, does not deeallocate memory (default).
22    Soft = 0,
23
24    /// Hard reset, releases all memory used, if any.
25    Hard = 1,
26}
27#[derive(Clone, Copy, Debug, PartialEq, Eq)]
28pub struct JitAllocatorOptions {
29    /// Enables the use of an anonymous memory-mapped memory that is mapped into two buffers having a different pointer.
30    /// The first buffer has read and execute permissions and the second buffer has read+write permissions.
31    ///
32    /// See [alloc_dual_mapping] for more details about this feature.
33    ///
34    /// ## Remarks
35    ///
36    /// Dual mapping would be automatically turned on by [JitAllocator] in case of hardened runtime that
37    /// enforces `W^X` policy, so specifying this flag is essentually forcing to use dual mapped pages even when RWX
38    /// pages can be allocated and dual mapping is not necessary.
39    pub use_dual_mapping: bool,
40    /// Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable
41    /// 3 internal pools in total having 64, 128, and 256 bytes granularity.
42    ///
43    /// This feature is only recommended for users that generate a lot of code and would like to minimize the overhead
44    /// of `JitAllocator` itself by having blocks of different allocation granularities. Using this feature only for
45    /// few allocations won't pay off as the allocator may need to create more blocks initially before it can take the
46    /// advantage of variable block granularity.
47    pub use_multiple_pools: bool,
48    /// Always fill reserved memory by a fill-pattern.
49    ///
50    /// Causes a new block to be cleared by the fill pattern and freshly released memory to be cleared before making
51    /// it ready for another use.
52    pub fill_unused_memory: bool,
53    /// When this flag is set the allocator would immediately release unused blocks during `release()` or `reset()`.
54    /// When this flag is not set the allocator would keep one empty block in each pool to prevent excessive virtual
55    /// memory allocations and deallocations in border cases, which involve constantly allocating and deallocating a
56    /// single block caused by repetitive calling `alloc()` and `release()` when the allocator has either no blocks
57    /// or have all blocks fully occupied.
58    pub immediate_release: bool,
59    pub custom_fill_pattern: Option<u32>,
60
61    pub block_size: u32,
62    pub granularity: u32,
63}
64
65impl Default for JitAllocatorOptions {
66    fn default() -> Self {
67        Self {
68            use_dual_mapping: true,
69            use_multiple_pools: true,
70            fill_unused_memory: true,
71            immediate_release: false,
72            custom_fill_pattern: None,
73            block_size: 0,
74            granularity: 0,
75        }
76    }
77}
78
79#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
80const DEFAULT_FILL_PATTERN: u32 = 0xCCCCCCCC; // int3
81#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
82const DEFAULT_FILL_PATTERN: u32 = 0x0; // int3
83
84/// Number of pools to use when `JitAllocatorOptions::use_multiple_pools` is set.
85///
86/// Each pool increases granularity twice to make memory management more
87/// efficient. Ideal number of pools appears to be 3 to 4 as it distributes
88/// small and large functions properly.
89const MULTI_POOL_COUNT: usize = 3;
90
91/// Minimum granularity (and the default granularity for pool #0).
92const MIN_GRANULARITY: usize = 64;
93
94/// Maximum block size (32MB).
95const MAX_BLOCK_SIZE: usize = 32 * 1024 * 1024;
96
97struct BitVectorRangeIterator<'a, const B: u32> {
98    slice: &'a [u32],
99    idx: usize,
100    end: usize,
101    bit_word: u32,
102}
103
104const BIT_WORD_SIZE: usize = core::mem::size_of::<u32>() * 8;
105
106impl<'a, const B: u32> BitVectorRangeIterator<'a, B> {
107    const XOR_MASK: u32 = if B == 0 { u32::MAX } else { 0 };
108
109    fn from_slice_and_nbitwords(data: &'a [u32], num_bit_words: usize) -> Self {
110        Self::new(data, num_bit_words, 0, num_bit_words * BIT_WORD_SIZE)
111    }
112
113    fn new(data: &'a [u32], _num_bit_words: usize, start: usize, end: usize) -> Self {
114        let idx = align_down(start, BIT_WORD_SIZE);
115        let slice = &data[idx / BIT_WORD_SIZE..];
116
117        let mut bit_word = 0;
118
119        if idx < end {
120            bit_word =
121                (slice[0] ^ Self::XOR_MASK) & (u32::MAX << (start as u32 % BIT_WORD_SIZE as u32));
122        }
123
124        Self {
125            slice,
126            idx,
127            end,
128            bit_word,
129        }
130    }
131
132    fn next_range(&mut self, range_hint: u32) -> Option<Range<u32>> {
133        while self.bit_word == 0 {
134            self.idx += BIT_WORD_SIZE;
135
136            if self.idx >= self.end {
137                return None;
138            }
139
140            self.slice = &self.slice[1..];
141            self.bit_word = self.slice[0] ^ Self::XOR_MASK;
142        }
143
144        let i = self.bit_word.trailing_zeros();
145        let start = self.idx as u32 + i;
146        self.bit_word = !(self.bit_word ^ !(u32::MAX << i));
147        let mut end;
148        if self.bit_word == 0 {
149            end = (self.idx as u32 + BIT_WORD_SIZE as u32).min(self.end as _);
150
151            while end.wrapping_sub(start) < range_hint {
152                self.idx += BIT_WORD_SIZE;
153
154                if self.idx >= self.end {
155                    break;
156                }
157
158                self.slice = &self.slice[1..];
159                self.bit_word = self.slice[0] ^ Self::XOR_MASK;
160
161                if self.bit_word != u32::MAX {
162                    let j = self.bit_word.trailing_zeros();
163                    end = (self.idx as u32 + j).min(self.end as _);
164                    self.bit_word = !(self.bit_word ^ !(u32::MAX << j));
165                    break;
166                }
167
168                end = (self.idx as u32 + BIT_WORD_SIZE as u32).min(self.end as _);
169                self.bit_word = 0;
170                continue;
171            }
172
173            Some(start..end)
174        } else {
175            let j = self.bit_word.trailing_zeros();
176            end = (self.idx as u32 + j).min(self.end as _);
177
178            self.bit_word = !(self.bit_word ^ !(u32::MAX << j));
179
180            Some(start..end)
181        }
182    }
183}
184
185impl<'a> Iterator for BitVectorRangeIterator<'a, 0> {
186    type Item = Range<u32>;
187
188    fn next(&mut self) -> Option<Self::Item> {
189        self.next_range(u32::MAX)
190    }
191}
192
193use intrusive_collections::{intrusive_adapter, rbtree::*};
194use intrusive_collections::{KeyAdapter, UnsafeRef};
195
196struct JitAllocatorBlock {
197    node: Link,
198    list_node: intrusive_collections::LinkedListLink,
199
200    /// Link to the pool that owns this block.
201    pool: *mut JitAllocatorPool,
202    /// Virtual memory mapping - either single mapping (both pointers equal) or
203    /// dual mapping, where one pointer is Read+Execute and the second Read+Write.
204    mapping: DualMapping,
205    /// Virtual memory size (block size) [bytes].
206    block_size: usize,
207
208    flags: Cell<u32>,
209    area_size: Cell<u32>,
210    area_used: Cell<u32>,
211    largest_unused_area: Cell<u32>,
212    search_start: Cell<u32>,
213    search_end: Cell<u32>,
214
215    used_bitvector: UnsafeCell<alloc::vec::Vec<u32>>,
216    stop_bitvector: UnsafeCell<alloc::vec::Vec<u32>>,
217}
218
219impl JitAllocatorBlock {
220    const FLAG_EMPTY: u32 = 0x00000001;
221    const FLAG_DIRTY: u32 = 0x00000002;
222    const FLAG_DUAL_MAPPED: u32 = 0x00000004;
223
224    fn pool(&self) -> *mut JitAllocatorPool {
225        self.pool
226    }
227
228    fn rx_ptr(&self) -> *const u8 {
229        self.mapping.rx
230    }
231
232    fn rw_ptr(&self) -> *mut u8 {
233        self.mapping.rw
234    }
235
236    fn flags(&self) -> u32 {
237        self.flags.get()
238    }
239
240    fn add_flags(&self, flags: u32) {
241        self.flags.set(self.flags() | flags);
242    }
243
244    fn clear_flags(&self, flags: u32) {
245        self.flags.set(self.flags() & !flags);
246    }
247
248    fn is_dirty(&self) -> bool {
249        (self.flags() & Self::FLAG_DIRTY) != 0
250    }
251
252    fn block_size(&self) -> usize {
253        self.block_size
254    }
255
256    fn area_used(&self) -> u32 {
257        self.area_used.get()
258    }
259
260    fn area_size(&self) -> u32 {
261        self.area_size.get()
262    }
263
264    fn largest_unused_area(&self) -> u32 {
265        self.largest_unused_area.get()
266    }
267
268    fn search_start(&self) -> u32 {
269        self.search_start.get()
270    }
271
272    fn search_end(&self) -> u32 {
273        self.search_end.get()
274    }
275
276    fn used_bitvector(&self) -> &alloc::vec::Vec<u32> {
277        unsafe { &*self.used_bitvector.get() }
278    }
279
280    fn stop_bitvector(&self) -> &alloc::vec::Vec<u32> {
281        unsafe { &*self.stop_bitvector.get() }
282    }
283
284    fn used_bitvector_mut(&self) -> &mut alloc::vec::Vec<u32> {
285        unsafe { &mut *self.used_bitvector.get() }
286    }
287
288    fn stop_bitvector_mut(&self) -> &mut alloc::vec::Vec<u32> {
289        unsafe { &mut *self.stop_bitvector.get() }
290    }
291
292    fn area_available(&self) -> u32 {
293        self.area_size() - self.area_used()
294    }
295
296    fn mark_allocated_area(&self, allocated_area_start: u32, allocated_area_end: u32) {
297        let allocated_area_size = allocated_area_end - allocated_area_start;
298
299        bit_vector_fill(
300            self.used_bitvector_mut(),
301            allocated_area_start as _,
302            allocated_area_size as _,
303        );
304        bit_vector_set_bit(
305            self.stop_bitvector_mut(),
306            allocated_area_end as usize - 1,
307            true,
308        );
309
310        // SAFETY: Done inside JitAllocator behind mutex and pool is valid.
311        unsafe {
312            (*self.pool).total_area_used += allocated_area_size as usize;
313        }
314
315        self.area_used
316            .set(self.area_used() + allocated_area_size as u32);
317
318        if self.area_available() == 0 {
319            self.search_start.set(self.area_size());
320            self.search_end.set(0);
321            self.largest_unused_area.set(0);
322            self.clear_flags(Self::FLAG_DIRTY);
323        } else {
324            if self.search_start.get() == allocated_area_start {
325                self.search_start.set(allocated_area_end as _);
326            }
327
328            if self.search_end.get() == allocated_area_end {
329                self.search_end.set(allocated_area_start as _);
330            }
331
332            self.add_flags(Self::FLAG_DIRTY);
333        }
334    }
335    fn mark_released_area(&self, released_area_start: u32, released_area_end: u32) {
336        let released_area_size = released_area_end - released_area_start;
337
338        // SAFETY: Done behind mutex and pool is valid.
339        unsafe {
340            (*self.pool).total_area_used -= released_area_size as usize;
341        }
342
343        self.area_used
344            .set(self.area_used() - released_area_size as u32);
345        self.search_start
346            .set(self.search_start.get().min(released_area_start));
347        self.search_end
348            .set(self.search_end.get().max(released_area_end));
349
350        bit_vector_clear(
351            self.used_bitvector_mut(),
352            released_area_start as _,
353            released_area_size as _,
354        );
355        bit_vector_set_bit(
356            self.stop_bitvector_mut(),
357            released_area_end as usize - 1,
358            false,
359        );
360
361        if self.area_used() == 0 {
362            self.search_start.set(0);
363            self.search_end.set(self.area_size());
364            self.largest_unused_area.set(self.area_size());
365            self.add_flags(Self::FLAG_EMPTY);
366            self.clear_flags(Self::FLAG_DIRTY);
367        } else {
368            self.add_flags(Self::FLAG_DIRTY);
369        }
370    }
371
372    fn mark_shrunk_area(&self, shrunk_area_start: u32, shrunk_area_end: u32) {
373        let shrunk_area_size = shrunk_area_end - shrunk_area_start;
374
375        // Shrunk area cannot start at zero as it would mean that we have shrunk the first
376        // block to zero bytes, which is not allowed as such block must be released instead.
377        assert!(shrunk_area_start != 0);
378        assert!(shrunk_area_end != self.area_size());
379
380        // SAFETY: Done behind mutex and pool is valid.
381        unsafe {
382            (*self.pool).total_area_used -= shrunk_area_size as usize;
383        }
384
385        self.area_used.set(self.area_used() - shrunk_area_size);
386        self.search_start
387            .set(self.search_start.get().min(shrunk_area_start));
388        self.search_end
389            .set(self.search_end.get().max(shrunk_area_end));
390
391        bit_vector_clear(
392            &mut self.used_bitvector_mut(),
393            shrunk_area_start as _,
394            shrunk_area_size as _,
395        );
396        bit_vector_set_bit(
397            &mut self.stop_bitvector_mut(),
398            shrunk_area_end as usize - 1,
399            false,
400        );
401        bit_vector_set_bit(
402            &mut self.stop_bitvector_mut(),
403            shrunk_area_start as usize - 1,
404            true,
405        );
406
407        self.add_flags(Self::FLAG_DIRTY);
408    }
409}
410
411impl PartialOrd for JitAllocatorBlock {
412    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
413        self.rx_ptr().partial_cmp(&other.rx_ptr())
414    }
415}
416
417impl Ord for JitAllocatorBlock {
418    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
419        self.rx_ptr().cmp(&other.rx_ptr())
420    }
421}
422
423impl PartialEq for JitAllocatorBlock {
424    fn eq(&self, other: &Self) -> bool {
425        self.rx_ptr() == other.rx_ptr()
426    }
427}
428
429impl Eq for JitAllocatorBlock {}
430use intrusive_collections::linked_list::LinkedList;
431intrusive_adapter!(JitAllocatorBlockAdapter = UnsafeRef<JitAllocatorBlock> : JitAllocatorBlock { node: Link });
432intrusive_adapter!(BlockListAdapter = UnsafeRef<JitAllocatorBlock> : JitAllocatorBlock { list_node: intrusive_collections::LinkedListLink });
433
434struct BlockKey {
435    rxptr: *const u8,
436    block_size: u32,
437}
438
439impl PartialEq for BlockKey {
440    fn eq(&self, other: &Self) -> bool {
441        self.rxptr == other.rxptr
442    }
443}
444
445impl Eq for BlockKey {}
446
447impl PartialOrd for BlockKey {
448    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
449        Some(self.cmp(other))
450    }
451}
452
453impl Ord for BlockKey {
454    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
455        let addr_off = other.rxptr as usize + other.block_size as usize;
456
457        if addr_off <= self.rxptr as usize {
458            return core::cmp::Ordering::Less;
459        } else if other.rxptr > self.rxptr {
460            return core::cmp::Ordering::Greater;
461        } else {
462            return core::cmp::Ordering::Equal;
463        }
464    }
465}
466
467impl<'a> KeyAdapter<'a> for JitAllocatorBlockAdapter {
468    type Key = BlockKey;
469
470    fn get_key(
471        &self,
472        value: &'a <Self::PointerOps as intrusive_collections::PointerOps>::Value,
473    ) -> Self::Key {
474        BlockKey {
475            rxptr: value.rx_ptr(),
476            block_size: value.block_size as _,
477        }
478    }
479}
480
481struct JitAllocatorPool {
482    blocks: LinkedList<BlockListAdapter>,
483    cursor: *mut JitAllocatorBlock,
484
485    block_count: u32,
486    granularity: u16,
487    granularity_log2: u8,
488    empty_block_count: u8,
489    total_area_size: usize,
490    total_area_used: usize,
491    total_overhead_bytes: usize,
492}
493
494impl JitAllocatorPool {
495    fn new(granularity: u32) -> Self {
496        let granularity_log2 = granularity.trailing_zeros() as u8;
497        let granularity = granularity as u16;
498
499        Self {
500            blocks: LinkedList::new(BlockListAdapter::new()),
501            cursor: core::ptr::null_mut(),
502            block_count: 0,
503            granularity,
504            granularity_log2,
505            empty_block_count: 0,
506            total_area_size: 0,
507            total_area_used: 0,
508            total_overhead_bytes: 0,
509        }
510    }
511
512    fn reset(&mut self) {
513        self.blocks.clear();
514        self.cursor = core::ptr::null_mut();
515        self.block_count = 0;
516        self.total_area_size = 0;
517        self.total_area_used = 0;
518        self.total_overhead_bytes = 0;
519    }
520
521    fn byte_size_from_area_size(&self, area_size: u32) -> usize {
522        area_size as usize * self.granularity as usize
523    }
524
525    fn area_size_from_byte_size(&self, byte_size: usize) -> u32 {
526        ((byte_size + self.granularity as usize - 1) >> self.granularity_log2) as u32
527    }
528
529    fn bit_word_count_from_area_size(&self, area_size: u32) -> usize {
530        align_up(area_size as _, 32) / 32
531    }
532}
533use alloc::boxed::Box;
534
535/// A simple implementation of memory manager that uses [virtual_memory].
536/// functions to manage virtual memory for JIT compiled code.
537///
538/// Implementation notes:
539///
540/// - Granularity of allocated blocks is different than granularity for a typical C malloc. In addition, the allocator
541///   can use several memory pools having a different granularity to minimize the maintenance overhead. Multiple pools
542///   feature requires `use_multiple_pools` flag to be set.
543///
544/// - The allocator doesn't store any information in executable memory, instead, the implementation uses two
545///   bit-vectors to manage allocated memory of each allocator-block. The first bit-vector called 'used' is used to
546///   track used memory (where each bit represents memory size defined by granularity) and the second bit vector called
547///   'stop' is used as a sentinel to mark where the allocated area ends.
548///
549/// - Internally, the allocator also uses RB tree to keep track of all blocks across all pools. Each inserted block is
550///   added to the tree so it can be matched fast during `release()` and `shrink()`.
551pub struct JitAllocator {
552    options: JitAllocatorOptions,
553    block_size: usize,
554    granulariy: usize,
555    fill_pattern: u32,
556
557    allocation_count: usize,
558    tree: RBTree<JitAllocatorBlockAdapter>,
559    pools: Box<[*mut JitAllocatorPool]>,
560}
561
562impl JitAllocator {
563    /// Creates a new JitAllocator instance.
564    pub fn new(params: JitAllocatorOptions) -> Box<Self> {
565        let vm_info = virtual_memory::info();
566
567        let mut block_size = params.block_size;
568        let mut granularity = params.block_size;
569
570        let mut pool_count = 1;
571
572        if params.use_multiple_pools {
573            pool_count = MULTI_POOL_COUNT;
574        }
575
576        if block_size < 64 * 1024 || block_size > 256 * 1024 * 1024 || !block_size.is_power_of_two()
577        {
578            block_size = vm_info.page_granularity as _;
579        }
580
581        if granularity < 64 || granularity > 256 || !granularity.is_power_of_two() {
582            granularity = MIN_GRANULARITY as _;
583        }
584
585        let fill_pattern = params.custom_fill_pattern.unwrap_or(DEFAULT_FILL_PATTERN);
586
587        let mut pools = Vec::with_capacity(pool_count);
588
589        for _ in 0..pool_count {
590            pools.push(Box::into_raw(Box::new(JitAllocatorPool::new(granularity))));
591        }
592
593        let allocator = Box::new(Self {
594            options: params,
595            block_size: block_size as _,
596            granulariy: granularity as _,
597            fill_pattern,
598            allocation_count: 0,
599            tree: RBTree::new(JitAllocatorBlockAdapter::new()),
600            pools: pools.into_boxed_slice(),
601        });
602
603        allocator
604    }
605
606    fn size_to_pool_id(&self, size: usize) -> usize {
607        let mut pool_id = self.pools.len() - 1;
608        let mut granularity = self.granulariy << pool_id;
609
610        while pool_id != 0 {
611            if align_up(size, granularity) == size {
612                break;
613            }
614
615            pool_id -= 1;
616            granularity >>= 1;
617        }
618
619        pool_id
620    }
621
622    fn bitvector_size_to_byte_size(area_size: u32) -> usize {
623        ((area_size as usize + 32 - 1) / 32) * size_of::<u32>()
624    }
625
626    fn calculate_ideal_block_size(
627        &self,
628        pool: *mut JitAllocatorPool,
629        allocation_size: usize,
630    ) -> usize {
631        unsafe {
632            let last = (*pool).blocks.back();
633
634            let mut block_size = if !last.is_null() {
635                last.get().unwrap().block_size()
636            } else {
637                self.block_size
638            };
639
640            if block_size < MAX_BLOCK_SIZE {
641                block_size *= 2;
642            }
643
644            if allocation_size > block_size {
645                block_size = align_up(allocation_size, block_size);
646
647                // overflow
648                if block_size < allocation_size {
649                    return 0;
650                }
651            }
652
653            block_size
654        }
655    }
656
657    unsafe fn new_block(
658        &mut self,
659        pool: *mut JitAllocatorPool,
660        block_size: usize,
661    ) -> Result<Box<JitAllocatorBlock>, AsmError> {
662        let area_size = (block_size + (*pool).granularity as usize - 1) >> (*pool).granularity_log2;
663        let num_bit_words = (area_size + 32 - 1) / 32;
664
665        let mut block = Box::new(JitAllocatorBlock {
666            node: Link::new(),
667            list_node: intrusive_collections::LinkedListLink::new(),
668            pool,
669            mapping: DualMapping {
670                rx: null_mut(),
671                rw: null_mut(),
672            },
673            block_size: block_size as _,
674            flags: Cell::new(0),
675            area_size: Cell::new(0),
676            area_used: Cell::new(0),
677            largest_unused_area: Cell::new(area_size as _),
678            search_end: Cell::new(area_size as _),
679            search_start: Cell::new(0),
680            used_bitvector: UnsafeCell::new({
681                let mut v = Vec::with_capacity(num_bit_words);
682                v.resize(num_bit_words * size_of::<u32>(), 0);
683                v
684            }),
685            stop_bitvector: UnsafeCell::new({
686                let mut v = Vec::with_capacity(num_bit_words);
687                v.resize(num_bit_words * size_of::<u32>(), 0);
688                v
689            }),
690        });
691        let mut block_flags = 0;
692        let virt_mem = if self.options.use_dual_mapping {
693            block_flags |= JitAllocatorBlock::FLAG_DUAL_MAPPED;
694            alloc_dual_mapping(block_size, MemoryFlags::ACCESS_RWX.into())?
695        } else {
696            let rx = alloc(block_size, MemoryFlags::ACCESS_RWX.into())?;
697            DualMapping { rx, rw: rx }
698        };
699
700        if self.options.fill_unused_memory {
701            protect_jit_memory(ProtectJitAccess::ReadWrite);
702            fill_pattern(virt_mem.rw, self.fill_pattern, block_size);
703            protect_jit_memory(ProtectJitAccess::ReadExecute);
704            flush_instruction_cache(virt_mem.rx, block_size);
705        }
706
707        block.area_size.set(area_size as _);
708        block.mapping = virt_mem;
709        block.flags.set(block_flags);
710        Ok(block)
711    }
712
713    unsafe fn delete_block(&mut self, block: *mut JitAllocatorBlock) {
714        let mut block = Box::from_raw(block);
715        if (block.flags() & JitAllocatorBlock::FLAG_DUAL_MAPPED) != 0 {
716            let _ = release_dual_mapping(&mut block.mapping, block.block_size);
717        } else {
718            let _ = release(block.mapping.rx as _, block.block_size);
719        }
720
721        drop(block);
722    }
723
724    unsafe fn insert_block(&mut self, block: *mut JitAllocatorBlock) {
725        let b = &mut *block;
726        let pool = &mut *b.pool();
727
728        if pool.cursor.is_null() {
729            pool.cursor = block;
730        }
731
732        self.tree.insert(UnsafeRef::from_raw(block));
733        pool.blocks.push_front(UnsafeRef::from_raw(block));
734
735        pool.block_count += 1;
736        pool.total_area_size += b.area_size() as usize;
737
738        pool.total_overhead_bytes +=
739            size_of::<JitAllocatorBlock>() + Self::bitvector_size_to_byte_size(b.area_size()) * 2;
740    }
741
742    unsafe fn remove_block(
743        &mut self,
744        block: &mut intrusive_collections::linked_list::CursorMut<'_, BlockListAdapter>,
745    ) -> *mut JitAllocatorBlock {
746        let b = block.get().unwrap();
747        let pool = &mut *b.pool();
748
749        if pool.cursor == b as *const JitAllocatorBlock as *mut _ {
750            pool.cursor = if let Some(block) = block.peek_prev().get() {
751                block as *const _ as *mut _
752            } else if let Some(block) = block.peek_next().get() {
753                block as *const _ as *mut _
754            } else {
755                null_mut()
756            };
757        }
758
759        match self.tree.entry(&BlockKey {
760            rxptr: b.rx_ptr(),
761            block_size: b.block_size as _,
762        }) {
763            Entry::Occupied(mut c) => {
764                assert_eq!(
765                    UnsafeRef::into_raw(c.remove().unwrap()),
766                    b as *const _ as *mut JitAllocatorBlock,
767                    "blocks are not the same"
768                );
769            }
770
771            _ => (),
772        }
773        let area_size = b.area_size();
774
775        pool.block_count -= 1;
776        pool.total_area_size -= area_size as usize;
777
778        pool.total_overhead_bytes -=
779            size_of::<JitAllocatorBlock>() + Self::bitvector_size_to_byte_size(area_size) * 2;
780
781        UnsafeRef::into_raw(block.remove().unwrap())
782    }
783
784    unsafe fn wipe_out_block(
785        &mut self,
786        block: &mut intrusive_collections::linked_list::CursorMut<'_, BlockListAdapter>,
787    ) {
788        let b = block.get().unwrap();
789        if (b.flags() & JitAllocatorBlock::FLAG_EMPTY) != 0 {
790            return;
791        }
792
793        let pool = &mut *b.pool();
794
795        let area_size = b.area_size();
796        let granularity = pool.granularity;
797
798        virtual_memory::protect_jit_memory(ProtectJitAccess::ReadWrite);
799
800        if self.options.fill_unused_memory {
801            let rw_ptr = b.rw_ptr();
802
803            let it = BitVectorRangeIterator::from_slice_and_nbitwords(
804                &b.stop_bitvector(),
805                pool.bit_word_count_from_area_size(b.area_size()),
806            );
807
808            for range in it {
809                let span_ptr = rw_ptr.add(range.start as usize * granularity as usize);
810                let span_size = (range.end as usize - range.start as usize) * granularity as usize;
811
812                let mut n = 0;
813                while n < span_size {
814                    *span_ptr.add(n).cast::<u32>() = self.fill_pattern;
815                    n += size_of::<u32>();
816                }
817
818                virtual_memory::flush_instruction_cache(span_ptr, span_size as usize);
819            }
820        }
821
822        virtual_memory::protect_jit_memory(ProtectJitAccess::ReadExecute);
823
824        let b = &mut *UnsafeRef::into_raw(block.remove().unwrap());
825        b.used_bitvector_mut().fill(0);
826        b.stop_bitvector_mut().fill(0);
827
828        b.area_used.set(0);
829        b.largest_unused_area.set(area_size);
830        b.search_start.set(0);
831        b.search_end.set(area_size);
832        b.add_flags(JitAllocatorBlock::FLAG_EMPTY);
833        b.clear_flags(JitAllocatorBlock::FLAG_DIRTY);
834    }
835
836    /// Resets current allocator by emptying all pools and blocks.
837    ///
838    /// Frees all memory is `ResetPolicy::Hard` is specified or `immediate_release` in [JitAllocatorOptions] is specific.
839    pub unsafe fn reset(&mut self, reset_policy: ResetPolicy) {
840        self.tree.clear();
841
842        let pool_count = self.pools.len();
843
844        for pool_id in 0..pool_count {
845            let pool = unsafe { &mut *self.pools[pool_id] };
846
847            let mut cursor = pool.blocks.cursor_mut();
848            cursor.move_next();
849            let mut block_to_keep = false;
850            if reset_policy != ResetPolicy::Hard && !self.options.immediate_release {
851                block_to_keep = true;
852                cursor.move_next();
853            }
854            unsafe {
855                while !cursor.is_null() {
856                    let block = UnsafeRef::into_raw(cursor.remove().unwrap());
857                    self.delete_block(block);
858                    cursor.move_next();
859                }
860
861                pool.reset();
862
863                if block_to_keep {
864                    let mut front = pool.blocks.cursor_mut();
865                    front.move_next();
866                    self.wipe_out_block(&mut front);
867                    pool.empty_block_count = 1;
868                }
869            }
870        }
871    }
872
873    /// Allocates `size` bytes in the executable memory region.
874    /// Returns two pointers. One points to Read-Execute mapping and another to Read-Write mapping.
875    /// All code writes *must* go to the Read-Write mapping.
876    pub fn alloc(&mut self, size: usize) -> Result<Span, AsmError> {
877        const NO_INDEX: u32 = u32::MAX;
878
879        let size = align_up(size, self.granulariy);
880
881        if size == 0 {
882            return Err(AsmError::InvalidArgument);
883        }
884
885        if size > u32::MAX as usize / 2 {
886            return Err(AsmError::TooLarge);
887        }
888
889        unsafe {
890            let pool_id = self.size_to_pool_id(size);
891            let pool = &mut *self.pools[pool_id];
892
893            let mut area_index = NO_INDEX;
894            let area_size = pool.area_size_from_byte_size(size);
895
896            let mut block = pool.blocks.cursor();
897            block.move_next();
898            if let Some(initial) = block.get().map(|x| x as *const JitAllocatorBlock) {
899                loop {
900                    let b = block.get().unwrap();
901
902                    if b.area_available() >= area_size {
903                        if b.is_dirty() || b.largest_unused_area() >= area_size {
904                            let mut it = BitVectorRangeIterator::<0>::new(
905                                b.used_bitvector(),
906                                pool.bit_word_count_from_area_size(b.area_size()),
907                                b.search_start() as _,
908                                b.search_end() as _,
909                            );
910
911                            let mut range_start;
912                            let mut range_end = b.area_size() as usize;
913
914                            let mut search_start = usize::MAX;
915                            let mut largest_area = 0;
916
917                            while let Some(range) = it.next_range(area_size as _) {
918                                range_start = range.start as _;
919                                range_end = range.end as _;
920
921                                let range_size = range_end - range_start;
922
923                                if range_size >= area_size as usize {
924                                    area_index = range_start as _;
925                                    break;
926                                }
927
928                                search_start = search_start.min(range_start);
929                                largest_area = largest_area.max(range_size);
930                            }
931
932                            if area_index != NO_INDEX {
933                                break;
934                            }
935
936                            if search_start != usize::MAX {
937                                let search_end = range_end;
938
939                                b.search_start.set(search_start as _);
940
941                                b.search_end.set(search_end as _);
942                                b.largest_unused_area.set(largest_area as _);
943                                b.clear_flags(JitAllocatorBlock::FLAG_DIRTY);
944                            }
945                        }
946                    }
947
948                    block.move_next();
949
950                    if block.get().map(|x| x as *const _) == Some(initial) {
951                        break;
952                    }
953
954                    if block.is_null() {
955                        break;
956                    }
957                }
958            }
959
960            let mut block = block.get();
961
962            if area_index == NO_INDEX {
963                let block_size = self.calculate_ideal_block_size(pool, size);
964
965                {
966                    let nblock = self.new_block(pool, block_size)?;
967
968                    area_index = 0;
969
970                    nblock.search_start.set(area_size as _);
971
972                    nblock
973                        .largest_unused_area
974                        .set(nblock.area_size() - area_size);
975
976                    let nblock = Box::into_raw(nblock);
977
978                    self.insert_block(nblock);
979
980                    block = Some(&*nblock);
981                }
982            } else if (block.unwrap().flags() & JitAllocatorBlock::FLAG_EMPTY) != 0 {
983                pool.empty_block_count -= 1;
984                block.unwrap().clear_flags(JitAllocatorBlock::FLAG_EMPTY);
985            }
986
987            self.allocation_count += 1;
988
989            let block = block.unwrap();
990
991            block.mark_allocated_area(area_index, area_index + area_size);
992
993            let offset = pool.byte_size_from_area_size(area_index);
994
995            let rx = block.rx_ptr().add(offset);
996            let rw = block.rw_ptr().add(offset);
997
998            Ok(Span {
999                block: block as *const JitAllocatorBlock as _,
1000                rx,
1001                rw,
1002                size,
1003                icache_clean: true,
1004            })
1005
1006            //Ok((block.rx_ptr().add(offset), block.rw_ptr().add(offset)))
1007        }
1008    }
1009
1010    /// Releases the memory allocated by `alloc`.
1011    ///
1012    /// # SAFETY
1013    /// - `rx_ptr` must have been returned from `alloc`
1014    /// - `rx_ptr` must have been allocaetd from this allocator
1015    /// - `rx_ptr` must not have been passed to `release` before
1016    /// - `rx_ptr` must point to read-execute part of memory returned from `alloc`.
1017    pub unsafe fn release(&mut self, rx_ptr: *const u8) -> Result<(), AsmError> {
1018        if rx_ptr.is_null() {
1019            return Err(AsmError::InvalidArgument);
1020        }
1021
1022        let block = self.tree.find(&BlockKey {
1023            rxptr: rx_ptr,
1024            block_size: 0,
1025        });
1026
1027        let Some(block) = block.get() else {
1028            return Err(AsmError::InvalidState);
1029        };
1030
1031        unsafe {
1032            let pool = &mut *block.pool;
1033
1034            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1035
1036            let area_index = (offset >> pool.granularity_log2 as usize) as u32;
1037            let area_end =
1038                bit_vector_index_of(&block.stop_bitvector(), area_index as _, true) as u32 + 1;
1039            let area_size = area_end - area_index;
1040
1041            self.allocation_count -= 1;
1042
1043            block.mark_released_area(area_index, area_end);
1044
1045            if self.options.fill_unused_memory {
1046                let span_ptr = block
1047                    .rw_ptr()
1048                    .add(area_index as usize * pool.granularity as usize);
1049                let span_size = area_size as usize * pool.granularity as usize;
1050
1051                protect_jit_memory(ProtectJitAccess::ReadWrite);
1052                fill_pattern(span_ptr, self.fill_pattern, span_size);
1053                protect_jit_memory(ProtectJitAccess::ReadExecute);
1054                flush_instruction_cache(span_ptr, span_size);
1055            }
1056
1057            if block.area_used() == 0 {
1058                if pool.empty_block_count != 0 || self.options.immediate_release {
1059                    let mut cursor = pool.blocks.cursor_mut_from_ptr(block);
1060                    let block = self.remove_block(&mut cursor);
1061
1062                    self.delete_block(block);
1063                } else {
1064                    pool.empty_block_count += 1;
1065                }
1066            }
1067        }
1068
1069        Ok(())
1070    }
1071    /// Shrinks the memory allocated by `alloc`.
1072    ///
1073    /// # SAFETY
1074    ///
1075    /// `rx_ptr` must be a pointer returned by `alloc`.
1076    pub unsafe fn shrink(&mut self, rx_ptr: *const u8, new_size: usize) -> Result<(), AsmError> {
1077        if rx_ptr.is_null() {
1078            return Err(AsmError::InvalidArgument);
1079        }
1080
1081        if new_size == 0 {
1082            return self.release(rx_ptr);
1083        }
1084
1085        let Some(block) = self
1086            .tree
1087            .find(&BlockKey {
1088                rxptr: rx_ptr,
1089                block_size: 0,
1090            })
1091            .get()
1092        else {
1093            return Err(AsmError::InvalidArgument);
1094        };
1095
1096        unsafe {
1097            let pool = &mut *block.pool;
1098            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1099            let area_start = (offset >> pool.granularity_log2 as usize) as u32;
1100
1101            let is_used = bit_vector_get_bit(block.used_bitvector(), area_start as _);
1102
1103            if !is_used {
1104                return Err(AsmError::InvalidArgument);
1105            }
1106
1107            let area_end =
1108                bit_vector_index_of(&block.stop_bitvector(), area_start as _, true) as u32 + 1;
1109
1110            let area_prev_size = area_end - area_start;
1111            let area_shrunk_size = pool.area_size_from_byte_size(new_size);
1112
1113            if area_shrunk_size > area_prev_size {
1114                return Err(AsmError::InvalidState);
1115            }
1116
1117            let area_diff = area_prev_size - area_shrunk_size;
1118
1119            if area_diff != 0 {
1120                block.mark_shrunk_area(area_start + area_shrunk_size, area_end);
1121
1122                if self.options.fill_unused_memory {
1123                    let span_ptr = block
1124                        .rw_ptr()
1125                        .add(area_start as usize * pool.granularity as usize);
1126                    let span_size = area_diff as usize * pool.granularity as usize;
1127
1128                    protect_jit_memory(ProtectJitAccess::ReadWrite);
1129                    fill_pattern(span_ptr, self.fill_pattern, span_size);
1130                    protect_jit_memory(ProtectJitAccess::ReadExecute);
1131                    flush_instruction_cache(span_ptr, span_size);
1132                }
1133            }
1134        }
1135
1136        Ok(())
1137    }
1138
1139    /// Takes a pointer into the JIT memory and tries to query
1140    /// RX, RW mappings and size of the allocation.
1141    pub fn query(&self, rx_ptr: *const u8) -> Result<Span, AsmError> {
1142        let Some(block) = self
1143            .tree
1144            .find(&BlockKey {
1145                rxptr: rx_ptr,
1146                block_size: 0,
1147            })
1148            .get()
1149        else {
1150            return Err(AsmError::InvalidArgument);
1151        };
1152
1153        unsafe {
1154            let pool = &mut *block.pool;
1155            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1156
1157            let area_start = (offset >> pool.granularity_log2 as usize) as u32;
1158
1159            let is_used = bit_vector_get_bit(block.used_bitvector(), area_start as _);
1160
1161            if !is_used {
1162                return Err(AsmError::InvalidArgument);
1163            }
1164
1165            let area_end =
1166                bit_vector_index_of(&block.stop_bitvector(), area_start as _, true) as u32 + 1;
1167            let byte_offset = pool.byte_size_from_area_size(area_start);
1168            let byte_size = pool.byte_size_from_area_size(area_end - area_start);
1169
1170            Ok(Span {
1171                rx: block.rx_ptr().add(byte_offset),
1172                rw: block.rw_ptr().add(byte_offset),
1173                size: byte_size,
1174                icache_clean: false,
1175                block: block as *const JitAllocatorBlock as _,
1176            })
1177        }
1178    }
1179
1180    pub unsafe fn write(
1181        &mut self,
1182        span: &mut Span,
1183        mut write_func: impl FnMut(&mut Span),
1184    ) -> Result<(), AsmError> {
1185        let size = span.size();
1186
1187        if size == 0 {
1188            return Ok(());
1189        }
1190
1191        protect_jit_memory(ProtectJitAccess::ReadWrite);
1192        write_func(span);
1193        protect_jit_memory(ProtectJitAccess::ReadExecute);
1194        flush_instruction_cache(span.rx(), span.size());
1195
1196        if span.size() != size {
1197            self.shrink(span.rx(), span.size)?;
1198        }
1199
1200        Ok(())
1201    }
1202
1203    pub unsafe fn copy_from_slice(
1204        &mut self,
1205        span: &mut Span,
1206        offset: usize,
1207        slice: &[u8],
1208    ) -> Result<(), AsmError> {
1209        if slice.len() == 0 {
1210            return Ok(());
1211        }
1212
1213        protect_jit_memory(ProtectJitAccess::ReadWrite);
1214        span.rw()
1215            .add(offset)
1216            .copy_from_nonoverlapping(slice.as_ptr(), slice.len());
1217        protect_jit_memory(ProtectJitAccess::ReadExecute);
1218        flush_instruction_cache(span.rx(), span.size());
1219        Ok(())
1220    }
1221}
1222
1223#[inline]
1224unsafe fn fill_pattern(mem: *mut u8, pattern: u32, size_in_bytes: usize) {
1225    let n = size_in_bytes / 4;
1226
1227    let p = mem as *mut u32;
1228
1229    for i in 0..n {
1230        p.add(i).write(pattern);
1231    }
1232}
1233
1234unsafe impl Send for JitAllocator {}
1235
1236/// A memory reference returned by [`JitAllocator::alloc`]
1237#[allow(dead_code)]
1238pub struct Span {
1239    rx: *const u8,
1240    rw: *mut u8,
1241    size: usize,
1242    block: *mut u8,
1243    icache_clean: bool,
1244}
1245
1246impl Span {
1247    /// Returns a pointer having Read & Execute permissions (references executable memory).
1248    ///
1249    /// This pointer is never NULL if the allocation succeeded, it points to an executable memory.
1250    pub const fn rx(&self) -> *const u8 {
1251        self.rx
1252    }
1253    /// Returns a pointer having Read & Write permissions (references writable memory).
1254    ///
1255    /// Depending on the type of the allocation strategy this could either be:
1256    ///
1257    ///   - the same address as returned by `rx()` if the allocator uses RWX mapping (pages have all of Read, Write,
1258    ///     and Execute permissions) or MAP_JIT, which requires to call [protect_jit_memory()] manually.
1259    ///   - a valid pointer, but not the same as `rx` - this would be valid if dual mapping is used.
1260    ///   - NULL pointer, in case that the allocation strategy doesn't use RWX, MAP_JIT, or dual mapping. In this
1261    ///     case only [JitAllocator] can copy new code into the executable memory referenced by [Span].
1262    pub const fn rw(&self) -> *mut u8 {
1263        self.rw
1264    }
1265
1266    pub const fn size(&self) -> usize {
1267        self.size
1268    }
1269
1270    pub fn is_icache_clean(&self) -> bool {
1271        self.icache_clean
1272    }
1273
1274    pub fn is_directly_writeable(&self) -> bool {
1275        !self.rw.is_null()
1276    }
1277}