Skip to main content

asmkit/core/
jit_allocator.rs

1/* Copyright (c) 2008-2024 The AsmJit Authors
2
3   This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
4
5   Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
6
7   The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
8   Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
9   This notice may not be removed or altered from any source distribution.
10
11*/
12
13use crate::AsmError;
14use crate::util::virtual_memory::{
15    self, DualMapping, MemoryFlags, ProtectJitAccess, alloc, alloc_dual_mapping,
16    flush_instruction_cache, protect_jit_memory, release, release_dual_mapping,
17};
18use crate::util::{
19    align_down, align_up, bit_vector_clear, bit_vector_fill, bit_vector_get_bit,
20    bit_vector_index_of, bit_vector_set_bit,
21};
22use alloc::vec::Vec;
23use core::cell::{Cell, UnsafeCell};
24use core::mem::size_of;
25use core::ops::Range;
26use core::ptr::null_mut;
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29#[repr(u32)]
30/// A policy that can be used with `reset()` functions.
31pub enum ResetPolicy {
32    /// Soft reset, does not deeallocate memory (default).
33    Soft = 0,
34
35    /// Hard reset, releases all memory used, if any.
36    Hard = 1,
37}
38#[derive(Clone, Copy, Debug, PartialEq, Eq)]
39pub struct JitAllocatorOptions {
40    /// Enables the use of an anonymous memory-mapped memory that is mapped into two buffers having a different pointer.
41    /// The first buffer has read and execute permissions and the second buffer has read+write permissions.
42    ///
43    /// See [alloc_dual_mapping] for more details about this feature.
44    ///
45    /// ## Remarks
46    ///
47    /// Dual mapping would be automatically turned on by [JitAllocator] in case of hardened runtime that
48    /// enforces `W^X` policy, so specifying this flag is essentually forcing to use dual mapped pages even when RWX
49    /// pages can be allocated and dual mapping is not necessary.
50    pub use_dual_mapping: bool,
51    /// Enables the use of multiple pools with increasing granularity instead of a single pool. This flag would enable
52    /// 3 internal pools in total having 64, 128, and 256 bytes granularity.
53    ///
54    /// This feature is only recommended for users that generate a lot of code and would like to minimize the overhead
55    /// of `JitAllocator` itself by having blocks of different allocation granularities. Using this feature only for
56    /// few allocations won't pay off as the allocator may need to create more blocks initially before it can take the
57    /// advantage of variable block granularity.
58    pub use_multiple_pools: bool,
59    /// Always fill reserved memory by a fill-pattern.
60    ///
61    /// Causes a new block to be cleared by the fill pattern and freshly released memory to be cleared before making
62    /// it ready for another use.
63    pub fill_unused_memory: bool,
64    /// When this flag is set the allocator would immediately release unused blocks during `release()` or `reset()`.
65    /// When this flag is not set the allocator would keep one empty block in each pool to prevent excessive virtual
66    /// memory allocations and deallocations in border cases, which involve constantly allocating and deallocating a
67    /// single block caused by repetitive calling `alloc()` and `release()` when the allocator has either no blocks
68    /// or have all blocks fully occupied.
69    pub immediate_release: bool,
70    pub custom_fill_pattern: Option<u32>,
71
72    pub block_size: u32,
73    pub granularity: u32,
74}
75
76impl Default for JitAllocatorOptions {
77    fn default() -> Self {
78        Self {
79            use_dual_mapping: true,
80            use_multiple_pools: true,
81            fill_unused_memory: true,
82            immediate_release: false,
83            custom_fill_pattern: None,
84            block_size: 0,
85            granularity: 0,
86        }
87    }
88}
89
90#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
91const DEFAULT_FILL_PATTERN: u32 = 0xCCCCCCCC; // int3
92#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
93const DEFAULT_FILL_PATTERN: u32 = 0x0; // int3
94
95/// Number of pools to use when `JitAllocatorOptions::use_multiple_pools` is set.
96///
97/// Each pool increases granularity twice to make memory management more
98/// efficient. Ideal number of pools appears to be 3 to 4 as it distributes
99/// small and large functions properly.
100const MULTI_POOL_COUNT: usize = 3;
101
102/// Minimum granularity (and the default granularity for pool #0).
103const MIN_GRANULARITY: usize = 64;
104
105/// Maximum block size (32MB).
106const MAX_BLOCK_SIZE: usize = 32 * 1024 * 1024;
107
108struct BitVectorRangeIterator<'a, const B: u32> {
109    slice: &'a [u32],
110    idx: usize,
111    end: usize,
112    bit_word: u32,
113}
114
115const BIT_WORD_SIZE: usize = core::mem::size_of::<u32>() * 8;
116
117impl<'a, const B: u32> BitVectorRangeIterator<'a, B> {
118    const XOR_MASK: u32 = if B == 0 { u32::MAX } else { 0 };
119
120    fn from_slice_and_nbitwords(data: &'a [u32], num_bit_words: usize) -> Self {
121        Self::new(data, num_bit_words, 0, num_bit_words * BIT_WORD_SIZE)
122    }
123
124    fn new(data: &'a [u32], _num_bit_words: usize, start: usize, end: usize) -> Self {
125        let idx = align_down(start, BIT_WORD_SIZE);
126        let slice = &data[idx / BIT_WORD_SIZE..];
127
128        let mut bit_word = 0;
129
130        if idx < end {
131            bit_word =
132                (slice[0] ^ Self::XOR_MASK) & (u32::MAX << (start as u32 % BIT_WORD_SIZE as u32));
133        }
134
135        Self {
136            slice,
137            idx,
138            end,
139            bit_word,
140        }
141    }
142
143    fn next_range(&mut self, range_hint: u32) -> Option<Range<u32>> {
144        while self.bit_word == 0 {
145            self.idx += BIT_WORD_SIZE;
146
147            if self.idx >= self.end {
148                return None;
149            }
150
151            self.slice = &self.slice[1..];
152            self.bit_word = self.slice[0] ^ Self::XOR_MASK;
153        }
154
155        let i = self.bit_word.trailing_zeros();
156        let start = self.idx as u32 + i;
157        self.bit_word = !(self.bit_word ^ !(u32::MAX << i));
158        let mut end;
159        if self.bit_word == 0 {
160            end = (self.idx as u32 + BIT_WORD_SIZE as u32).min(self.end as _);
161
162            while end.wrapping_sub(start) < range_hint {
163                self.idx += BIT_WORD_SIZE;
164
165                if self.idx >= self.end {
166                    break;
167                }
168
169                self.slice = &self.slice[1..];
170                self.bit_word = self.slice[0] ^ Self::XOR_MASK;
171
172                if self.bit_word != u32::MAX {
173                    let j = self.bit_word.trailing_zeros();
174                    end = (self.idx as u32 + j).min(self.end as _);
175                    self.bit_word = !(self.bit_word ^ !(u32::MAX << j));
176                    break;
177                }
178
179                end = (self.idx as u32 + BIT_WORD_SIZE as u32).min(self.end as _);
180                self.bit_word = 0;
181                continue;
182            }
183
184            Some(start..end)
185        } else {
186            let j = self.bit_word.trailing_zeros();
187            end = (self.idx as u32 + j).min(self.end as _);
188
189            self.bit_word = !(self.bit_word ^ !(u32::MAX << j));
190
191            Some(start..end)
192        }
193    }
194}
195
196impl<'a> Iterator for BitVectorRangeIterator<'a, 0> {
197    type Item = Range<u32>;
198
199    fn next(&mut self) -> Option<Self::Item> {
200        self.next_range(u32::MAX)
201    }
202}
203
204use intrusive_collections::{KeyAdapter, UnsafeRef};
205use intrusive_collections::{intrusive_adapter, rbtree::*};
206
207struct JitAllocatorBlock {
208    node: Link,
209    list_node: intrusive_collections::LinkedListLink,
210
211    /// Link to the pool that owns this block.
212    pool: *mut JitAllocatorPool,
213    /// Virtual memory mapping - either single mapping (both pointers equal) or
214    /// dual mapping, where one pointer is Read+Execute and the second Read+Write.
215    mapping: DualMapping,
216    /// Virtual memory size (block size) [bytes].
217    block_size: usize,
218
219    flags: Cell<u32>,
220    area_size: Cell<u32>,
221    area_used: Cell<u32>,
222    largest_unused_area: Cell<u32>,
223    search_start: Cell<u32>,
224    search_end: Cell<u32>,
225
226    used_bitvector: UnsafeCell<alloc::vec::Vec<u32>>,
227    stop_bitvector: UnsafeCell<alloc::vec::Vec<u32>>,
228}
229
230impl JitAllocatorBlock {
231    const FLAG_EMPTY: u32 = 0x00000001;
232    const FLAG_DIRTY: u32 = 0x00000002;
233    const FLAG_DUAL_MAPPED: u32 = 0x00000004;
234
235    fn pool(&self) -> *mut JitAllocatorPool {
236        self.pool
237    }
238
239    fn rx_ptr(&self) -> *const u8 {
240        self.mapping.rx
241    }
242
243    fn rw_ptr(&self) -> *mut u8 {
244        self.mapping.rw
245    }
246
247    fn flags(&self) -> u32 {
248        self.flags.get()
249    }
250
251    fn add_flags(&self, flags: u32) {
252        self.flags.set(self.flags() | flags);
253    }
254
255    fn clear_flags(&self, flags: u32) {
256        self.flags.set(self.flags() & !flags);
257    }
258
259    fn is_dirty(&self) -> bool {
260        (self.flags() & Self::FLAG_DIRTY) != 0
261    }
262
263    fn block_size(&self) -> usize {
264        self.block_size
265    }
266
267    fn area_used(&self) -> u32 {
268        self.area_used.get()
269    }
270
271    fn area_size(&self) -> u32 {
272        self.area_size.get()
273    }
274
275    fn largest_unused_area(&self) -> u32 {
276        self.largest_unused_area.get()
277    }
278
279    fn search_start(&self) -> u32 {
280        self.search_start.get()
281    }
282
283    fn search_end(&self) -> u32 {
284        self.search_end.get()
285    }
286
287    fn used_bitvector(&self) -> &alloc::vec::Vec<u32> {
288        unsafe { &*self.used_bitvector.get() }
289    }
290
291    fn stop_bitvector(&self) -> &alloc::vec::Vec<u32> {
292        unsafe { &*self.stop_bitvector.get() }
293    }
294
295    #[allow(clippy::mut_from_ref)]
296    fn used_bitvector_mut(&self) -> &mut alloc::vec::Vec<u32> {
297        unsafe { &mut *self.used_bitvector.get() }
298    }
299
300    #[allow(clippy::mut_from_ref)]
301    fn stop_bitvector_mut(&self) -> &mut alloc::vec::Vec<u32> {
302        unsafe { &mut *self.stop_bitvector.get() }
303    }
304
305    fn area_available(&self) -> u32 {
306        self.area_size() - self.area_used()
307    }
308
309    fn mark_allocated_area(&self, allocated_area_start: u32, allocated_area_end: u32) {
310        let allocated_area_size = allocated_area_end - allocated_area_start;
311
312        bit_vector_fill(
313            self.used_bitvector_mut(),
314            allocated_area_start as _,
315            allocated_area_size as _,
316        );
317        bit_vector_set_bit(
318            self.stop_bitvector_mut(),
319            allocated_area_end as usize - 1,
320            true,
321        );
322
323        // SAFETY: Done inside JitAllocator behind mutex and pool is valid.
324        unsafe {
325            (*self.pool).total_area_used += allocated_area_size as usize;
326        }
327
328        self.area_used.set(self.area_used() + allocated_area_size);
329
330        if self.area_available() == 0 {
331            self.search_start.set(self.area_size());
332            self.search_end.set(0);
333            self.largest_unused_area.set(0);
334            self.clear_flags(Self::FLAG_DIRTY);
335        } else {
336            if self.search_start.get() == allocated_area_start {
337                self.search_start.set(allocated_area_end as _);
338            }
339
340            if self.search_end.get() == allocated_area_end {
341                self.search_end.set(allocated_area_start as _);
342            }
343
344            self.add_flags(Self::FLAG_DIRTY);
345        }
346    }
347    fn mark_released_area(&self, released_area_start: u32, released_area_end: u32) {
348        let released_area_size = released_area_end - released_area_start;
349
350        // SAFETY: Done behind mutex and pool is valid.
351        unsafe {
352            (*self.pool).total_area_used -= released_area_size as usize;
353        }
354
355        self.area_used.set(self.area_used() - released_area_size);
356        self.search_start
357            .set(self.search_start.get().min(released_area_start));
358        self.search_end
359            .set(self.search_end.get().max(released_area_end));
360
361        bit_vector_clear(
362            self.used_bitvector_mut(),
363            released_area_start as _,
364            released_area_size as _,
365        );
366        bit_vector_set_bit(
367            self.stop_bitvector_mut(),
368            released_area_end as usize - 1,
369            false,
370        );
371
372        if self.area_used() == 0 {
373            self.search_start.set(0);
374            self.search_end.set(self.area_size());
375            self.largest_unused_area.set(self.area_size());
376            self.add_flags(Self::FLAG_EMPTY);
377            self.clear_flags(Self::FLAG_DIRTY);
378        } else {
379            self.add_flags(Self::FLAG_DIRTY);
380        }
381    }
382
383    fn mark_shrunk_area(&self, shrunk_area_start: u32, shrunk_area_end: u32) {
384        let shrunk_area_size = shrunk_area_end - shrunk_area_start;
385
386        // Shrunk area cannot start at zero as it would mean that we have shrunk the first
387        // block to zero bytes, which is not allowed as such block must be released instead.
388        assert!(shrunk_area_start != 0);
389        assert!(shrunk_area_end != self.area_size());
390
391        // SAFETY: Done behind mutex and pool is valid.
392        unsafe {
393            (*self.pool).total_area_used -= shrunk_area_size as usize;
394        }
395
396        self.area_used.set(self.area_used() - shrunk_area_size);
397        self.search_start
398            .set(self.search_start.get().min(shrunk_area_start));
399        self.search_end
400            .set(self.search_end.get().max(shrunk_area_end));
401
402        bit_vector_clear(
403            self.used_bitvector_mut(),
404            shrunk_area_start as _,
405            shrunk_area_size as _,
406        );
407        bit_vector_set_bit(
408            self.stop_bitvector_mut(),
409            shrunk_area_end as usize - 1,
410            false,
411        );
412        bit_vector_set_bit(
413            self.stop_bitvector_mut(),
414            shrunk_area_start as usize - 1,
415            true,
416        );
417
418        self.add_flags(Self::FLAG_DIRTY);
419    }
420}
421
422impl PartialOrd for JitAllocatorBlock {
423    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
424        Some(self.cmp(other))
425    }
426}
427
428impl Ord for JitAllocatorBlock {
429    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
430        self.rx_ptr().cmp(&other.rx_ptr())
431    }
432}
433
434impl PartialEq for JitAllocatorBlock {
435    fn eq(&self, other: &Self) -> bool {
436        self.rx_ptr() == other.rx_ptr()
437    }
438}
439
440impl Eq for JitAllocatorBlock {}
441use intrusive_collections::linked_list::LinkedList;
442intrusive_adapter!(JitAllocatorBlockAdapter = UnsafeRef<JitAllocatorBlock> : JitAllocatorBlock { node: Link });
443intrusive_adapter!(BlockListAdapter = UnsafeRef<JitAllocatorBlock> : JitAllocatorBlock { list_node: intrusive_collections::LinkedListLink });
444
445struct BlockKey {
446    rxptr: *const u8,
447    block_size: u32,
448}
449
450impl PartialEq for BlockKey {
451    fn eq(&self, other: &Self) -> bool {
452        self.rxptr == other.rxptr
453    }
454}
455
456impl Eq for BlockKey {}
457
458impl PartialOrd for BlockKey {
459    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
460        Some(self.cmp(other))
461    }
462}
463
464impl Ord for BlockKey {
465    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
466        let addr_off = other.rxptr as usize + other.block_size as usize;
467
468        if addr_off <= self.rxptr as usize {
469            core::cmp::Ordering::Less
470        } else if other.rxptr > self.rxptr {
471            core::cmp::Ordering::Greater
472        } else {
473            core::cmp::Ordering::Equal
474        }
475    }
476}
477
478impl<'a> KeyAdapter<'a> for JitAllocatorBlockAdapter {
479    type Key = BlockKey;
480
481    fn get_key(
482        &self,
483        value: &'a <Self::PointerOps as intrusive_collections::PointerOps>::Value,
484    ) -> Self::Key {
485        BlockKey {
486            rxptr: value.rx_ptr(),
487            block_size: value.block_size as _,
488        }
489    }
490}
491
492struct JitAllocatorPool {
493    blocks: LinkedList<BlockListAdapter>,
494    cursor: *mut JitAllocatorBlock,
495
496    block_count: u32,
497    granularity: u16,
498    granularity_log2: u8,
499    empty_block_count: u8,
500    total_area_size: usize,
501    total_area_used: usize,
502    total_overhead_bytes: usize,
503}
504
505impl JitAllocatorPool {
506    fn new(granularity: u32) -> Self {
507        let granularity_log2 = granularity.trailing_zeros() as u8;
508        let granularity = granularity as u16;
509
510        Self {
511            blocks: LinkedList::new(BlockListAdapter::new()),
512            cursor: core::ptr::null_mut(),
513            block_count: 0,
514            granularity,
515            granularity_log2,
516            empty_block_count: 0,
517            total_area_size: 0,
518            total_area_used: 0,
519            total_overhead_bytes: 0,
520        }
521    }
522
523    fn reset(&mut self) {
524        self.blocks.clear();
525        self.cursor = core::ptr::null_mut();
526        self.block_count = 0;
527        self.total_area_size = 0;
528        self.total_area_used = 0;
529        self.total_overhead_bytes = 0;
530    }
531
532    fn byte_size_from_area_size(&self, area_size: u32) -> usize {
533        area_size as usize * self.granularity as usize
534    }
535
536    fn area_size_from_byte_size(&self, byte_size: usize) -> u32 {
537        ((byte_size + self.granularity as usize - 1) >> self.granularity_log2) as u32
538    }
539
540    fn bit_word_count_from_area_size(&self, area_size: u32) -> usize {
541        align_up(area_size as _, 32) / 32
542    }
543}
544use alloc::boxed::Box;
545
546/// A simple implementation of memory manager that uses [virtual_memory].
547/// functions to manage virtual memory for JIT compiled code.
548///
549/// Implementation notes:
550///
551/// - Granularity of allocated blocks is different than granularity for a typical C malloc. In addition, the allocator
552///   can use several memory pools having a different granularity to minimize the maintenance overhead. Multiple pools
553///   feature requires `use_multiple_pools` flag to be set.
554///
555/// - The allocator doesn't store any information in executable memory, instead, the implementation uses two
556///   bit-vectors to manage allocated memory of each allocator-block. The first bit-vector called 'used' is used to
557///   track used memory (where each bit represents memory size defined by granularity) and the second bit vector called
558///   'stop' is used as a sentinel to mark where the allocated area ends.
559///
560/// - Internally, the allocator also uses RB tree to keep track of all blocks across all pools. Each inserted block is
561///   added to the tree so it can be matched fast during `release()` and `shrink()`.
562pub struct JitAllocator {
563    options: JitAllocatorOptions,
564    block_size: usize,
565    granulariy: usize,
566    fill_pattern: u32,
567
568    allocation_count: usize,
569    tree: RBTree<JitAllocatorBlockAdapter>,
570    pools: Box<[*mut JitAllocatorPool]>,
571}
572
573impl JitAllocator {
574    /// Creates a new JitAllocator instance.
575    pub fn new(params: JitAllocatorOptions) -> Box<Self> {
576        let vm_info = virtual_memory::info();
577
578        let mut block_size = params.block_size;
579        let mut granularity = params.granularity;
580
581        let mut pool_count = 1;
582
583        if params.use_multiple_pools {
584            pool_count = MULTI_POOL_COUNT;
585        }
586
587        if !(64 * 1024..=256 * 1024 * 1024).contains(&block_size) || !block_size.is_power_of_two() {
588            block_size = vm_info.page_granularity as _;
589        }
590
591        if !(64..=256).contains(&granularity) || !granularity.is_power_of_two() {
592            granularity = MIN_GRANULARITY as _;
593        }
594
595        let fill_pattern = params.custom_fill_pattern.unwrap_or(DEFAULT_FILL_PATTERN);
596
597        let mut pools = Vec::with_capacity(pool_count);
598
599        for _ in 0..pool_count {
600            pools.push(Box::into_raw(Box::new(JitAllocatorPool::new(granularity))));
601        }
602
603        Box::new(Self {
604            options: params,
605            block_size: block_size as _,
606            granulariy: granularity as _,
607            fill_pattern,
608            allocation_count: 0,
609            tree: RBTree::new(JitAllocatorBlockAdapter::new()),
610            pools: pools.into_boxed_slice(),
611        })
612    }
613
614    fn size_to_pool_id(&self, size: usize) -> usize {
615        let mut pool_id = self.pools.len() - 1;
616        let mut granularity = self.granulariy << pool_id;
617
618        while pool_id != 0 {
619            if align_up(size, granularity) == size {
620                break;
621            }
622
623            pool_id -= 1;
624            granularity >>= 1;
625        }
626
627        pool_id
628    }
629
630    fn bitvector_size_to_byte_size(area_size: u32) -> usize {
631        (area_size as usize).div_ceil(32) * size_of::<u32>()
632    }
633
634    fn calculate_ideal_block_size(
635        &self,
636        pool: *mut JitAllocatorPool,
637        allocation_size: usize,
638    ) -> usize {
639        unsafe {
640            let last = (*pool).blocks.back();
641
642            let mut block_size = if !last.is_null() {
643                last.get().unwrap().block_size()
644            } else {
645                self.block_size
646            };
647
648            if block_size < MAX_BLOCK_SIZE {
649                block_size *= 2;
650            }
651
652            if allocation_size > block_size {
653                block_size = align_up(allocation_size, block_size);
654
655                // overflow
656                if block_size < allocation_size {
657                    return 0;
658                }
659            }
660
661            block_size
662        }
663    }
664
665    unsafe fn new_block(
666        &mut self,
667        pool: *mut JitAllocatorPool,
668        block_size: usize,
669    ) -> Result<Box<JitAllocatorBlock>, AsmError> {
670        unsafe {
671            let area_size =
672                (block_size + (*pool).granularity as usize - 1) >> (*pool).granularity_log2;
673            let num_bit_words = area_size.div_ceil(32);
674
675            let mut block = Box::new(JitAllocatorBlock {
676                node: Link::new(),
677                list_node: intrusive_collections::LinkedListLink::new(),
678                pool,
679                mapping: DualMapping {
680                    rx: null_mut(),
681                    rw: null_mut(),
682                },
683                block_size: block_size as _,
684                flags: Cell::new(0),
685                area_size: Cell::new(0),
686                area_used: Cell::new(0),
687                largest_unused_area: Cell::new(area_size as _),
688                search_end: Cell::new(area_size as _),
689                search_start: Cell::new(0),
690                used_bitvector: UnsafeCell::new({
691                    let mut v = Vec::with_capacity(num_bit_words);
692                    v.resize(num_bit_words * size_of::<u32>(), 0);
693                    v
694                }),
695                stop_bitvector: UnsafeCell::new({
696                    let mut v = Vec::with_capacity(num_bit_words);
697                    v.resize(num_bit_words * size_of::<u32>(), 0);
698                    v
699                }),
700            });
701            let mut block_flags = 0;
702            let virt_mem = if self.options.use_dual_mapping {
703                block_flags |= JitAllocatorBlock::FLAG_DUAL_MAPPED;
704                alloc_dual_mapping(block_size, MemoryFlags::ACCESS_RWX.into())?
705            } else {
706                let rx = alloc(block_size, MemoryFlags::ACCESS_RWX.into())?;
707                DualMapping { rx, rw: rx }
708            };
709
710            if self.options.fill_unused_memory {
711                protect_jit_memory(ProtectJitAccess::ReadWrite);
712                fill_pattern(virt_mem.rw, self.fill_pattern, block_size);
713                protect_jit_memory(ProtectJitAccess::ReadExecute);
714                flush_instruction_cache(virt_mem.rx, block_size);
715            }
716
717            block.area_size.set(area_size as _);
718            block.mapping = virt_mem;
719            block.flags.set(block_flags);
720            Ok(block)
721        }
722    }
723
724    unsafe fn delete_block(&mut self, block: *mut JitAllocatorBlock) {
725        unsafe {
726            let mut block = Box::from_raw(block);
727            if (block.flags() & JitAllocatorBlock::FLAG_DUAL_MAPPED) != 0 {
728                let _ = release_dual_mapping(&mut block.mapping, block.block_size);
729            } else {
730                let _ = release(block.mapping.rx as _, block.block_size);
731            }
732
733            drop(block);
734        }
735    }
736
737    unsafe fn insert_block(&mut self, block: *mut JitAllocatorBlock) {
738        unsafe {
739            let b = &mut *block;
740            let pool = &mut *b.pool();
741
742            if pool.cursor.is_null() {
743                pool.cursor = block;
744            }
745
746            self.tree.insert(UnsafeRef::from_raw(block));
747            pool.blocks.push_front(UnsafeRef::from_raw(block));
748
749            pool.block_count += 1;
750            pool.total_area_size += b.area_size() as usize;
751
752            pool.total_overhead_bytes += size_of::<JitAllocatorBlock>()
753                + Self::bitvector_size_to_byte_size(b.area_size()) * 2;
754        }
755    }
756
757    unsafe fn remove_block(
758        &mut self,
759        block: &mut intrusive_collections::linked_list::CursorMut<'_, BlockListAdapter>,
760    ) -> *mut JitAllocatorBlock {
761        unsafe {
762            let b = block.get().unwrap();
763            let pool = &mut *b.pool();
764
765            if core::ptr::eq(pool.cursor, b) {
766                pool.cursor = if let Some(block) = block.peek_prev().get() {
767                    block as *const _ as *mut _
768                } else if let Some(block) = block.peek_next().get() {
769                    block as *const _ as *mut _
770                } else {
771                    null_mut()
772                };
773            }
774
775            if let Entry::Occupied(mut c) = self.tree.entry(&BlockKey {
776                rxptr: b.rx_ptr(),
777                block_size: b.block_size as _,
778            }) {
779                assert_eq!(
780                    UnsafeRef::into_raw(c.remove().unwrap()),
781                    b as *const _ as *mut JitAllocatorBlock,
782                    "blocks are not the same"
783                );
784            }
785            let area_size = b.area_size();
786
787            pool.block_count -= 1;
788            pool.total_area_size -= area_size as usize;
789
790            pool.total_overhead_bytes -=
791                size_of::<JitAllocatorBlock>() + Self::bitvector_size_to_byte_size(area_size) * 2;
792
793            UnsafeRef::into_raw(block.remove().unwrap())
794        }
795    }
796
797    unsafe fn wipe_out_block(
798        &mut self,
799        block: &mut intrusive_collections::linked_list::CursorMut<'_, BlockListAdapter>,
800    ) {
801        unsafe {
802            let b = block.get().unwrap();
803            if (b.flags() & JitAllocatorBlock::FLAG_EMPTY) != 0 {
804                return;
805            }
806
807            let pool = &mut *b.pool();
808
809            let area_size = b.area_size();
810            let granularity = pool.granularity;
811
812            virtual_memory::protect_jit_memory(ProtectJitAccess::ReadWrite);
813
814            if self.options.fill_unused_memory {
815                let rw_ptr = b.rw_ptr();
816
817                let it = BitVectorRangeIterator::from_slice_and_nbitwords(
818                    b.stop_bitvector(),
819                    pool.bit_word_count_from_area_size(b.area_size()),
820                );
821
822                for range in it {
823                    let span_ptr = rw_ptr.add(range.start as usize * granularity as usize);
824                    let span_size =
825                        (range.end as usize - range.start as usize) * granularity as usize;
826
827                    let mut n = 0;
828                    while n < span_size {
829                        *span_ptr.add(n).cast::<u32>() = self.fill_pattern;
830                        n += size_of::<u32>();
831                    }
832
833                    virtual_memory::flush_instruction_cache(span_ptr, span_size);
834                }
835            }
836
837            virtual_memory::protect_jit_memory(ProtectJitAccess::ReadExecute);
838
839            let b = &mut *UnsafeRef::into_raw(block.remove().unwrap());
840            b.used_bitvector_mut().fill(0);
841            b.stop_bitvector_mut().fill(0);
842
843            b.area_used.set(0);
844            b.largest_unused_area.set(area_size);
845            b.search_start.set(0);
846            b.search_end.set(area_size);
847            b.add_flags(JitAllocatorBlock::FLAG_EMPTY);
848            b.clear_flags(JitAllocatorBlock::FLAG_DIRTY);
849        }
850    }
851
852    /// Resets current allocator by emptying all pools and blocks.
853    ///
854    /// Frees all memory is `ResetPolicy::Hard` is specified or `immediate_release` in [JitAllocatorOptions] is specific.
855    ///
856    /// # Safety
857    ///
858    /// - The caller must ensure that no code is currently executing from the memory managed by this allocator.
859    /// - The caller must ensure that no references to the memory managed by this allocator are used after calling this function.
860    /// - The caller must ensure that no other thread is currently accessing the memory managed by this allocator.
861    pub unsafe fn reset(&mut self, reset_policy: ResetPolicy) {
862        self.tree.clear();
863
864        let pool_count = self.pools.len();
865
866        for pool_id in 0..pool_count {
867            let pool = unsafe { &mut *self.pools[pool_id] };
868
869            let mut cursor = pool.blocks.cursor_mut();
870            cursor.move_next();
871            let mut block_to_keep = false;
872            if reset_policy != ResetPolicy::Hard && !self.options.immediate_release {
873                block_to_keep = true;
874                cursor.move_next();
875            }
876            unsafe {
877                while !cursor.is_null() {
878                    let block = UnsafeRef::into_raw(cursor.remove().unwrap());
879                    self.delete_block(block);
880                    cursor.move_next();
881                }
882
883                pool.reset();
884
885                if block_to_keep {
886                    let mut front = pool.blocks.cursor_mut();
887                    front.move_next();
888                    self.wipe_out_block(&mut front);
889                    pool.empty_block_count = 1;
890                }
891            }
892        }
893    }
894
895    /// Allocates `size` bytes in the executable memory region.
896    /// Returns two pointers. One points to Read-Execute mapping and another to Read-Write mapping.
897    /// All code writes *must* go to the Read-Write mapping.
898    pub fn alloc(&mut self, size: usize) -> Result<Span, AsmError> {
899        const NO_INDEX: u32 = u32::MAX;
900
901        let size = align_up(size, self.granulariy);
902
903        if size == 0 {
904            return Err(AsmError::InvalidArgument);
905        }
906
907        if size > u32::MAX as usize / 2 {
908            return Err(AsmError::TooLarge);
909        }
910
911        unsafe {
912            let pool_id = self.size_to_pool_id(size);
913            let pool = &mut *self.pools[pool_id];
914
915            let mut area_index = NO_INDEX;
916            let area_size = pool.area_size_from_byte_size(size);
917
918            let mut block = pool.blocks.cursor();
919            block.move_next();
920            if let Some(initial) = block.get().map(|x| x as *const JitAllocatorBlock) {
921                loop {
922                    let b = block.get().unwrap();
923
924                    if b.area_available() >= area_size
925                        && (b.is_dirty() || b.largest_unused_area() >= area_size)
926                    {
927                        let mut it = BitVectorRangeIterator::<0>::new(
928                            b.used_bitvector(),
929                            pool.bit_word_count_from_area_size(b.area_size()),
930                            b.search_start() as _,
931                            b.search_end() as _,
932                        );
933
934                        let mut range_start;
935                        let mut range_end = b.area_size() as usize;
936
937                        let mut search_start = usize::MAX;
938                        let mut largest_area = 0;
939
940                        while let Some(range) = it.next_range(area_size as _) {
941                            range_start = range.start as _;
942                            range_end = range.end as _;
943
944                            let range_size = range_end - range_start;
945
946                            if range_size >= area_size as usize {
947                                area_index = range_start as _;
948                                break;
949                            }
950
951                            search_start = search_start.min(range_start);
952                            largest_area = largest_area.max(range_size);
953                        }
954
955                        if area_index != NO_INDEX {
956                            break;
957                        }
958
959                        if search_start != usize::MAX {
960                            let search_end = range_end;
961
962                            b.search_start.set(search_start as _);
963
964                            b.search_end.set(search_end as _);
965                            b.largest_unused_area.set(largest_area as _);
966                            b.clear_flags(JitAllocatorBlock::FLAG_DIRTY);
967                        }
968                    }
969
970                    block.move_next();
971
972                    if block.get().map(|x| x as *const _) == Some(initial) {
973                        break;
974                    }
975
976                    if block.is_null() {
977                        break;
978                    }
979                }
980            }
981
982            let mut block = block.get();
983
984            if area_index == NO_INDEX {
985                let block_size = self.calculate_ideal_block_size(pool, size);
986
987                {
988                    let nblock = self.new_block(pool, block_size)?;
989
990                    area_index = 0;
991
992                    nblock.search_start.set(area_size as _);
993
994                    nblock
995                        .largest_unused_area
996                        .set(nblock.area_size() - area_size);
997
998                    let nblock = Box::into_raw(nblock);
999
1000                    self.insert_block(nblock);
1001
1002                    block = Some(&*nblock);
1003                }
1004            } else if (block.unwrap().flags() & JitAllocatorBlock::FLAG_EMPTY) != 0 {
1005                pool.empty_block_count -= 1;
1006                block.unwrap().clear_flags(JitAllocatorBlock::FLAG_EMPTY);
1007            }
1008
1009            self.allocation_count += 1;
1010
1011            let block = block.unwrap();
1012
1013            block.mark_allocated_area(area_index, area_index + area_size);
1014
1015            let offset = pool.byte_size_from_area_size(area_index);
1016
1017            let rx = block.rx_ptr().add(offset);
1018            let rw = block.rw_ptr().add(offset);
1019
1020            Ok(Span {
1021                block: block as *const JitAllocatorBlock as _,
1022                rx,
1023                rw,
1024                size,
1025                icache_clean: true,
1026            })
1027
1028            //Ok((block.rx_ptr().add(offset), block.rw_ptr().add(offset)))
1029        }
1030    }
1031
1032    /// Releases the memory allocated by `alloc`.
1033    ///
1034    /// # SAFETY
1035    /// - `rx_ptr` must have been returned from `alloc`
1036    /// - `rx_ptr` must have been allocaetd from this allocator
1037    /// - `rx_ptr` must not have been passed to `release` before
1038    /// - `rx_ptr` must point to read-execute part of memory returned from `alloc`.
1039    pub unsafe fn release(&mut self, rx_ptr: *const u8) -> Result<(), AsmError> {
1040        if rx_ptr.is_null() {
1041            return Err(AsmError::InvalidArgument);
1042        }
1043
1044        let block = self.tree.find(&BlockKey {
1045            rxptr: rx_ptr,
1046            block_size: 0,
1047        });
1048
1049        let Some(block) = block.get() else {
1050            return Err(AsmError::InvalidState);
1051        };
1052
1053        unsafe {
1054            let pool = &mut *block.pool;
1055
1056            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1057
1058            let area_index = (offset >> pool.granularity_log2 as usize) as u32;
1059            let area_end =
1060                bit_vector_index_of(block.stop_bitvector(), area_index as _, true) as u32 + 1;
1061            let area_size = area_end - area_index;
1062
1063            self.allocation_count -= 1;
1064
1065            block.mark_released_area(area_index, area_end);
1066
1067            if self.options.fill_unused_memory {
1068                let span_ptr = block
1069                    .rw_ptr()
1070                    .add(area_index as usize * pool.granularity as usize);
1071                let span_size = area_size as usize * pool.granularity as usize;
1072
1073                protect_jit_memory(ProtectJitAccess::ReadWrite);
1074                fill_pattern(span_ptr, self.fill_pattern, span_size);
1075                protect_jit_memory(ProtectJitAccess::ReadExecute);
1076                flush_instruction_cache(span_ptr, span_size);
1077            }
1078
1079            if block.area_used() == 0 {
1080                if pool.empty_block_count != 0 || self.options.immediate_release {
1081                    let mut cursor = pool.blocks.cursor_mut_from_ptr(block);
1082                    let block = self.remove_block(&mut cursor);
1083
1084                    self.delete_block(block);
1085                } else {
1086                    pool.empty_block_count += 1;
1087                }
1088            }
1089        }
1090
1091        Ok(())
1092    }
1093    /// Shrinks the memory allocated by `alloc`.
1094    ///
1095    /// # SAFETY
1096    ///
1097    /// `rx_ptr` must be a pointer returned by `alloc`.
1098    pub unsafe fn shrink(&mut self, rx_ptr: *const u8, new_size: usize) -> Result<(), AsmError> {
1099        if rx_ptr.is_null() {
1100            return Err(AsmError::InvalidArgument);
1101        }
1102
1103        if new_size == 0 {
1104            return unsafe { self.release(rx_ptr) };
1105        }
1106
1107        let Some(block) = self
1108            .tree
1109            .find(&BlockKey {
1110                rxptr: rx_ptr,
1111                block_size: 0,
1112            })
1113            .get()
1114        else {
1115            return Err(AsmError::InvalidArgument);
1116        };
1117
1118        unsafe {
1119            let pool = &mut *block.pool;
1120            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1121            let area_start = (offset >> pool.granularity_log2 as usize) as u32;
1122
1123            let is_used = bit_vector_get_bit(block.used_bitvector(), area_start as _);
1124
1125            if !is_used {
1126                return Err(AsmError::InvalidArgument);
1127            }
1128
1129            let area_end =
1130                bit_vector_index_of(block.stop_bitvector(), area_start as _, true) as u32 + 1;
1131
1132            let area_prev_size = area_end - area_start;
1133            let area_shrunk_size = pool.area_size_from_byte_size(new_size);
1134
1135            if area_shrunk_size > area_prev_size {
1136                return Err(AsmError::InvalidState);
1137            }
1138
1139            let area_diff = area_prev_size - area_shrunk_size;
1140
1141            if area_diff != 0 {
1142                block.mark_shrunk_area(area_start + area_shrunk_size, area_end);
1143
1144                if self.options.fill_unused_memory {
1145                    let span_ptr = block
1146                        .rw_ptr()
1147                        .add(area_start as usize * pool.granularity as usize);
1148                    let span_size = area_diff as usize * pool.granularity as usize;
1149
1150                    protect_jit_memory(ProtectJitAccess::ReadWrite);
1151                    fill_pattern(span_ptr, self.fill_pattern, span_size);
1152                    protect_jit_memory(ProtectJitAccess::ReadExecute);
1153                    flush_instruction_cache(span_ptr, span_size);
1154                }
1155            }
1156        }
1157
1158        Ok(())
1159    }
1160
1161    /// Takes a pointer into the JIT memory and tries to query
1162    /// RX, RW mappings and size of the allocation.
1163    pub fn query(&self, rx_ptr: *const u8) -> Result<Span, AsmError> {
1164        let Some(block) = self
1165            .tree
1166            .find(&BlockKey {
1167                rxptr: rx_ptr,
1168                block_size: 0,
1169            })
1170            .get()
1171        else {
1172            return Err(AsmError::InvalidArgument);
1173        };
1174
1175        unsafe {
1176            let pool = &mut *block.pool;
1177            let offset = rx_ptr as usize - block.rx_ptr() as usize;
1178
1179            let area_start = (offset >> pool.granularity_log2 as usize) as u32;
1180
1181            let is_used = bit_vector_get_bit(block.used_bitvector(), area_start as _);
1182
1183            if !is_used {
1184                return Err(AsmError::InvalidArgument);
1185            }
1186
1187            let area_end =
1188                bit_vector_index_of(block.stop_bitvector(), area_start as _, true) as u32 + 1;
1189            let byte_offset = pool.byte_size_from_area_size(area_start);
1190            let byte_size = pool.byte_size_from_area_size(area_end - area_start);
1191
1192            Ok(Span {
1193                rx: block.rx_ptr().add(byte_offset),
1194                rw: block.rw_ptr().add(byte_offset),
1195                size: byte_size,
1196                icache_clean: false,
1197                block: block as *const JitAllocatorBlock as _,
1198            })
1199        }
1200    }
1201
1202    /// Writes to the memory referenced by `span` using `write_func` and then flushes the instruction cache for that memory range.
1203    ///
1204    /// # SAFETY
1205    ///
1206    /// - `span` must reference a valid memory range allocated by this allocator.
1207    /// - `write_func` must only write to the memory range referenced by `span`.
1208    /// - After `write_func` is called, the memory range referenced by `span` must contain valid executable code.
1209    /// - The caller must ensure that no other threads are executing code in the memory range referenced by `span` while `write_func` is modifying it.
1210    ///
1211    pub unsafe fn write(
1212        &mut self,
1213        span: &mut Span,
1214        mut write_func: impl FnMut(&mut Span),
1215    ) -> Result<(), AsmError> {
1216        let size = span.size();
1217
1218        if size == 0 {
1219            return Ok(());
1220        }
1221
1222        protect_jit_memory(ProtectJitAccess::ReadWrite);
1223        write_func(span);
1224        protect_jit_memory(ProtectJitAccess::ReadExecute);
1225        flush_instruction_cache(span.rx(), span.size());
1226
1227        if span.size() != size {
1228            unsafe {
1229                self.shrink(span.rx(), span.size)?;
1230            }
1231        }
1232
1233        Ok(())
1234    }
1235
1236    /// Writes the provided `slice` to the memory referenced by `span` starting at `offset`, and then flushes the instruction cache for that memory range.
1237    ///
1238    /// # Safety
1239    ///
1240    /// - `span` must reference a valid memory range allocated by this allocator.
1241    /// - `offset + slice.len()` must not exceed the size of the memory range referenced by `span`.
1242    /// - After this function is called, the memory range from `span.rx() + offset` to `span.rx() + offset + slice.len()` must contain valid executable code.
1243    /// - The caller must ensure that no other threads are executing code in the memory range being modified by this function while it is running.
1244    pub unsafe fn copy_from_slice(
1245        &mut self,
1246        span: &mut Span,
1247        offset: usize,
1248        slice: &[u8],
1249    ) -> Result<(), AsmError> {
1250        if slice.is_empty() {
1251            return Ok(());
1252        }
1253
1254        protect_jit_memory(ProtectJitAccess::ReadWrite);
1255        unsafe {
1256            span.rw()
1257                .add(offset)
1258                .copy_from_nonoverlapping(slice.as_ptr(), slice.len());
1259        }
1260        protect_jit_memory(ProtectJitAccess::ReadExecute);
1261        flush_instruction_cache(span.rx(), span.size());
1262        Ok(())
1263    }
1264}
1265
1266#[inline]
1267unsafe fn fill_pattern(mem: *mut u8, pattern: u32, size_in_bytes: usize) {
1268    let n = size_in_bytes / 4;
1269
1270    let p = mem as *mut u32;
1271
1272    for i in 0..n {
1273        unsafe {
1274            p.add(i).write(pattern);
1275        }
1276    }
1277}
1278
1279unsafe impl Send for JitAllocator {}
1280
1281/// A memory reference returned by [`JitAllocator::alloc`]
1282#[allow(dead_code)]
1283#[derive(Debug, Clone, Copy)]
1284pub struct Span {
1285    rx: *const u8,
1286    rw: *mut u8,
1287    size: usize,
1288    block: *mut u8,
1289    icache_clean: bool,
1290}
1291
1292impl Span {
1293    /// Returns a pointer having Read & Execute permissions (references executable memory).
1294    ///
1295    /// This pointer is never NULL if the allocation succeeded, it points to an executable memory.
1296    pub const fn rx(&self) -> *const u8 {
1297        self.rx
1298    }
1299    /// Returns a pointer having Read & Write permissions (references writable memory).
1300    ///
1301    /// Depending on the type of the allocation strategy this could either be:
1302    ///
1303    ///   - the same address as returned by `rx()` if the allocator uses RWX mapping (pages have all of Read, Write,
1304    ///     and Execute permissions) or MAP_JIT, which requires to call [protect_jit_memory()] manually.
1305    ///   - a valid pointer, but not the same as `rx` - this would be valid if dual mapping is used.
1306    ///   - NULL pointer, in case that the allocation strategy doesn't use RWX, MAP_JIT, or dual mapping. In this
1307    ///     case only [JitAllocator] can copy new code into the executable memory referenced by [Span].
1308    pub const fn rw(&self) -> *mut u8 {
1309        self.rw
1310    }
1311
1312    pub const fn size(&self) -> usize {
1313        self.size
1314    }
1315
1316    pub fn is_icache_clean(&self) -> bool {
1317        self.icache_clean
1318    }
1319
1320    pub fn is_directly_writeable(&self) -> bool {
1321        !self.rw.is_null()
1322    }
1323}