feanor_mempool/
lib.rs

1#![feature(allocator_api)]
2#![feature(ptr_alignment_type)]
3#![feature(btree_cursors)]
4#![feature(slice_ptr_get)]
5#![feature(layout_for_ptr)]
6#![feature(btreemap_alloc)]
7#![feature(const_alloc_layout)]
8#![feature(mapped_lock_guards)]
9#![feature(never_type)]
10#![feature(test)]
11
12extern crate test;
13
14use std::alloc::{AllocError, Allocator, Global, Layout};
15use std::ptr::NonNull;
16use std::rc::Rc;
17use std::sync::Arc;
18
19///
20/// Implementation of a memory pool supporting only allocations of a fixed layout.
21/// 
22pub mod fixedsize;
23///
24/// Implementation of a memory pool supporting arbitrary allocations.
25/// 
26pub mod dynsize;
27
28///
29/// An [`Rc`] pointing to an [`Allocator`]. As opposed to `Rc<A>`, the type `AllocRc<A>`
30/// implements again [`Allocator`].
31/// 
32pub struct AllocRc<A: Allocator, PtrAlloc: Allocator + Clone = Global>(pub Rc<A, PtrAlloc>);
33
34impl<A: Allocator, PtrAlloc: Allocator + Clone> Clone for AllocRc<A, PtrAlloc> {
35
36    fn clone(&self) -> Self {
37        Self(self.0.clone())
38    }
39}
40
41unsafe impl<A: Allocator, PtrAlloc: Allocator + Clone> Allocator for AllocRc<A, PtrAlloc> {
42
43    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
44        (*self.0).allocate(layout)
45    }
46
47    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
48        (*self.0).deallocate(ptr, layout)
49    }
50}
51
52///
53/// An [`Arc`] pointing to an [`Allocator`]. As opposed to `Arc<A>`, the type `AllocArc<A>`
54/// implements again [`Allocator`].
55/// 
56pub struct AllocArc<A: Allocator, PtrAlloc: Allocator + Clone = Global>(pub Arc<A, PtrAlloc>);
57
58unsafe impl<A: Allocator, PtrAlloc: Allocator + Clone> Allocator for AllocArc<A, PtrAlloc> {
59
60    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
61        (*self.0).allocate(layout)
62    }
63
64    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
65        (*self.0).deallocate(ptr, layout)
66    }
67}
68
69impl<A: Allocator, PtrAlloc: Allocator + Clone> Clone for AllocArc<A, PtrAlloc> {
70    
71    fn clone(&self) -> Self {
72        Self(self.0.clone())
73    }
74}
75
76struct SendableNonNull<T: ?Sized> {
77    data: NonNull<T>
78}
79
80impl<T: ?Sized> SendableNonNull<T> {
81
82    unsafe fn new(data: NonNull<T>) -> Self {
83        Self { data }
84    }
85
86    fn extract(self) -> NonNull<T> {
87        self.data
88    }
89}
90
91unsafe impl<T: ?Sized> Send for SendableNonNull<T> {}
92
93#[cfg(test)]
94use dynsize::DynLayoutMempool;
95#[cfg(test)]
96use std::ptr::Alignment;
97
98///
99/// I try to mimic the allocation behavior when performing operations in 
100/// a ring extension. In particular, at the time of writing, the feanor-math
101/// implementation required three differently sized allocations:
102///  - arrays of size `n` for ring elements
103///  - arrays of size `2n` temporarily during multiplication
104///  - arrays of size `n^2` for the matrix during complex operations, in particular division
105/// 
106#[cfg(test)]
107fn mock_use_case_algebra_computations<A: Allocator>(allocator: &A) {
108
109    #[inline(never)]
110    fn mimic_convolution<A: Allocator>(lhs: &[i64], rhs: &[i64], dst: &mut Vec<i64, A>) {
111        dst.clear();
112        for i in 0..lhs.len() {
113            std::hint::black_box(lhs[i]);
114        }
115        for i in 0..rhs.len() {
116            std::hint::black_box(rhs[i]);
117        }
118        for _ in 0..(lhs.len() + rhs.len() - 1) {
119            dst.push(std::hint::black_box(0));
120        }
121        dst.push(0);
122    }
123
124    #[inline(never)]
125    fn mimic_addition<A: Allocator>(lhs: &[i64], rhs: &[i64], dst: &mut Vec<i64, A>) {
126        dst.clear();
127        assert_eq!(lhs.len(), rhs.len());
128        for i in 0..lhs.len() {
129            std::hint::black_box(lhs[i]);
130            std::hint::black_box(rhs[i]);
131            dst.push(std::hint::black_box(0));
132        }
133    }
134
135    #[inline(never)]
136    fn mimic_matrix_construction<A: Allocator>(x: &[i64], y: &[i64], dst: &mut Vec<i64, A>) {
137        for i in 0..x.len() {
138            for j in 0..y.len() {
139                std::hint::black_box(x[i]);
140                std::hint::black_box(y[j]);
141                dst.push(std::hint::black_box(0));
142            }
143        }
144    }
145
146    const SIZE: usize = 64;
147    const SIZE2: usize = 2 * SIZE;
148    const SIZE_SQR: usize = SIZE * SIZE;
149    
150    let mut x = Vec::with_capacity_in(SIZE, allocator);
151    x.extend((0..SIZE).map(|n| n as i64));
152    let mut y = Vec::with_capacity_in(SIZE, allocator);
153    y.extend((0..SIZE).map(|n| 2 * n as i64 + 1));
154    // mimic some additions and multiplications, like they would e.g. appear during polynomial
155    // evaluation via Horner's schema
156    for _ in 0..8 {
157        let mut w = Vec::with_capacity_in(SIZE2, allocator);
158        mimic_convolution(&x, &y, &mut w);
159        let mut z = Vec::with_capacity_in(SIZE, allocator);
160        mimic_addition(&w[SIZE..], &w[0..SIZE], &mut z);
161        mimic_addition(&z, &x, &mut y);
162        std::hint::black_box(w);
163        std::hint::black_box(z);
164    }
165    let mut matrix = Vec::with_capacity_in(SIZE_SQR, allocator);
166    mimic_matrix_construction(&x, &y, &mut matrix);
167}
168
169#[cfg(test)]
170fn benchmark_dynsize_multithreaded<A: Allocator + Sync>(allocator: &A) {
171
172    const THREADS: usize = 16;
173    const LOOPS: usize = 16;
174
175    std::thread::scope(|scope| {
176        for _ in 0..THREADS {
177            scope.spawn(|| {
178                for _ in 0..LOOPS {
179                    mock_use_case_algebra_computations(&allocator)
180                }
181            });
182        }
183    });
184}
185
186#[bench]
187fn bench_dynsize_multithreaded_mempool(bencher: &mut test::Bencher) {
188    let allocator: DynLayoutMempool = DynLayoutMempool::new_global(Alignment::of::<u64>());
189    bencher.iter(|| {
190        benchmark_dynsize_multithreaded(&allocator);
191    });
192}
193
194#[bench]
195fn bench_dynsize_multithreaded_global(bencher: &mut test::Bencher) {
196    let allocator = Global;
197    bencher.iter(|| {
198        benchmark_dynsize_multithreaded(&allocator);
199    });
200}
201
202#[cfg(test)]
203fn benchmark_dynsize_singlethreaded<A: Allocator>(allocator: &A) {
204
205    const LOOPS: usize = 256;
206
207    for _ in 0..LOOPS {
208        mock_use_case_algebra_computations(&allocator)
209    }
210}
211
212#[bench]
213fn bench_dynsize_singlethreaded_mempool(bencher: &mut test::Bencher) {
214    let allocator: DynLayoutMempool = DynLayoutMempool::new_global(Alignment::of::<u64>());
215    bencher.iter(|| {
216        benchmark_dynsize_singlethreaded(&allocator);
217    });
218}
219
220#[bench]
221fn bench_dynsize_singlethreaded_global(bencher: &mut test::Bencher) {
222    let allocator = Global;
223    bencher.iter(|| {
224        benchmark_dynsize_singlethreaded(&allocator);
225    });
226}