hpt_allocator/
lib.rs

1//! This crate provides a memory allocator for tensor
2
3#![deny(missing_docs)]
4
5mod allocators;
6mod backend;
7mod ptr;
8mod storage;
9pub(crate) mod utils {
10    pub(crate) mod allocate;
11    pub(crate) mod cache_resize;
12    pub(crate) mod deallocate;
13}
14/// traits for the allocator
15pub mod traits;
16
17use std::marker::PhantomData;
18
19use crate::allocators::cpu::CACHE;
20#[cfg(feature = "cuda")]
21use crate::allocators::cuda::CUDA_CACHE;
22pub use crate::storage::clone_storage;
23pub use allocators::cpu::resize_cpu_lru_cache;
24#[cfg(feature = "cuda")]
25pub use allocators::cuda::resize_cuda_lru_cache;
26pub use backend::*;
27pub use storage::cpu::CPU_STORAGE;
28#[cfg(feature = "cuda")]
29pub use storage::cuda::CUDA_STORAGE;
30use traits::Allocator;
31/// program will free all the memory before exit
32#[allow(non_snake_case)]
33#[ctor::dtor]
34fn free_pools() {
35    CACHE.lock().unwrap().clear();
36    #[cfg(feature = "cuda")]
37    CUDA_CACHE.lock().unwrap().clear();
38}
39
40/// Built-in allocator for Hpt
41pub struct HptAllocator<B: BackendTy> {
42    phantom: PhantomData<B>,
43}
44
45impl<B: BackendTy> Clone for HptAllocator<B> {
46    fn clone(&self) -> Self {
47        HptAllocator {
48            phantom: PhantomData,
49        }
50    }
51}
52
53impl Allocator for HptAllocator<Cpu> {
54    type Output = *mut u8;
55    type CpuAllocator = HptAllocator<Cpu>;
56    #[cfg(feature = "cuda")]
57    type CudaAllocator = HptAllocator<Cuda>;
58    fn allocate(
59        &mut self,
60        layout: std::alloc::Layout,
61        device_id: usize,
62    ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
63        CACHE.lock().unwrap().allocate(layout, device_id)
64    }
65    fn allocate_zeroed(
66        &mut self,
67        layout: std::alloc::Layout,
68        device_id: usize,
69    ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
70        CACHE.lock().unwrap().allocate_zeroed(layout, device_id)
71    }
72    fn deallocate(
73        &mut self,
74        ptr: *mut u8,
75        layout: &std::alloc::Layout,
76        should_drop: bool,
77        device_id: usize,
78    ) {
79        CACHE
80            .lock()
81            .unwrap()
82            .deallocate(ptr, layout, should_drop, device_id);
83    }
84
85    fn insert_ptr(&mut self, ptr: *mut u8, device_id: usize) {
86        CACHE.lock().unwrap().insert_ptr(ptr, device_id);
87    }
88
89    fn clear(&mut self) {
90        CACHE.lock().unwrap().clear();
91    }
92
93    fn new() -> Self {
94        HptAllocator {
95            phantom: PhantomData,
96        }
97    }
98
99    fn forget(&mut self, ptr: *mut u8, device_id: usize) {
100        CACHE.lock().unwrap().forget(ptr, device_id);
101    }
102}
103
104#[cfg(feature = "cuda")]
105impl Allocator for HptAllocator<Cuda> {
106    type Output = (*mut u8, std::sync::Arc<cudarc::driver::CudaDevice>);
107    type CpuAllocator = HptAllocator<Cpu>;
108    type CudaAllocator = HptAllocator<Cuda>;
109
110    fn allocate(
111        &mut self,
112        layout: std::alloc::Layout,
113        device_id: usize,
114    ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
115        CUDA_CACHE.lock().unwrap().allocate(layout, device_id)
116    }
117
118    fn allocate_zeroed(
119        &mut self,
120        layout: std::alloc::Layout,
121        device_id: usize,
122    ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
123        CUDA_CACHE
124            .lock()
125            .unwrap()
126            .allocate_zeroed(layout, device_id)
127    }
128
129    fn deallocate(
130        &mut self,
131        ptr: *mut u8,
132        layout: &std::alloc::Layout,
133        should_drop: bool,
134        device_id: usize,
135    ) {
136        CUDA_CACHE
137            .lock()
138            .unwrap()
139            .deallocate(ptr, layout, should_drop, device_id);
140    }
141
142    fn insert_ptr(&mut self, ptr: *mut u8, device_id: usize) {
143        CUDA_CACHE.lock().unwrap().insert_ptr(ptr, device_id);
144    }
145
146    fn clear(&mut self) {
147        CUDA_CACHE.lock().unwrap().clear();
148    }
149
150    fn new() -> Self {
151        HptAllocator {
152            phantom: PhantomData,
153        }
154    }
155
156    fn forget(&mut self, ptr: *mut u8, device_id: usize) {
157        CUDA_CACHE.lock().unwrap().forget(ptr, device_id);
158    }
159}
160
161unsafe impl<B: BackendTy> Send for HptAllocator<B> {}
162unsafe impl<B: BackendTy> Sync for HptAllocator<B> {}