1#![deny(missing_docs)]
4
5mod allocators;
6mod backend;
7mod ptr;
8mod storage;
9pub(crate) mod utils {
10 pub(crate) mod allocate;
11 pub(crate) mod cache_resize;
12 pub(crate) mod deallocate;
13}
14pub mod traits;
16
17use std::marker::PhantomData;
18
19use crate::allocators::cpu::CACHE;
20#[cfg(feature = "cuda")]
21use crate::allocators::cuda::CUDA_CACHE;
22pub use crate::storage::clone_storage;
23pub use allocators::cpu::resize_cpu_lru_cache;
24#[cfg(feature = "cuda")]
25pub use allocators::cuda::resize_cuda_lru_cache;
26pub use backend::*;
27pub use storage::cpu::CPU_STORAGE;
28#[cfg(feature = "cuda")]
29pub use storage::cuda::CUDA_STORAGE;
30use traits::Allocator;
31#[allow(non_snake_case)]
33#[ctor::dtor]
34fn free_pools() {
35 CACHE.lock().unwrap().clear();
36 #[cfg(feature = "cuda")]
37 CUDA_CACHE.lock().unwrap().clear();
38}
39
40pub struct HptAllocator<B: BackendTy> {
42 phantom: PhantomData<B>,
43}
44
45impl<B: BackendTy> Clone for HptAllocator<B> {
46 fn clone(&self) -> Self {
47 HptAllocator {
48 phantom: PhantomData,
49 }
50 }
51}
52
53impl Allocator for HptAllocator<Cpu> {
54 type Output = *mut u8;
55 type CpuAllocator = HptAllocator<Cpu>;
56 #[cfg(feature = "cuda")]
57 type CudaAllocator = HptAllocator<Cuda>;
58 fn allocate(
59 &mut self,
60 layout: std::alloc::Layout,
61 device_id: usize,
62 ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
63 CACHE.lock().unwrap().allocate(layout, device_id)
64 }
65 fn allocate_zeroed(
66 &mut self,
67 layout: std::alloc::Layout,
68 device_id: usize,
69 ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
70 CACHE.lock().unwrap().allocate_zeroed(layout, device_id)
71 }
72 fn deallocate(
73 &mut self,
74 ptr: *mut u8,
75 layout: &std::alloc::Layout,
76 should_drop: bool,
77 device_id: usize,
78 ) {
79 CACHE
80 .lock()
81 .unwrap()
82 .deallocate(ptr, layout, should_drop, device_id);
83 }
84
85 fn insert_ptr(&mut self, ptr: *mut u8, device_id: usize) {
86 CACHE.lock().unwrap().insert_ptr(ptr, device_id);
87 }
88
89 fn clear(&mut self) {
90 CACHE.lock().unwrap().clear();
91 }
92
93 fn new() -> Self {
94 HptAllocator {
95 phantom: PhantomData,
96 }
97 }
98
99 fn forget(&mut self, ptr: *mut u8, device_id: usize) {
100 CACHE.lock().unwrap().forget(ptr, device_id);
101 }
102}
103
104#[cfg(feature = "cuda")]
105impl Allocator for HptAllocator<Cuda> {
106 type Output = (*mut u8, std::sync::Arc<cudarc::driver::CudaDevice>);
107 type CpuAllocator = HptAllocator<Cpu>;
108 type CudaAllocator = HptAllocator<Cuda>;
109
110 fn allocate(
111 &mut self,
112 layout: std::alloc::Layout,
113 device_id: usize,
114 ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
115 CUDA_CACHE.lock().unwrap().allocate(layout, device_id)
116 }
117
118 fn allocate_zeroed(
119 &mut self,
120 layout: std::alloc::Layout,
121 device_id: usize,
122 ) -> Result<Self::Output, hpt_common::error::base::TensorError> {
123 CUDA_CACHE
124 .lock()
125 .unwrap()
126 .allocate_zeroed(layout, device_id)
127 }
128
129 fn deallocate(
130 &mut self,
131 ptr: *mut u8,
132 layout: &std::alloc::Layout,
133 should_drop: bool,
134 device_id: usize,
135 ) {
136 CUDA_CACHE
137 .lock()
138 .unwrap()
139 .deallocate(ptr, layout, should_drop, device_id);
140 }
141
142 fn insert_ptr(&mut self, ptr: *mut u8, device_id: usize) {
143 CUDA_CACHE.lock().unwrap().insert_ptr(ptr, device_id);
144 }
145
146 fn clear(&mut self) {
147 CUDA_CACHE.lock().unwrap().clear();
148 }
149
150 fn new() -> Self {
151 HptAllocator {
152 phantom: PhantomData,
153 }
154 }
155
156 fn forget(&mut self, ptr: *mut u8, device_id: usize) {
157 CUDA_CACHE.lock().unwrap().forget(ptr, device_id);
158 }
159}
160
161unsafe impl<B: BackendTy> Send for HptAllocator<B> {}
162unsafe impl<B: BackendTy> Sync for HptAllocator<B> {}