1use super::*;
4use std::sync::Mutex;
5use bytemuck::Zeroable;
6use std::hash::{ Hash, Hasher };
7use std::ops::{ Deref, DerefMut, Index, IndexMut };
8use std::fmt;
9
10#[cfg(feature = "cuda")]
11use cust::memory::{ DeviceBuffer, DeviceSlice, CopyDestination };
12#[cfg(feature = "cuda")]
13use cust::context::Context;
14
15pub struct UVec<T: UniversalCopy> {
23 data_cpu: Option<Box<[T]>>,
24 #[cfg(feature = "cuda")]
25 data_cuda: [Option<DeviceBuffer<T>>; MAX_NUM_CUDA_DEVICES],
26 valid_flag: [bool; MAX_DEVICES],
29 read_locks: [Mutex<()>; MAX_DEVICES],
45 size: usize,
47 capacity: usize,
49}
50
51impl<T: UniversalCopy + fmt::Debug> fmt::Debug for UVec<T> {
52 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53 let slice = self.as_ref();
54 write!(f, "uvec[{}] = [", slice.len())?;
55 for (i, e) in slice.iter().enumerate() {
56 if i != 0 {
57 write!(f, ", ")?;
58 }
59 if f.alternate() {
60 write!(f, "{:#?}", e)?;
61 }
62 else {
63 write!(f, "{:?}", e)?;
64 }
65 }
66 write!(f, "]")
67 }
68}
69
70impl<T: UniversalCopy> Default for UVec<T> {
71 #[inline]
72 fn default() -> Self {
73 Self {
74 data_cpu: None,
75 #[cfg(feature = "cuda")]
76 data_cuda: Default::default(),
77 valid_flag: [false; MAX_DEVICES],
78 read_locks: Default::default(),
79 size: 0,
80 capacity: 0
81 }
82 }
83}
84
85impl<T: UniversalCopy> From<Box<[T]>> for UVec<T> {
86 #[inline]
87 fn from(b: Box<[T]>) -> UVec<T> {
88 let len = b.len();
89 let mut valid_flag = [false; MAX_DEVICES];
90 valid_flag[Device::CPU.to_id()] = true;
91 Self {
92 data_cpu: Some(b),
93 #[cfg(feature = "cuda")]
94 data_cuda: Default::default(),
95 valid_flag,
96 read_locks: Default::default(),
97 size: len,
98 capacity: len
99 }
100 }
101}
102
103impl<T: UniversalCopy> From<Vec<T>> for UVec<T> {
104 #[inline]
105 fn from(v: Vec<T>) -> UVec<T> {
106 v.into_boxed_slice().into()
107 }
108}
109
110impl<T: UniversalCopy> FromIterator<T> for UVec<T> {
111 #[inline]
112 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
113 Vec::from_iter(iter).into()
114 }
115}
116
117impl<T: UniversalCopy + Zeroable> UVec<T> {
118 #[inline]
125 fn alloc_zeroed(&mut self, device: Device) {
126 use Device::*;
127 match device {
128 CPU => {
129 use std::alloc;
130 self.data_cpu = Some(unsafe {
131 let ptr = alloc::alloc_zeroed(
132 alloc::Layout::array::<T>(
133 self.capacity
134 ).unwrap()) as *mut T;
135 Box::from_raw(
136 core::ptr::slice_from_raw_parts_mut(
137 ptr, self.size))
138 });
140 },
141 #[cfg(feature = "cuda")]
142 CUDA(c) => {
143 let _context = Context::new(
144 CUDA_DEVICES[c as usize].0).unwrap();
145 self.data_cuda[c as usize] =
146 Some(DeviceBuffer::zeroed(self.capacity)
147 .unwrap());
148 }
149 }
150 }
151}
152
153#[inline]
154unsafe fn alloc_cpu_uninit<T: UniversalCopy>(
155 sz: usize
156) -> Box<[T]> {
157 use std::alloc;
158 let ptr = alloc::alloc(alloc::Layout::array::<T>(sz).unwrap())
159 as *mut T;
160 Box::from_raw(core::ptr::slice_from_raw_parts_mut(ptr, sz))
161}
162
163#[cfg(feature = "cuda")]
164#[inline]
165unsafe fn alloc_cuda_uninit<T: UniversalCopy>(
166 sz: usize, dev: u8
167) -> DeviceBuffer<T> {
168 let _context = Context::new(CUDA_DEVICES[dev as usize].0)
169 .unwrap();
170 DeviceBuffer::uninitialized(sz).unwrap()
171}
172
173impl<T: UniversalCopy> UVec<T> {
174 #[inline]
181 unsafe fn alloc_uninitialized(&mut self, device: Device) {
182 use Device::*;
183 match device {
184 CPU => {
185 self.data_cpu = Some(alloc_cpu_uninit(
186 self.capacity));
187 },
188 #[cfg(feature = "cuda")]
189 CUDA(c) => {
190 self.data_cuda[c as usize] = Some(
191 alloc_cuda_uninit(self.capacity, c));
192 }
193 }
194 }
195
196 #[inline]
198 fn device_valid(&self) -> Option<Device> {
199 self.valid_flag.iter().enumerate().find(|(_i, v)| **v)
200 .map(|(i, _v)| Device::from_id(i))
201 }
202
203 #[inline]
210 fn schedule_device_read(&mut self, device: Device) {
211 if self.valid_flag[device.to_id()] {
212 return
213 }
214 use Device::*;
215 let is_none = match device {
216 CPU => self.data_cpu.is_none(),
217 #[cfg(feature = "cuda")]
218 CUDA(c) => self.data_cuda[c as usize].is_none()
219 };
220 if is_none {
221 unsafe { self.alloc_uninitialized(device); }
222 }
223 let device_valid = self.device_valid().expect("no valid dev");
224 match (device_valid, device) {
225 (CPU, CPU) => {},
226 #[cfg(feature = "cuda")]
227 (CPU, CUDA(c)) => {
228 let c = c as usize;
229 self.data_cuda[c].as_mut().unwrap().index(..self.size)
230 .copy_from(
231 &self.data_cpu.as_ref().unwrap()[..self.size]
232 ).unwrap();
233 },
234 #[cfg(feature = "cuda")]
235 (CUDA(c), CPU) => {
236 let c = c as usize;
237 self.data_cuda[c].as_ref().unwrap().index(..self.size)
238 .copy_to(
239 &mut self.data_cpu.as_mut().unwrap()[..self.size]
240 ).unwrap();
241 },
242 #[cfg(feature = "cuda")]
243 (CUDA(c1), CUDA(c2)) => {
244 let (c1, c2) = (c1 as usize, c2 as usize);
245 assert_ne!(c1, c2);
246 let c2_mut = unsafe {
249 &mut *(self.data_cuda[c2].as_ref().unwrap()
250 as *const DeviceBuffer<T>
251 as *mut DeviceBuffer<T>)
252 };
253 self.data_cuda[c1].as_ref().unwrap().index(..self.size)
254 .copy_to(
255 &mut c2_mut.index(..self.size)
256 ).unwrap();
257 }
258 }
259 self.valid_flag[device.to_id()] = true;
260 }
261
262 #[inline]
269 fn schedule_device_read_ro(&self, device: Device) {
270 if self.valid_flag[device.to_id()] {
271 return
272 }
273 let locked = self.read_locks[device.to_id()]
274 .lock().unwrap();
275 unsafe {
279 (&mut *(self as *const UVec<T> as *mut UVec<T>))
280 .schedule_device_read(device);
281 }
282 drop(locked);
283 }
284
285 #[inline]
287 fn schedule_device_write(&mut self, device: Device) {
288 if !self.valid_flag[device.to_id()] {
289 self.schedule_device_read(device);
290 }
291 self.valid_flag[..].fill(false);
293 self.valid_flag[device.to_id()] = true;
294 }
295
296 #[inline]
297 fn drop_all_buf(&mut self) {
298 self.data_cpu = None;
299 #[cfg(feature = "cuda")]
300 for d in &mut self.data_cuda {
301 *d = None;
302 }
303 }
304
305 #[inline]
306 unsafe fn realloc_uninit_nopreserve(&mut self, device: Device) {
307 self.drop_all_buf();
308 if self.capacity > 10000000 {
309 clilog::debug!("large realloc: capacity {}",
310 self.capacity);
311 }
312 self.alloc_uninitialized(device);
313 self.valid_flag.fill(false);
314 self.valid_flag[device.to_id()] = true;
315 }
316
317 #[inline]
318 unsafe fn realloc_uninit_preserve(&mut self, device: Device) {
319 use Device::*;
320 match device {
321 CPU => {
322 let old = self.data_cpu.take().unwrap();
323 self.drop_all_buf();
324 self.alloc_uninitialized(device);
325 self.data_cpu.as_mut().unwrap()[..self.size]
326 .copy_from_slice(&old[..self.size]);
327 },
328 #[cfg(feature = "cuda")]
329 CUDA(c) => {
330 let c = c as usize;
331 let old = self.data_cuda[c].take().unwrap();
332 self.drop_all_buf();
333 self.alloc_uninitialized(device);
334 self.data_cuda[c].as_mut().unwrap().index(..self.size)
335 .copy_from(&old.index(..self.size))
336 .unwrap();
337 }
338 }
339 self.valid_flag.fill(false);
340 self.valid_flag[device.to_id()] = true;
341 }
342
343 #[inline]
344 pub fn get(&self, idx: usize) -> T {
345 use Device::*;
346 match self.device_valid().unwrap() {
347 CPU => self.data_cpu.as_ref().unwrap()[idx],
348 #[cfg(feature = "cuda")]
349 CUDA(c) => {
350 let mut ret: [T; 1] = unsafe {
351 std::mem::MaybeUninit::uninit().assume_init()
352 };
353 self.data_cuda[c as usize].as_ref().unwrap()
354 .index(idx)
355 .copy_to(&mut ret)
356 .unwrap();
357 ret[0]
358 }
359 }
360 }
361}
362
363impl<T: UniversalCopy + Zeroable> UVec<T> {
364 #[inline]
366 pub fn new_zeroed(size: usize, device: Device) -> UVec<T> {
367 let mut v: UVec<T> = Default::default();
368 v.size = size;
369 v.capacity = size;
370 v.alloc_zeroed(device);
371 v.valid_flag[device.to_id()] = true;
372 v
373 }
374}
375
376impl<T: UniversalCopy> UVec<T> {
377 #[inline]
379 pub fn len(&self) -> usize {
380 self.size
381 }
382
383 #[inline]
385 pub fn capacity(&self) -> usize {
386 self.capacity
387 }
388
389 #[inline]
391 pub fn new() -> UVec<T> {
392 unsafe { Self::new_uninitialized(0, Device::CPU) }
393 }
394
395 #[inline]
398 pub unsafe fn new_uninitialized(
399 size: usize, device: Device
400 ) -> UVec<T> {
401 let mut v: UVec<T> = Default::default();
402 v.size = size;
403 v.capacity = size;
404 v.alloc_uninitialized(device);
405 v.valid_flag[device.to_id()] = true;
406 v
407 }
408
409 #[inline]
422 pub unsafe fn resize_uninit_nopreserve(&mut self, size: usize, device: Device) {
423 if self.capacity < size {
424 self.capacity = (size as f64 * 1.5).round() as usize;
425 self.realloc_uninit_nopreserve(device);
426 }
427 self.size = size;
428 }
429
430 #[inline]
434 pub unsafe fn resize_uninit_preserve(&mut self, size: usize, device: Device) {
435 if self.size != 0 {
436 self.schedule_device_read(device);
437 }
438 if self.capacity < size {
439 self.capacity = (size as f64 * 1.5).round() as usize;
440 self.realloc_uninit_preserve(device);
441 }
442 self.size = size;
443 self.valid_flag.fill(false);
444 self.valid_flag[device.to_id()] = true;
445 }
446}
447
448impl<T: UniversalCopy> AsRef<[T]> for UVec<T> {
449 #[inline]
457 fn as_ref(&self) -> &[T] {
458 self.schedule_device_read_ro(Device::CPU);
459 &self.data_cpu.as_ref().unwrap()[..self.size]
460 }
461}
462
463impl<T: UniversalCopy> AsMut<[T]> for UVec<T> {
464 #[inline]
472 fn as_mut(&mut self) -> &mut [T] {
473 self.schedule_device_write(Device::CPU);
474 &mut self.data_cpu.as_mut().unwrap()[..self.size]
475 }
476}
477
478impl<T: UniversalCopy> Deref for UVec<T> {
479 type Target = [T];
480 #[inline]
487 fn deref(&self) -> &[T] {
488 self.as_ref()
489 }
490}
491
492impl<T: UniversalCopy> DerefMut for UVec<T> {
493 #[inline]
500 fn deref_mut(&mut self) -> &mut [T] {
501 self.as_mut()
502 }
503}
504
505impl<T: UniversalCopy, I> Index<I> for UVec<T> where [T]: Index<I> {
506 type Output = <[T] as Index<I>>::Output;
507 #[inline]
508 fn index(&self, i: I) -> &Self::Output {
509 self.as_ref().index(i)
510 }
511}
512
513impl<T: UniversalCopy, I> IndexMut<I> for UVec<T> where [T]: IndexMut<I> {
514 #[inline]
515 fn index_mut(&mut self, i: I) -> &mut Self::Output {
516 self.as_mut().index_mut(i)
517 }
518}
519
520#[cfg(feature = "cuda")]
521impl<T: UniversalCopy> AsCUDASlice<T> for UVec<T> {
522 #[inline]
523 fn as_cuda_slice(&self, cuda_device: Device) -> DeviceSlice<T> {
524 use Device::*;
525 let c = match cuda_device {
526 CUDA(c) => c as usize,
527 _ => panic!("AsCUDASlice does not accept \
528 non-CUDA device {:?}", cuda_device)
529 };
530 self.schedule_device_read_ro(cuda_device);
531 let ptr = self.data_cuda[c].as_ref().unwrap().as_device_ptr();
533 unsafe { DeviceSlice::from_raw_parts(ptr, self.size) }
534 }
535}
536
537#[cfg(feature = "cuda")]
538impl<T: UniversalCopy> AsCUDASliceMut<T> for UVec<T> {
539 #[inline]
540 fn as_cuda_slice_mut(&mut self, cuda_device: Device) -> DeviceSlice<T> {
541 use Device::*;
542 let c = match cuda_device {
543 CUDA(c) => c as usize,
544 _ => panic!("AsCUDASlice does not accept \
545 non-CUDA device {:?}", cuda_device)
546 };
547 self.schedule_device_write(cuda_device);
548 let ptr = self.data_cuda[c].as_ref().unwrap().as_device_ptr();
550 unsafe { DeviceSlice::from_raw_parts(ptr, self.size) }
551 }
552}
553
554impl<T: UniversalCopy> AsUPtr<T> for UVec<T> {
555 #[inline]
556 fn as_uptr(&self, device: Device) -> *const T {
557 self.schedule_device_read_ro(device);
558 use Device::*;
559 match device {
560 CPU => self.data_cpu.as_ref().unwrap().as_ptr(),
561 #[cfg(feature = "cuda")]
562 CUDA(c) => self.data_cuda[c as usize].as_ref().unwrap()
563 .as_device_ptr().as_ptr()
564 }
565 }
566}
567
568impl<T: UniversalCopy> AsUPtrMut<T> for UVec<T> {
569 #[inline]
570 fn as_mut_uptr(&mut self, device: Device) -> *mut T {
571 self.schedule_device_write(device);
572 use Device::*;
573 match device {
574 CPU => self.data_cpu.as_mut().unwrap().as_mut_ptr(),
575 #[cfg(feature = "cuda")]
576 CUDA(c) => self.data_cuda[c as usize].as_mut().unwrap()
577 .as_device_ptr().as_mut_ptr()
578 }
579 }
580}
581
582impl<T, U: UniversalCopy> AsUPtr<U> for &T where T: AsUPtr<U> {
601 #[inline]
602 fn as_uptr(&self, device: Device) -> *const U {
603 (*self).as_uptr(device)
604 }
605}
606
607impl<T, U: UniversalCopy> AsUPtrMut<U> for &mut T where T: AsUPtrMut<U> {
608 #[inline]
609 fn as_mut_uptr(&mut self, device: Device) -> *mut U {
610 (*self).as_mut_uptr(device)
611 }
612}
613
614impl<T: UniversalCopy + Hash> Hash for UVec<T> {
615 #[inline]
616 fn hash<H: Hasher>(&self, state: &mut H) {
617 self.as_ref().hash(state)
618 }
619}
620
621impl<T: UniversalCopy, U: UniversalCopy> PartialEq<UVec<U>> for UVec<T>
622 where T: PartialEq<U>
623{
624 #[inline]
625 fn eq(&self, other: &UVec<U>) -> bool {
626 self.as_ref() == other.as_ref()
627 }
628}
629
630impl<T: UniversalCopy + Eq> Eq for UVec<T> { }
631
632impl<T: UniversalCopy> Clone for UVec<T> {
633 fn clone(&self) -> Self {
634 let valid_flag = self.valid_flag.clone();
635 let data_cpu = match valid_flag[Device::CPU.to_id()] {
636 true => self.data_cpu.clone(),
637 false => None
638 };
639 #[cfg(feature = "cuda")]
640 let data_cuda = unsafe {
641 let mut data_cuda: [Option<DeviceBuffer<T>>; MAX_NUM_CUDA_DEVICES] = Default::default();
642 for i in 0..MAX_NUM_CUDA_DEVICES {
643 if valid_flag[Device::CUDA(i as u8).to_id()] {
644 let dbuf = alloc_cuda_uninit(self.capacity, i as u8);
645 self.data_cuda[i].as_ref().unwrap().index(..self.size)
646 .copy_to(&mut dbuf.index(..self.size))
647 .unwrap();
648 data_cuda[i] = Some(dbuf);
649 }
650 }
651 data_cuda
652 };
653 UVec {
654 data_cpu,
655 #[cfg(feature = "cuda")] data_cuda,
656 valid_flag,
657 read_locks: Default::default(),
658 size: self.size,
659 capacity: self.capacity
660 }
661 }
662}