1use std::fmt;
4use std::fmt::{Debug, Formatter};
5use std::marker::PhantomData;
6use std::sync::{Arc, Mutex};
7use cuda_runtime_sys::{cudaHostAllocDefault, dim3};
8use libc::{c_void};
9use rcudnn::Error;
10use rcudnn::utils::DataType;
11use rcudnn_sys::{cudaMemcpyKind, cudaStream_t, cudnnDataType_t};
12use crate::arr::{Arr, IntoConverter, MakeView, MakeViewMut, SerializedVec, SliceSize};
13use mem::{MemoryPool};
14use crate::device::{DeviceGpu};
15use crate::error::{CudaError, CudaRuntimeError, SizeMismatchError, TypeConvertError};
16use crate::layer::{BatchDataType, BatchSize};
17use crate::mem::AsRawSlice;
18use crate::ope::UnitValue;
19
20pub mod ffi;
21pub mod mem;
22pub mod kernel;
23pub mod cudnn;
24
25pub trait DataTypeInfo {
27 fn cudnn_data_type() -> DataType;
29 fn cudnn_raw_data_type() -> cudnnDataType_t;
31 fn size() -> usize;
33}
34impl DataTypeInfo for f32 {
35 fn cudnn_data_type() -> DataType {
36 DataType::Float
37 }
38 fn cudnn_raw_data_type() -> cudnnDataType_t {
39 cudnnDataType_t::CUDNN_DATA_FLOAT
40 }
41 fn size() -> usize {
42 4_usize
43 }
44}
45impl DataTypeInfo for f64 {
46 fn cudnn_data_type() -> DataType {
47 DataType::Double
48 }
49 fn cudnn_raw_data_type() -> cudnnDataType_t {
50 cudnnDataType_t::CUDNN_DATA_DOUBLE
51 }
52 fn size() -> usize {
53 8_usize
54 }
55}
56pub(crate) mod private {
57 pub trait AsConstKernelPtrBase {
58 fn as_const_kernel_ptr(&self) -> *mut libc::c_void;
59 }
60
61 pub trait AsMutKernelPtrBase {
62 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void;
63 }
64
65 pub trait AsKernelPtrBase {
66 fn as_kernel_ptr(&mut self) -> *mut libc::c_void;
67 }
68}
69pub trait AsConstKernelPtr: private::AsConstKernelPtrBase {
71}
72pub trait AsMutKernelPtr: private::AsMutKernelPtrBase {
74}
75pub trait AsKernelPtr: private::AsKernelPtrBase {
77}
78impl<T> AsConstKernelPtr for T where T: private::AsConstKernelPtrBase {}
79impl<T> AsMutKernelPtr for T where T: private::AsMutKernelPtrBase {}
80impl<T> private::AsKernelPtrBase for T where T: AsMutKernelPtr {
81 fn as_kernel_ptr(&mut self) -> *mut c_void {
82 self.as_mut_kernel_ptr()
83 }
84}
85impl<T> AsKernelPtr for T where T: private::AsKernelPtrBase {
86
87}
88pub trait AsVoidPtr {
90 fn as_void_ptr(&self) -> *const libc::c_void;
91}
92pub trait AsMutVoidPtr {
94 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void;
95}
96pub trait AsPtr<T> {
98 fn as_ptr(&self) -> *const T;
99}
100pub trait AsMutPtr<T> {
102 fn as_mut_ptr(&mut self) -> *mut T;
103}
104pub trait TryClone: Sized {
105 type Error;
106
107 fn try_clone(&self) -> Result<Self,Self::Error>;
108}
109impl AsVoidPtr for i32 {
110 fn as_void_ptr(&self) -> *const libc::c_void {
111 self as *const i32 as *const libc::c_void
112 }
113}
114impl AsMutVoidPtr for i32 {
115 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
116 self as *mut i32 as *mut libc::c_void
117 }
118}
119impl AsVoidPtr for u32 {
120 fn as_void_ptr(&self) -> *const libc::c_void {
121 self as *const u32 as *const libc::c_void
122 }
123}
124impl AsMutVoidPtr for u32 {
125 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
126 self as *mut u32 as *mut libc::c_void
127 }
128}
129impl AsVoidPtr for i64 {
130 fn as_void_ptr(&self) -> *const libc::c_void {
131 self as *const i64 as *const libc::c_void
132 }
133}
134impl AsMutVoidPtr for i64 {
135 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
136 self as *mut i64 as *mut libc::c_void
137 }
138}
139impl AsVoidPtr for u64 {
140 fn as_void_ptr(&self) -> *const libc::c_void {
141 self as *const u64 as *const libc::c_void
142 }
143}
144impl AsMutVoidPtr for u64 {
145 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
146 self as *mut u64 as *mut libc::c_void
147 }
148}
149impl AsVoidPtr for usize {
150 fn as_void_ptr(&self) -> *const libc::c_void {
151 self as *const usize as *const libc::c_void
152 }
153}
154impl AsMutVoidPtr for usize {
155 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
156 self as *mut usize as *mut libc::c_void
157 }
158}
159impl AsVoidPtr for f32 {
160 fn as_void_ptr(&self) -> *const libc::c_void {
161 self as *const f32 as *const libc::c_void
162 }
163}
164impl AsMutVoidPtr for f32 {
165 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
166 self as *mut f32 as *mut libc::c_void
167 }
168}
169impl AsVoidPtr for f64 {
170 fn as_void_ptr(&self) -> *const libc::c_void {
171 self as *const f64 as *const libc::c_void
172 }
173}
174impl AsMutVoidPtr for f64 {
175 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
176 self as *mut f64 as *mut libc::c_void
177 }
178}
179impl private::AsMutKernelPtrBase for i32 {
180 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
181 self as *mut i32 as *mut libc::c_void
182 }
183}
184impl private::AsMutKernelPtrBase for u32 {
185 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
186 self as *mut u32 as *mut libc::c_void
187 }
188}
189impl private::AsMutKernelPtrBase for i64 {
190 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
191 self as *mut i64 as *mut libc::c_void
192 }
193}
194impl private::AsMutKernelPtrBase for u64 {
195 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
196 self as *mut u64 as *mut libc::c_void
197 }
198}
199impl private::AsMutKernelPtrBase for usize {
200 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
201 self as *mut usize as *mut libc::c_void
202 }
203}
204impl private::AsMutKernelPtrBase for f32 {
205 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
206 self as *mut f32 as *mut libc::c_void
207 }
208}
209impl private::AsMutKernelPtrBase for f64 {
210 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
211 self as *mut f64 as *mut libc::c_void
212 }
213}
214pub trait PointerElement {
216 type Element: Default + Debug;
217}
218pub trait ReadMemory<T: Default + Debug>: PointerElement {
220 fn read_to_vec(&self) -> Result<Vec<T>,rcudnn::Error>;
227 fn read_to_vec_with_size(&self,size:usize) -> Result<Vec<T>,rcudnn::Error>;
237}
238pub trait WriteMemory<T: Default + Debug>: AsMutVoidPtr {
240 fn memcpy(&mut self, p:*const T,len:usize) -> Result<usize,rcudnn::Error>;
251 fn memcpy_repeat(&mut self, p:*const T,len:usize,count:usize) -> Result<usize,rcudnn::Error>;
263}
264pub trait ReadMemoryAsync<T: Default + Debug> {
266 fn read_to_vec_async(&self,stream:cudaStream_t) -> Result<Vec<T>,rcudnn::Error>;
273 fn read_to_vec_with_size_async(&self,stream: cudaStream_t,size:usize) -> Result<Vec<T>,rcudnn::Error>;
284}
285pub trait WriteMemoryAsync<T: Default + Debug>: AsMutVoidPtr {
287 fn memcpy_async(&mut self, p:*const T,len:usize,stream:cudaStream_t) -> Result<usize,rcudnn::Error>;
299 fn memcpy_async_repeat(&mut self, p:*const T,len:usize,count:usize,stream:cudaStream_t) -> Result<usize,rcudnn::Error>;
311}
312pub trait MemoryMoveTo<T: Default + Debug,D: AsMutPtr<T>>: AsPtr<T> {
314 fn memcpy_to(&self, dst:&mut D,len:usize) -> Result<usize,rcudnn::Error>;
325}
326pub trait MemoryMoveToAsync<T: Default + Debug,D: AsMutPtr<T>>: AsPtr<T> {
328 fn memcpy_to_async(&self, dst:&mut D,len:usize,stream:cudaStream_t) -> Result<usize,rcudnn::Error>;
340}
341#[derive(Debug)]
343pub struct CudaPtr<T> {
344 ptr:*mut T,
345 size:usize,
346}
347impl<T> CudaPtr<T> {
348 pub fn new(size: usize) -> Result<CudaPtr<T>, CudaError> {
357 let ptr: *mut T = ffi::malloc(size)?;
358
359 Ok(CudaPtr {
360 ptr: ptr,
361 size: size,
362 })
363 }
364}
365impl<T> PointerElement for CudaPtr<T> where T: Default + Debug {
366 type Element = T;
367}
368impl<T: Default + Debug> ReadMemory<T> for CudaPtr<T> {
369 fn read_to_vec(&self) -> Result<Vec<T>,rcudnn::Error> {
370 let mut r = Vec::with_capacity(self.size);
371 r.resize_with(self.size,Default::default);
372
373 ffi::memcpy(r.as_mut_ptr(),
374 self.ptr,
375 self.size,
376 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
377 Ok(r)
378 }
379
380 fn read_to_vec_with_size(&self,size:usize) -> Result<Vec<T>,rcudnn::Error> {
381 let mut r = Vec::with_capacity(size);
382 r.resize_with(size,Default::default);
383
384 ffi::memcpy(r.as_mut_ptr(),
385 self.ptr,
386 size,
387 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
388 Ok(r)
389 }
390}
391impl<T: Default + Debug> WriteMemory<T> for CudaPtr<T> {
392 fn memcpy(&mut self, p:*const T,len:usize) -> Result<usize,rcudnn::Error> {
393 ffi::memcpy(self.ptr,
394 p,
395 len,
396 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
397 Ok(len)
398 }
399
400 fn memcpy_repeat(&mut self, p: *const T, len: usize, count: usize) -> Result<usize, Error> {
401 for i in 0..count {
402 unsafe {
403 ffi::memcpy(self.ptr.add(i * len),
404 p,
405 len,
406 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
407 }
408 }
409 Ok(len * count)
410 }
411}
412impl<T: Default + Debug> MemoryMoveTo<T,CudaHostPtr<T>> for CudaPtr<T> {
413 fn memcpy_to(&self, dst: &mut CudaHostPtr<T>, len: usize) -> Result<usize, Error> {
414 ffi::memcpy(dst.as_mut_ptr(),
415 self.ptr,
416 len,
417 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
418 Ok(len)
419 }
420}
421impl<T: Default + Debug> MemoryMoveTo<T,CudaPtr<T>> for CudaPtr<T> {
422 fn memcpy_to(&self, dst: &mut CudaPtr<T>, len: usize) -> Result<usize, Error> {
423 ffi::memcpy(dst.as_mut_ptr(),
424 self.ptr,
425 len,
426 cudaMemcpyKind::cudaMemcpyDeviceToDevice)?;
427 Ok(len)
428 }
429}
430impl<T: Default + Debug> MemoryMoveTo<T,CudaMemoryPoolPtr<T>> for CudaPtr<T> {
431 fn memcpy_to(&self, dst: &mut CudaMemoryPoolPtr<T>, len: usize) -> Result<usize, Error> {
432 ffi::memcpy(dst.as_mut_ptr(),
433 self.ptr,
434 len,
435 cudaMemcpyKind::cudaMemcpyDeviceToDevice)?;
436 Ok(len)
437 }
438}
439impl<T> Drop for CudaPtr<T> {
440 fn drop(&mut self) {
441 ffi::free(self.ptr).unwrap();
442 }
443}
444impl<T> private::AsConstKernelPtrBase for CudaPtr<T> {
445 fn as_const_kernel_ptr(&self) -> *mut libc::c_void {
446 &self.ptr as *const *mut T as *mut libc::c_void
447 }
448}
449impl<T> private::AsMutKernelPtrBase for CudaPtr<T> {
450 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
451 &mut self.ptr as *mut *mut T as *mut libc::c_void
452 }
453}
454impl<T> AsVoidPtr for CudaPtr<T> {
455 fn as_void_ptr(&self) -> *const libc::c_void {
456 self.ptr as *const libc::c_void
457 }
458}
459impl<T> AsMutVoidPtr for CudaPtr<T> {
460 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
461 self.ptr as *mut libc::c_void
462 }
463}
464impl<T> AsPtr<T> for CudaPtr<T> {
465 fn as_ptr(&self) -> *const T {
466 self.ptr as *const T
467 }
468}
469impl<T> AsMutPtr<T> for CudaPtr<T> {
470 fn as_mut_ptr(&mut self) -> *mut T {
471 self.ptr
472 }
473}
474#[derive(Debug)]
476pub struct CudaHostPtr<T> {
477 ptr:*mut T,
478 size:usize,
479}
480impl<T> CudaHostPtr<T> {
481 pub fn new(size: usize, flags:libc::c_uint) -> Result<CudaHostPtr<T>, CudaError> {
491 let ptr: *mut T = ffi::malloc_host(size,flags)?;
492
493 Ok(CudaHostPtr {
494 ptr: ptr,
495 size: size,
496 })
497 }
498}
499impl<T: Default + Debug> PointerElement for CudaHostPtr<T> {
500 type Element = T;
501}
502impl<T: Default + Debug> ReadMemory<T> for CudaHostPtr<T> {
503 fn read_to_vec(&self) -> Result<Vec<T>,rcudnn::Error> {
504 let mut r = Vec::with_capacity(self.size);
505 r.resize_with(self.size,Default::default);
506
507 ffi::memcpy(r.as_mut_ptr(),
508 self.ptr,
509 self.size,
510 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
511 Ok(r)
512 }
513
514 fn read_to_vec_with_size(&self,size:usize) -> Result<Vec<T>,rcudnn::Error> {
515 let mut r = Vec::with_capacity(size);
516 r.resize_with(size,Default::default);
517
518 ffi::memcpy(r.as_mut_ptr(),
519 self.ptr,
520 size,
521 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
522 Ok(r)
523 }
524}
525impl<T: Default + Debug> WriteMemory<T> for CudaHostPtr<T> {
526 fn memcpy(&mut self, p:*const T,len:usize) -> Result<usize,rcudnn::Error> {
527 ffi::memcpy(self.ptr,
528 p,
529 len,
530 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
531 Ok(len)
532 }
533
534 fn memcpy_repeat(&mut self, p: *const T, len: usize, count: usize) -> Result<usize, Error> {
535 for i in 0..count {
536 unsafe {
537 ffi::memcpy(self.ptr.add(i * len),
538 p,
539 len,
540 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
541 }
542 }
543 Ok(len * count)
544 }
545}
546impl<T: Default + Debug> ReadMemoryAsync<T> for CudaHostPtr<T> {
547 fn read_to_vec_async(&self, stream: cudaStream_t) -> Result<Vec<T>,rcudnn::Error> {
548 let mut r = Vec::with_capacity(self.size);
549 r.resize_with(self.size,Default::default);
550
551 ffi::memcpy_async(r.as_mut_ptr(),
552 self.ptr,
553 self.size,
554 cudaMemcpyKind::cudaMemcpyDeviceToHost,
555 stream)?;
556 Ok(r)
557 }
558
559 fn read_to_vec_with_size_async(&self, stream: cudaStream_t, size:usize) -> Result<Vec<T>,rcudnn::Error> {
560 let mut r = Vec::with_capacity(size);
561 r.resize_with(size,Default::default);
562
563 ffi::memcpy_async(r.as_mut_ptr(),
564 self.ptr,
565 size,
566 cudaMemcpyKind::cudaMemcpyDeviceToHost,
567 stream)?;
568 Ok(r)
569 }
570}
571impl<T: Default + Debug> WriteMemoryAsync<T> for CudaHostPtr<T> {
572 fn memcpy_async(&mut self, p:*const T,len:usize,stream:cudaStream_t) -> Result<usize,rcudnn::Error> {
573 ffi::memcpy_async(self.ptr,
574 p,
575 len,
576 cudaMemcpyKind::cudaMemcpyHostToDevice,stream)?;
577 Ok(len)
578 }
579
580 fn memcpy_async_repeat(&mut self, p: *const T, len: usize, count: usize, stream: cudaStream_t) -> Result<usize, Error> {
581 for i in 0..count {
582 unsafe {
583 ffi::memcpy_async(self.ptr.add(i * len),
584 p,
585 len,
586 cudaMemcpyKind::cudaMemcpyHostToDevice,stream)?;
587 }
588 }
589 Ok(len * count)
590
591 }
592}
593impl<T: Default + Debug> MemoryMoveTo<T,CudaHostPtr<T>> for CudaHostPtr<T> {
594 fn memcpy_to(&self, dst: &mut CudaHostPtr<T>, len: usize) -> Result<usize, Error> {
595 ffi::memcpy(dst.as_mut_ptr(),
596 self.ptr,
597 len,
598 cudaMemcpyKind::cudaMemcpyHostToHost)?;
599 Ok(len)
600 }
601}
602impl<T: Default + Debug> MemoryMoveTo<T,CudaPtr<T>> for CudaHostPtr<T> {
603 fn memcpy_to(&self, dst: &mut CudaPtr<T>, len: usize) -> Result<usize, Error> {
604 ffi::memcpy(dst.as_mut_ptr(),
605 self.ptr,
606 len,
607 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
608 Ok(len)
609 }
610}
611impl<T: Default + Debug> MemoryMoveTo<T,CudaMemoryPoolPtr<T>> for CudaHostPtr<T> {
612 fn memcpy_to(&self, dst: &mut CudaMemoryPoolPtr<T>, len: usize) -> Result<usize, Error> {
613 ffi::memcpy(dst.as_mut_ptr(),
614 self.ptr,
615 len,
616 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
617 Ok(len)
618 }
619}
620impl<T: Default + Debug> MemoryMoveToAsync<T,CudaHostPtr<T>> for CudaHostPtr<T> {
621 fn memcpy_to_async(&self, dst: &mut CudaHostPtr<T>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
622 ffi::memcpy_async(dst.as_mut_ptr(),
623 self.ptr,
624 len,
625 cudaMemcpyKind::cudaMemcpyHostToHost,stream)?;
626 Ok(len)
627 }
628}
629impl<T: Default + Debug> MemoryMoveToAsync<T,CudaPtr<T>> for CudaHostPtr<T> {
630 fn memcpy_to_async(&self, dst: &mut CudaPtr<T>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
631 ffi::memcpy_async(dst.as_mut_ptr(),
632 self.ptr,
633 len,
634 cudaMemcpyKind::cudaMemcpyHostToDevice,stream)?;
635 Ok(len)
636 }
637}
638impl<T: Default + Debug> MemoryMoveToAsync<T,CudaMemoryPoolPtr<T>> for CudaHostPtr<T> {
639 fn memcpy_to_async(&self, dst: &mut CudaMemoryPoolPtr<T>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
640 ffi::memcpy_async(dst.as_mut_ptr(),
641 self.ptr,
642 len,
643 cudaMemcpyKind::cudaMemcpyHostToDevice,stream)?;
644 Ok(len)
645 }
646}
647impl<T> Drop for CudaHostPtr<T> {
648 fn drop(&mut self) {
649 ffi::free_host(self.ptr).unwrap()
650 }
651}
652impl<T> AsPtr<T> for CudaHostPtr<T> {
653 fn as_ptr(&self) -> *const T {
654 self.ptr as *const T
655 }
656}
657impl<T> AsMutPtr<T> for CudaHostPtr<T> {
658 fn as_mut_ptr(&mut self) -> *mut T {
659 self.ptr
660 }
661}
662impl<T> private::AsConstKernelPtrBase for CudaHostPtr<T> {
663 fn as_const_kernel_ptr(&self) -> *mut libc::c_void {
664 &self.ptr as *const *mut T as *mut libc::c_void
665 }
666}
667impl<T> private::AsMutKernelPtrBase for CudaHostPtr<T> {
668 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
669 &mut self.ptr as *mut *mut T as *mut libc::c_void
670 }
671}
672impl<T> AsVoidPtr for CudaHostPtr<T> {
673 fn as_void_ptr(&self) -> *const libc::c_void {
674 self.ptr as *const libc::c_void
675 }
676}
677impl<T> AsMutVoidPtr for CudaHostPtr<T> {
678 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
679 self.ptr as *mut libc::c_void
680 }
681}
682pub struct CudaMemoryPoolPtr<T> {
684 ptr:*mut T,
685 size:usize,
686 memory_pool:Arc<Mutex<MemoryPool>>
687}
688impl<T> CudaMemoryPoolPtr<T> {
689 pub fn new(size: usize,memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaMemoryPoolPtr<T>, CudaError> {
699 let ptr:*mut T = match memory_pool.lock() {
700 Ok(mut memory_pool) => {
701 memory_pool.alloc_device(size)?
702 },
703 Err(_) => {
704 return Err(CudaError::InvalidState(String::from(
705 "Failed to secure exclusive lock on memory pool."
706 )));
707 }
708 };
709
710 Ok(CudaMemoryPoolPtr {
711 ptr: ptr,
712 size: size,
713 memory_pool:Arc::clone(memory_pool),
714 })
715 }
716}
717impl<T> CudaMemoryPoolPtr<T> where T: Default + Debug {
718 pub fn with_initializer<I: FnMut() -> T>(size: usize, memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaMemoryPoolPtr<T>, CudaError> {
729 let mut ptr = Self::new(size,memory_pool)?;
730
731 let mut src = Vec::with_capacity(size);
732
733 src.resize_with(size,initializer);
734
735 ptr.memcpy(src.into_boxed_slice().as_ptr(),size)?;
736
737 Ok(ptr)
738 }
739}
740impl<T: Default + Debug> PointerElement for CudaMemoryPoolPtr<T> {
741 type Element = T;
742}
743impl<T: Default + Debug> ReadMemory<T> for CudaMemoryPoolPtr<T> {
744 fn read_to_vec(&self) -> Result<Vec<T>,rcudnn::Error> {
745 let mut r = Vec::with_capacity(self.size);
746 r.resize_with(self.size,Default::default);
747
748 ffi::memcpy(r.as_mut_ptr(),
749 self.ptr,
750 self.size,
751 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
752 Ok(r)
753 }
754
755 fn read_to_vec_with_size(&self,size:usize) -> Result<Vec<T>,rcudnn::Error> {
756 let mut r = Vec::with_capacity(size);
757 r.resize_with(size,Default::default);
758
759 ffi::memcpy(r.as_mut_ptr(),
760 self.ptr,
761 size,
762 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
763 Ok(r)
764 }
765}
766impl<T: Default + Debug> WriteMemory<T> for CudaMemoryPoolPtr<T> {
767 fn memcpy(&mut self, p:*const T,len:usize) -> Result<usize,rcudnn::Error> {
768 ffi::memcpy(self.ptr,
769 p,
770 len,
771 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
772 Ok(len)
773 }
774
775 fn memcpy_repeat(&mut self, p: *const T, len: usize, count: usize) -> Result<usize, Error> {
776 for i in 0..count {
777 unsafe {
778 ffi::memcpy(self.ptr.add(i * len),
779 p,
780 len,
781 cudaMemcpyKind::cudaMemcpyHostToDevice)?;
782 }
783 }
784 Ok(len * count)
785 }
786}
787impl<T: Default + Debug> MemoryMoveTo<T,CudaHostPtr<T>> for CudaMemoryPoolPtr<T> {
788 fn memcpy_to(&self, dst: &mut CudaHostPtr<T>, len: usize) -> Result<usize, Error> {
789 ffi::memcpy(dst.as_mut_ptr(),
790 self.ptr,
791 len,
792 cudaMemcpyKind::cudaMemcpyDeviceToHost)?;
793 Ok(len)
794 }
795}
796impl<T: Default + Debug> MemoryMoveTo<T,CudaPtr<T>> for CudaMemoryPoolPtr<T> {
797 fn memcpy_to(&self, dst: &mut CudaPtr<T>, len: usize) -> Result<usize, Error> {
798 ffi::memcpy(dst.as_mut_ptr(),
799 self.ptr,
800 len,
801 cudaMemcpyKind::cudaMemcpyDeviceToDevice)?;
802 Ok(len)
803 }
804}
805impl<T: Default + Debug> MemoryMoveTo<T,CudaMemoryPoolPtr<T>> for CudaMemoryPoolPtr<T> {
806 fn memcpy_to(&self, dst: &mut CudaMemoryPoolPtr<T>, len: usize) -> Result<usize, Error> {
807 ffi::memcpy(dst.as_mut_ptr(),
808 self.ptr,
809 len,
810 cudaMemcpyKind::cudaMemcpyDeviceToDevice)?;
811 Ok(len)
812 }
813}
814impl<T> Drop for CudaMemoryPoolPtr<T> {
815 fn drop(&mut self) {
816 match self.memory_pool.lock() {
817 Ok(mut memory_pool) => {
818 memory_pool.deallocate(self.ptr).unwrap();
819 },
820 Err(_) => {
821 panic!("Failed to secure exclusive lock on memory pool.");
822 }
823 }
824 }
825}
826impl<T> private::AsConstKernelPtrBase for CudaMemoryPoolPtr<T> {
827 fn as_const_kernel_ptr(&self) -> *mut libc::c_void {
828 &self.ptr as *const *mut T as *mut libc::c_void
829 }
830}
831impl<T> private::AsMutKernelPtrBase for CudaMemoryPoolPtr<T> {
832 fn as_mut_kernel_ptr(&mut self) -> *mut libc::c_void {
833 &mut self.ptr as *mut *mut T as *mut libc::c_void
834 }
835}
836impl<T> AsVoidPtr for CudaMemoryPoolPtr<T> {
837 fn as_void_ptr(&self) -> *const libc::c_void {
838 self.ptr as *const libc::c_void
839 }
840}
841impl<T> AsMutVoidPtr for CudaMemoryPoolPtr<T> {
842 fn as_mut_void_ptr(&mut self) -> *mut libc::c_void {
843 self.ptr as *mut libc::c_void
844 }
845}
846impl<T> AsPtr<T> for CudaMemoryPoolPtr<T> {
847 fn as_ptr(&self) -> *const T {
848 self.ptr as *const T
849 }
850}
851impl<T> AsMutPtr<T> for CudaMemoryPoolPtr<T> {
852 fn as_mut_ptr(&mut self) -> *mut T {
853 self.ptr
854 }
855}
856impl<T> Debug for CudaMemoryPoolPtr<T> {
857 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
858 write!(f,"CudaMemoryPoolPtr {{ ptr: {:?}, size {:?} }}",self.ptr,self.size)
859 }
860}
861#[derive(Debug)]
863pub struct CudaConstPtr<'a,T> where T: AsConstKernelPtr {
864 ptr:&'a T
865}
866impl<'a,T> CudaConstPtr<'a,T> where T: AsConstKernelPtr {
867 pub fn new(ptr: &'a T) -> CudaConstPtr<'a,T> {
868 CudaConstPtr {
869 ptr
870 }
871 }
872}
873impl<'a,T> private::AsKernelPtrBase for CudaConstPtr<'a,T> where T: AsConstKernelPtr {
874 fn as_kernel_ptr(&mut self) -> *mut c_void {
875 self.ptr.as_const_kernel_ptr()
876 }
877}
878#[derive(Debug)]
880pub struct CudaTensor1dPtr<T,const N:usize> where T: Default + Debug {
881 ptr:CudaMemoryPoolPtr<T>
882}
883impl<T,const N:usize> CudaTensor1dPtr<T,N> where T: Default + Debug {
884 pub fn new(memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaTensor1dPtr<T,N>, CudaError> {
893 Ok(CudaTensor1dPtr {
894 ptr:CudaMemoryPoolPtr::new(N,memory_pool)?
895 })
896 }
897
898 pub fn with_initializer<I: FnMut() -> T>(memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaTensor1dPtr<T,N>, CudaError> {
908 let mut ptr = CudaMemoryPoolPtr::new(N,memory_pool)?;
909
910 let mut src = Vec::with_capacity(N);
911
912 src.resize_with(N,initializer);
913
914 ptr.memcpy(src.into_boxed_slice().as_ptr(),N)?;
915
916 Ok(CudaTensor1dPtr {
917 ptr: ptr
918 })
919 }
920}
921impl<T,const N:usize> BatchDataType for CudaTensor1dPtr<T,N> where T: Default + Debug + UnitValue<T> {
922 type Type = CudaVec<T,CudaTensor1dPtr<T,N>>;
923}
924impl<T,const N:usize> AsCudaPtrRef for CudaTensor1dPtr<T,N> where T: Default + Debug {
925 type Pointer = CudaMemoryPoolPtr<T>;
926
927 #[inline]
928 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
929 &self.ptr
930 }
931}
932impl<T,const N:usize> AsCudaMutPtr for CudaTensor1dPtr<T,N> where T: Default + Debug {
933 type Pointer = CudaMemoryPoolPtr<T>;
934
935 #[inline]
936 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer> {
937 CudaMutPtr::new(&mut self.ptr)
938 }
939}
940impl<T,const N:usize> TryClone for CudaTensor1dPtr<T,N> where T: Default + Debug {
941 type Error = CudaError;
942 fn try_clone(&self) -> Result<Self,CudaError> {
943 let mut dst = CudaMemoryPoolPtr::new(N,&self.ptr.memory_pool)?;
944
945 self.memcpy_to(&mut dst,N)?;
946
947 Ok(CudaTensor1dPtr {
948 ptr: dst
949 })
950 }
951}
952impl<'a,T,const N:usize> From<&'a CudaTensor1dPtr<T,N>> for &'a CudaMemoryPoolPtr<T> where T: Default + Debug {
953 fn from(value: &'a CudaTensor1dPtr<T,N>) -> Self {
954 &value.ptr
955 }
956}
957impl<'a,T,const N:usize> From<&'a mut CudaTensor1dPtr<T,N>> for &'a mut CudaMemoryPoolPtr<T> where T: Default + Debug {
958 fn from(value: &'a mut CudaTensor1dPtr<T,N>) -> Self {
959 &mut value.ptr
960 }
961}
962impl<'a,T,const N:usize> ToHost<T> for CudaTensor1dPtr<T,N>
963 where T: Default + Debug + Clone + Send + Sync + 'static {
964 type Output = Arr<T,N>;
965 fn to_host(self) -> Result<Self::Output,TypeConvertError> {
966 Ok(self.ptr.read_to_vec()?.try_into()?)
967 }
968}
969impl<T,const N:usize> MemorySize for CudaTensor1dPtr<T,N>
970 where T: Default + Debug {
971 #[inline]
972 fn size() -> usize {
973 N
974 }
975}
976impl<T,const N:usize> PointerElement for CudaTensor1dPtr<T,N> where T: Default + Debug {
977 type Element = T;
978}
979#[derive(Debug)]
981pub struct CudaTensor1dPtrView<'a,T,const N:usize>
982 where T: Default + Debug {
983 ptr:&'a CudaMemoryPoolPtr<T>
984}
985impl<'a,T,const N:usize> From<&'a CudaTensor1dPtr<T,N>> for CudaTensor1dPtrView<'a,T,N>
986 where T: Default + Debug {
987 fn from(value: &'a CudaTensor1dPtr<T, N>) -> Self {
988 CudaTensor1dPtrView {
989 ptr:&value.ptr
990 }
991 }
992}
993impl<'a,T,const N:usize> From<&'a CudaTensor1dPtrView<'a,T,N>> for CudaTensor1dPtrView<'a,T,N>
994 where T: Default + Debug {
995 fn from(value: &'a CudaTensor1dPtrView<'a,T,N>) -> Self {
996 CudaTensor1dPtrView {
997 ptr:&value.ptr
998 }
999 }
1000}
1001impl<'a,T,const N:usize> TryFrom<&'a CudaTensor1dPtrView<'a,T,N>> for CudaTensor1dPtr<T,N> where T: Default + Debug {
1002 type Error = CudaError;
1003 fn try_from(value: &'a CudaTensor1dPtrView<'a,T,N>) -> Result<CudaTensor1dPtr<T,N>,CudaError> {
1004 let mut dst = CudaMemoryPoolPtr::new(N,&value.ptr.memory_pool)?;
1005
1006 value.memcpy_to(&mut dst,N)?;
1007
1008 Ok(CudaTensor1dPtr {
1009 ptr: dst
1010 })
1011 }
1012}
1013impl<'a,T,const N:usize> PointerElement for CudaTensor1dPtrView<'a,T,N> where T: Default + Debug {
1014 type Element = T;
1015}
1016impl<'a,T,const N:usize> AsCudaPtrRef for CudaTensor1dPtrView<'a,T,N> where T: Default + Debug {
1017 type Pointer = CudaMemoryPoolPtr<T>;
1018
1019 #[inline]
1020 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1021 &self.ptr
1022 }
1023}
1024#[derive(Debug)]
1026pub struct CudaTensor2dPtr<T,const N1:usize,const N2:usize> where T: Default + Debug {
1027 ptr:CudaMemoryPoolPtr<T>
1028}
1029impl<T,const N1:usize,const N2:usize> CudaTensor2dPtr<T,N1,N2> where T: Default + Debug {
1030 pub fn new(memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaTensor2dPtr<T,N1,N2>, CudaError> {
1039 Ok(CudaTensor2dPtr {
1040 ptr:CudaMemoryPoolPtr::new(N1*N2,memory_pool)?
1041 })
1042 }
1043
1044 pub fn with_initializer<I: FnMut() -> T>(memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaTensor2dPtr<T,N1,N2>, CudaError> {
1054 let mut ptr = CudaMemoryPoolPtr::new(N1*N2,memory_pool)?;
1055
1056 let mut src = Vec::with_capacity(N1*N2);
1057
1058 src.resize_with(N1*N2,initializer);
1059
1060 ptr.memcpy(src.into_boxed_slice().as_ptr(),N1*N2)?;
1061
1062 Ok(CudaTensor2dPtr {
1063 ptr: ptr
1064 })
1065 }
1066}
1067impl<T,const N1:usize,const N2:usize> BatchDataType for CudaTensor2dPtr<T,N1,N2>
1068 where T: Default + Debug + UnitValue<T> {
1069 type Type = CudaVec<T,CudaTensor2dPtr<T,N1,N2>>;
1070}
1071impl<T,const N1:usize,const N2:usize> PointerElement for CudaTensor2dPtr<T,N1,N2> where T: Default + Debug {
1072 type Element = T;
1073}
1074impl<T,const N1:usize,const N2:usize> AsCudaPtrRef for CudaTensor2dPtr<T,N1,N2> where T: Default + Debug {
1075 type Pointer = CudaMemoryPoolPtr<T>;
1076 #[inline]
1077 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1078 &self.ptr
1079 }
1080}
1081impl<T,const N1:usize,const N2:usize> AsCudaMutPtr for CudaTensor2dPtr<T,N1,N2> where T: Default + Debug {
1082 type Pointer = CudaMemoryPoolPtr<T>;
1083
1084 #[inline]
1085 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer> {
1086 CudaMutPtr::new(&mut self.ptr)
1087 }
1088}
1089impl<T,const N1:usize,const N2:usize> TryClone for CudaTensor2dPtr<T,N1,N2> where T: Default + Debug {
1090 type Error = CudaError;
1091 fn try_clone(&self) -> Result<Self,CudaError> {
1092 let mut dst = CudaMemoryPoolPtr::new(N1*N2,&self.ptr.memory_pool)?;
1093
1094 self.memcpy_to(&mut dst,N1*N2)?;
1095
1096 Ok(CudaTensor2dPtr {
1097 ptr: dst
1098 })
1099 }
1100}
1101impl<'a,T,const N1:usize,const N2:usize> From<&'a CudaTensor2dPtr<T,N1,N2>> for &'a CudaMemoryPoolPtr<T> where T: Default + Debug {
1102 fn from(value: &'a CudaTensor2dPtr<T,N1,N2>) -> Self {
1103 &value.ptr
1104 }
1105}
1106impl<'a,T,const N1:usize,const N2:usize> From<&'a mut CudaTensor2dPtr<T,N1,N2>> for &'a mut CudaMemoryPoolPtr<T> where T: Default + Debug {
1107 fn from(value: &'a mut CudaTensor2dPtr<T,N1,N2>) -> Self {
1108 &mut value.ptr
1109 }
1110}
1111impl<T,const N1:usize,const N2:usize> MemorySize for CudaTensor2dPtr<T,N1,N2>
1112 where T: Default + Debug {
1113 #[inline]
1114 fn size() -> usize {
1115 N1 * N2
1116 }
1117}
1118#[derive(Debug)]
1120pub struct CudaTensor2dPtrView<'a,T,const N1:usize,const N2:usize>
1121 where T: Default + Debug {
1122 ptr:&'a CudaMemoryPoolPtr<T>
1123}
1124impl<'a,T,const N1:usize,const N2:usize> PointerElement for CudaTensor2dPtrView<'a,T,N1,N2> where T: Default + Debug {
1125 type Element = T;
1126}
1127impl<'a,T,const N1:usize,const N2:usize> AsCudaPtrRef for CudaTensor2dPtrView<'a,T,N1,N2> where T: Default + Debug {
1128 type Pointer = CudaMemoryPoolPtr<T>;
1129
1130 #[inline]
1131 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1132 &self.ptr
1133 }
1134}
1135impl<'a,T,const N1:usize,const N2:usize> From<&'a CudaTensor2dPtr<T,N1,N2>> for CudaTensor2dPtrView<'a,T,N1,N2>
1136 where T: Default + Debug {
1137 fn from(value: &'a CudaTensor2dPtr<T,N1,N2>) -> Self {
1138 CudaTensor2dPtrView {
1139 ptr:&value.ptr
1140 }
1141 }
1142}
1143impl<'a,T,const N1:usize,const N2:usize> From<&'a CudaTensor2dPtrView<'a,T,N1,N2>> for CudaTensor2dPtrView<'a,T,N1,N2>
1144 where T: Default + Debug {
1145 fn from(value: &'a CudaTensor2dPtrView<'a,T,N1,N2>) -> Self {
1146 CudaTensor2dPtrView {
1147 ptr:&value.ptr
1148 }
1149 }
1150}
1151impl<'a,T,const N1:usize,const N2:usize> TryFrom<&'a CudaTensor2dPtrView<'a,T,N1,N2>> for CudaTensor2dPtr<T,N1,N2>
1152 where T: Default + Debug {
1153 type Error = CudaError;
1154 fn try_from(value: &'a CudaTensor2dPtrView<'a,T,N1,N2>) -> Result<CudaTensor2dPtr<T,N1,N2>,CudaError> {
1155 let mut dst = CudaMemoryPoolPtr::new(N1*N2,&value.ptr.memory_pool)?;
1156
1157 value.memcpy_to(&mut dst,N1*N2)?;
1158
1159 Ok(CudaTensor2dPtr {
1160 ptr: dst
1161 })
1162 }
1163}
1164#[derive(Debug)]
1166pub struct CudaTensor3dPtr<T,const N1:usize,const N2:usize,const N3:usize> where T: Default + Debug {
1167 ptr:CudaMemoryPoolPtr<T>
1168}
1169impl<T,const N1:usize,const N2:usize,const N3:usize> CudaTensor3dPtr<T,N1,N2,N3> where T: Default + Debug {
1170 pub fn new(memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaTensor3dPtr<T,N1,N2,N3>, CudaError> {
1179 Ok(CudaTensor3dPtr {
1180 ptr:CudaMemoryPoolPtr::new(N1*N2*N3,memory_pool)?
1181 })
1182 }
1183
1184 pub fn with_initializer<I: FnMut() -> T>(memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaTensor3dPtr<T,N1,N2,N3>, CudaError> {
1194 let mut ptr = CudaMemoryPoolPtr::new(N1*N2*N3,memory_pool)?;
1195
1196 let mut src = Vec::with_capacity(N1*N2*N3);
1197
1198 src.resize_with(N1*N2*N3,initializer);
1199
1200 ptr.memcpy(src.into_boxed_slice().as_ptr(),N1*N2*N3)?;
1201
1202 Ok(CudaTensor3dPtr {
1203 ptr: ptr
1204 })
1205 }
1206}
1207impl<T,const N1:usize,const N2:usize,const N3:usize> BatchDataType for CudaTensor3dPtr<T,N1,N2,N3>
1208 where T: Default + Debug + UnitValue<T> {
1209 type Type = CudaVec<T,CudaTensor3dPtr<T,N1,N2,N3>>;
1210}
1211impl<T,const N1:usize,const N2:usize,const N3:usize> PointerElement for CudaTensor3dPtr<T,N1,N2,N3> where T: Default + Debug {
1212 type Element = T;
1213}
1214impl<T,const N1:usize,const N2:usize,const N3:usize> AsCudaPtrRef for CudaTensor3dPtr<T,N1,N2,N3> where T: Default + Debug {
1215 type Pointer = CudaMemoryPoolPtr<T>;
1216
1217 #[inline]
1218 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1219 &self.ptr
1220 }
1221}
1222impl<T,const N1:usize,const N2:usize,const N3:usize> AsCudaMutPtr for CudaTensor3dPtr<T,N1,N2,N3> where T: Default + Debug {
1223 type Pointer = CudaMemoryPoolPtr<T>;
1224
1225 #[inline]
1226 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer> {
1227 CudaMutPtr::new(&mut self.ptr)
1228 }
1229}
1230impl<T,const N1:usize,const N2:usize,const N3:usize> TryClone for CudaTensor3dPtr<T,N1,N2,N3> where T: Default + Debug {
1231 type Error = CudaError;
1232 fn try_clone(&self) -> Result<Self,CudaError> {
1233 let mut dst = CudaMemoryPoolPtr::new(N1*N2*N3,&self.ptr.memory_pool)?;
1234
1235 self.memcpy_to(&mut dst,N1*N2*N3)?;
1236
1237 Ok(CudaTensor3dPtr {
1238 ptr: dst
1239 })
1240 }
1241}
1242impl<'a,T,const N1:usize,const N2:usize,const N3:usize> From<&'a CudaTensor3dPtr<T,N1,N2,N3>> for &'a CudaMemoryPoolPtr<T> where T: Default + Debug {
1243 fn from(value: &'a CudaTensor3dPtr<T,N1,N2,N3>) -> Self {
1244 &value.ptr
1245 }
1246}
1247impl<'a,T,const N1:usize,const N2:usize,const N3:usize> From<&'a mut CudaTensor3dPtr<T,N1,N2,N3>> for &'a mut CudaMemoryPoolPtr<T> where T: Default + Debug {
1248 fn from(value: &'a mut CudaTensor3dPtr<T,N1,N2,N3>) -> Self {
1249 &mut value.ptr
1250 }
1251}
1252impl<T,const N1:usize,const N2:usize,const N3:usize> MemorySize for CudaTensor3dPtr<T,N1,N2,N3>
1253 where T: Default + Debug {
1254 #[inline]
1255 fn size() -> usize {
1256 N1 * N2 * N3
1257 }
1258}
1259#[derive(Debug)]
1261pub struct CudaTensor3dPtrView<'a,T,const N1:usize,const N2:usize,const N3:usize>
1262 where T: Default + Debug {
1263 ptr:&'a CudaMemoryPoolPtr<T>
1264}
1265impl<'a,T,const N1:usize,const N2:usize,const N3:usize> PointerElement for CudaTensor3dPtrView<'a,T,N1,N2,N3>
1266 where T: Default + Debug{
1267 type Element = T;
1268}
1269impl<'a,T,const N1:usize,const N2:usize,const N3:usize> AsCudaPtrRef for CudaTensor3dPtrView<'a,T,N1,N2,N3> where T: Default + Debug {
1270 type Pointer = CudaMemoryPoolPtr<T>;
1271
1272 #[inline]
1273 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1274 &self.ptr
1275 }
1276}
1277impl<'a,T,const N1:usize,const N2:usize,const N3:usize> From<&'a CudaTensor3dPtr<T,N1,N2,N3>> for CudaTensor3dPtrView<'a,T,N1,N2,N3>
1278 where T: Default + Debug {
1279 fn from(value: &'a CudaTensor3dPtr<T,N1,N2,N3>) -> Self {
1280 CudaTensor3dPtrView {
1281 ptr:&value.ptr
1282 }
1283 }
1284}
1285impl<'a,T,const N1:usize,const N2:usize,const N3:usize> From<&'a CudaTensor3dPtrView<'a,T,N1,N2,N3>> for CudaTensor3dPtrView<'a,T,N1,N2,N3>
1286 where T: Default + Debug {
1287 fn from(value: &'a CudaTensor3dPtrView<'a,T,N1,N2,N3>) -> Self {
1288 CudaTensor3dPtrView {
1289 ptr:&value.ptr
1290 }
1291 }
1292}
1293impl<'a,T,const N1:usize,const N2:usize,const N3:usize> TryFrom<&'a CudaTensor3dPtrView<'a,T,N1,N2,N3>> for CudaTensor3dPtr<T,N1,N2,N3>
1294 where T: Default + Debug {
1295 type Error = CudaError;
1296 fn try_from(value: &'a CudaTensor3dPtrView<'a,T,N1,N2,N3>) -> Result<CudaTensor3dPtr<T,N1,N2,N3>,CudaError> {
1297 let mut dst = CudaMemoryPoolPtr::new(N1*N2*N3,&value.ptr.memory_pool)?;
1298
1299 value.memcpy_to(&mut dst,N1*N2*N3)?;
1300
1301 Ok(CudaTensor3dPtr {
1302 ptr: dst
1303 })
1304 }
1305}
1306#[derive(Debug)]
1308pub struct CudaTensor4dPtr<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> where T: Default + Debug {
1309 ptr:CudaMemoryPoolPtr<T>
1310}
1311impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> CudaTensor4dPtr<T,N1,N2,N3,N4> where T: Default + Debug {
1312 pub fn new(memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaTensor4dPtr<T,N1,N2,N3,N4>, CudaError> {
1321 Ok(CudaTensor4dPtr {
1322 ptr:CudaMemoryPoolPtr::new(N1*N2*N3*N4,memory_pool)?
1323 })
1324 }
1325
1326 pub fn with_initializer<I: FnMut() -> T>(memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaTensor4dPtr<T,N1,N2,N3,N4>, CudaError> {
1336 let mut ptr = CudaMemoryPoolPtr::new(N1*N2*N3*N4,memory_pool)?;
1337
1338 let mut src = Vec::with_capacity(N1*N2*N3*N4);
1339
1340 src.resize_with(N1*N2*N3*N4,initializer);
1341
1342 ptr.memcpy(src.into_boxed_slice().as_ptr(),N1*N2*N3*N4)?;
1343
1344 Ok(CudaTensor4dPtr {
1345 ptr: ptr
1346 })
1347 }
1348}
1349impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> BatchDataType for CudaTensor4dPtr<T,N1,N2,N3,N4>
1350 where T: Default + Debug + UnitValue<T> {
1351 type Type = CudaVec<T,CudaTensor4dPtr<T,N1,N2,N3,N4>>;
1352}
1353impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> MemorySize for CudaTensor4dPtr<T,N1,N2,N3,N4>
1354 where T: Default + Debug {
1355 #[inline]
1356 fn size() -> usize {
1357 N1 * N2 * N3 * N4
1358 }
1359}
1360impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> PointerElement for CudaTensor4dPtr<T,N1,N2,N3,N4>
1361 where T: Default + Debug{
1362 type Element = T;
1363}
1364impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> AsCudaPtrRef for CudaTensor4dPtr<T,N1,N2,N3,N4>
1365 where T: Default + Debug {
1366 type Pointer = CudaMemoryPoolPtr<T>;
1367
1368 #[inline]
1369 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1370 &self.ptr
1371 }
1372}
1373impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> AsCudaMutPtr for CudaTensor4dPtr<T,N1,N2,N3,N4>
1374 where T: Default + Debug {
1375 type Pointer = CudaMemoryPoolPtr<T>;
1376
1377 #[inline]
1378 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer> {
1379 CudaMutPtr::new(&mut self.ptr)
1380 }
1381}
1382impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> TryClone for CudaTensor4dPtr<T,N1,N2,N3,N4>
1383 where T: Default + Debug {
1384 type Error = CudaError;
1385 fn try_clone(&self) -> Result<Self,CudaError> {
1386 let mut dst = CudaMemoryPoolPtr::new(N1*N2*N3*N4,&self.ptr.memory_pool)?;
1387
1388 self.memcpy_to(&mut dst,N1*N2*N3*N4)?;
1389
1390 Ok(CudaTensor4dPtr {
1391 ptr: dst
1392 })
1393 }
1394}
1395impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> From<&'a CudaTensor4dPtr<T,N1,N2,N3,N4>> for &'a CudaMemoryPoolPtr<T> where T: Default + Debug {
1396 fn from(value: &'a CudaTensor4dPtr<T,N1,N2,N3,N4>) -> Self {
1397 &value.ptr
1398 }
1399}
1400impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> From<&'a mut CudaTensor4dPtr<T,N1,N2,N3,N4>> for &'a mut CudaMemoryPoolPtr<T> where T: Default + Debug {
1401 fn from(value: &'a mut CudaTensor4dPtr<T,N1,N2,N3,N4>) -> Self {
1402 &mut value.ptr
1403 }
1404}
1405#[derive(Debug)]
1407pub struct CudaTensor4dPtrView<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize>
1408 where T: Default + Debug {
1409 ptr:&'a CudaMemoryPoolPtr<T>
1410}
1411impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> PointerElement for CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>
1412 where T: Default + Debug{
1413 type Element = T;
1414}
1415impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> AsCudaPtrRef for CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>
1416 where T: Default + Debug {
1417 type Pointer = CudaMemoryPoolPtr<T>;
1418
1419 #[inline]
1420 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1421 &self.ptr
1422 }
1423}
1424impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> From<&'a CudaTensor4dPtr<T,N1,N2,N3,N4>>
1425 for CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>
1426 where T: Default + Debug {
1427 fn from(value: &'a CudaTensor4dPtr<T,N1,N2,N3,N4>) -> Self {
1428 CudaTensor4dPtrView {
1429 ptr:&value.ptr
1430 }
1431 }
1432}
1433impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> From<&'a CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>>
1434 for CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>
1435 where T: Default + Debug {
1436 fn from(value: &'a CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>) -> Self {
1437 CudaTensor4dPtrView {
1438 ptr:&value.ptr
1439 }
1440 }
1441}
1442impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> TryFrom<&'a CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>>
1443 for CudaTensor4dPtr<T,N1,N2,N3,N4>
1444 where T: Default + Debug {
1445 type Error = CudaError;
1446
1447 fn try_from(value: &'a CudaTensor4dPtrView<'a, T, N1, N2, N3, N4>) -> Result<Self, CudaError> {
1448 let mut dst = CudaMemoryPoolPtr::new(N1*N2*N3*N4,&value.ptr.memory_pool)?;
1449
1450 value.memcpy_to(&mut dst,N1*N2*N3*N4)?;
1451
1452 Ok(CudaTensor4dPtr {
1453 ptr: dst
1454 })
1455 }
1456}
1457pub trait MemorySize {
1459 fn size() -> usize;
1460}
1461#[derive(Debug)]
1462pub struct CudaVec<U,T>
1463 where U: UnitValue<U>,
1464 T: AsConstKernelPtr + AsKernelPtr {
1465 len: usize,
1466 ptr:CudaMemoryPoolPtr<U>,
1467 t:PhantomData<T>
1468}
1469impl<U,T> CudaVec<U,T>
1470 where U: UnitValue<U>,
1471 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1472 pub fn new(size: usize, memory_pool:&Arc<Mutex<MemoryPool>>) -> Result<CudaVec<U,T>, CudaError> {
1482 let ptr = CudaMemoryPoolPtr::new(size * T::size(), memory_pool)?;
1483
1484 Ok(CudaVec {
1485 len:size,
1486 ptr,
1487 t:PhantomData::<T>
1488 })
1489 }
1490 pub fn with_initializer<I: FnMut() -> U>(size: usize, memory_pool:&Arc<Mutex<MemoryPool>>, initializer: I) -> Result<CudaVec<U,T>, CudaError> {
1501 let mut ptr = CudaMemoryPoolPtr::new(size * T::size(),memory_pool)?;
1502
1503 let mut src = Vec::with_capacity(size * T::size());
1504
1505 src.resize_with(size * T::size(),initializer);
1506
1507 ptr.memcpy(src.into_boxed_slice().as_ptr(),size * T::size())?;
1508
1509 Ok(CudaVec {
1510 len:size,
1511 ptr,
1512 t:PhantomData::<T>
1513 })
1514 }
1515}
1516impl<U,T> BatchSize for CudaVec<U,T>
1517 where U: UnitValue<U>,
1518 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1519 fn size(&self) -> usize {
1520 self.len
1521 }
1522}
1523impl<U,T> PointerElement for CudaVec<U,T>
1524 where U: UnitValue<U>,
1525 T: AsConstKernelPtr + AsKernelPtr {
1526 type Element = U;
1527}
1528impl<U,T> AsCudaPtrRef for CudaVec<U,T>
1529 where U: UnitValue<U>,
1530 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1531 type Pointer = CudaMemoryPoolPtr<U>;
1532
1533 #[inline]
1534 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1535 &self.ptr
1536 }
1537}
1538impl<U,T> AsCudaMutPtr for CudaVec<U,T>
1539 where U: UnitValue<U>,
1540 T: AsConstKernelPtr + AsKernelPtr {
1541 type Pointer = CudaMemoryPoolPtr<U>;
1542
1543 #[inline]
1544 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer> {
1545 CudaMutPtr::new(&mut self.ptr)
1546 }
1547}
1548impl<U,T> TryClone for CudaVec<U,T>
1549 where U: UnitValue<U>,
1550 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1551 type Error = CudaError;
1552 fn try_clone(&self) -> Result<Self,CudaError> {
1553 let mut dst = CudaMemoryPoolPtr::new(self.len * T::size(),&self.ptr.memory_pool)?;
1554
1555 self.memcpy_to(&mut dst,self.len * T::size())?;
1556
1557 Ok(CudaVec {
1558 len: self.len,
1559 ptr: dst,
1560 t:PhantomData::<T>
1561 })
1562 }
1563}
1564impl<'a,U,T> ToCuda<U> for &'a CudaVec<U,T>
1565 where U: UnitValue<U>,
1566 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1567 type Output = CudaVecView<'a,U,T>;
1568
1569 fn to_cuda(self, _: &DeviceGpu<U>) -> Result<Self::Output, TypeConvertError> {
1570 Ok(self.try_into()?)
1571 }
1572}
1573impl<U,T> ToCuda<U> for CudaVec<U,T>
1574 where U: UnitValue<U>,
1575 T: AsConstKernelPtr + AsKernelPtr + MemorySize {
1576 type Output = CudaVec<U,T>;
1577
1578 fn to_cuda(self, _: &DeviceGpu<U>) -> Result<Self::Output, TypeConvertError> {
1579 Ok(self)
1580 }
1581}
1582#[derive(Debug)]
1583pub struct CudaVecView<'a,U,T>
1584 where U: UnitValue<U>,
1585 T: AsConstKernelPtr {
1586 len: usize,
1587 ptr:&'a CudaMemoryPoolPtr<U>,
1588 t:PhantomData<T>
1589}
1590impl<'a,U,T> BatchSize for CudaVecView<'a,U,T>
1591 where U: UnitValue<U>,
1592 T: AsConstKernelPtr + MemorySize {
1593 fn size(&self) -> usize {
1594 self.len
1595 }
1596}
1597impl<'a,U,T> PointerElement for CudaVecView<'a,U,T>
1598 where U: UnitValue<U>,
1599 T: AsConstKernelPtr + AsKernelPtr {
1600 type Element = U;
1601}
1602impl<'a,U,T> AsCudaPtrRef for CudaVecView<'a,U,T>
1603 where U: UnitValue<U>,
1604 T: AsConstKernelPtr + MemorySize {
1605 type Pointer = CudaMemoryPoolPtr<U>;
1606
1607 #[inline]
1608 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1609 self.ptr
1610 }
1611}
1612impl<'a,U,T,R> TryFrom<&'a CudaVec<U,T>> for CudaVecView<'a,U,R>
1613 where U: UnitValue<U> + Default + Clone + Send,
1614 T: MemorySize + AsKernelPtr + AsConstKernelPtr,
1615 R: MemorySize + AsKernelPtr + AsConstKernelPtr + TryFrom<T> {
1616 type Error = TypeConvertError;
1617
1618 fn try_from(value: &'a CudaVec<U,T>) -> Result<Self, Self::Error> {
1619 if T::size() != R::size() {
1620 Err(TypeConvertError::SizeMismatchError(SizeMismatchError(T::size(),R::size())))
1621 } else {
1622 Ok(CudaVecView {
1623 len:value.size(),
1624 ptr: &value.ptr,
1625 t:PhantomData::<R>
1626 })
1627 }
1628 }
1629}
1630pub struct CudaVecViewConverter<'a,U,T>
1631 where U: UnitValue<U> + Default + Clone + Send,
1632 T: MemorySize + AsConstKernelPtr {
1633 len:usize,
1634 ptr:&'a CudaMemoryPoolPtr<U>,
1635 t:PhantomData<T>
1636}
1637impl<'a,U,T> IntoConverter for CudaVecView<'a,U,T>
1638 where U: UnitValue<U> + Default + Clone + Send,
1639 T: MemorySize + AsConstKernelPtr {
1640 type Converter = CudaVecViewConverter<'a,U,T>;
1641
1642 fn into_converter(self) -> Self::Converter {
1643 CudaVecViewConverter {
1644 len:self.len,
1645 ptr:self.ptr,
1646 t:PhantomData::<T>
1647 }
1648 }
1649}
1650impl<'a,U,T,R> TryFrom<CudaVecViewConverter<'a,U,T>> for CudaVecView<'a,U,R>
1651 where U: UnitValue<U> + Default + Clone + Send,
1652 T: MemorySize + AsConstKernelPtr,
1653 R: MemorySize + AsConstKernelPtr + From<T> {
1654 type Error = TypeConvertError;
1655
1656 #[inline]
1657 fn try_from(value: CudaVecViewConverter<'a,U,T>) -> Result<Self, Self::Error> {
1658 if T::size() != R::size() {
1659 Err(TypeConvertError::SizeMismatchError(SizeMismatchError(T::size(),R::size())))
1660 } else {
1661 let len = value.len;
1662
1663 Ok(CudaVecView {
1664 len:len,
1665 ptr: value.ptr,
1666 t:PhantomData::<R>
1667 })
1668 }
1669 }
1670}
1671impl<'a,U,T> TryFrom<&'a CudaVecView<'a,U,T>> for CudaVec<U,T>
1672 where U: UnitValue<U> + Default + Clone + Send,
1673 T: MemorySize + AsConstKernelPtr + AsKernelPtr {
1674 type Error = CudaError;
1675
1676 fn try_from(value: &'a CudaVecView<'a, U, T>) -> Result<Self, Self::Error> {
1677 let mut dst = CudaMemoryPoolPtr::new(value.size() * T::size(),&value.ptr.memory_pool)?;
1678
1679 value.memcpy_to(&mut dst,value.size() * T::size())?;
1680
1681 Ok(CudaVec {
1682 len: value.size(),
1683 ptr: dst,
1684 t:PhantomData::<T>
1685 })
1686 }
1687}
1688pub struct CudaVecConverter<U,T>
1689 where U: UnitValue<U> + Default + Clone + Send,
1690 T: MemorySize + AsKernelPtr + AsConstKernelPtr {
1691 len:usize,
1692 ptr:CudaMemoryPoolPtr<U>,
1693 u:PhantomData<U>,
1694 t:PhantomData<T>
1695}
1696impl<U,T> IntoConverter for CudaVec<U,T>
1697 where U: UnitValue<U> + Default + Clone + Send,
1698 T: MemorySize + AsKernelPtr + AsConstKernelPtr {
1699 type Converter = CudaVecConverter<U,T>;
1700
1701 fn into_converter(self) -> Self::Converter {
1702 CudaVecConverter {
1703 len:self.len,
1704 ptr:self.ptr,
1705 u:PhantomData::<U>,
1706 t:PhantomData::<T>
1707 }
1708 }
1709}
1710impl<U,T> BatchSize for CudaVecConverter<U,T>
1711 where U: UnitValue<U> + Default + Clone + Send,
1712 T: MemorySize + AsKernelPtr + AsConstKernelPtr {
1713 fn size(&self) -> usize {
1714 self.len
1715 }
1716}
1717impl<U,T> From<CudaVecConverter<U,T>> for CudaMemoryPoolPtr<U>
1718 where U: UnitValue<U> + Default + Clone + Send,
1719 T: MemorySize + AsKernelPtr + AsConstKernelPtr {
1720 fn from(value: CudaVecConverter<U, T>) -> Self {
1721 value.ptr
1722 }
1723}
1724impl<U,T,R> TryFrom<CudaVecConverter<U,T>> for CudaVec<U,R>
1725 where U: UnitValue<U> + Default + Clone + Send,
1726 T: MemorySize + AsKernelPtr + AsConstKernelPtr,
1727 R: MemorySize + AsKernelPtr + AsConstKernelPtr + From<T> {
1728 type Error = TypeConvertError;
1729
1730 #[inline]
1731 fn try_from(value: CudaVecConverter<U,T>) -> Result<Self, Self::Error> {
1732 if T::size() != R::size() {
1733 Err(TypeConvertError::SizeMismatchError(SizeMismatchError(T::size(),R::size())))
1734 } else {
1735 let len = value.size();
1736
1737 Ok(CudaVec {
1738 len:len,
1739 ptr: value.into(),
1740 t:PhantomData::<R>
1741 })
1742 }
1743 }
1744}
1745impl<U,T,R> TryFrom<CudaVecConverter<U,T>> for SerializedVec<U,R>
1746 where U: Debug + Default + Clone + Copy + Send + UnitValue<U>,
1747 for<'a> T: MemorySize + AsKernelPtr + AsConstKernelPtr,
1748 for<'b> R: SliceSize + AsRawSlice<U> + MakeView<'b,U> + MakeViewMut<'b,U> {
1749 type Error = TypeConvertError;
1750 #[inline]
1751 fn try_from(value: CudaVecConverter<U,T>) -> Result<Self, Self::Error> {
1752 if T::size() != R::slice_size() {
1753 Err(TypeConvertError::SizeMismatchError(SizeMismatchError(T::size(),R::slice_size())))
1754 } else {
1755 Ok(value.ptr.read_to_vec()?.into_boxed_slice().try_into()?)
1756 }
1757 }
1758}
1759impl<U,T> ToHost<U> for CudaVec<U,T>
1760 where U: Debug + Default + Clone + Copy + Send + UnitValue<U>,
1761 SerializedVec<U,<T as ToHost<U>>::Output>: TryFrom<Box<[U]>,Error=TypeConvertError>,
1762 for<'a> <T as ToHost<U>>::Output: SliceSize + MakeView<'a,U>,
1763 for<'a> T: MemorySize + AsKernelPtr + AsConstKernelPtr + ToHost<U> {
1764 type Output = SerializedVec<U,<T as ToHost<U>>::Output>;
1765 #[inline]
1766 fn to_host(self) -> Result<Self::Output,TypeConvertError> {
1767 if T::size() != <T as ToHost<U>>::Output::slice_size() {
1768 Err(TypeConvertError::SizeMismatchError(SizeMismatchError(T::size(),<T as ToHost<U>>::Output::slice_size())))
1769 } else {
1770 Ok(self.ptr.read_to_vec()?.into_boxed_slice().try_into()?)
1771 }
1772 }
1773}
1774impl TryFrom<f32> for CudaPtr<f32> {
1775 type Error = CudaError;
1776
1777 fn try_from(value: f32) -> Result<Self, Self::Error> {
1778 let mut ptr:CudaPtr<f32> = CudaPtr::new(1)?;
1779 ptr.memcpy(&value as *const f32,1)?;
1780 Ok(ptr)
1781 }
1782}
1783impl TryFrom<f64> for CudaPtr<f64> {
1784 type Error = CudaError;
1785
1786 fn try_from(value: f64) -> Result<Self, Self::Error> {
1787 let mut ptr:CudaPtr<f64> = CudaPtr::new(1)?;
1788 ptr.memcpy(&value as *const f64,1)?;
1789 Ok(ptr)
1790 }
1791}
1792impl TryFrom<i32> for CudaPtr<i32> {
1793 type Error = CudaError;
1794
1795 fn try_from(value: i32) -> Result<Self, Self::Error> {
1796 let mut ptr:CudaPtr<i32> = CudaPtr::new(1)?;
1797 ptr.memcpy(&value as *const i32,1)?;
1798 Ok(ptr)
1799 }
1800}
1801impl TryFrom<i64> for CudaPtr<i64> {
1802 type Error = CudaError;
1803
1804 fn try_from(value: i64) -> Result<Self, Self::Error> {
1805 let mut ptr:CudaPtr<i64> = CudaPtr::new(1)?;
1806 ptr.memcpy(&value as *const i64,1)?;
1807 Ok(ptr)
1808 }
1809}
1810impl TryFrom<f32> for CudaHostPtr<f32> {
1811 type Error = CudaError;
1812
1813 fn try_from(value: f32) -> Result<Self, Self::Error> {
1814 let mut ptr:CudaHostPtr<f32> = CudaHostPtr::new(1,cudaHostAllocDefault)?;
1815 ptr.memcpy(&value as *const f32,1)?;
1816 Ok(ptr)
1817 }
1818}
1819impl TryFrom<f64> for CudaHostPtr<f64> {
1820 type Error = CudaError;
1821
1822 fn try_from(value: f64) -> Result<Self, Self::Error> {
1823 let mut ptr:CudaHostPtr<f64> = CudaHostPtr::new(1,cudaHostAllocDefault)?;
1824 ptr.memcpy(&value as *const f64,1)?;
1825 Ok(ptr)
1826 }
1827}
1828impl TryFrom<i32> for CudaHostPtr<i32> {
1829 type Error = CudaError;
1830
1831 fn try_from(value: i32) -> Result<Self, Self::Error> {
1832 let mut ptr:CudaHostPtr<i32> = CudaHostPtr::new(1,cudaHostAllocDefault)?;
1833 ptr.memcpy(&value as *const i32,1)?;
1834 Ok(ptr)
1835 }
1836}
1837impl TryFrom<i64> for CudaHostPtr<i64> {
1838 type Error = CudaError;
1839
1840 fn try_from(value: i64) -> Result<Self, Self::Error> {
1841 let mut ptr:CudaHostPtr<i64> = CudaHostPtr::new(1,cudaHostAllocDefault)?;
1842 ptr.memcpy(&value as *const i64,1)?;
1843 Ok(ptr)
1844 }
1845}
1846pub trait ToCuda<T> where T: UnitValue<T> {
1848 type Output;
1849
1850 fn to_cuda(self,device:&DeviceGpu<T>) -> Result<Self::Output,TypeConvertError>;
1859}
1860pub trait ToHost<T> where T: Default + Clone + Send {
1862 type Output;
1863
1864 fn to_host(self) -> Result<Self::Output,TypeConvertError>;
1870}
1871impl<'a,T,const N:usize> ToCuda<T> for &'a CudaTensor1dPtr<T,N>
1872 where T :UnitValue<T> {
1873 type Output = CudaTensor1dPtrView<'a,T,N>;
1874
1875 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1876 Ok(self.into())
1877 }
1878}
1879impl<T,const N:usize> ToCuda<T> for CudaTensor1dPtr<T,N>
1880 where T :UnitValue<T> {
1881 type Output = CudaTensor1dPtr<T,N>;
1882
1883 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1884 Ok(self)
1885 }
1886}
1887impl<'a,T,const N1:usize,const N2:usize> ToCuda<T> for &'a CudaTensor2dPtr<T,N1,N2>
1888 where T :UnitValue<T> {
1889 type Output = CudaTensor2dPtrView<'a,T,N1,N2>;
1890
1891 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1892 Ok(self.into())
1893 }
1894}
1895impl<T,const N1:usize,const N2:usize> ToCuda<T> for CudaTensor2dPtr<T,N1,N2>
1896 where T :UnitValue<T> {
1897 type Output = CudaTensor2dPtr<T,N1,N2>;
1898
1899 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1900 Ok(self)
1901 }
1902}
1903impl<'a,T,const N1:usize,const N2:usize,const N3:usize> ToCuda<T> for &'a CudaTensor3dPtr<T,N1,N2,N3>
1904 where T :UnitValue<T> {
1905 type Output = CudaTensor3dPtrView<'a,T,N1,N2,N3>;
1906
1907 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1908 Ok(self.into())
1909 }
1910}
1911impl<T,const N1:usize,const N2:usize,const N3:usize> ToCuda<T> for CudaTensor3dPtr<T,N1,N2,N3>
1912 where T :UnitValue<T> {
1913 type Output = CudaTensor3dPtr<T,N1,N2,N3>;
1914
1915 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1916 Ok(self)
1917 }
1918}
1919impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> ToCuda<T> for &'a CudaTensor4dPtr<T,N1,N2,N3,N4>
1920 where T :UnitValue<T> {
1921 type Output = CudaTensor4dPtrView<'a,T,N1,N2,N3,N4>;
1922
1923 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1924 Ok(self.into())
1925 }
1926}
1927impl<T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> ToCuda<T> for CudaTensor4dPtr<T,N1,N2,N3,N4>
1928 where T :UnitValue<T> {
1929 type Output = CudaTensor4dPtr<T,N1,N2,N3,N4>;
1930
1931 fn to_cuda(self, _: &DeviceGpu<T>) -> Result<Self::Output,TypeConvertError> {
1932 Ok(self)
1933 }
1934}
1935pub trait AsCudaPtrRef {
1937 type Pointer: AsConstKernelPtr;
1939
1940 fn as_cuda_ptr_ref(&self) -> &Self::Pointer;
1941}
1942impl<'a,T,const N:usize> AsCudaPtrRef for &'a CudaTensor1dPtr<T,N>
1943 where T: Default + Debug {
1944 type Pointer = CudaMemoryPoolPtr<T>;
1945
1946 #[inline]
1947 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1948 &self.ptr
1949 }
1950}
1951impl<'a,T,const N1:usize,const N2:usize> AsCudaPtrRef for &'a CudaTensor2dPtr<T,N1,N2>
1952 where T: Default + Debug {
1953 type Pointer = CudaMemoryPoolPtr<T>;
1954
1955 #[inline]
1956 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1957 &self.ptr
1958 }
1959}
1960impl<'a,T,const N1:usize,const N2:usize,const N3:usize> AsCudaPtrRef for &'a CudaTensor3dPtr<T,N1,N2,N3>
1961 where T: Default + Debug {
1962 type Pointer = CudaMemoryPoolPtr<T>;
1963
1964 #[inline]
1965 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1966 &self.ptr
1967 }
1968}
1969impl<'a,T,const N1:usize,const N2:usize,const N3:usize,const N4:usize> AsCudaPtrRef for &'a CudaTensor4dPtr<T,N1,N2,N3,N4>
1970 where T: Default + Debug {
1971 type Pointer = CudaMemoryPoolPtr<T>;
1972
1973 #[inline]
1974 fn as_cuda_ptr_ref(&self) -> &Self::Pointer {
1975 &self.ptr
1976 }
1977}
1978impl<CP> private::AsConstKernelPtrBase for CP
1979 where CP: AsCudaPtrRef,
1980 <CP as AsCudaPtrRef>::Pointer: private::AsConstKernelPtrBase {
1981 #[inline]
1982 fn as_const_kernel_ptr(&self) -> *mut c_void {
1983 self.as_cuda_ptr_ref().as_const_kernel_ptr()
1984 }
1985}
1986impl<CP> AsVoidPtr for CP
1987 where CP: AsCudaPtrRef,
1988 <CP as AsCudaPtrRef>::Pointer: AsVoidPtr {
1989 #[inline]
1990 fn as_void_ptr(&self) -> *const c_void {
1991 self.as_cuda_ptr_ref().as_void_ptr()
1992 }
1993}
1994impl<CP,T> AsPtr<T> for CP
1995 where CP: AsCudaPtrRef,
1996 <CP as AsCudaPtrRef>::Pointer: AsPtr<T> {
1997 #[inline]
1998 fn as_ptr(&self) -> *const T {
1999 self.as_cuda_ptr_ref().as_ptr()
2000 }
2001}
2002impl<CP> ReadMemory<<CP as PointerElement>::Element> for CP
2003 where CP: AsCudaPtrRef + PointerElement,
2004 <CP as AsCudaPtrRef>::Pointer: ReadMemory<<CP as PointerElement>::Element> {
2005 #[inline]
2006 fn read_to_vec(&self) -> Result<Vec<<CP as PointerElement>::Element>, Error> {
2007 self.as_cuda_ptr_ref().read_to_vec()
2008 }
2009
2010 #[inline]
2011 fn read_to_vec_with_size(&self, size: usize) -> Result<Vec<<CP as PointerElement>::Element>, Error> {
2012 self.as_cuda_ptr_ref().read_to_vec_with_size(size)
2013 }
2014}
2015impl<CP> ReadMemoryAsync<<CP as PointerElement>::Element> for CP
2016 where CP: AsCudaPtrRef + PointerElement,
2017 <CP as AsCudaPtrRef>::Pointer: ReadMemoryAsync<<CP as PointerElement>::Element> {
2018 #[inline]
2019 fn read_to_vec_async(&self, stream: cudaStream_t) -> Result<Vec<<CP as PointerElement>::Element>, Error> {
2020 self.as_cuda_ptr_ref().read_to_vec_async(stream)
2021 }
2022 #[inline]
2023 fn read_to_vec_with_size_async(&self, stream: cudaStream_t, size: usize) -> Result<Vec<<CP as PointerElement>::Element>, Error> {
2024 self.as_cuda_ptr_ref().read_to_vec_with_size_async(stream,size)
2025 }
2026}
2027impl<CP,D> MemoryMoveTo<<CP as PointerElement>::Element,D> for CP
2028 where CP: AsCudaPtrRef + PointerElement,
2029 D: AsCudaMutPtr,
2030 <CP as AsCudaPtrRef>::Pointer: MemoryMoveTo<<CP as PointerElement>::Element,<D as AsCudaMutPtr>::Pointer>,
2031 <D as AsCudaMutPtr>::Pointer: AsMutPtr<<CP as PointerElement>::Element> {
2032 #[inline]
2033 fn memcpy_to(&self, dst: &mut D, len: usize) -> Result<usize, Error> {
2034 self.as_cuda_ptr_ref().memcpy_to(dst.as_cuda_mut_ptr().ptr,len)
2035 }
2036}
2037impl<CP> MemoryMoveTo<<CP as PointerElement>::Element,CudaPtr<<CP as PointerElement>::Element>> for CP
2038 where CP: AsCudaPtrRef + PointerElement,
2039 <CP as AsCudaPtrRef>::Pointer: MemoryMoveTo<<CP as PointerElement>::Element,CudaPtr<<CP as PointerElement>::Element>> {
2040 #[inline]
2041 fn memcpy_to(&self, dst: &mut CudaPtr<<CP as PointerElement>::Element>, len: usize) -> Result<usize, Error> {
2042 self.as_cuda_ptr_ref().memcpy_to(dst,len)
2043 }
2044}
2045impl<CP> MemoryMoveTo<<CP as PointerElement>::Element,CudaHostPtr<<CP as PointerElement>::Element>> for CP
2046 where CP: AsCudaPtrRef + PointerElement,
2047 <CP as AsCudaPtrRef>::Pointer: MemoryMoveTo<<CP as PointerElement>::Element,CudaHostPtr<<CP as PointerElement>::Element>> {
2048 #[inline]
2049 fn memcpy_to(&self, dst: &mut CudaHostPtr<<CP as PointerElement>::Element>, len: usize) -> Result<usize, Error> {
2050 self.as_cuda_ptr_ref().memcpy_to(dst,len)
2051 }
2052}
2053impl<CP> MemoryMoveTo<<CP as PointerElement>::Element,CudaMemoryPoolPtr<<CP as PointerElement>::Element>> for CP
2054 where CP: AsCudaPtrRef + PointerElement,
2055 <CP as AsCudaPtrRef>::Pointer: MemoryMoveTo<<CP as PointerElement>::Element,CudaMemoryPoolPtr<<CP as PointerElement>::Element>> {
2056 #[inline]
2057 fn memcpy_to(&self, dst: &mut CudaMemoryPoolPtr<<CP as PointerElement>::Element>, len: usize) -> Result<usize, Error> {
2058 self.as_cuda_ptr_ref().memcpy_to(dst,len)
2059 }
2060}
2061impl<CP,D> MemoryMoveToAsync<<CP as PointerElement>::Element,D> for CP
2062 where CP: AsCudaPtrRef + PointerElement,
2063 D: AsCudaMutPtr,
2064 <CP as AsCudaPtrRef>::Pointer: MemoryMoveToAsync<<CP as PointerElement>::Element,<D as AsCudaMutPtr>::Pointer>,
2065 <D as AsCudaMutPtr>::Pointer: AsMutPtr<<CP as PointerElement>::Element> {
2066 #[inline]
2067 fn memcpy_to_async(&self, dst: &mut D, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
2068 self.as_cuda_ptr_ref().memcpy_to_async(&mut dst.as_cuda_mut_ptr().ptr,len,stream)
2069 }
2070}
2071impl<CP> MemoryMoveToAsync<<CP as PointerElement>::Element,CudaPtr<<CP as PointerElement>::Element>> for CP
2072 where CP: AsCudaPtrRef + PointerElement,
2073 <CP as AsCudaPtrRef>::Pointer: MemoryMoveToAsync<<CP as PointerElement>::Element,CudaPtr<<CP as PointerElement>::Element>> {
2074 #[inline]
2075 fn memcpy_to_async(&self, dst: &mut CudaPtr<<CP as PointerElement>::Element>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
2076 self.as_cuda_ptr_ref().memcpy_to_async(dst,len,stream)
2077 }
2078}
2079impl<CP> MemoryMoveToAsync<<CP as PointerElement>::Element,CudaHostPtr<<CP as PointerElement>::Element>> for CP
2080 where CP: AsCudaPtrRef + PointerElement,
2081 <CP as AsCudaPtrRef>::Pointer: MemoryMoveToAsync<<CP as PointerElement>::Element,CudaHostPtr<<CP as PointerElement>::Element>> {
2082 #[inline]
2083 fn memcpy_to_async(&self, dst: &mut CudaHostPtr<<CP as PointerElement>::Element>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
2084 self.as_cuda_ptr_ref().memcpy_to_async(dst,len,stream)
2085 }
2086}
2087impl<CP> MemoryMoveToAsync<<CP as PointerElement>::Element,CudaMemoryPoolPtr<<CP as PointerElement>::Element>> for CP
2088 where CP: AsCudaPtrRef + PointerElement,
2089 <CP as AsCudaPtrRef>::Pointer: MemoryMoveToAsync<<CP as PointerElement>::Element,CudaMemoryPoolPtr<<CP as PointerElement>::Element>> {
2090 #[inline]
2091 fn memcpy_to_async(&self, dst: &mut CudaMemoryPoolPtr<<CP as PointerElement>::Element>, len: usize,stream:cudaStream_t) -> Result<usize, Error> {
2092 self.as_cuda_ptr_ref().memcpy_to_async(dst,len,stream)
2093 }
2094}
2095pub struct CudaMutPtr<'a,P> {
2097 ptr:&'a mut P
2098}
2099impl<'a,P> CudaMutPtr<'a,P> {
2100 pub fn new(ptr:&'a mut P) -> CudaMutPtr<'a,P> {
2101 CudaMutPtr {
2102 ptr:ptr
2103 }
2104 }
2105}
2106impl<'a,P> private::AsMutKernelPtrBase for CudaMutPtr<'a,P> where P: private::AsMutKernelPtrBase {
2107 #[inline]
2108 fn as_mut_kernel_ptr(&mut self) -> *mut c_void {
2109 self.ptr.as_mut_kernel_ptr()
2110 }
2111}
2112impl<'a,P> AsMutVoidPtr for CudaMutPtr<'a,P> where P: AsMutVoidPtr {
2113 #[inline]
2114 fn as_mut_void_ptr(&mut self) -> *mut c_void {
2115 self.ptr.as_mut_void_ptr()
2116 }
2117}
2118impl<'a,P,T> AsMutPtr<T> for CudaMutPtr<'a,P> where P: AsMutPtr<T> {
2119 #[inline]
2120 fn as_mut_ptr(&mut self) -> *mut T {
2121 self.ptr.as_mut_ptr()
2122 }
2123}
2124impl<'a,P> PointerElement for CudaMutPtr<'a,P>
2125 where P: PointerElement {
2126 type Element = P::Element;
2127}
2128impl<'a,P> WriteMemory<<Self as PointerElement>::Element> for CudaMutPtr<'a,P>
2129 where P: WriteMemory<<Self as PointerElement>::Element> + PointerElement {
2130 #[inline]
2131 fn memcpy(&mut self, p: *const <Self as PointerElement>::Element, len: usize) -> Result<usize, Error> {
2132 self.ptr.memcpy(p,len)
2133 }
2134
2135 #[inline]
2136 fn memcpy_repeat(&mut self, p: *const <Self as PointerElement>::Element, len: usize, count: usize) -> Result<usize, Error> {
2137 self.ptr.memcpy_repeat(p,len,count)
2138 }
2139}
2140impl<'a,P> WriteMemoryAsync<<Self as PointerElement>::Element> for CudaMutPtr<'a,P>
2141 where P: WriteMemoryAsync<<Self as PointerElement>::Element> + PointerElement {
2142 #[inline]
2143 fn memcpy_async(&mut self, p: *const <Self as PointerElement>::Element, len: usize, stream: cudaStream_t) -> Result<usize, Error> {
2144 self.ptr.memcpy_async(p,len,stream)
2145 }
2146
2147 #[inline]
2148 fn memcpy_async_repeat(&mut self, p: *const <Self as PointerElement>::Element, len: usize, count: usize, stream: cudaStream_t) -> Result<usize, Error> {
2149 self.ptr.memcpy_async_repeat(p,len,count,stream)
2150 }
2151}
2152pub trait AsCudaMutPtr {
2154 type Pointer;
2156
2157 fn as_cuda_mut_ptr<'a>(&'a mut self) -> CudaMutPtr<'a,Self::Pointer>;
2158}
2159impl<CP> private::AsMutKernelPtrBase for CP
2160 where CP: AsCudaMutPtr,
2161 <CP as AsCudaMutPtr>::Pointer: private::AsMutKernelPtrBase {
2162 #[inline]
2163 fn as_mut_kernel_ptr(&mut self) -> *mut c_void {
2164 self.as_cuda_mut_ptr().as_mut_kernel_ptr()
2165 }
2166}
2167impl<CP> AsMutVoidPtr for CP
2168 where CP: AsCudaMutPtr,
2169 <CP as AsCudaMutPtr>::Pointer: AsMutVoidPtr {
2170 #[inline]
2171 fn as_mut_void_ptr(&mut self) -> *mut c_void {
2172 self.as_cuda_mut_ptr().as_mut_void_ptr()
2173 }
2174}
2175impl<CP,T> AsMutPtr<T> for CP
2176 where CP: AsCudaMutPtr,
2177 <CP as AsCudaMutPtr>::Pointer: AsMutPtr<T> {
2178 #[inline]
2179 fn as_mut_ptr(&mut self) -> *mut T {
2180 self.as_cuda_mut_ptr().as_mut_ptr()
2181 }
2182}
2183impl<CP> WriteMemory<<CP as PointerElement>::Element> for CP
2184 where CP: AsCudaMutPtr + PointerElement,
2185 <CP as AsCudaMutPtr>::Pointer: WriteMemory<<CP as PointerElement>::Element> {
2186 #[inline]
2187 fn memcpy(&mut self, p: *const <CP as PointerElement>::Element, len: usize) -> Result<usize, Error> {
2188 self.as_cuda_mut_ptr().ptr.memcpy(p,len)
2189 }
2190
2191 #[inline]
2192 fn memcpy_repeat(&mut self, p: *const <CP as PointerElement>::Element, len: usize, count: usize) -> Result<usize, Error> {
2193 self.as_cuda_mut_ptr().ptr.memcpy_repeat(p,len,count)
2194 }
2195}
2196impl<CP> WriteMemoryAsync<<CP as PointerElement>::Element> for CP
2197 where CP: AsCudaMutPtr + PointerElement,
2198 <CP as AsCudaMutPtr>::Pointer: WriteMemoryAsync<<CP as PointerElement>::Element> {
2199 #[inline]
2200 fn memcpy_async(&mut self, p: *const <CP as PointerElement>::Element, len: usize, stream: cudaStream_t) -> Result<usize, Error> {
2201 self.as_cuda_mut_ptr().ptr.memcpy_async(p,len,stream)
2202 }
2203
2204 #[inline]
2205 fn memcpy_async_repeat(&mut self, p: *const <CP as PointerElement>::Element, len: usize, count: usize, stream: cudaStream_t) -> Result<usize, Error> {
2206 self.as_cuda_mut_ptr().ptr.memcpy_async_repeat(p,len,count,stream)
2207 }
2208}
2209impl<'a,T,const N:usize> From<&'a mut CudaTensor1dPtr<T,N>> for CudaMutPtr<'a,CudaMemoryPoolPtr<T>> where T: Default + Debug {
2210 fn from(value: &'a mut CudaTensor1dPtr<T,N>) -> Self {
2211 value.as_cuda_mut_ptr()
2212 }
2213}
2214impl<'a,T,const N1:usize,const N2:usize> From<&'a mut CudaTensor2dPtr<T,N1,N2>> for CudaMutPtr<'a,CudaMemoryPoolPtr<T>> where T: Default + Debug {
2215 fn from(value: &'a mut CudaTensor2dPtr<T,N1,N2>) -> Self {
2216 value.as_cuda_mut_ptr()
2217 }
2218}
2219pub trait KernelArgs {
2221 fn as_vec(&mut self) -> Vec<&mut dyn AsKernelPtr>;
2224}
2225pub trait Kernel {
2227 type Args: KernelArgs;
2229
2230 const FUNC_PTR: *const c_void;
2232
2233 fn launch(&mut self,grid_dim:dim3,block_dim:dim3,args:&mut Self::Args,shared_mem:usize) -> Result<(),CudaRuntimeError> {
2245 ffi::launch(Self::FUNC_PTR,
2246 grid_dim,
2247 block_dim,
2248 &mut args.as_vec().into_iter()
2249 .map(|p| p.as_kernel_ptr())
2250 .collect::<Vec<*mut c_void>>().as_mut_slice(),
2251 shared_mem
2252 )
2253 }
2254
2255 fn device_synchronize(&self) -> Result<(),CudaRuntimeError> {
2262 ffi::device_synchronize()
2263 }
2264}
2265pub trait CooperativeKernel {
2267 type Args: KernelArgs;
2269
2270 const FUNC_PTR: *const c_void;
2272
2273 fn launch(&mut self,grid_dim:dim3,block_dim:dim3,args:&mut Self::Args,shared_mem:usize) -> Result<(),CudaRuntimeError> {
2286 ffi::launch_cooperative(Self::FUNC_PTR,
2287 grid_dim,
2288 block_dim,
2289 &mut args.as_vec().into_iter()
2290 .map(|p| p.as_kernel_ptr())
2291 .collect::<Vec<*mut c_void>>().as_mut_slice(),
2292 shared_mem
2293 )
2294 }
2295
2296 fn device_synchronize(&self) -> Result<(),CudaRuntimeError> {
2303 ffi::device_synchronize()
2304 }
2305}