#ifdef UCL_COPY_ALLOW
template <class mat1, class mat2>
inline void _check_ucl_copy_perm(mat1 &dst, mat2 &src) {
if ((int)mat1::MEM_TYPE==(int)mat2::MEM_TYPE) {
if (dst.kind()==UCL_READ_ONLY) {
std::cerr << "Attempt to copy where destination is UCL_READ_ONLY\n";
assert(0==1);
} else if (src.kind()==UCL_WRITE_ONLY) {
std::cerr << "Attempt to copy where source is UCL_WRITE_ONLY\n";
assert(0==1);
}
} else {
if (dst.kind()==UCL_WRITE_ONLY) {
std::cerr << "Destination in host-device copy cannot be UCL_WRITE_ONLY\n";
assert(0==1);
} else if (src.kind()==UCL_READ_ONLY) {
std::cerr << "Source in host-device copy cannot be UCL_READ_ONLY\n";
assert(0==1);
}
}
}
template <int host_t1, int host_t2> struct _host_host_copy;
template <> struct _host_host_copy<1,1> {
template <class mat1, class mat2>
static inline void hhc(mat1 &dst, const mat2 &src, const size_t numel) {
#ifdef UCL_DEBUG
assert(mat1::PADDED==0 && mat2::PADDED==0);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE && mat1::DATA_TYPE!=0) {
#ifdef _OCL_MAT
if (dst.begin()==src.begin()) {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 7S\n";
#endif
return;
}
#endif
memcpy(dst.begin(),src.begin(),numel*sizeof(typename mat1::data_type));
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 7NS\n";
#endif
} else
for (size_t i=0; i<numel; i++)
dst[i]=static_cast<typename mat1::data_type>(src[i]);
}
template <class mat1, class mat2>
static inline void hhc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols) {
#ifdef UCL_DEBUG
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
size_t dst_row_size, src_row_size;
if (mat1::VECTOR)
dst_row_size=cols;
else
dst_row_size=dst.row_size();
if (mat2::VECTOR)
src_row_size=cols;
else
src_row_size=src.row_size();
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE && mat1::DATA_TYPE!=0) {
#ifdef _OCL_MAT
if (dst.begin()==src.begin()) {
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 8S\n";
#endif
return;
}
#endif
#ifdef UCL_DBG_MEM_TRACE
std::cerr << "UCL_COPY 8NS\n";
#endif
for (size_t i=0; i<rows; i++)
memcpy(dst.begin()+i*dst_row_size,src.begin()+i*src_row_size,
cols*sizeof(typename mat1::data_type));
} else
for (size_t j=0; j<rows; j++) {
size_t dst_i=j*dst_row_size;
size_t d_end=dst_i+cols;
size_t src_i=j*src_row_size;
for (; dst_i<d_end; dst_i++) {
dst[dst_i]=static_cast<typename mat1::data_type>(src[src_i]);
src_i++;
}
}
}
};
template <int host_t1, int host_t2> struct _host_host_copy {
template <class mat1, class mat2>
static inline void hhc(mat1 &dst, const mat2 &src, const size_t numel) {
assert(0==1);
}
template <class mat1, class mat2>
static inline void hhc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols) {
assert(0==1);
}
};
template <int host_type1, int host_type2> struct _ucl_cast_copy;
template <int host_type2> struct _ucl_cast_copy<1,host_type2> {
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer) {
ucl_mv_cpy(cast_buffer,src,numel*sizeof(typename mat2::data_type));
for (size_t i=0; i<numel; i++)
dst[i]=static_cast<typename mat1::data_type>(cast_buffer[i]);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer,command_queue &cq) {
ucl_mv_cpy(cast_buffer,src,numel*sizeof(typename mat2::data_type),cq);
cast_buffer.sync();
for (size_t i=0; i<numel; i++)
dst[i]=static_cast<typename mat1::data_type>(cast_buffer[i]);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer) {
#ifdef UCL_DEBUG
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(dst.numel()>=rows*cols && cast_buffer.numel()>=rows*cols);
if (mat1::VECTOR==0) assert(dst.rows()>=rows && dst.cols()>=cols);
if (mat2::VECTOR==0) assert(src.rows()>=rows && src.cols()>=cols);
#endif
if (mat1::VECTOR) {
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
src.row_bytes(),cols*sizeof(typename mat2::data_type),rows);
for (size_t i=0; i<rows*cols; i++)
dst[i]=static_cast<typename mat1::data_type>(cast_buffer[i]);
} else {
if (mat2::VECTOR)
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
cols*sizeof(typename mat2::data_type),
cols*sizeof(typename mat2::data_type),rows);
else
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
src.row_bytes(),cols*sizeof(typename mat2::data_type),
rows);
size_t dst_i=0, buff_i=0, doff=dst.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
dst[dst_i]=static_cast<typename mat1::data_type>(cast_buffer[buff_i]);
buff_i++;
dst_i++;
}
dst_i+=doff;
}
}
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
command_queue &cq) {
#ifdef UCL_DEBUG
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(dst.numel()>=rows*cols && cast_buffer.numel()>=rows*cols);
if (mat1::VECTOR==0) assert(dst.rows()>=rows && dst.cols()>=cols);
if (mat2::VECTOR==0) assert(src.rows()>=rows && src.cols()>=cols);
#endif
if (mat1::VECTOR) {
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
src.row_bytes(),cols*sizeof(typename mat2::data_type),rows,cq);
cast_buffer.sync();
for (size_t i=0; i<rows*cols; i++)
dst[i]=static_cast<typename mat1::data_type>(cast_buffer[i]);
} else {
if (mat2::VECTOR)
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
cols*sizeof(typename mat2::data_type),
cols*sizeof(typename mat2::data_type),rows,cq);
else
ucl_mv_cpy(cast_buffer,cols*sizeof(typename mat2::data_type),src,
src.row_bytes(),cols*sizeof(typename mat2::data_type),
rows,cq);
cast_buffer.sync();
size_t dst_i=0, buff_i=0, doff=dst.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
dst[dst_i]=static_cast<typename mat1::data_type>(cast_buffer[buff_i]);
buff_i++;
dst_i++;
}
dst_i+=doff;
}
}
}
};
template <int host_type1> struct _ucl_cast_copy<host_type1,1> {
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer) {
for (size_t i=0; i<numel; i++)
cast_buffer[i]=static_cast<typename mat3::data_type>(src[i]);
ucl_mv_cpy(dst,cast_buffer,numel*sizeof(typename mat1::data_type));
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer, command_queue &cq) {
for (size_t i=0; i<numel; i++)
cast_buffer[i]=static_cast<typename mat3::data_type>(src[i]);
ucl_mv_cpy(dst,cast_buffer,numel*sizeof(typename mat1::data_type),cq);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer) {
#ifdef UCL_DEBUG
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(src.numel()>=rows*cols && cast_buffer.numel()>=rows*cols);
if (mat1::VECTOR==0) assert(dst.rows()>=rows && dst.cols()>=cols);
if (mat2::VECTOR==0) assert(src.rows()>=rows && src.cols()>=cols);
if (mat3::VECTOR==0) {
assert(cast_buffer.rows()>=rows && cast_buffer.cols()>=cols);
assert(dst.rows()>=rows && dst.cols()>=cols);
}
#endif
if (mat2::VECTOR) {
if (mat3::VECTOR==0) {
size_t ci=0, si=0, co=cast_buffer.cols()-cols, so=src.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[ci]=static_cast<typename mat3::data_type>(src[si]);
ci++;
si++;
}
ci+=co;
si+=so;
}
ucl_mv_cpy(dst,dst.row_bytes(),cast_buffer,cast_buffer.row_bytes(),
cols*sizeof(typename mat1::data_type),rows);
} else {
for (size_t i=0; i<rows*cols; i++)
cast_buffer[i]=static_cast<typename mat3::data_type>(src[i]);
ucl_mv_cpy(dst,dst.row_bytes(),cast_buffer,
cols*sizeof(typename mat1::data_type),
cols*sizeof(typename mat1::data_type),rows);
}
} else if (mat1::VECTOR) {
size_t src_i=0, buf_i=0, soff=src.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[buf_i]=static_cast<typename mat3::data_type>(src[src_i]);
buf_i++;
src_i++;
}
src_i+=soff;
}
ucl_mv_cpy(dst,cast_buffer,cols*sizeof(typename mat1::data_type)*rows);
} else {
size_t src_i=0, buf_i=0, so=src.cols()-cols, co, spitch;
if (mat3::VECTOR==0) {
co=cast_buffer.cols()-cols;
spitch=cast_buffer.row_bytes();
} else {
co=0;
spitch=cols*sizeof(typename mat1::data_type);
}
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[buf_i]=static_cast<typename mat3::data_type>(src[src_i]);
buf_i++;
src_i++;
}
src_i+=so;
buf_i+=co;
}
ucl_mv_cpy(dst,dst.row_bytes(),cast_buffer,spitch,
cols*sizeof(typename mat1::data_type),rows);
}
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
command_queue &cq) {
#ifdef UCL_DEBUG
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(src.numel()>=rows*cols && cast_buffer.numel()>=rows*cols);
if (mat1::VECTOR==0) assert(dst.rows()>=rows && dst.cols()>=cols);
if (mat2::VECTOR==0) assert(src.rows()>=rows && src.cols()>=cols);
if (mat3::VECTOR==0) {
assert(cast_buffer.rows()>=rows && cast_buffer.cols()>=cols);
assert(dst.rows()>=rows && dst.cols()>=cols);
}
#endif
if (mat2::VECTOR) {
if (mat3::VECTOR==0) {
size_t ci=0, si=0, co=cast_buffer.cols()-cols, so=src.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[ci]=static_cast<typename mat3::data_type>(src[si]);
ci++;
si++;
}
ci+=co;
si+=so;
}
ucl_mv_cpy(dst,dst.row_bytes(),cast_buffer,cast_buffer.row_bytes(),
cols*sizeof(typename mat1::data_type),rows);
} else {
for (size_t i=0; i<rows*cols; i++)
cast_buffer[i]=static_cast<typename mat3::data_type>(src[i]);
ucl_mv_cpy(dst,dst.row_bytes(),
cast_buffer,cols*sizeof(typename mat1::data_type),
cols*sizeof(typename mat1::data_type),rows,cq);
}
} else if (mat1::VECTOR) {
size_t src_i=0, buf_i=0, soff=src.cols()-cols;
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[buf_i]=static_cast<typename mat3::data_type>(src[src_i]);
buf_i++;
src_i++;
}
src_i+=soff;
}
ucl_mv_cpy(dst,cast_buffer,cols*sizeof(typename mat1::data_type)*rows,cq);
} else {
size_t src_i=0, buf_i=0, so=src.cols()-cols, co, spitch;
if (mat3::VECTOR==0) {
co=cast_buffer.cols()-cols;
spitch=cast_buffer.row_bytes();
} else {
co=0;
spitch=cols*sizeof(typename mat1::data_type);
}
for (size_t i=0; i<rows; i++) {
for (size_t j=0; j<cols; j++) {
cast_buffer[buf_i]=static_cast<typename mat3::data_type>(src[src_i]);
buf_i++;
src_i++;
}
src_i+=so;
buf_i+=co;
}
ucl_mv_cpy(dst,dst.row_bytes(),cast_buffer,spitch,
cols*sizeof(typename mat1::data_type),rows,cq);
}
}
};
template <> struct _ucl_cast_copy<1,1> {
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer, command_queue &cq) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
command_queue &cq) {
assert(0==1);
}
};
template <> struct _ucl_cast_copy<0,0> {
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer, command_queue &cq) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer) {
assert(0==1);
}
template <class mat1, class mat2, class mat3>
static inline void cc(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
command_queue &cq) {
assert(0==1);
}
};
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer, command_queue &cq) {
#ifdef UCL_DEBUG
assert(dst.numel()>=numel && src.numel()>=numel);
assert(cast_buffer.numel()>=numel);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,numel,cq);
else {
#ifdef UCL_DEBUG
_check_ucl_copy_perm(dst,src);
#endif
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer,cq);
}
}
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src, const size_t numel,
mat3 &cast_buffer, const bool async) {
#ifdef UCL_DEBUG
assert(dst.numel()>=numel && src.numel()>=numel);
assert(cast_buffer.numel()>=numel);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
_check_ucl_copy_perm(dst,src);
#endif
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,numel,async);
else if (async)
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer,dst.cq());
else
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer);
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, const size_t numel,
command_queue &cq) {
#ifdef UCL_DEBUG
assert(dst.row_size()*dst.rows()>=numel && src.row_size()*src.rows()>=numel);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
_check_ucl_copy_perm(dst,src);
#endif
if (mat1::MEM_TYPE==1 && mat2::MEM_TYPE==1)
_host_host_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::hhc(dst,src,numel);
else if ((int)mat1::DATA_TYPE!=(int)mat2::DATA_TYPE &&
(mat1::MEM_TYPE==1 || mat2::MEM_TYPE==1)) {
if (mat1::MEM_TYPE==1) {
UCL_H_Vec<typename mat2::data_type> cast_buffer;
cast_buffer.alloc(numel,dst,UCL_READ_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer,cq);
} else {
UCL_H_Vec<typename mat1::data_type> cast_buffer;
cast_buffer.alloc(numel,dst,UCL_WRITE_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer,cq);
}
} else
ucl_mv_cpy(dst,src,numel*sizeof(typename mat2::data_type),cq);
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, const size_t numel,
const bool async) {
#ifdef UCL_DEBUG
assert(dst.row_size()*dst.rows()>=numel && src.row_size()*src.rows()>=numel);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
_check_ucl_copy_perm(dst,src);
#endif
if (mat1::MEM_TYPE==1 && mat2::MEM_TYPE==1)
_host_host_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::hhc(dst,src,numel);
else if (async)
ucl_copy(dst,src,numel,dst.cq());
else if ((int)mat1::DATA_TYPE!=(int)mat2::DATA_TYPE &&
(mat1::MEM_TYPE==1 || mat2::MEM_TYPE==1)) {
if (mat1::MEM_TYPE==1) {
UCL_H_Vec<typename mat2::data_type> cast_buffer;
cast_buffer.alloc(numel,dst,UCL_READ_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer);
} else {
UCL_H_Vec<typename mat1::data_type> cast_buffer;
cast_buffer.alloc(numel,dst,UCL_WRITE_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,numel,
cast_buffer);
}
} else
ucl_mv_cpy(dst,src,numel*sizeof(typename mat2::data_type));
}
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
const bool async) {
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,rows,cols,async);
else if (async)
ucl_copy(dst,src,rows,cols,dst.cq());
else {
#ifdef UCL_DEBUG
_check_ucl_copy_perm(dst,src);
#endif
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer);
}
}
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, mat3 &cast_buffer,
command_queue &cq) {
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,rows,cols,cq);
else {
#ifdef UCL_DEBUG
_check_ucl_copy_perm(dst,src);
#endif
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer,cq);
}
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, command_queue &cq) {
#ifdef UCL_DEBUG
_check_ucl_copy_perm(dst,src);
#endif
if (mat1::MEM_TYPE==1 && mat2::MEM_TYPE==1)
_host_host_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::hhc(dst,src,rows,cols);
else if ((int)mat1::DATA_TYPE!=(int)mat2::DATA_TYPE &&
(mat1::MEM_TYPE==1 || mat2::MEM_TYPE==1)) {
if (mat1::MEM_TYPE==1) {
UCL_H_Vec<typename mat2::data_type> cast_buffer;
cast_buffer.alloc(rows*cols,dst,UCL_READ_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer,cq);
} else {
UCL_H_Vec<typename mat1::data_type> cast_buffer;
cast_buffer.alloc(rows*cols,dst,UCL_WRITE_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer,cq);
}
} else if (mat1::VECTOR) {
#ifdef UCL_DEBUG
assert(dst.numel()>=rows*cols && src.rows()>=rows && src.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
ucl_mv_cpy(dst,cols*sizeof(typename mat1::data_type),src,src.row_bytes(),
cols*sizeof(typename mat1::data_type),rows,
cq);
} else if (mat2::VECTOR) {
#ifdef UCL_DEBUG
assert(src.numel()>=rows*cols && dst.rows()>=rows && dst.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
ucl_mv_cpy(dst,dst.row_bytes(),src,cols*sizeof(typename mat1::data_type),
cols*sizeof(typename mat1::data_type),rows,cq);
} else {
#ifdef UCL_DEBUG
assert(src.rows()>=rows && src.cols()>=cols);
assert(dst.rows()>=rows && dst.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
ucl_mv_cpy(dst,dst.row_bytes(),src,src.row_bytes(),
cols*sizeof(typename mat1::data_type),rows,cq);
}
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, const size_t rows,
const size_t cols, const bool async) {
#ifdef UCL_DEBUG
_check_ucl_copy_perm(dst,src);
#endif
if (async)
ucl_copy(dst,src,rows,cols,dst.cq());
else if (mat1::MEM_TYPE==1 && mat2::MEM_TYPE==1)
_host_host_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::hhc(dst,src,rows,cols);
else if ((int)mat1::DATA_TYPE!=(int)mat2::DATA_TYPE &&
(mat1::MEM_TYPE==1 || mat2::MEM_TYPE==1)) {
if (mat1::MEM_TYPE==1) {
UCL_H_Vec<typename mat2::data_type> cast_buffer;
cast_buffer.alloc(rows*cols,dst,UCL_READ_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer);
} else {
UCL_H_Vec<typename mat1::data_type> cast_buffer;
cast_buffer.alloc(rows*cols,dst,UCL_WRITE_ONLY);
_ucl_cast_copy<mat1::MEM_TYPE,mat2::MEM_TYPE>::cc(dst,src,rows,cols,
cast_buffer);
}
} else if (mat1::VECTOR) {
#ifdef UCL_DEBUG
assert(dst.numel()>=rows*cols && src.rows()>=rows && src.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(mat2::VECTOR==0);
#endif
ucl_mv_cpy(dst,cols*sizeof(typename mat1::data_type),src,src.row_bytes(),
cols*sizeof(typename mat1::data_type),rows);
} else if (mat2::VECTOR) {
#ifdef UCL_DEBUG
assert(src.numel()>=rows*cols && dst.rows()>=rows && dst.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
assert(mat1::VECTOR==0);
#endif
ucl_mv_cpy(dst,dst.row_bytes(),src,cols*sizeof(typename mat1::data_type),
cols*sizeof(typename mat1::data_type),rows);
} else {
#ifdef UCL_DEBUG
assert(src.rows()>=rows && src.cols()>=cols);
assert(dst.rows()>=rows && dst.cols()>=cols);
assert(mat1::ROW_MAJOR==1 && mat2::ROW_MAJOR==1);
#endif
ucl_mv_cpy(dst,dst.row_bytes(),src,src.row_bytes(),
cols*sizeof(typename mat1::data_type),rows);
}
}
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src,
mat3 &cast_buffer, const bool async) {
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,async);
else if (mat2::PADDED==1 || (mat1::PADDED==1 && mat2::VECTOR==0) )
ucl_cast_copy(dst,src,src.rows(),src.cols(),cast_buffer,async);
else if (mat1::PADDED==1)
ucl_cast_copy(dst,src,dst.rows(),dst.cols(),cast_buffer,async);
else
ucl_cast_copy(dst,src,src.numel(),cast_buffer,async);
}
template <class mat1, class mat2, class mat3>
inline void ucl_cast_copy(mat1 &dst, const mat2 &src,
mat3 &cast_buffer, command_queue &cq) {
if ((int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,cq);
else if (mat2::PADDED==1 || (mat1::PADDED==1 && mat2::VECTOR==0) )
ucl_copy(dst,src,src.rows(),src.cols(),cast_buffer,cq);
else if (mat1::PADDED==1)
ucl_copy(dst,src,dst.rows(),dst.cols(),cast_buffer,cq);
else
ucl_copy(dst,src,src.numel(),cast_buffer,cq);
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, command_queue &cq) {
if (dst.row_bytes()==src.row_bytes() &&
src.kind()!=UCL_VIEW && dst.kind()!=UCL_VIEW &&
(int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,src.row_size()*src.rows(),cq);
else if (mat2::PADDED==1 || (mat1::PADDED==1 && mat2::VECTOR==0) )
ucl_copy(dst,src,src.rows(),src.cols(),cq);
else if (mat1::PADDED==1)
ucl_copy(dst,src,dst.rows(),dst.cols(),cq);
else
ucl_copy(dst,src,src.numel(),cq);
}
template <class mat1, class mat2>
inline void ucl_copy(mat1 &dst, const mat2 &src, const bool async) {
if (async)
ucl_copy(dst,src,dst.cq());
else if (dst.row_bytes()==src.row_bytes() &&
src.kind()!=UCL_VIEW && dst.kind()!=UCL_VIEW &&
(int)mat1::DATA_TYPE==(int)mat2::DATA_TYPE)
ucl_copy(dst,src,src.row_size()*src.rows(),async);
else if (mat2::PADDED==1 || (mat1::PADDED==1 && mat2::VECTOR==0) )
ucl_copy(dst,src,src.rows(),src.cols(),async);
else if (mat1::PADDED==1)
ucl_copy(dst,src,dst.rows(),dst.cols(),async);
else
ucl_copy(dst,src,src.numel(),async);
}
#endif