megenginelite-sys 1.8.2

/**
 * \file dnn/src/cuda/local/cuda-convnet2/nvmatrix.cuh
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
/**
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * --------------------------------------------------------------------------
 * * This file has been modified by Megvii ("Megvii Modifications").
 * * All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights
 * reserved.
 * --------------------------------------------------------------------------
 */

#pragma once
#include <cublas_v2.h>
#include "src/cuda/utils.cuh"

namespace megdnn {
namespace cuda {

const int TEXTURE_SIZE_MAX = 1 << 29;

struct MemorySegment {
    float* data;
    MemorySegment(float* data) : data(data) {}
};

struct NVMatrix {
    NVMatrix(MemorySegment* seg, int row, int col)
            : seg(seg), row(row), col(col), stride(col), _texObj(0) {}
    NVMatrix(MemorySegment* seg, int row, int col, int stride)
            : seg(seg), row(row), col(col), stride(stride), _texObj(0) {}
    float* getDevData() { return seg->data; }
    MemorySegment* seg;
    int row, col, stride;
    cudaTextureObject_t _texObj;
    // target must be initialized before transpose.
    void transpose(
            const NVMatrix& target, cublasHandle_t handle, float* one, float* zero) {
        cublas_check(cublasSgeam(
                handle, CUBLAS_OP_T, CUBLAS_OP_T, row, col, one, seg->data,
                this->stride, zero, seg->data, this->stride, target.seg->data,
                target.stride));
    }
    cudaTextureObject_t getTextureObject() {
        if (_texObj == 0) {
            struct cudaResourceDesc resDesc;
            memset(&resDesc, 0, sizeof(resDesc));
            resDesc.resType = cudaResourceTypeLinear;
            resDesc.res.linear.devPtr = getDevData();
            resDesc.res.linear.sizeInBytes = getNumDataBytes();
            resDesc.res.linear.desc =
                    cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
            struct cudaTextureDesc texDesc;
            memset(&texDesc, 0, sizeof(texDesc));
            cuda_check(cudaCreateTextureObject(&_texObj, &resDesc, &texDesc, NULL));
        }
        megdnn_assert_internal(_texObj != 0);
        return _texObj;
    }
    ~NVMatrix() {
        if (_texObj) {
            cuda_check(cudaDestroyTextureObject(_texObj));
        }
    }
    int getNumDataBytes() { return row * col * sizeof(float); }
    int getNumRows() { return row; }
    int getNumCols() { return col; }
    int getStride() { return stride; }
    bool isTrans() { return false; }
    bool isContiguous() { return true; }
    void resize(int row, int col) {
        megdnn_assert_internal(row * col == this->row * this->col);
        this->row = row;
        this->col = col;
    }
};

}  // namespace cuda
}  // namespace megdnn