#include "megbrain_build_config.h"
#if MGB_CUDA
#ifdef _WIN32
#include <Windows.h>
#else
#include <dlfcn.h>
#endif
#include "NvOF.h"
NvOF::NvOF(
uint32_t nWidth, uint32_t nHeight, NV_OF_BUFFER_FORMAT eInBufFmt,
NV_OF_MODE eMode, NV_OF_PERF_LEVEL preset)
: m_nOutGridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX),
m_ePreset(preset),
m_ofMode(eMode) {
m_inputElementSize = 1;
if (eInBufFmt == NV_OF_BUFFER_FORMAT_ABGR8)
m_inputElementSize = 4;
memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc));
m_inputBufferDesc.width = nWidth;
m_inputBufferDesc.height = nHeight;
m_inputBufferDesc.bufferFormat = eInBufFmt;
m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT;
}
bool NvOF::CheckGridSize(uint32_t nOutGridSize) {
uint32_t size;
DoGetOutputGridSizes(nullptr, &size);
std::unique_ptr<uint32_t[]> val(new uint32_t[size]);
DoGetOutputGridSizes(val.get(), &size);
for (uint32_t i = 0; i < size; i++) {
if (nOutGridSize == val[i]) {
return true;
}
}
return false;
}
bool NvOF::GetNextMinGridSize(uint32_t nOutGridSize, uint32_t& nextMinOutGridSize) {
uint32_t size;
DoGetOutputGridSizes(nullptr, &size);
std::unique_ptr<uint32_t[]> val(new uint32_t[size]);
DoGetOutputGridSizes(val.get(), &size);
nextMinOutGridSize = NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX;
for (uint32_t i = 0; i < size; i++) {
if (nOutGridSize == val[i]) {
nextMinOutGridSize = nOutGridSize;
return true;
}
if (nOutGridSize < val[i] && val[i] < nextMinOutGridSize) {
nextMinOutGridSize = val[i];
}
}
return (nextMinOutGridSize >= NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX) ? false : true;
}
void NvOF::Init(uint32_t nOutGridSize) {
m_nOutGridSize = nOutGridSize;
auto nOutWidth = (m_inputBufferDesc.width + m_nOutGridSize - 1) / m_nOutGridSize;
auto nOutHeight = (m_inputBufferDesc.height + m_nOutGridSize - 1) / m_nOutGridSize;
auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2;
if (m_ofMode == NV_OF_MODE_OPTICALFLOW) {
outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2;
m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR);
} else if (m_ofMode == NV_OF_MODE_STEREODISPARITY) {
outBufFmt = NV_OF_BUFFER_FORMAT_SHORT;
m_outputElementSize = sizeof(NV_OF_STEREO_DISPARITY);
} else {
mgb_throw(
MegBrainError,
"NVOF: Unsupported OF mode err type: NV_OF_ERR_INVALID_PARAM");
}
memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc));
m_outputBufferDesc.width = nOutWidth;
m_outputBufferDesc.height = nOutHeight;
m_outputBufferDesc.bufferFormat = outBufFmt;
m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT;
memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc));
m_costBufferDesc.width = nOutWidth;
m_costBufferDesc.height = nOutHeight;
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT;
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST;
m_costBufElementSize = sizeof(uint32_t);
memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc));
m_hintBufferDesc.width = nOutWidth;
m_hintBufferDesc.height = nOutHeight;
m_hintBufferDesc.bufferFormat = outBufFmt;
m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT;
m_hintBufElementSize = m_outputElementSize;
memset(&m_initParams, 0, sizeof(m_initParams));
m_initParams.width = m_inputBufferDesc.width;
m_initParams.height = m_inputBufferDesc.height;
m_initParams.enableExternalHints = NV_OF_FALSE;
m_initParams.enableOutputCost = NV_OF_FALSE;
m_initParams.hintGridSize = NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED;
m_initParams.outGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)m_nOutGridSize;
m_initParams.mode = m_ofMode;
m_initParams.perfLevel = m_ePreset;
DoInit(m_initParams);
}
void NvOF::Execute(
NvOFBuffer* image1, NvOFBuffer* image2, NvOFBuffer* outputBuffer,
NvOFBuffer* hintBuffer, NvOFBuffer* costBuffer) {
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams;
memset(&exeInParams, 0, sizeof(exeInParams));
exeInParams.inputFrame = image1->getOFBufferHandle();
exeInParams.referenceFrame = image2->getOFBufferHandle();
exeInParams.disableTemporalHints = NV_OF_FALSE;
exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE
? hintBuffer->getOFBufferHandle()
: nullptr;
memset(&exeOutParams, 0, sizeof(exeOutParams));
exeOutParams.outputBuffer = outputBuffer->getOFBufferHandle();
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE
? costBuffer->getOFBufferHandle()
: nullptr;
DoExecute(exeInParams, exeOutParams);
}
std::vector<std::unique_ptr<NvOFBuffer>> NvOF::CreateBuffers(
NV_OF_BUFFER_USAGE usage, uint32_t numBuffers) {
std::vector<std::unique_ptr<NvOFBuffer>> ofBuffers;
if (usage == NV_OF_BUFFER_USAGE_INPUT) {
ofBuffers = DoAllocBuffers(m_inputBufferDesc, m_inputElementSize, numBuffers);
} else if (usage == NV_OF_BUFFER_USAGE_OUTPUT) {
ofBuffers = DoAllocBuffers(m_outputBufferDesc, m_outputElementSize, numBuffers);
} else if (usage == NV_OF_BUFFER_USAGE_COST) {
ofBuffers = DoAllocBuffers(m_costBufferDesc, m_costBufElementSize, numBuffers);
} else if (usage == NV_OF_BUFFER_USAGE_HINT) {
ofBuffers = DoAllocBuffers(m_hintBufferDesc, m_hintBufElementSize, numBuffers);
} else {
mgb_throw(MegBrainError, "NVOF: Invalid parameter err type: NV_OF_ERR_GENERIC");
}
return ofBuffers;
}
std::vector<std::unique_ptr<NvOFBuffer>> NvOF::CreateBuffers(
uint32_t nWidth, uint32_t nHeight, NV_OF_BUFFER_USAGE usage,
uint32_t numBuffers) {
std::vector<std::unique_ptr<NvOFBuffer>> ofBuffers;
NV_OF_BUFFER_DESCRIPTOR bufferDesc;
if (usage == NV_OF_BUFFER_USAGE_OUTPUT) {
bufferDesc.width = nWidth;
bufferDesc.height = nHeight;
bufferDesc.bufferFormat = m_outputBufferDesc.bufferFormat;
bufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT;
ofBuffers = DoAllocBuffers(bufferDesc, m_outputElementSize, numBuffers);
} else {
mgb_throw(MegBrainError, "NVOF: Invalid parameter err type: NV_OF_ERR_GENERIC");
}
return ofBuffers;
}
void NvOFAPI::LoadNvOFAPI() {
#if defined(_WIN32)
#if defined(_WIN64)
HMODULE hModule = LoadLibrary(TEXT("nvofapi64.dll"));
#else
HMODULE hModule = LoadLibrary(TEXT("nvofapi.dll"));
#endif
#else
void* hModule = dlopen("libnvidia-opticalflow.so.1", RTLD_LAZY);
#endif
if (hModule == NULL) {
mgb_throw(
MegBrainError,
"NVOF: NVOF library file not found. Please ensure that the "
"NVIDIA driver is installed type: NV_OF_ERR_OF_NOT_AVAILABLE");
}
m_hModule = hModule;
}
#endif