#include "megbrain/tensorrt/tensorrt_engine_cache.h"
#if MGB_ENABLE_TENSOR_RT
#if defined(_WIN32)
#include <io.h>
#define F_OK 0
#define access(a, b) _access(a, b)
#elif __linux__ || __unix__ || __APPLE__
#include <unistd.h>
#endif
using namespace mgb;
bool TensorRTEngineCache::sm_enable_engine_cache = false;
std::string TensorRTEngineCache::make_key_from_trt_opr(const opr::TensorRTOpr* opr) {
auto&& env = CompNodeEnv::from_comp_node(opr->output(0)->comp_node());
mgb_assert(
env.property().type == CompNode::DeviceType::CUDA,
"tensorrt opr only support CompNode with DeviceType::CUDA");
auto&& prop = env.cuda_env().device_prop;
std::string key;
int tensorrt_version = getInferLibVersion();
key = ssprintf(
"dev=%s;cap=%d.%d;trt=%d;", prop.name, prop.major, prop.minor,
tensorrt_version);
key.append(opr->cname());
return key;
}
bool TensorRTEngineCache::enable_engine_cache(bool enable_engine_cache) {
if (enable_engine_cache)
sm_enable_engine_cache = enable_engine_cache;
return sm_enable_engine_cache;
}
void TensorRTEngineCache::disable_engine_cache() {
sm_enable_engine_cache = false;
}
std::shared_ptr<TensorRTEngineCache> TensorRTEngineCache::set_impl(
std::shared_ptr<TensorRTEngineCache> impl) {
mgb_assert(impl);
sm_impl.swap(impl);
return impl;
}
class TensorRTEngineCacheMemory final : public TensorRTEngineCache {
const uint8_t* m_ptr;
size_t m_size;
size_t m_offset = 0;
template <typename T>
void read(T& val) {
static_assert(
std::is_trivially_copyable<T>::value,
"only support trivially copyable type");
mgb_assert(m_offset + sizeof(T) <= m_size);
memcpy(&val, m_ptr, sizeof(T));
m_offset += sizeof(T);
m_ptr += sizeof(T);
}
template <typename T>
void read(T* buf, size_t size) {
static_assert(
std::is_trivially_copyable<T>::value && sizeof(T) == 1,
"only support read bytes");
mgb_assert(m_offset + size <= m_size);
memcpy(buf, m_ptr, size);
m_offset += size;
m_ptr += size;
}
void read_cache() {
uint32_t nr_engines;
read(nr_engines);
for (uint32_t i = 0; i < nr_engines; ++i) {
uint32_t key_size;
read(key_size);
std::string key;
key.resize(key_size);
read(const_cast<char*>(key.data()), key.size());
mgb_log_debug("read key: %s", key.c_str());
m_cache[std::move(key)].init_from(*this);
}
}
struct EngineStorage : public Engine {
std::unique_ptr<uint8_t[]> data_refhold;
EngineStorage& init_from(TensorRTEngineCacheMemory& io) {
uint32_t data_size;
io.read(data_size);
size = data_size;
data_refhold = std::make_unique<uint8_t[]>(size);
io.read(data_refhold.get(), size);
ptr = data_refhold.get();
return *this;
};
EngineStorage& init_from_buf(const void* buf, size_t buf_size) {
data_refhold = std::make_unique<uint8_t[]>(buf_size);
memcpy(data_refhold.get(), buf, buf_size);
size = buf_size;
ptr = data_refhold.get();
return *this;
};
};
std::unordered_map<std::string, EngineStorage> m_cache;
std::mutex m_mtx;
public:
TensorRTEngineCacheMemory() = default;
TensorRTEngineCacheMemory(const uint8_t* bin, size_t size)
: m_ptr{bin}, m_size{size} {
read_cache();
}
Maybe<Engine> get(const std::string& key) override {
MGB_LOCK_GUARD(m_mtx);
auto find = m_cache.find(key);
if (find == m_cache.end())
return None;
return find->second;
}
void put(const std::string& key, const Engine& value) override {
MGB_LOCK_GUARD(m_mtx);
m_cache[key].init_from_buf(value.ptr, value.size);
}
void dump_cache() override {}
};
void TensorRTEngineCacheIO::read_cache() {
uint32_t nr_engines;
read(nr_engines);
for (uint32_t i = 0; i < nr_engines; ++i) {
uint32_t key_size;
read(key_size);
std::string key;
key.resize(key_size);
read(const_cast<char*>(key.data()), key.size());
mgb_log_debug("read key: %s", key.c_str());
m_cache[std::move(key)].init_from(*this);
}
}
TensorRTEngineCacheIO::TensorRTEngineCacheIO(std::string filename)
: m_filename{std::move(filename)} {
mgb_log_debug("create tensorrt engine cache: %s", m_filename.c_str());
if (access(m_filename.c_str(), F_OK) == 0) {
mgb_log("tensorrt engine cache %s already exists, read from binary "
"file.",
m_filename.c_str());
m_ptr = fopen(m_filename.c_str(), "r+b");
mgb_throw_if(
m_ptr == nullptr, SystemError,
"failed to open tensorrt engine file %s %s", m_filename.c_str(),
strerror(errno));
std::unique_ptr<FILE, int (*)(FILE*)> fptr_close{m_ptr, ::fclose};
read_cache();
m_update_cache = true;
} else {
mgb_log_debug(
"tensorrt engine cache %s not exists, will create tensorrt "
"engine cache file",
m_filename.c_str());
}
}
void TensorRTEngineCacheIO::dump_cache() {
if (m_update_cache)
mgb_log_debug(
"tensorrt engine cache %s already exists, and will be "
"rewritten during dumping cache to this file.",
m_filename.c_str());
m_ptr = fopen(m_filename.c_str(), "wb");
mgb_throw_if(
m_ptr == nullptr, SystemError, "failed to open tensorrt engine file %s %s",
m_filename.c_str(), strerror(errno));
std::unique_ptr<FILE, int (*)(FILE*)> fptr_close{m_ptr, ::fclose};
uint32_t nr_engines = m_cache.size();
write(nr_engines);
for (auto&& cached_engine : m_cache) {
uint32_t key_size = cached_engine.first.size();
write(key_size);
write(cached_engine.first.data(), key_size);
cached_engine.second.write_to(*this);
}
}
Maybe<TensorRTEngineCache::Engine> TensorRTEngineCacheIO::get(const std::string& key) {
MGB_LOCK_GUARD(m_mtx);
auto find = m_cache.find(key);
if (find == m_cache.end())
return None;
return find->second;
}
void TensorRTEngineCacheIO::put(const std::string& key, const Engine& value) {
MGB_LOCK_GUARD(m_mtx);
m_cache[key].init_from_buf(value.ptr, value.size);
}
MGE_WIN_DECLSPEC_DATA std::shared_ptr<TensorRTEngineCache>
TensorRTEngineCache::sm_impl = std::make_shared<TensorRTEngineCacheMemory>();
#endif