#include <thread>
#include "example.h"
#if LITE_BUILD_WITH_MGE
#include <cstdio>
#include "misc.h"
using namespace lite;
using namespace example;
namespace {
void output_info(std::shared_ptr<Network> network, size_t output_size) {
for (size_t index = 0; index < output_size; index++) {
printf("output[%zu] names %s \n", index,
network->get_all_output_name()[index].c_str());
std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(index);
size_t ndim = output_tensor->get_layout().ndim;
for (size_t i = 0; i < ndim; i++) {
printf("output[%zu] tensor.shape[%zu] %zu \n", index, i,
output_tensor->get_layout().shapes[i]);
}
}
}
void output_data_info(std::shared_ptr<Network> network, size_t output_size) {
for (size_t index = 0; index < output_size; index++) {
auto output_tensor = network->get_output_tensor(index);
void* out_data = output_tensor->get_memory_ptr();
size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
output_tensor->get_layout().get_elem_size();
LiteDataType dtype = output_tensor->get_layout().data_type;
float max = -1000.0f;
float min = 1000.0f;
int max_idx = 0;
int min_idx = 0;
float sum = 0.0f;
#define cb(_dtype, _real_dtype) \
case LiteDataType::_dtype: { \
for (size_t i = 0; i < out_length; i++) { \
_real_dtype data = static_cast<_real_dtype*>(out_data)[i]; \
sum += data; \
if (max < data) { \
max = data; \
max_idx = i; \
} \
if (min > data) { \
min = data; \
min_idx = i; \
} \
} \
} break;
switch (dtype) {
cb(LITE_FLOAT, float);
cb(LITE_INT, int);
cb(LITE_INT8, int8_t);
cb(LITE_UINT8, uint8_t);
default:
printf("unknow datatype");
}
printf("output_length %zu index %zu max=%e , max idx=%d, min=%e , min_idx=%d, "
"sum=%e\n",
out_length, index, max, max_idx, min, min_idx, sum);
}
#undef cb
}
}
namespace {
bool basic_load_from_path(const Args& args) {
set_log_level(LiteLogLevel::DEBUG);
std::string network_path = args.model_path;
std::string input_path = args.input_path;
std::shared_ptr<Network> network = std::make_shared<Network>();
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
auto layout = input_tensor->get_layout();
for (size_t i = 0; i < layout.ndim; i++) {
printf("model input shape[%zu]=%zu \n", i, layout.shapes[i]);
}
size_t length = input_tensor->get_tensor_total_size_in_byte();
void* dst_ptr = input_tensor->get_memory_ptr();
auto src_tensor = parse_npy(input_path);
auto layout0 = src_tensor->get_layout();
for (size_t i = 0; i < layout0.ndim; i++) {
printf("src shape[%zu]=%zu \n", i, layout0.shapes[i]);
}
void* src = src_tensor->get_memory_ptr();
memcpy(dst_ptr, src, length);
{
lite::Timer ltimer("warmup");
network->forward();
network->wait();
ltimer.print_used_time(0);
}
lite::Timer ltimer("forward_iter");
for (int i = 0; i < 10; i++) {
network->forward();
network->wait();
ltimer.print_used_time(i);
}
{
lite::Timer ltimer("warmup");
network->forward();
network->wait();
ltimer.print_used_time(0);
}
for (int i = 0; i < 10; i++) {
ltimer.reset_start();
network->forward();
network->wait();
ltimer.print_used_time(i);
}
size_t output_size = network->get_all_output_name().size();
output_info(network, output_size);
output_data_info(network, output_size);
return true;
}
bool basic_load_from_path_with_loader(const Args& args) {
set_log_level(LiteLogLevel::DEBUG);
lite::set_loader_lib_path(args.loader_path);
std::string network_path = args.model_path;
std::string input_path = args.input_path;
std::shared_ptr<Network> network = std::make_shared<Network>();
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
auto input_layout = input_tensor->get_layout();
auto src_tensor = parse_npy(input_path);
auto src_layout = src_tensor->get_layout();
if (src_layout.ndim != input_layout.ndim) {
printf("src dim is not equal model input dim\n");
}
for (size_t i = 0; i < input_layout.ndim; i++) {
if (input_layout.shapes[i] != src_layout.shapes[i]) {
printf("src shape not equal input shape");
}
}
input_tensor->set_layout(src_tensor->get_layout());
input_tensor->reset(src_tensor->get_memory_ptr(), src_tensor->get_layout());
network->forward();
network->wait();
{
lite::Timer ltimer("warmup");
network->forward();
network->wait();
ltimer.print_used_time(0);
}
lite::Timer ltimer("forward_iter");
for (int i = 0; i < 10; i++) {
ltimer.reset_start();
network->forward();
network->wait();
ltimer.print_used_time(i);
}
size_t output_size = network->get_all_output_name().size();
output_info(network, output_size);
output_data_info(network, output_size);
return true;
}
bool basic_load_from_memory(const Args& args) {
std::string network_path = args.model_path;
std::string input_path = args.input_path;
std::shared_ptr<Network> network = std::make_shared<Network>();
FILE* fin = fopen(network_path.c_str(), "rb");
if (!fin) {
printf("failed to open %s.", network_path.c_str());
}
fseek(fin, 0, SEEK_END);
size_t size = ftell(fin);
fseek(fin, 0, SEEK_SET);
void* ptr = malloc(size);
std::shared_ptr<void> buf{ptr, ::free};
auto len = fread(buf.get(), 1, size, fin);
if (len < 1) {
printf("read file failed.\n");
}
fclose(fin);
network->load_model(buf.get(), size);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
size_t length = input_tensor->get_tensor_total_size_in_byte();
void* dst_ptr = input_tensor->get_memory_ptr();
auto src_tensor = parse_npy(input_path);
void* src = src_tensor->get_memory_ptr();
memcpy(dst_ptr, src, length);
network->forward();
network->wait();
std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
void* out_data = output_tensor->get_memory_ptr();
size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
output_tensor->get_layout().get_elem_size();
printf("length=%zu\n", length);
float max = -1.0f;
float sum = 0.0f;
for (size_t i = 0; i < out_length; i++) {
float data = static_cast<float*>(out_data)[i];
sum += data;
if (max < data)
max = data;
}
printf("max=%e, sum=%e\n", max, sum);
return true;
}
bool async_forward(const Args& args) {
std::string network_path = args.model_path;
std::string input_path = args.input_path;
Config config;
config.options.var_sanity_check_first_run = false;
std::shared_ptr<Network> network = std::make_shared<Network>(config);
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
size_t length = input_tensor->get_tensor_total_size_in_byte();
void* dst_ptr = input_tensor->get_memory_ptr();
auto src_tensor = parse_npy(input_path);
void* src = src_tensor->get_memory_ptr();
memcpy(dst_ptr, src, length);
volatile bool finished = false;
network->set_async_callback([&finished]() {
#if !__DEPLOY_ON_XP_SP2__
std::cout << "worker thread_id:" << std::this_thread::get_id() << std::endl;
#endif
finished = true;
});
#if !__DEPLOY_ON_XP_SP2__
std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
#endif
network->forward();
size_t count = 0;
while (finished == false) {
count++;
}
printf("Forward finish, count is %zu\n", count);
std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
void* out_data = output_tensor->get_memory_ptr();
size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
output_tensor->get_layout().get_elem_size();
printf("length=%zu\n", length);
float max = -1.0f;
float sum = 0.0f;
for (size_t i = 0; i < out_length; i++) {
float data = static_cast<float*>(out_data)[i];
sum += data;
if (max < data)
max = data;
}
printf("max=%e, sum=%e\n", max, sum);
return true;
}
bool set_input_callback(const Args& args) {
std::string network_path = args.model_path;
std::string input_path = args.input_path;
Config config;
config.options.var_sanity_check_first_run = false;
std::shared_ptr<Network> network = std::make_shared<Network>(config);
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
size_t length = input_tensor->get_tensor_total_size_in_byte();
void* dst_ptr = input_tensor->get_memory_ptr();
auto src_tensor = parse_npy(input_path);
void* src = src_tensor->get_memory_ptr();
memcpy(dst_ptr, src, length);
volatile bool finished = false;
network->set_start_callback(
[&finished](const std::unordered_map<
std::string, std::pair<IO, std::shared_ptr<Tensor>>>& inputs) {
#if !__DEPLOY_ON_XP_SP2__
std::cout << "worker thread_id:" << std::this_thread::get_id()
<< std::endl;
#endif
for (auto&& item : inputs) {
std::cout << "input name: " << item.first
<< "input dim: " << item.second.second->get_layout().ndim
<< std::endl;
}
finished = true;
});
#if !__DEPLOY_ON_XP_SP2__
std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
#endif
network->forward();
size_t count = 0;
while (finished == false) {
count++;
}
printf("Forward finish, count is %zu\n", count);
std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
void* out_data = output_tensor->get_memory_ptr();
size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
output_tensor->get_layout().get_elem_size();
printf("length=%zu\n", length);
float max = -1.0f;
float sum = 0.0f;
for (size_t i = 0; i < out_length; i++) {
float data = static_cast<float*>(out_data)[i];
sum += data;
if (max < data)
max = data;
}
printf("max=%e, sum=%e\n", max, sum);
return true;
}
bool set_output_callback(const Args& args) {
std::string network_path = args.model_path;
std::string input_path = args.input_path;
Config config;
config.options.var_sanity_check_first_run = false;
std::shared_ptr<Network> network = std::make_shared<Network>(config);
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_output_tensor(0);
size_t length = input_tensor->get_tensor_total_size_in_byte();
void* dst_ptr = input_tensor->get_memory_ptr();
auto src_tensor = parse_npy(input_path);
void* src = src_tensor->get_memory_ptr();
memcpy(dst_ptr, src, length);
volatile bool finished = false;
network->set_finish_callback(
[&finished](const std::unordered_map<
std::string, std::pair<IO, std::shared_ptr<Tensor>>>& outputs) {
#if !__DEPLOY_ON_XP_SP2__
std::cout << "worker thread_id:" << std::this_thread::get_id()
<< std::endl;
#endif
for (auto&& item : outputs) {
std::cout << "output name: " << item.first
<< "output dim: " << item.second.second->get_layout().ndim
<< std::endl;
}
finished = true;
});
#if !__DEPLOY_ON_XP_SP2__
std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl;
#endif
network->forward();
network->wait();
size_t count = 0;
while (finished == false) {
count++;
}
printf("Forward finish, count is %zu\n", count);
std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
void* out_data = output_tensor->get_memory_ptr();
size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
output_tensor->get_layout().get_elem_size();
printf("length=%zu\n", length);
float max = -1.0f;
float sum = 0.0f;
for (size_t i = 0; i < out_length; i++) {
float data = static_cast<float*>(out_data)[i];
sum += data;
if (max < data)
max = data;
}
printf("max=%e, sum=%e\n", max, sum);
return true;
}
}
REGIST_EXAMPLE("basic_load_from_path", basic_load_from_path);
REGIST_EXAMPLE("basic_load_from_path_with_loader", basic_load_from_path_with_loader);
REGIST_EXAMPLE("basic_load_from_memory", basic_load_from_memory);
REGIST_EXAMPLE("async_forward", async_forward);
REGIST_EXAMPLE("set_input_callback", set_input_callback);
REGIST_EXAMPLE("set_output_callback", set_output_callback);
#if LITE_WITH_CUDA
namespace {
bool load_from_path_run_cuda(const Args& args) {
std::string network_path = args.model_path;
std::string input_path = args.input_path;
set_log_level(LiteLogLevel::DEBUG);
lite::Config config{false, -1, LiteDeviceType::LITE_CUDA};
NetworkIO network_io;
std::string input_name = "img0_comp_fullface";
bool is_host = false;
IO device_input{input_name, is_host};
network_io.inputs.push_back(device_input);
std::shared_ptr<Network> network = std::make_shared<Network>(config, network_io);
network->load_model(network_path);
std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
Layout input_layout = input_tensor->get_layout();
auto src_tensor = parse_npy(input_path);
auto tensor_device = Tensor(LiteDeviceType::LITE_CUDA, input_layout);
tensor_device.copy_from(*src_tensor);
input_tensor->reset(tensor_device.get_memory_ptr(), input_layout);
{
lite::Timer ltimer("warmup");
network->forward();
network->wait();
ltimer.print_used_time(0);
}
lite::Timer ltimer("forward_iter");
for (int i = 0; i < 10; i++) {
ltimer.reset_start();
network->forward();
network->wait();
ltimer.print_used_time(i);
}
size_t output_size = network->get_all_output_name().size();
output_info(network, output_size);
output_data_info(network, output_size);
return true;
}
}
REGIST_EXAMPLE("load_from_path_run_cuda", load_from_path_run_cuda);
#endif
#endif