#include "model_config_utils.h"
#include <google/protobuf/util/json_util.h>
#include <google/protobuf/util/message_differencer.h>
#include <deque>
#include <mutex>
#include <set>
#include "constants.h"
#include "cuda_utils.h"
#include "filesystem/api.h"
#include "triton/common/logging.h"
#define TRITONJSON_STATUSTYPE triton::core::Status
#define TRITONJSON_STATUSRETURN(M) \
return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif
namespace triton { namespace core {
namespace {
#ifdef TRITON_ENABLE_ENSEMBLE
struct EnsembleTensor {
EnsembleTensor(const std::string& name, bool isOutput)
: name{name}, isOutput(isOutput)
{
}
const std::string name;
bool isOutput{false};
bool ready{false};
std::vector<EnsembleTensor*> prev_nodes;
std::vector<EnsembleTensor*> next_nodes;
};
Status
BuildEnsembleGraph(
const inference::ModelConfig& config,
std::unordered_map<std::string, EnsembleTensor>& keyed_ensemble_graph)
{
keyed_ensemble_graph.clear();
size_t step_idx = 0;
for (const auto& element : config.ensemble_scheduling().step()) {
if (element.model_name().empty()) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'model_name' in step " + std::to_string(step_idx) +
" of ensemble '" + config.name() + "'");
}
if (element.input_map().size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'input_map' in step " + std::to_string(step_idx) +
" of ensemble '" + config.name() + "'");
}
if (element.output_map().size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'output_map' in step " + std::to_string(step_idx) +
" of ensemble '" + config.name() + "'");
}
std::vector<EnsembleTensor*> tensor_as_output;
for (const auto& output_map : element.output_map()) {
auto it = keyed_ensemble_graph.find(output_map.second);
if (it != keyed_ensemble_graph.end()) {
if (it->second.isOutput) {
return Status(
Status::Code::INVALID_ARG,
"ensemble tensor '" + it->first +
"' can appear in an output map only once for ensemble '" +
config.name() + "' step " + std::to_string(step_idx));
} else {
it->second.isOutput = true;
}
} else {
it =
keyed_ensemble_graph
.emplace(std::make_pair(
output_map.second, EnsembleTensor(output_map.second, true)))
.first;
}
tensor_as_output.push_back(&(it->second));
}
std::set<std::string> model_inputs;
for (const auto& input_map : element.input_map()) {
if (model_inputs.find(input_map.first) != model_inputs.end()) {
return Status(
Status::Code::INVALID_ARG,
"input '" + input_map.first + "' in model '" +
element.model_name() +
"' is mapped to multiple ensemble tensors for ensemble '" +
config.name() + "' step " + std::to_string(step_idx));
} else {
model_inputs.emplace(input_map.first);
}
auto it = keyed_ensemble_graph.find(input_map.second);
if (it == keyed_ensemble_graph.end()) {
it = keyed_ensemble_graph
.emplace(std::make_pair(
input_map.second, EnsembleTensor(input_map.second, false)))
.first;
}
for (auto output : tensor_as_output) {
output->prev_nodes.push_back(&(it->second));
it->second.next_nodes.push_back(output);
}
}
step_idx++;
}
return Status::Success;
}
Status
ValidateEnsembleSchedulingConfig(const inference::ModelConfig& config)
{
if (config.platform() != kEnsemblePlatform) {
return Status(
Status::Code::INVALID_ARG,
"ensemble scheduling cannot be set for model '" + config.name() +
"' whose platform is not " + kEnsemblePlatform);
}
if (config.instance_group().size() != 0) {
return Status(
Status::Code::INVALID_ARG,
"instance group should not be specified for ensemble '" +
config.name() + "'");
}
if (config.has_optimization()) {
return Status(
Status::Code::INVALID_ARG,
"optimization should not be specified for ensemble '" + config.name() +
"'");
}
if (config.model_warmup_size() != 0) {
return Status(
Status::Code::INVALID_ARG,
"model_warmup can not be specified for ensemble '" + config.name() +
"'");
}
if (config.ensemble_scheduling().step_size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'step' for ensemble '" + config.name() + "'");
}
std::unordered_map<std::string, EnsembleTensor> tensors;
RETURN_IF_ERROR(BuildEnsembleGraph(config, tensors));
std::deque<EnsembleTensor*> ready_queue;
for (const auto& input : config.input()) {
auto it = tensors.find(input.name());
if (it == tensors.end()) {
return Status(
Status::Code::INVALID_ARG, "ensemble input '" + input.name() +
"' for ensemble " + config.name() +
"' is not used");
}
it->second.ready = true;
ready_queue.push_back(&(it->second));
}
while (!ready_queue.empty()) {
auto& ready_node = ready_queue.front();
for (auto& next_node : ready_node->next_nodes) {
if (next_node->ready) {
continue;
}
bool next_node_ready = true;
for (auto& prev_node : next_node->prev_nodes) {
if (!prev_node->ready) {
next_node_ready = false;
break;
}
}
next_node->ready = next_node_ready;
if (next_node_ready) {
ready_queue.push_back(next_node);
}
}
ready_queue.pop_front();
}
std::set<std::string> outputs;
for (const auto& output : config.output()) {
auto it = tensors.find(output.name());
if (it == tensors.end()) {
return Status(
Status::Code::INVALID_ARG, "ensemble output '" + output.name() +
"' for ensemble " + config.name() +
"' is not used");
}
if (!it->second.ready) {
std::string error_message = "output '" + output.name() +
"' for ensemble '" + config.name() +
"' is not written";
std::vector<EnsembleTensor*>* prev_nodes = &it->second.prev_nodes;
auto last_not_ready_node = &it->second;
std::set<std::string> seen_names;
while ((prev_nodes != nullptr) && (!prev_nodes->empty())) {
const auto& nodes = *prev_nodes;
prev_nodes = nullptr;
for (const auto& node : nodes) {
if ((!node->ready) &&
(seen_names.find(node->name) == seen_names.end())) {
seen_names.emplace(node->name);
last_not_ready_node = node;
prev_nodes = &node->prev_nodes;
break;
}
}
}
if (last_not_ready_node->name != it->second.name) {
error_message += ": at least one of its depending tensors, '" +
last_not_ready_node->name + "', is not connected";
}
return Status(Status::Code::INVALID_ARG, error_message);
} else {
outputs.insert(it->first);
}
}
for (const auto& tensor : tensors) {
if (outputs.find(tensor.first) != outputs.end()) {
continue;
}
if (!tensor.second.ready || (tensor.second.next_nodes.size() == 0)) {
return Status(
Status::Code::INVALID_ARG, "ensemble tensor '" + tensor.first +
"' is unused in ensemble '" +
config.name() + "'");
}
}
return Status::Success;
}
#endif
template <class ModelIO>
Status
ValidateIOShape(
const ModelIO& io, int32_t max_batch_size,
const std::string& message_prefix = "")
{
if (io.name().empty()) {
return Status(
Status::Code::INVALID_ARG, message_prefix + "must specify 'name'");
}
std::string message_prefix_with_name =
message_prefix + std::string("'" + io.name() + "' ");
if (io.data_type() == inference::DataType::TYPE_INVALID) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name + "must specify 'data_type'");
}
if (io.dims_size() == 0) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name + "must specify 'dims'");
}
if (io.has_reshape() && (io.reshape().shape_size() == 0) &&
(max_batch_size == 0)) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name +
"cannot have empty reshape for non-batching model as scalar "
"tensors are not supported");
}
for (auto dim : io.dims()) {
if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name + "dimension must be integer >= 1, or " +
std::to_string(triton::common::WILDCARD_DIM) +
" to indicate a variable-size dimension");
}
}
if (io.has_reshape()) {
for (auto dim : io.reshape().shape()) {
if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name +
"reshape dimensions must be integer >= 1, or " +
std::to_string(triton::common::WILDCARD_DIM) +
" to indicate a variable-size dimension");
}
}
const int64_t dims_size = triton::common::GetElementCount(io.dims());
const int64_t reshape_size =
triton::common::GetElementCount(io.reshape().shape());
if ((dims_size != reshape_size) &&
((reshape_size != 0) || (dims_size != 1))) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name + "has different size for dims and reshape");
}
if (dims_size == -1) {
std::vector<int64_t> dim_element_cnts;
std::vector<int64_t> reshape_element_cnts;
int64_t current_cnt = 1;
for (const auto& dim : io.dims()) {
if (dim != -1) {
current_cnt *= dim;
} else {
dim_element_cnts.push_back(current_cnt);
current_cnt = 1;
}
}
dim_element_cnts.push_back(current_cnt);
current_cnt = 1;
for (const auto& dim : io.reshape().shape()) {
if (dim != -1) {
current_cnt *= dim;
} else {
reshape_element_cnts.push_back(current_cnt);
current_cnt = 1;
}
}
reshape_element_cnts.push_back(current_cnt);
if (dim_element_cnts.size() != reshape_element_cnts.size()) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name +
"has different number of variable-size dimensions for dims "
"and reshape");
}
for (size_t idx = 0; idx < dim_element_cnts.size(); idx++) {
if (dim_element_cnts[idx] != reshape_element_cnts[idx]) {
return Status(
Status::Code::INVALID_ARG,
message_prefix_with_name +
"has different size for dims and reshape");
}
}
}
}
return Status::Success;
}
template <class ModelIO>
Status
ValidateNonLinearFormatIO(
const ModelIO& io, const std::string& platform, bool is_input)
{
if (!io.is_non_linear_format_io()) {
return Status::Success;
}
if (platform != kTensorRTPlanPlatform) {
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format is only supported for the TensorRT platform");
}
if (io.dims_size() != 3) {
std::string io_type = is_input ? "input" : "output";
return Status(
Status::Code::INVALID_ARG,
"Non-linear IO format " + io_type + " requires 3 dims");
}
return Status::Success;
}
}
Status
GetModelVersionFromPath(const std::string& path, int64_t* version)
{
auto version_dir = BaseName(path);
try {
*version = std::atoll(version_dir.c_str());
}
catch (...) {
return Status(
Status::Code::INTERNAL,
"unable to determine model version from " + path);
}
return Status::Success;
}
Status
GetBooleanSequenceControlProperties(
const inference::ModelSequenceBatching& batcher,
const std::string& model_name,
const inference::ModelSequenceBatching::Control::Kind control_kind,
const bool required, std::string* tensor_name,
inference::DataType* tensor_datatype, float* fp32_false_value,
float* fp32_true_value, int32_t* int32_false_value,
int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value)
{
std::set<std::string> seen_tensors;
bool seen_control = false;
for (const auto& control_input : batcher.control_input()) {
if (control_input.name().empty()) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor must have a name for " +
model_name);
}
if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor '" + control_input.name() +
"' is specified for multiple control kinds for " + model_name);
}
seen_tensors.insert(control_input.name());
for (const auto& c : control_input.control()) {
if (c.kind() == control_kind) {
if (seen_control) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching specifies multiple " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" tensors for " + model_name);
}
*tensor_name = control_input.name();
seen_control = true;
if (!((c.int32_false_true_size() != 0) ||
(c.fp32_false_true_size() != 0) ||
(c.bool_false_true_size() != 0))) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching must specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
} else if (
((c.int32_false_true_size() != 0) &&
(c.fp32_false_true_size() != 0)) ||
((c.int32_false_true_size() != 0) &&
(c.bool_false_true_size() != 0)) ||
((c.fp32_false_true_size() != 0) &&
(c.bool_false_true_size() != 0))) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching specifies more than one from "
"'int32_false_true', 'fp32_false_true' and 'bool_false_true' "
"for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
}
if (c.int32_false_true_size() > 0) {
if (c.int32_false_true_size() != 2) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control 'int32_false_true' must have "
"exactly 2 entries for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
}
if (tensor_datatype != nullptr) {
*tensor_datatype = inference::DataType::TYPE_INT32;
}
if (int32_false_value != nullptr) {
*int32_false_value = c.int32_false_true(0);
}
if (int32_true_value != nullptr) {
*int32_true_value = c.int32_false_true(1);
}
} else if (c.fp32_false_true_size() > 0) {
if (c.fp32_false_true_size() != 2) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control 'fp32_false_true' must have exactly "
"2 entries for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
}
if (tensor_datatype != nullptr) {
*tensor_datatype = inference::DataType::TYPE_FP32;
}
if (fp32_false_value != nullptr) {
*fp32_false_value = c.fp32_false_true(0);
}
if (fp32_true_value != nullptr) {
*fp32_true_value = c.fp32_false_true(1);
}
} else {
if (c.bool_false_true_size() != 2) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control 'bool_false_true' must have exactly "
"2 entries for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
}
if (tensor_datatype != nullptr) {
*tensor_datatype = inference::DataType::TYPE_BOOL;
}
if (bool_false_value != nullptr) {
*bool_false_value = c.bool_false_true(0);
}
if (bool_true_value != nullptr) {
*bool_true_value = c.bool_false_true(1);
}
}
}
}
}
if (!seen_control) {
if (required) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor must specify a " +
inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
" value for " + model_name);
}
tensor_name->clear();
}
return Status::Success;
}
Status
GetTypedSequenceControlProperties(
const inference::ModelSequenceBatching& batcher,
const std::string& model_name,
const inference::ModelSequenceBatching::Control::Kind control_kind,
const bool required, std::string* tensor_name,
inference::DataType* tensor_datatype)
{
std::set<std::string> seen_tensors;
bool seen_control = false;
for (const auto& control_input : batcher.control_input()) {
if (control_input.name().empty()) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor must have a name for " +
model_name);
}
if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor '" + control_input.name() +
"' is specified for multiple control kinds for " + model_name);
}
seen_tensors.insert(control_input.name());
for (const auto& c : control_input.control()) {
if (c.kind() == control_kind) {
if (seen_control) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching specifies multiple " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" tensors for " + model_name);
}
*tensor_name = control_input.name();
if (tensor_datatype != nullptr) {
*tensor_datatype = c.data_type();
}
seen_control = true;
if ((c.int32_false_true_size() > 0) || (c.fp32_false_true_size() > 0) ||
(c.bool_false_true_size() > 0)) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching must not specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for " +
inference::ModelSequenceBatching_Control_Kind_Name(
control_kind) +
" for " + model_name);
}
}
}
}
if (!seen_control) {
if (required) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching control tensor must specify a " +
inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
" value for " + model_name);
}
tensor_name->clear();
}
return Status::Success;
}
Status
GetNormalizedModelConfig(
const std::string& model_name, const std::string& path,
const double min_compute_capability, inference::ModelConfig* config)
{
RETURN_IF_ERROR(
AutoCompleteBackendFields(model_name, std::string(path), config));
LOG_PROTOBUF_VERBOSE(1, "Server side auto-completed config: ", (*config));
RETURN_IF_ERROR(NormalizeModelConfig(min_compute_capability, config));
return Status::Success;
}
Status
NormalizeModelConfig(
const double min_compute_capability, inference::ModelConfig* config)
{
if (!config->has_version_policy()) {
inference::ModelVersionPolicy::Latest latest;
latest.set_num_versions(1);
config->mutable_version_policy()->mutable_latest()->CopyFrom(latest);
}
if (config->has_dynamic_batching()) {
if (config->dynamic_batching().preferred_batch_size().size() == 0) {
auto mutable_preferred_batch_size =
config->mutable_dynamic_batching()->mutable_preferred_batch_size();
if (config->max_batch_size() > 0) {
mutable_preferred_batch_size->Add(config->max_batch_size());
}
}
}
if (config->has_sequence_batching()) {
if (config->sequence_batching().max_sequence_idle_microseconds() == 0) {
config->mutable_sequence_batching()->set_max_sequence_idle_microseconds(
SEQUENCE_IDLE_DEFAULT_MICROSECONDS);
}
if (config->sequence_batching().has_oldest()) {
if (config->sequence_batching().oldest().preferred_batch_size().size() ==
0) {
auto mutable_preferred_batch_size =
config->mutable_sequence_batching()
->mutable_oldest()
->mutable_preferred_batch_size();
if (config->max_batch_size() > 0) {
mutable_preferred_batch_size->Add(config->max_batch_size());
}
}
}
}
if (!config->has_ensemble_scheduling()) {
auto optimization = config->mutable_optimization();
if (!optimization->has_input_pinned_memory()) {
optimization->mutable_input_pinned_memory()->set_enable(true);
}
if (!optimization->has_output_pinned_memory()) {
optimization->mutable_output_pinned_memory()->set_enable(true);
}
}
return Status::Success;
}
Status
NormalizeInstanceGroup(
const double min_compute_capability,
const std::vector<inference::ModelInstanceGroup>& preferred_groups,
inference::ModelConfig* config)
{
if (config->has_ensemble_scheduling()) {
return Status::Success;
}
std::set<int> supported_gpus;
#ifdef TRITON_ENABLE_GPU
Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
if (!status.IsOk()) {
return status;
}
#endif
if (config->instance_group().empty()) {
inference::ModelInstanceGroup* group = config->add_instance_group();
group->set_name(config->name());
for (const auto& pg : preferred_groups) {
if (pg.kind() == inference::ModelInstanceGroup::KIND_GPU) {
if (supported_gpus.empty()) {
continue;
}
if (!pg.gpus().empty()) {
for (const int32_t gid : pg.gpus()) {
if (supported_gpus.find(gid) != supported_gpus.end()) {
group->add_gpus(gid);
}
}
}
} else if (pg.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
for (const int32_t gid : pg.gpus()) {
group->add_gpus(gid);
}
}
group->set_kind(pg.kind());
group->set_count(pg.count());
break;
}
}
size_t cnt = 0;
for (auto& group : *config->mutable_instance_group()) {
if (group.name().empty()) {
group.set_name(config->name() + "_" + std::to_string(cnt));
}
cnt++;
if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
if (supported_gpus.empty()) {
group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
} else {
for (const int32_t gid : group.gpus()) {
if (supported_gpus.find(gid) == supported_gpus.end()) {
group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
break;
}
}
}
if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
group.set_kind(inference::ModelInstanceGroup::KIND_GPU);
}
}
for (const auto& pg : preferred_groups) {
if (group.kind() != pg.kind()) {
continue;
}
if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
group.gpus().empty() && !pg.gpus().empty()) {
for (const int32_t gid : pg.gpus()) {
if (supported_gpus.find(gid) != supported_gpus.end()) {
group.add_gpus(gid);
}
}
if (group.gpus().empty()) {
continue;
}
}
if ((group.count() < 1) && (pg.count() > 0)) {
group.set_count(pg.count());
}
}
if (group.count() < 1) {
RETURN_IF_ERROR(SetDefaultInstanceCount(&group, config->backend()));
}
if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
(group.gpus().size() == 0)) {
for (auto d : supported_gpus) {
group.add_gpus(d);
}
}
}
return Status::Success;
}
Status
LocalizePythonBackendExecutionEnvironmentPath(
const std::string& model_path, inference::ModelConfig* config,
std::shared_ptr<LocalizedPath>* localized_model_dir)
{
if (config->backend() == kPythonBackend) {
if (config->parameters().contains("EXECUTION_ENV_PATH")) {
std::string exec_env_path =
config->parameters().at("EXECUTION_ENV_PATH").string_value();
std::string model_dir_var = "$$TRITON_MODEL_DIRECTORY";
if (exec_env_path.substr(0, model_dir_var.size()) == model_dir_var) {
exec_env_path.replace(0, model_dir_var.size(), model_path);
}
std::string abs_exec_env_path;
std::size_t prev_pos = exec_env_path.size();
std::size_t pos = exec_env_path.find_last_of('/', prev_pos - 1);
int skip = 0;
while (pos != std::string::npos && prev_pos > 0) {
if (!skip) {
abs_exec_env_path =
exec_env_path.substr(pos, prev_pos - pos) + abs_exec_env_path;
}
skip = skip > 0 ? skip - 1 : skip;
if (pos >= 3 && exec_env_path.substr(pos - 3, 3) == "/..") {
skip += 2;
}
prev_pos = pos;
pos = exec_env_path.find_last_of('/', prev_pos - 1);
}
abs_exec_env_path = exec_env_path.substr(0, prev_pos) + abs_exec_env_path;
std::string model_path_slash =
model_path.back() == '/' ? model_path : model_path + "/";
if (abs_exec_env_path.substr(0, model_path_slash.size()) !=
model_path_slash) {
std::shared_ptr<LocalizedPath> localized_exec_env_path;
RETURN_IF_ERROR(
LocalizePath(abs_exec_env_path, &localized_exec_env_path));
(*localized_model_dir)
->other_localized_path.push_back(localized_exec_env_path);
config->mutable_parameters()
->at("EXECUTION_ENV_PATH")
.set_string_value(localized_exec_env_path->Path());
}
}
}
return Status::Success;
}
Status
SetPythonBasedBackendExecutionEnvironment(
const std::string& backend_libdir, inference::ModelConfig* model_config)
{
if (!model_config->parameters().contains("EXECUTION_ENV_PATH")) {
std::string env_name = "pb_exec_env_" + model_config->runtime() + ".tar.gz";
std::string env_path = JoinPath({backend_libdir, std::move(env_name)});
bool env_path_exist;
RETURN_IF_ERROR(FileExists(env_path, &env_path_exist));
if (env_path_exist) {
inference::ModelParameter model_param;
model_param.set_string_value(env_path);
(*model_config->mutable_parameters())["EXECUTION_ENV_PATH"] =
std::move(model_param);
}
}
return Status::Success;
}
Status
SetDefaultInstanceCount(
inference::ModelInstanceGroup* group, const std::string& backend)
{
group->set_count(1);
const int default_cpu_instance_count = 2;
bool use_default_cpu_instance_count =
(backend == kTensorFlowBackend) || (backend == kOnnxRuntimeBackend);
if (group->kind() == inference::ModelInstanceGroup::KIND_CPU &&
use_default_cpu_instance_count) {
group->set_count(default_cpu_instance_count);
}
return Status::Success;
}
Status
AutoCompleteBackendFields(
const std::string& model_name, const std::string& model_path,
inference::ModelConfig* config)
{
std::set<std::string> version_dirs;
RETURN_IF_ERROR(GetDirectorySubdirs(model_path, &version_dirs));
const bool has_version = (version_dirs.size() != 0);
const auto version_path =
has_version ? JoinPath({model_path, *(version_dirs.begin())}) : "";
std::set<std::string> version_dir_content;
if (has_version) {
RETURN_IF_ERROR(GetDirectoryContents(version_path, &version_dir_content));
}
if (config->name().empty()) {
config->set_name(model_name);
}
if (config->platform().empty()) {
if (config->backend().empty() ||
(config->backend() == kTensorFlowBackend)) {
if (config->default_model_filename() == kTensorFlowSavedModelFilename) {
config->set_platform(kTensorFlowSavedModelPlatform);
} else if (
config->default_model_filename() == kTensorFlowGraphDefFilename) {
config->set_platform(kTensorFlowGraphDefPlatform);
} else if (config->default_model_filename().empty() && has_version) {
bool is_dir = false;
if (version_dir_content.find(kTensorFlowSavedModelFilename) !=
version_dir_content.end()) {
RETURN_IF_ERROR(IsDirectory(
JoinPath({version_path, kTensorFlowSavedModelFilename}),
&is_dir));
if (is_dir) {
config->set_platform(kTensorFlowSavedModelPlatform);
}
}
if (version_dir_content.find(kTensorFlowGraphDefFilename) !=
version_dir_content.end()) {
RETURN_IF_ERROR(IsDirectory(
JoinPath({version_path, kTensorFlowGraphDefFilename}), &is_dir));
if (!is_dir) {
config->set_platform(kTensorFlowGraphDefPlatform);
}
}
}
}
}
if ((config->platform() == kTensorFlowSavedModelPlatform) ||
(config->platform() == kTensorFlowGraphDefPlatform)) {
if (config->backend().empty()) {
config->set_backend(kTensorFlowBackend);
}
if (config->default_model_filename().empty()) {
if (config->platform() == kTensorFlowSavedModelPlatform) {
config->set_default_model_filename(kTensorFlowSavedModelFilename);
} else {
config->set_default_model_filename(kTensorFlowGraphDefFilename);
}
}
return Status::Success;
}
if (config->backend().empty()) {
if ((config->platform() == kTensorRTPlanPlatform) ||
(config->default_model_filename() == kTensorRTPlanFilename)) {
config->set_backend(kTensorRTBackend);
} else if (
config->platform().empty() &&
config->default_model_filename().empty() && has_version) {
bool is_dir = false;
if (version_dir_content.find(kTensorRTPlanFilename) !=
version_dir_content.end()) {
RETURN_IF_ERROR(IsDirectory(
JoinPath({version_path, kTensorRTPlanFilename}), &is_dir));
if (!is_dir) {
config->set_backend(kTensorRTBackend);
}
}
}
}
if (config->backend() == kTensorRTBackend) {
if (config->platform().empty()) {
config->set_platform(kTensorRTPlanPlatform);
}
if (config->default_model_filename().empty()) {
config->set_default_model_filename(kTensorRTPlanFilename);
}
return Status::Success;
}
if (config->backend().empty()) {
if ((config->platform() == kOnnxRuntimeOnnxPlatform) ||
(config->default_model_filename() == kOnnxRuntimeOnnxFilename)) {
config->set_backend(kOnnxRuntimeBackend);
} else if (
config->platform().empty() &&
config->default_model_filename().empty() && has_version) {
if (version_dir_content.find(kOnnxRuntimeOnnxFilename) !=
version_dir_content.end()) {
config->set_backend(kOnnxRuntimeBackend);
}
}
}
if (config->backend() == kOnnxRuntimeBackend) {
if (config->platform().empty()) {
config->set_platform(kOnnxRuntimeOnnxPlatform);
}
if (config->default_model_filename().empty()) {
config->set_default_model_filename(kOnnxRuntimeOnnxFilename);
}
return Status::Success;
}
if (config->backend().empty()) {
if (config->default_model_filename() == kOpenVINORuntimeOpenVINOFilename) {
config->set_backend(kOpenVINORuntimeBackend);
} else if (
config->platform().empty() &&
config->default_model_filename().empty() && has_version) {
if (version_dir_content.find(kOpenVINORuntimeOpenVINOFilename) !=
version_dir_content.end()) {
config->set_backend(kOpenVINORuntimeBackend);
}
}
}
if (config->backend() == kOpenVINORuntimeBackend) {
if (config->default_model_filename().empty()) {
config->set_default_model_filename(kOpenVINORuntimeOpenVINOFilename);
}
return Status::Success;
}
if (config->backend().empty()) {
if ((config->platform() == kPyTorchLibTorchPlatform) ||
(config->default_model_filename() == kPyTorchLibTorchFilename)) {
config->set_backend(kPyTorchBackend);
} else if (
config->platform().empty() &&
config->default_model_filename().empty() && has_version) {
bool is_dir = false;
if (version_dir_content.find(kPyTorchLibTorchFilename) !=
version_dir_content.end()) {
RETURN_IF_ERROR(IsDirectory(
JoinPath({version_path, kPyTorchLibTorchFilename}), &is_dir));
if (!is_dir) {
config->set_backend(kPyTorchBackend);
}
}
}
}
if (config->backend() == kPyTorchBackend) {
if (config->platform().empty()) {
config->set_platform(kPyTorchLibTorchPlatform);
}
if (config->runtime() != kPythonFilename &&
config->default_model_filename().empty()) {
config->set_default_model_filename(kPyTorchLibTorchFilename);
}
return Status::Success;
}
if (config->backend().empty()) {
if (config->default_model_filename() == kPythonFilename) {
config->set_backend(kPythonBackend);
} else if (
config->platform().empty() &&
config->default_model_filename().empty() && has_version) {
if (version_dir_content.find(kPythonFilename) !=
version_dir_content.end()) {
config->set_backend(kPythonBackend);
}
}
}
if (config->backend() == kPythonBackend) {
if (config->default_model_filename().empty()) {
config->set_default_model_filename(kPythonFilename);
}
return Status::Success;
}
if (config->backend().empty() && config->platform().empty() &&
config->default_model_filename().empty()) {
LOG_VERBOSE(1) << "Could not infer supported backend, so attempting "
"autofill of custom backend.";
const std::string delimiter = ".";
size_t pos = model_name.find(delimiter, 0);
if (pos == std::string::npos) {
return Status(
triton::common::Error::Code::INVALID_ARG,
("Invalid model name: Could not determine backend for model '" +
model_name +
"' with no backend in model configuration. Expected model name of "
"the form 'model.<backend_name>'."));
}
const std::string backend_name =
model_name.substr(pos + 1, std::string::npos);
config->set_backend(backend_name);
config->set_default_model_filename(
(std::string("model.") + backend_name).c_str());
return Status::Success;
}
return Status::Success;
}
Status
ValidateModelIOConfig(const inference::ModelConfig& config)
{
Status status;
for (const auto& io : config.input()) {
status = ValidateModelInput(io, config.max_batch_size(), config.platform());
if (!status.IsOk()) {
return Status(
status.StatusCode(), status.Message() + " for " + config.name());
}
}
for (const auto& io : config.output()) {
status =
ValidateModelOutput(io, config.max_batch_size(), config.platform());
if (!status.IsOk()) {
return Status(
status.StatusCode(), status.Message() + " for " + config.name());
}
}
status = ValidateBatchIO(config);
if (!status.IsOk()) {
return Status(
status.StatusCode(), status.Message() + " for " + config.name());
}
return Status::Success;
}
Status
ValidateBatchIO(const inference::ModelConfig& config)
{
std::set<std::string> input_names;
std::set<std::string> output_names;
for (const auto& io : config.input()) {
input_names.emplace(io.name());
}
for (const auto& io : config.output()) {
output_names.emplace(io.name());
}
for (const auto& batch_io : config.batch_input()) {
switch (batch_io.kind()) {
case inference::BatchInput::BATCH_ELEMENT_COUNT:
case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT:
case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO:
case inference::BatchInput::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE:
case inference::BatchInput::BATCH_ITEM_SHAPE:
case inference::BatchInput::BATCH_ITEM_SHAPE_FLATTEN: {
if (batch_io.source_input_size() != 1) {
return Status(
Status::Code::INVALID_ARG,
"batch input kind '" +
inference::BatchInput::Kind_Name(batch_io.kind()) +
"' expects 1 source input, got " +
std::to_string(batch_io.source_input_size()));
}
break;
}
default:
return Status(
Status::Code::INVALID_ARG,
"unknown batch input kind '" +
inference::BatchInput::Kind_Name(batch_io.kind()) + "'");
}
if ((batch_io.data_type() != inference::DataType::TYPE_INT32) &&
(batch_io.data_type() != inference::DataType::TYPE_FP32)) {
return Status(
Status::Code::INVALID_ARG,
"batch input data type must be TYPE_INT32 or TYPE_FP32");
}
for (const auto& source_name : batch_io.source_input()) {
if (input_names.find(source_name) == input_names.end()) {
return Status(
Status::Code::INVALID_ARG,
"unknown source input name '" + source_name + "'");
}
}
}
for (const auto& batch_io : config.batch_output()) {
switch (batch_io.kind()) {
case inference::BatchOutput::BATCH_SCATTER_WITH_INPUT_SHAPE: {
if (batch_io.source_input_size() != 1) {
return Status(
Status::Code::INVALID_ARG,
"batch output kind '" +
inference::BatchOutput::Kind_Name(batch_io.kind()) +
"' expects 1 source input, got " +
std::to_string(batch_io.source_input_size()));
}
break;
}
default:
return Status(
Status::Code::INVALID_ARG,
"unknown batch output kind '" +
inference::BatchOutput::Kind_Name(batch_io.kind()) + "'");
}
for (const auto& source_name : batch_io.source_input()) {
if (input_names.find(source_name) == input_names.end()) {
return Status(
Status::Code::INVALID_ARG,
"unknown source input name '" + source_name + "'");
}
}
std::set<std::string> target_names;
for (const auto& target_name : batch_io.target_name()) {
if (output_names.find(target_name) == output_names.end()) {
return Status(
Status::Code::INVALID_ARG,
"unknown target output name '" + target_name + "'");
}
if (target_names.emplace(target_name).second == false) {
return Status(
Status::Code::INVALID_ARG, "target output name '" + target_name +
"' can only be specified once");
}
}
}
return Status::Success;
}
Status
ValidateModelConfig(
const inference::ModelConfig& config, const double min_compute_capability)
{
if (config.name().empty()) {
return Status(
Status::Code::INVALID_ARG, "model configuration must specify 'name'");
}
if (config.backend().empty()) {
#ifdef TRITON_ENABLE_ENSEMBLE
if (config.platform() != kEnsemblePlatform)
#endif return Status(
Status::Code::INVALID_ARG, "unexpected platform type '" +
config.platform() + "' for " +
config.name());
}
#ifdef TRITON_ENABLE_ENSEMBLE
else if (config.platform() == kEnsemblePlatform) {
return Status(
Status::Code::INVALID_ARG,
"Ensemble model '" + config.name() + "' must have platform type '" +
config.platform() + "' and empty backend type");
}
#endif
if (config.platform().empty() && config.backend().empty()) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'platform' or 'backend' for '" + config.name() + "'");
}
auto backend_type = GetBackendType(config.backend());
if ((backend_type != BackendType::BACKEND_TYPE_UNKNOWN) &&
(backend_type != GetBackendTypeFromPlatform(config.platform()))) {
return Status(
Status::Code::INVALID_ARG,
"unexpected 'platform' and 'backend' pair, got:" + config.platform() +
", " + config.backend());
}
if (config.max_batch_size() < 0) {
return Status(
Status::Code::INVALID_ARG,
"'max_batch_size' must be non-negative value for " + config.name());
}
if (!config.has_version_policy()) {
return Status(
Status::Code::INVALID_ARG,
"must specify 'version policy' for " + config.name());
}
if (config.has_dynamic_batching()) {
for (const auto size : config.dynamic_batching().preferred_batch_size()) {
if (size <= 0) {
return Status(
Status::Code::INVALID_ARG,
"dynamic batching preferred size must be positive for " +
config.name());
}
if (size > config.max_batch_size()) {
return Status(
Status::Code::INVALID_ARG,
"dynamic batching preferred size must be <= max batch size for " +
config.name());
}
}
const auto priority_levels = config.dynamic_batching().priority_levels();
if (priority_levels != 0) {
if ((config.dynamic_batching().default_priority_level() == 0) ||
(config.dynamic_batching().default_priority_level() >
priority_levels)) {
return Status(
Status::Code::INVALID_ARG,
"default priority level must be in range [1, " +
std::to_string(priority_levels) + "] for " + config.name());
}
for (const auto& queue_policy :
config.dynamic_batching().priority_queue_policy()) {
if ((queue_policy.first == 0) ||
(queue_policy.first > priority_levels)) {
return Status(
Status::Code::INVALID_ARG,
"priority queue policy must have priority level in range [1, " +
std::to_string(priority_levels) + "] for " + config.name());
}
}
}
if (config.dynamic_batching().preserve_ordering()) {
if (priority_levels > 1) {
return Status(
Status::Code::INVALID_ARG,
"Only one priority level is allowed when 'preserve_ordering' is "
"true for " +
config.name());
}
const auto& default_policy =
config.dynamic_batching().default_queue_policy();
if ((default_policy.default_timeout_microseconds() != 0) &&
(default_policy.timeout_action() ==
inference::ModelQueuePolicy::DELAY)) {
return Status(
Status::Code::INVALID_ARG,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for " +
config.name());
}
for (const auto& policy :
config.dynamic_batching().priority_queue_policy()) {
if ((policy.second.default_timeout_microseconds() != 0) &&
(policy.second.timeout_action() ==
inference::ModelQueuePolicy::DELAY)) {
return Status(
Status::Code::INVALID_ARG,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for " +
config.name());
}
}
}
}
if (config.has_sequence_batching()) {
if (config.response_cache().enable()) {
return Status(
Status::Code::INVALID_ARG,
"Response Cache does not currently support model " + config.name() +
" with sequence batching scheduler. Please disable the response "
"cache.");
}
const auto& batcher = config.sequence_batching();
std::string tensor_name;
RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
batcher, config.name(),
inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_START,
false , &tensor_name, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr));
RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
batcher, config.name(),
inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_END,
false , &tensor_name, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr));
RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
batcher, config.name(),
inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_READY,
false , &tensor_name, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr));
inference::DataType tensor_datatype;
RETURN_IF_ERROR(GetTypedSequenceControlProperties(
batcher, config.name(),
inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_CORRID,
false , &tensor_name, &tensor_datatype));
if (!tensor_name.empty()) {
if ((tensor_datatype != inference::DataType::TYPE_UINT64) &&
(tensor_datatype != inference::DataType::TYPE_INT64) &&
(tensor_datatype != inference::DataType::TYPE_UINT32) &&
(tensor_datatype != inference::DataType::TYPE_INT32) &&
(tensor_datatype != inference::DataType::TYPE_STRING)) {
return Status(
Status::Code::INVALID_ARG,
"unexpected data type for control " +
inference::ModelSequenceBatching_Control_Kind_Name(
inference::ModelSequenceBatching::Control::
CONTROL_SEQUENCE_CORRID) +
" for " + config.name() +
". Allowed data types are TYPE_UINT64, TYPE_INT64, "
"TYPE_UINT32, "
"TYPE_INT32 and TYPE_STRING");
}
}
if (config.sequence_batching().has_oldest()) {
for (const auto size :
config.sequence_batching().oldest().preferred_batch_size()) {
if (size <= 0) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching preferred batch size must be positive for " +
config.name());
}
if (size > config.max_batch_size()) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching preferred batch size must be <= max batch "
"size for " +
config.name());
}
}
}
if (config.sequence_batching().has_direct()) {
if ((config.sequence_batching().direct().minimum_slot_utilization() <
0.0) ||
(config.sequence_batching().direct().minimum_slot_utilization() >
1.0)) {
return Status(
Status::Code::INVALID_ARG,
"sequence batching minimum slot utilization must be in range "
"(0.0, 1.0] for " +
config.name());
}
}
}
if (config.has_ensemble_scheduling()) {
#ifdef TRITON_ENABLE_ENSEMBLE
RETURN_IF_ERROR(ValidateEnsembleSchedulingConfig(config));
#else
return Status(
Status::Code::INVALID_ARG, "ensemble scheduling not supported");
#endif }
#ifdef TRITON_ENABLE_ENSEMBLE
else if (config.platform() == kEnsemblePlatform) {
return Status(
Status::Code::INVALID_ARG,
"ensemble scheduling must be set for ensemble " + config.name() +
" whose platform is " + kEnsemblePlatform);
}
#endif
if (config.model_transaction_policy().decoupled() &&
config.response_cache().enable()) {
return Status(
Status::Code::INVALID_ARG,
"Response Cache does not currently support model " + config.name() +
" with 'decoupled' transaction policy. Please disable the response"
" cache.");
}
return Status::Success;
}
Status
ValidateInstanceGroup(
const inference::ModelConfig& config, const double min_compute_capability)
{
if (config.has_ensemble_scheduling()) {
return Status::Success;
}
if (config.instance_group().size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"must specify one or more 'instance group's for " + config.name());
}
#ifdef TRITON_ENABLE_GPU
std::set<int> supported_gpus;
Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
if (!status.IsOk()) {
return status;
}
#endif
for (const auto& group : config.instance_group()) {
if (group.kind() == inference::ModelInstanceGroup::KIND_MODEL) {
if (group.gpus().size() > 0) {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" has kind KIND_MODEL but specifies one or more GPUs");
}
} else if (group.kind() == inference::ModelInstanceGroup::KIND_GPU) {
#if !defined(TRITON_ENABLE_GPU) && !defined(TRITON_ENABLE_MALI_GPU)
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" has kind KIND_GPU but server does not support GPUs");
#elif defined(TRITON_ENABLE_GPU)
if (group.gpus().size() == 0) {
if (supported_gpus.size() == 0) {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" has kind KIND_GPU but no GPUs are available");
} else {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" has kind KIND_GPU but specifies no GPUs");
}
}
for (const int32_t gid : group.gpus()) {
if (supported_gpus.find(gid) == supported_gpus.end()) {
std::string supported_gpus_str;
for (const auto& cc : supported_gpus) {
if (!supported_gpus_str.empty()) {
supported_gpus_str += ", ";
}
supported_gpus_str += std::to_string(cc);
}
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" specifies invalid or unsupported gpu id " +
std::to_string(gid) +
". GPUs with at least the minimum required CUDA compute "
"compatibility of " +
std::to_string(min_compute_capability) +
" are: " + supported_gpus_str);
}
}
#endif } else if (group.kind() == inference::ModelInstanceGroup::KIND_CPU) {
if (group.gpus().size() > 0) {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" has kind KIND_CPU but specifies one or more GPUs");
}
} else {
return Status(
Status::Code::INTERNAL, "instance group " + group.name() +
" of model " + config.name() +
" has unexpected kind KIND_AUTO");
}
if ((config.platform() != kTensorRTPlanPlatform) &&
!group.profile().empty()) {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" and platform " + config.platform() +
"specifies profile field which is only supported for "
"TensorRT models");
} else if (!group.profile().empty()) {
for (const auto& profile : group.profile()) {
int profile_index;
RETURN_IF_ERROR(GetProfileIndex(profile, &profile_index));
if (profile_index < 0) {
return Status(
Status::Code::INVALID_ARG,
"instance group " + group.name() + " of model " + config.name() +
" and platform " + config.platform() +
" specifies invalid profile " + profile +
". The field should contain the string representation of a "
"non-negative integer.");
}
}
}
}
return Status::Success;
}
Status
ValidateModelInput(
const inference::ModelInput& io, int32_t max_batch_size,
const std::string& platform)
{
RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model input "));
if (((io.format() == inference::ModelInput::FORMAT_NHWC) ||
(io.format() == inference::ModelInput::FORMAT_NCHW)) &&
(io.dims_size() != 3)) {
return Status(
Status::Code::INVALID_ARG, "model input NHWC/NCHW require 3 dims");
}
if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
return Status(
Status::Code::INVALID_ARG,
"shape tensors are only supported for TensorRT platform");
}
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true ));
return Status::Success;
}
Status
CheckAllowedModelInput(
const inference::ModelInput& io, const std::set<std::string>& allowed)
{
if (allowed.find(io.name()) == allowed.end()) {
std::string astr;
for (const auto& a : allowed) {
if (!astr.empty()) {
astr.append(", ");
}
astr.append(a);
}
return Status(
Status::Code::INVALID_ARG, "unexpected inference input '" + io.name() +
"', allowed inputs are: " + astr);
}
return Status::Success;
}
Status
ValidateModelOutput(
const inference::ModelOutput& io, int32_t max_batch_size,
const std::string& platform)
{
RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model output "));
if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
return Status(
Status::Code::INVALID_ARG,
"shape tensors are only supported for TensorRT platform");
}
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false ));
return Status::Success;
}
Status
CheckAllowedModelOutput(
const inference::ModelOutput& io, const std::set<std::string>& allowed)
{
if (allowed.find(io.name()) == allowed.end()) {
std::string astr;
for (const auto& a : allowed) {
if (!astr.empty()) {
astr.append(", ");
}
astr.append(a);
}
return Status(
Status::Code::INVALID_ARG, "unexpected inference output '" + io.name() +
"', allowed outputs are: " + astr);
}
return Status::Success;
}
Status
ParseBoolParameter(
const std::string& key, std::string value, bool* parsed_value)
{
std::transform(
value.begin(), value.end(), value.begin(),
[](unsigned char c) { return std::tolower(c); });
if ((value == "true") || (value == "1")) {
*parsed_value = true;
} else if ((value == "false") || (value == "0")) {
*parsed_value = false;
} else {
return Status(
Status::Code::INVALID_ARG,
"failed to convert " + key + " '" + value + "' to boolean value");
}
return Status::Success;
}
Status
ParseLongLongParameter(
const std::string& key, const std::string& value, int64_t* parsed_value)
{
try {
*parsed_value = std::stoll(value);
}
catch (const std::invalid_argument& ia) {
return Status(
Status::Code::INVALID_ARG,
"failed to convert " + key + " '" + value + "' to integral number");
}
return Status::Success;
}
Status
GetProfileIndex(const std::string& profile_name, int* profile_index)
{
if (profile_name.empty()) {
return Status(Status::Code::INVALID_ARG, "profile name must not be empty");
}
try {
*profile_index = stoi(profile_name);
}
catch (const std::invalid_argument& ia) {
return Status(
Status::Code::INVALID_ARG,
"unable to parse '" + profile_name + "': " + ia.what());
}
return Status::Success;
}
namespace {
Status
CollectInt64Fields(
google::protobuf::Message* message, const std::string& prefix,
std::set<std::string>* int64_fields)
{
const google::protobuf::Descriptor* desc = message->GetDescriptor();
const google::protobuf::Reflection* refl = message->GetReflection();
for (int i = 0; i < desc->field_count(); ++i) {
const google::protobuf::FieldDescriptor* field = desc->field(i);
const std::string fullname = prefix + "::" + field->name();
switch (field->type()) {
case google::protobuf::FieldDescriptor::TYPE_MESSAGE: {
if (field->is_repeated()) {
int rsize = refl->FieldSize(*message, field);
if (rsize == 0) {
refl->AddMessage(message, field);
}
rsize = refl->FieldSize(*message, field);
for (int r = 0; r < rsize; ++r) {
RETURN_IF_ERROR(CollectInt64Fields(
refl->MutableRepeatedMessage(message, field, r), fullname,
int64_fields));
}
} else {
RETURN_IF_ERROR(CollectInt64Fields(
refl->MutableMessage(message, field), fullname, int64_fields));
}
} break;
case google::protobuf::FieldDescriptor::TYPE_INT64:
case google::protobuf::FieldDescriptor::TYPE_UINT64:
case google::protobuf::FieldDescriptor::TYPE_SINT64:
case google::protobuf::FieldDescriptor::TYPE_FIXED64:
case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
int64_fields->insert(fullname);
break;
default:
break;
}
}
return Status::Success;
}
Status
ValidateModelConfigInt64()
{
inference::ModelConfig config;
std::set<std::string> int64_fields;
RETURN_IF_ERROR(CollectInt64Fields(&config, "ModelConfig", &int64_fields));
LOG_VERBOSE(1) << "ModelConfig 64-bit fields:";
for (const auto& f : int64_fields) {
LOG_VERBOSE(1) << "\t" << f;
}
std::set<std::string> expected{
"ModelConfig::input::dims",
"ModelConfig::input::reshape::shape",
"ModelConfig::output::dims",
"ModelConfig::output::reshape::shape",
"ModelConfig::version_policy::specific::versions",
"ModelConfig::dynamic_batching::max_queue_delay_microseconds",
"ModelConfig::dynamic_batching::default_queue_policy::default_timeout_"
"microseconds",
"ModelConfig::dynamic_batching::priority_queue_policy::value::default_"
"timeout_microseconds",
"ModelConfig::dynamic_batching::priority_levels",
"ModelConfig::dynamic_batching::priority_queue_policy::key",
"ModelConfig::dynamic_batching::default_priority_level",
"ModelConfig::sequence_batching::direct::max_queue_delay_microseconds",
"ModelConfig::sequence_batching::state::dims",
"ModelConfig::sequence_batching::state::initial_state::dims",
"ModelConfig::sequence_batching::oldest::max_queue_delay_microseconds",
"ModelConfig::sequence_batching::max_sequence_idle_microseconds",
"ModelConfig::ensemble_scheduling::step::model_version",
"ModelConfig::model_warmup::inputs::value::dims",
"ModelConfig::optimization::cuda::graph_spec::input::value::dim",
"ModelConfig::optimization::cuda::graph_spec::graph_lower_bound::input::"
"value::dim",
"ModelConfig::instance_group::secondary_devices::device_id"};
if (int64_fields != expected) {
return Status(
Status::Code::INTERNAL, "ModelConfig 64-bit field needs update");
}
return Status::Success;
}
Status
FixUInt(
triton::common::TritonJson::Value& document,
triton::common::TritonJson::Value& io, const std::string& name)
{
triton::common::TritonJson::Value str_value;
if (!io.Find(name.c_str(), &str_value)) {
return Status::Success;
}
std::string str;
RETURN_IF_ERROR(str_value.AsString(&str));
uint64_t d;
try {
d = std::strtoull(str.c_str(), nullptr, 10);
}
catch (...) {
return Status(
Status::Code::INTERNAL,
(std::string("unable to convert '") + str + "' to unsigned integer"));
}
str_value.SetUInt(d);
return Status::Success;
}
Status
FixInt(
triton::common::TritonJson::Value& document,
triton::common::TritonJson::Value& io, const std::string& name)
{
triton::common::TritonJson::Value str_value;
if (!io.Find(name.c_str(), &str_value)) {
return Status::Success;
}
std::string str;
RETURN_IF_ERROR(str_value.AsString(&str));
int64_t d;
try {
d = std::atoll(str.c_str());
}
catch (...) {
return Status(
Status::Code::INTERNAL,
(std::string("unable to convert '") + str + "' to integer"));
}
str_value.SetInt(d);
return Status::Success;
}
Status
FixIntArray(
triton::common::TritonJson::Value& document,
triton::common::TritonJson::Value& io, const std::string& name)
{
triton::common::TritonJson::Value fixed_shape_array(
document, triton::common::TritonJson::ValueType::ARRAY);
if (!io.Find(name.c_str())) {
return Status::Success;
}
triton::common::TritonJson::Value shape_array;
RETURN_IF_ERROR(io.MemberAsArray(name.c_str(), &shape_array));
for (size_t i = 0; i < shape_array.ArraySize(); ++i) {
std::string str;
RETURN_IF_ERROR(shape_array.IndexAsString(i, &str));
int64_t d;
try {
d = std::atoll(str.c_str());
}
catch (...) {
return Status(
Status::Code::INTERNAL,
(std::string("unable to convert '") + str + "' to integer"));
}
RETURN_IF_ERROR(fixed_shape_array.AppendInt(d));
}
shape_array.Swap(fixed_shape_array);
fixed_shape_array.Release();
return Status::Success;
}
Status
FixObjectArray(
triton::common::TritonJson::Value& document,
triton::common::TritonJson::Value& arr, const std::string& name)
{
for (size_t i = 0; i < arr.ArraySize(); ++i) {
triton::common::TritonJson::Value obj;
RETURN_IF_ERROR(arr.IndexAsObject(i, &obj));
RETURN_IF_ERROR(FixInt(document, obj, name));
}
return Status::Success;
}
}
Status
ModelConfigToJson(
const inference::ModelConfig& config, const uint32_t config_version,
std::string* json_str)
{
if (config_version != 1) {
return Status(
Status::Code::INVALID_ARG,
std::string("model configuration version ") +
std::to_string(config_version) +
" not supported, supported versions are: 1");
}
if (config.ByteSizeLong() == 0) {
json_str->clear();
return Status::Success;
}
std::string config_json_str;
::google::protobuf::util::JsonPrintOptions options;
options.preserve_proto_field_names = true;
options.always_print_primitive_fields = true;
::google::protobuf::util::MessageToJsonString(
config, &config_json_str, options);
{
static std::once_flag fonce;
Status status = Status::Success;
std::call_once(fonce, [&status] { status = ValidateModelConfigInt64(); });
RETURN_IF_ERROR(status);
}
triton::common::TritonJson::Value config_json;
config_json.Parse(config_json_str);
for (std::string name : {"input", "output"}) {
triton::common::TritonJson::Value ios;
RETURN_IF_ERROR(config_json.MemberAsArray(name.c_str(), &ios));
for (size_t i = 0; i < ios.ArraySize(); ++i) {
triton::common::TritonJson::Value io;
RETURN_IF_ERROR(ios.IndexAsObject(i, &io));
RETURN_IF_ERROR(FixIntArray(config_json, io, "dims"));
triton::common::TritonJson::Value reshape;
if (io.Find("reshape", &reshape)) {
RETURN_IF_ERROR(FixIntArray(config_json, reshape, "shape"));
}
}
}
{
triton::common::TritonJson::Value vp;
if (config_json.Find("version_policy", &vp)) {
triton::common::TritonJson::Value specific;
if (vp.Find("specific", &specific)) {
RETURN_IF_ERROR(FixIntArray(config_json, specific, "versions"));
}
}
}
{
triton::common::TritonJson::Value db;
if (config_json.Find("dynamic_batching", &db)) {
RETURN_IF_ERROR(FixUInt(config_json, db, "max_queue_delay_microseconds"));
RETURN_IF_ERROR(FixUInt(config_json, db, "priority_levels"));
RETURN_IF_ERROR(FixUInt(config_json, db, "default_priority_level"));
triton::common::TritonJson::Value dqp;
if (db.Find("default_queue_policy", &dqp)) {
RETURN_IF_ERROR(
FixUInt(config_json, dqp, "default_timeout_microseconds"));
}
triton::common::TritonJson::Value pqp;
if (db.Find("priority_queue_policy", &pqp)) {
std::vector<std::string> members;
RETURN_IF_ERROR(pqp.Members(&members));
for (const auto& m : members) {
triton::common::TritonJson::Value el;
RETURN_IF_ERROR(pqp.MemberAsObject(m.c_str(), &el));
RETURN_IF_ERROR(
FixUInt(config_json, el, "default_timeout_microseconds"));
}
}
}
}
{
triton::common::TritonJson::Value sb;
if (config_json.Find("sequence_batching", &sb)) {
RETURN_IF_ERROR(
FixUInt(config_json, sb, "max_sequence_idle_microseconds"));
triton::common::TritonJson::Value oldest;
if (sb.Find("oldest", &oldest)) {
RETURN_IF_ERROR(
FixUInt(config_json, oldest, "max_queue_delay_microseconds"));
}
triton::common::TritonJson::Value direct;
if (sb.Find("direct", &direct)) {
RETURN_IF_ERROR(
FixUInt(config_json, direct, "max_queue_delay_microseconds"));
}
triton::common::TritonJson::Value states;
if (sb.Find("state", &states)) {
for (size_t i = 0; i < states.ArraySize(); ++i) {
triton::common::TritonJson::Value state;
RETURN_IF_ERROR(states.IndexAsObject(i, &state));
RETURN_IF_ERROR(FixIntArray(config_json, state, "dims"));
triton::common::TritonJson::Value initial_state;
if (sb.Find("initial_state", &initial_state)) {
RETURN_IF_ERROR(FixIntArray(config_json, initial_state, "dims"));
}
}
}
}
}
{
triton::common::TritonJson::Value ens;
if (config_json.Find("ensemble_scheduling", &ens)) {
triton::common::TritonJson::Value step;
if (ens.Find("step", &step)) {
RETURN_IF_ERROR(FixObjectArray(config_json, step, "model_version"));
}
}
}
{
triton::common::TritonJson::Value warmups;
if (config_json.Find("model_warmup", &warmups)) {
for (size_t i = 0; i < warmups.ArraySize(); ++i) {
triton::common::TritonJson::Value warmup;
RETURN_IF_ERROR(warmups.IndexAsObject(i, &warmup));
triton::common::TritonJson::Value inputs;
if (warmup.Find("inputs", &inputs)) {
std::vector<std::string> members;
RETURN_IF_ERROR(inputs.Members(&members));
for (const auto& m : members) {
triton::common::TritonJson::Value input;
RETURN_IF_ERROR(inputs.MemberAsObject(m.c_str(), &input));
RETURN_IF_ERROR(FixIntArray(config_json, input, "dims"));
}
}
}
}
}
triton::common::TritonJson::WriteBuffer buffer;
RETURN_IF_ERROR(config_json.Write(&buffer));
*json_str = std::move(buffer.MutableContents());
return Status::Success;
}
Status
JsonToModelConfig(
const std::string& json_config, const uint32_t config_version,
inference::ModelConfig* protobuf_config)
{
if (config_version != 1) {
return Status(
Status::Code::INVALID_ARG,
std::string("model configuration version ") +
std::to_string(config_version) +
" not supported, supported versions are: 1");
}
::google::protobuf::util::JsonParseOptions options;
options.case_insensitive_enum_parsing = true;
options.ignore_unknown_fields = false;
auto err = ::google::protobuf::util::JsonStringToMessage(
json_config, protobuf_config, options);
if (!err.ok()) {
return Status(Status::Code::INVALID_ARG, std::string(err.message()));
}
return Status::Success;
}
BackendType
GetBackendTypeFromPlatform(const std::string& platform_name)
{
if ((platform_name == kTensorFlowGraphDefPlatform) ||
(platform_name == kTensorFlowSavedModelPlatform)) {
return BackendType::BACKEND_TYPE_TENSORFLOW;
}
if (platform_name == kTensorRTPlanPlatform) {
return BackendType::BACKEND_TYPE_TENSORRT;
}
if (platform_name == kOnnxRuntimeOnnxPlatform) {
return BackendType::BACKEND_TYPE_ONNXRUNTIME;
}
if (platform_name == kPyTorchLibTorchPlatform) {
return BackendType::BACKEND_TYPE_PYTORCH;
}
return BackendType::BACKEND_TYPE_UNKNOWN;
}
BackendType
GetBackendType(const std::string& backend_name)
{
if (backend_name == kTensorFlowBackend) {
return BackendType::BACKEND_TYPE_TENSORFLOW;
}
if (backend_name == kTensorRTBackend) {
return BackendType::BACKEND_TYPE_TENSORRT;
}
if (backend_name == kOnnxRuntimeBackend) {
return BackendType::BACKEND_TYPE_ONNXRUNTIME;
}
if (backend_name == kPyTorchBackend) {
return BackendType::BACKEND_TYPE_PYTORCH;
}
return BackendType::BACKEND_TYPE_UNKNOWN;
}
TRITONSERVER_DataType
DataTypeToTriton(const inference::DataType dtype)
{
switch (dtype) {
case inference::DataType::TYPE_BOOL:
return TRITONSERVER_TYPE_BOOL;
case inference::DataType::TYPE_UINT8:
return TRITONSERVER_TYPE_UINT8;
case inference::DataType::TYPE_UINT16:
return TRITONSERVER_TYPE_UINT16;
case inference::DataType::TYPE_UINT32:
return TRITONSERVER_TYPE_UINT32;
case inference::DataType::TYPE_UINT64:
return TRITONSERVER_TYPE_UINT64;
case inference::DataType::TYPE_INT8:
return TRITONSERVER_TYPE_INT8;
case inference::DataType::TYPE_INT16:
return TRITONSERVER_TYPE_INT16;
case inference::DataType::TYPE_INT32:
return TRITONSERVER_TYPE_INT32;
case inference::DataType::TYPE_INT64:
return TRITONSERVER_TYPE_INT64;
case inference::DataType::TYPE_FP16:
return TRITONSERVER_TYPE_FP16;
case inference::DataType::TYPE_FP32:
return TRITONSERVER_TYPE_FP32;
case inference::DataType::TYPE_FP64:
return TRITONSERVER_TYPE_FP64;
case inference::DataType::TYPE_STRING:
return TRITONSERVER_TYPE_BYTES;
case inference::DataType::TYPE_BF16:
return TRITONSERVER_TYPE_BF16;
default:
break;
}
return TRITONSERVER_TYPE_INVALID;
}
inference::DataType
TritonToDataType(const TRITONSERVER_DataType dtype)
{
switch (dtype) {
case TRITONSERVER_TYPE_BOOL:
return inference::DataType::TYPE_BOOL;
case TRITONSERVER_TYPE_UINT8:
return inference::DataType::TYPE_UINT8;
case TRITONSERVER_TYPE_UINT16:
return inference::DataType::TYPE_UINT16;
case TRITONSERVER_TYPE_UINT32:
return inference::DataType::TYPE_UINT32;
case TRITONSERVER_TYPE_UINT64:
return inference::DataType::TYPE_UINT64;
case TRITONSERVER_TYPE_INT8:
return inference::DataType::TYPE_INT8;
case TRITONSERVER_TYPE_INT16:
return inference::DataType::TYPE_INT16;
case TRITONSERVER_TYPE_INT32:
return inference::DataType::TYPE_INT32;
case TRITONSERVER_TYPE_INT64:
return inference::DataType::TYPE_INT64;
case TRITONSERVER_TYPE_FP16:
return inference::DataType::TYPE_FP16;
case TRITONSERVER_TYPE_FP32:
return inference::DataType::TYPE_FP32;
case TRITONSERVER_TYPE_FP64:
return inference::DataType::TYPE_FP64;
case TRITONSERVER_TYPE_BYTES:
return inference::DataType::TYPE_STRING;
case TRITONSERVER_TYPE_BF16:
return inference::DataType::TYPE_BF16;
default:
break;
}
return inference::DataType::TYPE_INVALID;
}
bool
ConfigChangeRequiresReload(
const inference::ModelConfig& old_config,
const inference::ModelConfig& new_config)
{
::google::protobuf::util::MessageDifferencer pb_diff;
pb_diff.IgnoreField(
old_config.descriptor()->FindFieldByLowercaseName("instance_group"));
pb_diff.IgnoreField(
old_config.descriptor()->FindFieldByLowercaseName("version_policy"));
return !pb_diff.Compare(old_config, new_config);
}
bool
EquivalentInInstanceConfig(
const inference::ModelInstanceGroup& instance_config_lhs,
const inference::ModelInstanceGroup& instance_config_rhs)
{
::google::protobuf::util::MessageDifferencer pb_diff;
pb_diff.IgnoreField(
instance_config_lhs.descriptor()->FindFieldByLowercaseName("name"));
pb_diff.IgnoreField(
instance_config_lhs.descriptor()->FindFieldByLowercaseName("count"));
return pb_diff.Compare(instance_config_lhs, instance_config_rhs);
}
std::string
InstanceConfigSignature(const inference::ModelInstanceGroup& instance_config)
{
inference::ModelInstanceGroup config = instance_config;
*config.mutable_name() = "[Normalized]";
config.set_count(1);
return config.SerializeAsString();
}
}}