use crate::call;
use crate::common::PrecisionType;
use crate::config::SetConfig;
use crate::ctypes::{
PD_Config, PD_ConfigEnableCudnn, PD_ConfigEnableGpuMultiStream, PD_ConfigEnableMkldnnBfloat16,
PD_ConfigEnableONNXRuntime, PD_ConfigEnableORTOptimization, PD_ConfigEnableTensorRtDla,
PD_ConfigEnableTensorRtEngine, PD_ConfigEnableTensorRtOSS, PD_ConfigEnableUseGpu,
PD_ConfigEnableXpu, PD_ConfigSetBfloat16Op, PD_ConfigSetCpuMathLibraryNumThreads,
PD_ConfigSetMkldnnCacheCapacity, PD_ConfigSetMkldnnOp, PD_ConfigSetTrtDynamicShapeInfo,
};
use crate::utils::to_c_str;
use std::ptr::null;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Default, Clone)]
pub struct Cpu {
pub threads: Option<i32>,
pub mkldnn: Option<Mkldnn>,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Gpu {
pub memory_pool_init_size_mb: u64,
pub device_id: i32,
pub enable_multi_stream: bool,
pub enable_cudnn: bool,
pub enable_tensor_rt: Option<TensorRT>,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Xpu {
pub l3_workspace_size: i32,
pub locked: bool,
pub autorune: bool,
pub autotune_file: Option<String>,
pub precision: String,
pub adaptive_seqlen: bool,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Copy, Clone)]
pub struct ONNXRuntime {
pub enable_optimization: bool,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Default, Clone)]
pub struct Mkldnn {
pub cache_size: Option<i32>,
pub op: Option<Vec<String>>,
pub op_f16: Option<Vec<String>>,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct TensorRT {
pub workspace_size: i32,
pub max_batch_size: i32,
pub min_subgraph_size: i32,
pub precision_type: PrecisionType,
pub use_static: bool,
pub use_calib_mode: bool,
pub dynamic_shape_info: Vec<DynamicShapeInfo>,
pub disable_plugin_fp16: bool,
pub enable_oss: bool,
pub dla_core: Option<i32>,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct DynamicShapeInfo {
pub name: String,
pub min_shape: Vec<i32>,
pub max_shape: Vec<i32>,
pub optim_shape: Vec<i32>,
}
impl DynamicShapeInfo {
#[inline]
pub fn check_and_get_shape_size(&self) -> usize {
assert!(
self.min_shape.len() == self.max_shape.len()
&& self.min_shape.len() == self.optim_shape.len(),
"DynamicShapeInfo 中所有shape的大小必须相同"
);
self.min_shape.len()
}
}
impl SetConfig for Cpu {
fn set_to(self, config: *mut PD_Config) {
let Cpu { threads, mkldnn } = self;
if let Some(t) = threads {
if t > 0 {
call! { PD_ConfigSetCpuMathLibraryNumThreads(config, t) };
}
}
if let Some(Mkldnn {
cache_size,
op,
op_f16,
}) = mkldnn
{
if let Some(cache) = cache_size {
if cache > 0 {
call! { PD_ConfigSetMkldnnCacheCapacity(config, cache) };
}
}
if let Some(op) = op {
let (_l, mut r): (Vec<_>, Vec<_>) = op.iter().map(|s| to_c_str(s)).unzip();
let size = r.len();
call! { PD_ConfigSetMkldnnOp(config, size, r.as_mut_ptr()) };
}
if let Some(op) = op_f16 {
call! { PD_ConfigEnableMkldnnBfloat16(config) };
let (_l, mut r): (Vec<_>, Vec<_>) = op.iter().map(|s| to_c_str(s)).unzip();
let size = r.len();
call! { PD_ConfigSetBfloat16Op(config, size, r.as_mut_ptr()) };
}
}
}
}
impl SetConfig for Gpu {
fn set_to(self, config: *mut PD_Config) {
let Gpu {
memory_pool_init_size_mb,
device_id,
enable_multi_stream,
enable_cudnn,
enable_tensor_rt,
} = self;
call! { PD_ConfigEnableUseGpu(config, memory_pool_init_size_mb, device_id) };
if enable_multi_stream {
call! { PD_ConfigEnableGpuMultiStream(config) };
}
if enable_cudnn {
call! { PD_ConfigEnableCudnn(config) };
}
if let Some(TensorRT {
workspace_size,
max_batch_size,
min_subgraph_size,
precision_type,
use_static,
use_calib_mode,
dynamic_shape_info,
disable_plugin_fp16,
enable_oss,
dla_core,
}) = enable_tensor_rt
{
call! {
PD_ConfigEnableTensorRtEngine(
config,
workspace_size,
max_batch_size,
min_subgraph_size,
precision_type,
use_static,
use_calib_mode
)
};
if !dynamic_shape_info.is_empty() {
let tensor_num = dynamic_shape_info.len();
let mut tensor_name = vec![];
let mut tensor_name_cs = vec![];
let mut shapes_num = vec![];
let mut min_shapes = vec![];
let mut max_shapes = vec![];
let mut optim_shapes = vec![];
for info @ DynamicShapeInfo {
name,
min_shape,
max_shape,
optim_shape,
} in &dynamic_shape_info
{
shapes_num.push(info.check_and_get_shape_size());
let (cn, n) = to_c_str(name);
tensor_name.push(n);
tensor_name_cs.push(cn);
min_shapes.push(min_shape.as_ptr() as *mut i32);
max_shapes.push(max_shape.as_ptr() as *mut i32);
optim_shapes.push(optim_shape.as_ptr() as *mut i32);
}
call! {
PD_ConfigSetTrtDynamicShapeInfo(
config,
tensor_num,
tensor_name.as_mut_ptr(),
shapes_num.as_mut_ptr(),
min_shapes.as_mut_ptr(),
max_shapes.as_mut_ptr(),
optim_shapes.as_mut_ptr(),
disable_plugin_fp16
)
};
}
if enable_oss {
call! { PD_ConfigEnableTensorRtOSS(config) };
}
if let Some(dla_core) = dla_core {
call! { PD_ConfigEnableTensorRtDla(config, dla_core) };
}
}
}
}
impl SetConfig for Xpu {
fn set_to(self, config: *mut PD_Config) {
let Xpu {
l3_workspace_size,
locked,
autorune,
autotune_file,
precision,
adaptive_seqlen,
} = self;
let (_a, af) = autotune_file
.as_ref()
.map(|s| to_c_str(s))
.unwrap_or_else(|| (None, null()));
let (_p, p) = to_c_str(&precision);
call! {
PD_ConfigEnableXpu(
config,
l3_workspace_size,
locked,
autorune,
af,
p,
adaptive_seqlen
)
};
}
}
impl SetConfig for ONNXRuntime {
fn set_to(self, config: *mut PD_Config) {
call! { PD_ConfigEnableONNXRuntime(config) };
if self.enable_optimization {
call! { PD_ConfigEnableORTOptimization(config) };
}
}
}