use crate::core::traits::TokenizedInput;
use crate::error::{Result, TrustformersError};
use crate::pipeline::{BasePipeline, Device, Pipeline, PipelineOptions, PipelineOutput};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use trustformers_core::errors::{Result as CoreResult, TrustformersError as CoreTrustformersError};
use trustformers_core::traits::{Model, Tokenizer};
#[derive(Debug, Clone)]
pub struct OpenVINORuntime;
#[derive(Debug, Clone)]
pub struct OpenVINOConfig {
pub device: OpenVINODevice,
pub precision: OpenVINOPrecision,
pub execution_mode: OpenVINOExecutionMode,
pub num_threads: Option<usize>,
pub num_streams: Option<usize>,
pub enable_profiling: bool,
pub cache_dir: Option<PathBuf>,
pub performance_hint: String,
pub execution_priority: String,
pub inference_timeout: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct OpenVINOModel {
model: Option<Arc<OpenVINOModelWrapper>>,
config: OpenVINOBackendConfig,
input_names: Vec<String>,
output_names: Vec<String>,
runtime: Arc<OpenVINORuntime>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum OpenVINODevice {
CPU,
GPU(i32),
VPU,
AUTO,
}
#[derive(Debug, Clone, Copy)]
pub enum OpenVINOPrecision {
FP32,
FP16,
INT8,
}
#[derive(Debug, Clone, Copy)]
pub enum OpenVINOExecutionMode {
Sync,
Async,
}
#[derive(Debug, Clone)]
pub struct BenchmarkResults {
pub avg_latency_ms: f64,
pub throughput: f64,
pub memory_usage: u64,
}
#[derive(Debug, Clone)]
pub struct MemoryInfo {
pub total_memory: u64,
pub used_memory: u64,
pub free_memory: u64,
}
impl OpenVINORuntime {
pub fn new(_config: OpenVINOConfig) -> Result<Self> {
Ok(Self)
}
}
impl Default for OpenVINORuntime {
fn default() -> Self {
OpenVINORuntime
}
}
impl OpenVINORuntime {
pub fn load_model(&self, _path: &PathBuf) -> Result<OpenVINOModel> {
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
let model = OpenVINOModel {
model: None, config: config.clone(),
input_names: input_names.clone(),
output_names: output_names.clone(),
runtime: runtime.clone(),
};
Ok(model)
}
pub fn load_model_with_weights(
&self,
_model_path: &PathBuf,
_weights_path: &PathBuf,
) -> Result<OpenVINOModel> {
self.load_model(_model_path)
}
pub fn get_available_devices(&self) -> Result<Vec<String>> {
Ok(vec!["CPU".to_string(), "GPU".to_string()])
}
pub fn get_device_properties(&self, _device: &str) -> Result<HashMap<String, String>> {
let mut props = HashMap::new();
props.insert("type".to_string(), "mock".to_string());
Ok(props)
}
}
impl OpenVINOModel {
pub fn new_mock() -> Self {
Self::default()
}
pub fn new_minimal() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModel {
model: None, config,
input_names,
output_names,
runtime,
}
}
pub fn new_base_minimal() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModel {
model: None, config,
input_names,
output_names,
runtime,
}
}
pub fn new_leaf() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModel {
model: None, config,
input_names,
output_names,
runtime,
}
}
pub fn new_with_wrapper() -> Self {
OpenVINOModel {
model: Some(Arc::new(OpenVINOModelWrapper::new_base())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_empty_model() -> Self {
OpenVINOModel {
model: None,
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_absolute_final() -> Self {
OpenVINOModel {
model: Some(Arc::new(OpenVINOModelWrapper::new_absolute_final())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_ultimate_final() -> Self {
OpenVINOModel {
model: Some(Arc::new(OpenVINOModelWrapper::new_ultimate_final())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_chain_breaker() -> Self {
OpenVINOModel {
model: Some(Arc::new(OpenVINOModelWrapper::new_simple())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_placeholder() -> Self {
OpenVINOModel {
model: None,
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
}
impl Default for OpenVINOModel {
fn default() -> Self {
OpenVINOModel {
model: None,
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
}
impl OpenVINOModel {
pub fn infer(
&self,
_inputs: HashMap<String, trustformers_core::tensor::Tensor>,
) -> Result<HashMap<String, trustformers_core::tensor::Tensor>> {
let mut outputs = HashMap::new();
let mock_tensor = trustformers_core::tensor::Tensor::zeros(&[1, 10])?;
outputs.insert("logits".to_string(), mock_tensor);
Ok(outputs)
}
pub fn infer_with_device(
&self,
inputs: HashMap<String, trustformers_core::tensor::Tensor>,
_device: OpenVINODevice,
) -> Result<HashMap<String, trustformers_core::tensor::Tensor>> {
self.infer(inputs)
}
pub async fn infer_async(
&self,
inputs: HashMap<String, trustformers_core::tensor::Tensor>,
) -> Result<HashMap<String, trustformers_core::tensor::Tensor>> {
self.infer(inputs)
}
pub fn benchmark_mock(
&self,
_inputs: HashMap<String, trustformers_core::tensor::Tensor>,
_num_runs: usize,
_warmup_runs: usize,
) -> Result<BenchmarkResults> {
Ok(BenchmarkResults {
avg_latency_ms: 50.0,
throughput: 20.0,
memory_usage: 1024 * 1024 * 1024, })
}
pub fn get_memory_info(&self) -> Result<MemoryInfo> {
Ok(MemoryInfo {
total_memory: 8 * 1024 * 1024 * 1024, used_memory: 2 * 1024 * 1024 * 1024, free_memory: 6 * 1024 * 1024 * 1024, })
}
}
use trustformers_core::tensor::Tensor;
#[derive(Debug, Clone)]
pub struct OpenVINOBackendConfig {
pub model_path: PathBuf,
pub weights_path: Option<PathBuf>,
pub device: OpenVINODevice,
pub precision: OpenVINOPrecision,
pub execution_mode: OpenVINOExecutionMode,
pub num_threads: Option<usize>,
pub num_streams: Option<usize>,
pub enable_profiling: bool,
pub cache_dir: Option<PathBuf>,
pub performance_hint: PerformanceHint,
pub execution_priority: ExecutionPriority,
pub inference_timeout: Option<u64>,
}
#[derive(Debug, Clone, Copy)]
pub enum PerformanceHint {
Latency,
Throughput,
CumulativeThroughput,
None,
}
#[derive(Debug, Clone, Copy)]
pub enum ExecutionPriority {
Low,
Medium,
High,
}
impl Default for OpenVINOBackendConfig {
fn default() -> Self {
Self {
model_path: PathBuf::new(),
weights_path: None,
device: OpenVINODevice::CPU,
precision: OpenVINOPrecision::FP32,
execution_mode: OpenVINOExecutionMode::Sync,
num_threads: None,
num_streams: Some(1),
enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Latency,
execution_priority: ExecutionPriority::Medium,
inference_timeout: None,
}
}
}
impl OpenVINOBackendConfig {
pub fn cpu_optimized(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::CPU,
precision: OpenVINOPrecision::FP32,
execution_mode: OpenVINOExecutionMode::Sync,
num_threads: Some(num_cpus::get()),
num_streams: Some(1),
enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Latency,
execution_priority: ExecutionPriority::Medium,
inference_timeout: None,
}
}
pub fn gpu_optimized(model_path: PathBuf, device_id: Option<i32>) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::GPU(device_id.unwrap_or(0)),
precision: OpenVINOPrecision::FP16, execution_mode: OpenVINOExecutionMode::Async,
num_threads: Some(1),
num_streams: Some(2), enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Throughput,
execution_priority: ExecutionPriority::High,
inference_timeout: None,
}
}
pub fn igpu_optimized(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::GPU(0),
precision: OpenVINOPrecision::FP16,
execution_mode: OpenVINOExecutionMode::Sync,
num_threads: Some(1),
num_streams: Some(1),
enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Latency,
execution_priority: ExecutionPriority::Medium,
inference_timeout: None,
}
}
pub fn vpu_optimized(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::VPU,
precision: OpenVINOPrecision::FP16, execution_mode: OpenVINOExecutionMode::Sync,
num_threads: Some(1),
num_streams: Some(1),
enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Latency,
execution_priority: ExecutionPriority::Medium,
inference_timeout: Some(10000), }
}
pub fn production(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::AUTO, precision: OpenVINOPrecision::FP32,
execution_mode: OpenVINOExecutionMode::Async,
num_threads: Some(num_cpus::get()),
num_streams: Some(2),
enable_profiling: false,
cache_dir: Some(std::env::temp_dir().join("openvino_cache")),
performance_hint: PerformanceHint::Throughput,
execution_priority: ExecutionPriority::High,
inference_timeout: Some(30000), }
}
pub fn low_latency(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::CPU,
precision: OpenVINOPrecision::FP32,
execution_mode: OpenVINOExecutionMode::Sync,
num_threads: Some(1), num_streams: Some(1),
enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Latency,
execution_priority: ExecutionPriority::High,
inference_timeout: None,
}
}
pub fn high_throughput(model_path: PathBuf) -> Self {
Self {
model_path,
weights_path: None,
device: OpenVINODevice::CPU,
precision: OpenVINOPrecision::FP32,
execution_mode: OpenVINOExecutionMode::Async,
num_threads: Some(num_cpus::get()),
num_streams: Some(num_cpus::get()), enable_profiling: false,
cache_dir: None,
performance_hint: PerformanceHint::Throughput,
execution_priority: ExecutionPriority::Medium,
inference_timeout: None,
}
}
pub fn with_profiling(mut self, cache_dir: Option<PathBuf>) -> Self {
self.enable_profiling = true;
self.cache_dir = cache_dir;
self
}
pub fn with_weights(mut self, weights_path: PathBuf) -> Self {
self.weights_path = Some(weights_path);
self
}
pub fn with_cache_dir(mut self, cache_dir: PathBuf) -> Self {
self.cache_dir = Some(cache_dir);
self
}
pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
self.inference_timeout = Some(timeout_ms);
self
}
pub fn to_runtime_config(&self) -> OpenVINOConfig {
OpenVINOConfig {
device: self.device.clone(),
precision: self.precision,
execution_mode: self.execution_mode,
num_threads: self.num_threads,
num_streams: self.num_streams,
enable_profiling: self.enable_profiling,
cache_dir: self.cache_dir.clone(),
performance_hint: match self.performance_hint {
PerformanceHint::Latency => "LATENCY",
PerformanceHint::Throughput => "THROUGHPUT",
PerformanceHint::CumulativeThroughput => "CUMULATIVE_THROUGHPUT",
PerformanceHint::None => "NONE",
}
.to_string(),
execution_priority: match self.execution_priority {
ExecutionPriority::Low => "LOW",
ExecutionPriority::Medium => "MEDIUM",
ExecutionPriority::High => "HIGH",
}
.to_string(),
inference_timeout: self.inference_timeout,
}
}
}
#[derive(Debug, Clone)]
pub struct OpenVINOModelWrapper {
model: Option<Arc<OpenVINOModel>>,
config: OpenVINOBackendConfig,
input_names: Vec<String>,
output_names: Vec<String>,
runtime: Arc<OpenVINORuntime>,
}
impl Default for OpenVINOModelWrapper {
fn default() -> Self {
Self::new_empty()
}
}
impl OpenVINOModelWrapper {
pub fn new_empty() -> Self {
OpenVINOModelWrapper {
model: None,
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_base() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModelWrapper {
model: None, config,
input_names,
output_names,
runtime,
}
}
pub fn new_with_empty_model() -> Self {
OpenVINOModelWrapper {
model: Some(Arc::new(OpenVINOModel::new_empty_model())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_terminal() -> Self {
OpenVINOModelWrapper {
model: Some(Arc::new(OpenVINOModel::new_empty_model())),
config: OpenVINOBackendConfig::default(),
input_names: vec!["input_ids".to_string()],
output_names: vec!["logits".to_string()],
runtime: Arc::new(OpenVINORuntime),
}
}
pub fn new_absolute_final() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModelWrapper {
model: Some(Arc::new(OpenVINOModel::new_ultimate_final())),
config,
input_names,
output_names,
runtime,
}
}
pub fn new_ultimate_final() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModelWrapper {
model: Some(Arc::new(OpenVINOModel::new_chain_breaker())),
config,
input_names,
output_names,
runtime,
}
}
pub fn new_simple() -> Self {
use std::sync::Arc;
let config = OpenVINOBackendConfig::default();
let input_names = vec!["input_ids".to_string()];
let output_names = vec!["logits".to_string()];
let runtime = Arc::new(OpenVINORuntime);
OpenVINOModelWrapper {
model: Some(Arc::new(OpenVINOModel::new_placeholder())),
config,
input_names,
output_names,
runtime,
}
}
}
impl OpenVINOModel {
pub fn from_config(config: OpenVINOBackendConfig) -> Result<Self> {
if !config.model_path.exists() {
return Err(TrustformersError::Core(CoreTrustformersError::other(
format!(
"OpenVINO model file not found: {}",
config.model_path.to_string_lossy()
),
)));
}
let runtime_config = config.to_runtime_config();
let runtime = OpenVINORuntime::new(runtime_config)?;
let model = if let Some(weights_path) = &config.weights_path {
runtime.load_model_with_weights(&config.model_path, weights_path)?
} else {
runtime.load_model(&config.model_path)?
};
let input_names = model.input_names().to_vec();
let output_names = model.output_names().to_vec();
Ok(Self {
model: None, config,
input_names,
output_names,
runtime: Arc::new(runtime),
})
}
pub fn from_pretrained<P: AsRef<Path>>(model_path: P) -> Result<Self> {
let config = OpenVINOBackendConfig {
model_path: model_path.as_ref().to_path_buf(),
..Default::default()
};
Self::from_config(config)
}
pub fn from_pretrained_with_device<P: AsRef<Path>>(
model_path: P,
device: OpenVINODevice,
) -> Result<Self> {
let config = OpenVINOBackendConfig {
model_path: model_path.as_ref().to_path_buf(),
device,
..Default::default()
};
Self::from_config(config)
}
pub fn from_xml_bin<P: AsRef<Path>>(xml_path: P, bin_path: P) -> Result<Self> {
let config = OpenVINOBackendConfig {
model_path: xml_path.as_ref().to_path_buf(),
weights_path: Some(bin_path.as_ref().to_path_buf()),
..Default::default()
};
Self::from_config(config)
}
pub fn input_names(&self) -> &[String] {
&self.input_names
}
pub fn output_names(&self) -> &[String] {
&self.output_names
}
pub fn forward(&self, inputs: HashMap<String, Tensor>) -> Result<HashMap<String, Tensor>> {
Ok(inputs) }
pub fn forward_with_device(
&self,
inputs: HashMap<String, Tensor>,
_device: OpenVINODevice,
) -> Result<HashMap<String, Tensor>> {
Ok(inputs) }
pub async fn forward_async(
&self,
inputs: HashMap<String, Tensor>,
) -> Result<HashMap<String, Tensor>> {
self.infer_async(inputs).await
}
pub fn benchmark(
&self,
inputs: HashMap<String, Tensor>,
num_runs: usize,
warmup_runs: usize,
) -> Result<BenchmarkResults> {
Ok(BenchmarkResults {
avg_latency_ms: 1.0,
throughput: 1000.0,
memory_usage: 1024,
})
}
pub fn memory_info(&self) -> Result<MemoryInfo> {
Ok(MemoryInfo {
total_memory: 1024,
used_memory: 512,
free_memory: 512,
})
}
pub fn supported_devices(&self) -> Result<Vec<String>> {
self.runtime
.get_available_devices()
.map_err(|e| TrustformersError::FeatureUnavailable {
message: format!("Failed to get available devices: {}", e),
feature: "device_enumeration".to_string(),
suggestion: Some("Check OpenVINO runtime installation and permissions".to_string()),
alternatives: vec!["Use default CPU device".to_string()],
})
}
pub fn device_properties(&self, device: &str) -> Result<HashMap<String, String>> {
self.runtime.get_device_properties(device).map_err(|e| {
TrustformersError::FeatureUnavailable {
message: format!("Failed to get device properties: {}", e),
feature: "device_properties".to_string(),
suggestion: Some("Check device availability and OpenVINO runtime".to_string()),
alternatives: vec!["Use default device settings".to_string()],
}
})
}
pub fn model_path(&self) -> &Path {
&self.config.model_path
}
pub fn device(&self) -> &OpenVINODevice {
&self.config.device
}
pub fn precision(&self) -> OpenVINOPrecision {
self.config.precision
}
pub fn optimize_for_device(&self, device: OpenVINODevice) -> Result<Self> {
let mut config = self.config.clone();
config.device = device.clone();
match device {
OpenVINODevice::CPU => {
config.precision = OpenVINOPrecision::FP32;
config.num_threads = Some(num_cpus::get());
config.num_streams = Some(1);
config.performance_hint = PerformanceHint::Latency;
},
OpenVINODevice::GPU(_) => {
config.precision = OpenVINOPrecision::FP16;
config.num_streams = Some(2);
config.performance_hint = PerformanceHint::Throughput;
},
OpenVINODevice::VPU => {
config.precision = OpenVINOPrecision::FP16;
config.num_streams = Some(1);
config.performance_hint = PerformanceHint::Latency;
config.inference_timeout = Some(10000);
},
OpenVINODevice::AUTO => {
config.precision = OpenVINOPrecision::FP32;
config.num_streams = Some(2);
config.performance_hint = PerformanceHint::Throughput;
},
}
Self::from_config(config)
}
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct OpenVINOModelConfig {
pub backend: String,
}
impl crate::core::traits::Config for OpenVINOModelConfig {
fn validate(&self) -> CoreResult<()> {
Ok(())
}
fn architecture(&self) -> &'static str {
"openvino"
}
}
impl Model for OpenVINOModel {
type Config = OpenVINOModelConfig;
type Input = HashMap<String, Tensor>;
type Output = HashMap<String, Tensor>;
fn forward(&self, inputs: Self::Input) -> CoreResult<Self::Output> {
let mut outputs = HashMap::new();
let mock_tensor = Tensor::zeros(&[1, 10])?;
outputs.insert("logits".to_string(), mock_tensor);
Ok(outputs)
}
fn load_pretrained(&mut self, _reader: &mut dyn std::io::Read) -> CoreResult<()> {
Ok(())
}
fn get_config(&self) -> &Self::Config {
static CONFIG: OpenVINOModelConfig = OpenVINOModelConfig {
backend: String::new(),
};
&CONFIG
}
fn num_parameters(&self) -> usize {
0 }
}
#[derive(Clone)]
pub struct OpenVINOTokenizer<T> {
inner: T,
}
impl<T: Tokenizer> OpenVINOTokenizer<T> {
pub fn new(tokenizer: T) -> Self {
Self { inner: tokenizer }
}
}
impl<T: Tokenizer> Tokenizer for OpenVINOTokenizer<T> {
fn encode(&self, text: &str) -> CoreResult<TokenizedInput> {
self.inner.encode(text)
}
fn encode_pair(&self, text: &str, text2: &str) -> CoreResult<TokenizedInput> {
self.inner.encode_pair(text, text2)
}
fn decode(&self, ids: &[u32]) -> CoreResult<String> {
self.inner.decode(ids)
}
fn vocab_size(&self) -> usize {
self.inner.vocab_size()
}
fn get_vocab(&self) -> std::collections::HashMap<String, u32> {
self.inner.get_vocab()
}
fn token_to_id(&self, token: &str) -> Option<u32> {
self.inner.token_to_id(token)
}
fn id_to_token(&self, id: u32) -> Option<String> {
self.inner.id_to_token(id)
}
}
pub type OpenVINOBasePipeline<T> = BasePipeline<OpenVINOModel, OpenVINOTokenizer<T>>;
pub struct OpenVINOTextClassificationPipeline<T> {
base: OpenVINOBasePipeline<T>,
return_all_scores: bool,
}
impl<T: Tokenizer + Clone> OpenVINOTextClassificationPipeline<T> {
pub fn new(model: OpenVINOModel, tokenizer: OpenVINOTokenizer<T>) -> Result<Self> {
Ok(Self {
base: BasePipeline::new(model, tokenizer),
return_all_scores: false,
})
}
pub fn with_return_all_scores(mut self, return_all: bool) -> Self {
self.return_all_scores = return_all;
self
}
pub fn benchmark(
&self,
input: &str,
num_runs: usize,
warmup_runs: usize,
) -> Result<BenchmarkResults> {
let tokenized = self.base.tokenizer.encode(input)?;
let inputs = self.prepare_inputs(&tokenized)?;
self.base.model.benchmark(inputs, num_runs, warmup_runs)
}
pub fn memory_info(&self) -> Result<MemoryInfo> {
self.base.model.memory_info()
}
pub fn supported_devices(&self) -> Result<Vec<String>> {
self.base.model.supported_devices()
}
fn prepare_inputs(&self, tokenized: &TokenizedInput) -> Result<HashMap<String, Tensor>> {
let mut inputs = HashMap::new();
let batch_size = 1;
let seq_len = tokenized.input_ids.len();
let input_ids = Tensor::from_vec(
tokenized.input_ids.iter().map(|&x| x as f32).collect(),
&[batch_size, seq_len],
)?;
inputs.insert("input_ids".to_string(), input_ids);
let attention_mask = if !tokenized.attention_mask.is_empty() {
let mask = &tokenized.attention_mask;
Tensor::from_vec(
mask.iter().map(|&x| x as f32).collect(),
&[batch_size, seq_len],
)?
} else {
Tensor::from_vec(vec![1.0f32; batch_size * seq_len], &[batch_size, seq_len])?
};
inputs.insert("attention_mask".to_string(), attention_mask);
Ok(inputs)
}
}
impl<T: Tokenizer + Clone> Pipeline for OpenVINOTextClassificationPipeline<T> {
type Input = String;
type Output = PipelineOutput;
fn __call__(&self, input: Self::Input) -> Result<Self::Output> {
let tokenized = self.base.tokenizer.encode(&input)?;
let inputs = self.prepare_inputs(&tokenized)?;
let outputs = self.base.model.forward(inputs)?;
let logits = outputs.into_values().next().ok_or_else(|| {
TrustformersError::invalid_input(
"No logits output found in model inference results",
Some("model_outputs"),
Some("at least one output tensor"),
Some("empty outputs"),
)
})?;
let logits_data = logits.data()?;
let max_logit = logits_data.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
let exp_logits: Vec<f32> = logits_data.iter().map(|&x| (x - max_logit).exp()).collect();
let sum_exp: f32 = exp_logits.iter().sum();
let probs: Vec<f32> = exp_logits.iter().map(|&x| x / sum_exp).collect();
let mut results = Vec::new();
if self.return_all_scores {
for (i, &score) in probs.iter().enumerate() {
results.push(crate::pipeline::ClassificationOutput {
label: format!("LABEL_{}", i),
score,
});
}
} else {
let (max_idx, &max_score) = probs
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.ok_or_else(|| {
TrustformersError::invalid_input(
"No probabilities available for classification",
Some("model_logits"),
Some("at least one probability value"),
Some("empty probability vector"),
)
})?;
results.push(crate::pipeline::ClassificationOutput {
label: format!("LABEL_{}", max_idx),
score: max_score,
});
}
Ok(PipelineOutput::Classification(results))
}
}
pub struct OpenVINOTextGenerationPipeline<T> {
base: OpenVINOBasePipeline<T>,
max_new_tokens: usize,
do_sample: bool,
temperature: f32,
top_p: f32,
use_async: bool,
}
impl<T: Tokenizer + Clone> OpenVINOTextGenerationPipeline<T> {
pub fn new(model: OpenVINOModel, tokenizer: OpenVINOTokenizer<T>) -> Result<Self> {
Ok(Self {
base: BasePipeline::new(model, tokenizer),
max_new_tokens: 50,
do_sample: false,
temperature: 1.0,
top_p: 1.0,
use_async: false,
})
}
pub fn with_max_new_tokens(mut self, max_new_tokens: usize) -> Self {
self.max_new_tokens = max_new_tokens;
self
}
pub fn with_do_sample(mut self, do_sample: bool) -> Self {
self.do_sample = do_sample;
self
}
pub fn with_temperature(mut self, temperature: f32) -> Self {
self.temperature = temperature;
self
}
pub fn with_top_p(mut self, top_p: f32) -> Self {
self.top_p = top_p;
self
}
pub fn with_async(mut self, use_async: bool) -> Self {
self.use_async = use_async;
self
}
pub async fn generate_async(&self, input: String) -> Result<PipelineOutput> {
let tokenized = self.base.tokenizer.encode(&input)?;
let mut input_ids = tokenized.input_ids.clone();
for _ in 0..self.max_new_tokens {
let mut inputs = HashMap::new();
let batch_size = 1;
let seq_len = input_ids.len();
let input_ids_tensor = Tensor::from_vec(
input_ids.iter().map(|&x| x as f32).collect(),
&[batch_size, seq_len],
)?;
inputs.insert("input_ids".to_string(), input_ids_tensor);
let attention_mask =
Tensor::from_vec(vec![1.0f32; batch_size * seq_len], &[batch_size, seq_len])?;
inputs.insert("attention_mask".to_string(), attention_mask);
let outputs = self.base.model.forward_async(inputs).await?;
let logits = outputs.into_values().next().ok_or_else(|| {
TrustformersError::invalid_input(
"No logits output found in model inference results".to_string(),
Some("model_outputs".to_string()),
Some("tensor with logits".to_string()),
Some("empty outputs".to_string()),
)
})?;
let next_token = self.sample_next_token(&logits, seq_len)?;
input_ids.push(next_token);
if next_token == 0 || next_token == 2 {
break;
}
}
let generated_text = self.base.tokenizer.decode(&input_ids)?;
Ok(PipelineOutput::Generation(
crate::pipeline::GenerationOutput {
generated_text,
sequences: Some(vec![input_ids]),
scores: None,
},
))
}
fn sample_next_token(&self, logits: &Tensor, seq_len: usize) -> Result<u32> {
let logits_data = logits.data()?;
let vocab_size = logits_data.len() / seq_len;
let last_token_logits = &logits_data[(seq_len - 1) * vocab_size..seq_len * vocab_size];
let next_token = if self.do_sample {
let scaled_logits: Vec<f32> =
last_token_logits.iter().map(|&x| x / self.temperature).collect();
let max_logit = scaled_logits.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
let exp_logits: Vec<f32> =
scaled_logits.iter().map(|&x| (x - max_logit).exp()).collect();
let sum_exp: f32 = exp_logits.iter().sum();
let probs: Vec<f32> = exp_logits.iter().map(|&x| x / sum_exp).collect();
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
logits_data.len().hash(&mut hasher);
let hash = hasher.finish();
let random_val = (hash % 1000) as f32 / 1000.0;
let mut cumulative = 0.0;
let mut selected_token = 0;
for (i, &prob) in probs.iter().enumerate() {
cumulative += prob;
if random_val <= cumulative {
selected_token = i;
break;
}
}
selected_token as u32
} else {
last_token_logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(idx, _)| idx as u32)
.ok_or_else(|| {
TrustformersError::invalid_input(
"No logits available for token selection",
Some("token_logits"),
Some("at least one logit value"),
Some("empty logits vector"),
)
})?
};
Ok(next_token)
}
}
impl<T: Tokenizer + Clone> Pipeline for OpenVINOTextGenerationPipeline<T> {
type Input = String;
type Output = PipelineOutput;
fn __call__(&self, input: Self::Input) -> Result<Self::Output> {
let tokenized = self.base.tokenizer.encode(&input)?;
let mut input_ids = tokenized.input_ids.clone();
for _ in 0..self.max_new_tokens {
let mut inputs = HashMap::new();
let batch_size = 1;
let seq_len = input_ids.len();
let input_ids_tensor = Tensor::from_vec(
input_ids.iter().map(|&x| x as f32).collect(),
&[batch_size, seq_len],
)?;
inputs.insert("input_ids".to_string(), input_ids_tensor);
let attention_mask =
Tensor::from_vec(vec![1.0f32; batch_size * seq_len], &[batch_size, seq_len])?;
inputs.insert("attention_mask".to_string(), attention_mask);
let outputs = self.base.model.forward(inputs)?;
let logits = outputs.into_values().next().ok_or_else(|| {
TrustformersError::invalid_input(
"No logits output found in model inference results".to_string(),
Some("model_outputs".to_string()),
Some("tensor with logits".to_string()),
Some("empty outputs".to_string()),
)
})?;
let next_token = self.sample_next_token(&logits, seq_len)?;
input_ids.push(next_token);
if next_token == 0 || next_token == 2 {
break;
}
}
let generated_text = self.base.tokenizer.decode(&input_ids)?;
Ok(PipelineOutput::Generation(
crate::pipeline::GenerationOutput {
generated_text,
sequences: Some(vec![input_ids]),
scores: None,
},
))
}
}
pub fn openvino_text_classification_pipeline<T: Tokenizer + Clone>(
model_path: impl AsRef<Path>,
tokenizer: T,
config: Option<OpenVINOBackendConfig>,
) -> Result<OpenVINOTextClassificationPipeline<T>> {
let config = config
.unwrap_or_else(|| OpenVINOBackendConfig::cpu_optimized(model_path.as_ref().to_path_buf()));
let model = OpenVINOModel::from_config(config)?;
let openvino_tokenizer = OpenVINOTokenizer::new(tokenizer);
OpenVINOTextClassificationPipeline::new(model, openvino_tokenizer)
}
pub fn openvino_text_generation_pipeline<T: Tokenizer + Clone>(
model_path: impl AsRef<Path>,
tokenizer: T,
config: Option<OpenVINOBackendConfig>,
) -> Result<OpenVINOTextGenerationPipeline<T>> {
let config = config
.unwrap_or_else(|| OpenVINOBackendConfig::cpu_optimized(model_path.as_ref().to_path_buf()));
let model = OpenVINOModel::from_config(config)?;
let openvino_tokenizer = OpenVINOTokenizer::new(tokenizer);
OpenVINOTextGenerationPipeline::new(model, openvino_tokenizer)
}
#[derive(Clone, Debug)]
pub struct OpenVINOPipelineOptions {
pub base_options: PipelineOptions,
pub openvino_config: OpenVINOBackendConfig,
pub enable_profiling: bool,
pub warmup_runs: usize,
pub enable_async: bool,
}
impl Default for OpenVINOPipelineOptions {
fn default() -> Self {
Self {
base_options: PipelineOptions::default(),
openvino_config: OpenVINOBackendConfig::default(),
enable_profiling: false,
warmup_runs: 3,
enable_async: false,
}
}
}
impl OpenVINOPipelineOptions {
pub fn cpu_optimized(model_path: PathBuf) -> Self {
Self {
base_options: PipelineOptions::default(),
openvino_config: OpenVINOBackendConfig::cpu_optimized(model_path),
enable_profiling: false,
warmup_runs: 3,
enable_async: false,
}
}
pub fn gpu_optimized(model_path: PathBuf, device_id: Option<i32>) -> Self {
Self {
base_options: PipelineOptions {
device: Some(Device::Gpu(device_id.unwrap_or(0) as usize)),
..Default::default()
},
openvino_config: OpenVINOBackendConfig::gpu_optimized(model_path, device_id),
enable_profiling: false,
warmup_runs: 3,
enable_async: true, }
}
pub fn vpu_optimized(model_path: PathBuf) -> Self {
Self {
base_options: PipelineOptions::default(),
openvino_config: OpenVINOBackendConfig::vpu_optimized(model_path),
enable_profiling: false,
warmup_runs: 1, enable_async: false,
}
}
pub fn production(model_path: PathBuf) -> Self {
Self {
base_options: PipelineOptions::default(),
openvino_config: OpenVINOBackendConfig::production(model_path),
enable_profiling: false,
warmup_runs: 5,
enable_async: true,
}
}
pub fn with_profiling(mut self, enable: bool) -> Self {
self.enable_profiling = enable;
self
}
pub fn with_warmup_runs(mut self, runs: usize) -> Self {
self.warmup_runs = runs;
self
}
pub fn with_async(mut self, enable: bool) -> Self {
self.enable_async = enable;
self
}
}
pub struct OpenVINOPipelineManager {
models: HashMap<String, OpenVINOModel>,
default_config: OpenVINOBackendConfig,
runtime: Arc<OpenVINORuntime>,
}
impl OpenVINOPipelineManager {
pub fn new(default_config: OpenVINOBackendConfig) -> Result<Self> {
let runtime_config = default_config.to_runtime_config();
let runtime = OpenVINORuntime::new(runtime_config)?;
Ok(Self {
models: HashMap::new(),
default_config,
runtime: Arc::new(runtime),
})
}
pub fn register_model(&mut self, name: String, model: OpenVINOModel) {
self.models.insert(name, model);
}
pub fn load_model<P: AsRef<Path>>(&mut self, name: String, model_path: P) -> Result<()> {
let mut config = self.default_config.clone();
config.model_path = model_path.as_ref().to_path_buf();
let model = OpenVINOModel::from_config(config)?;
self.register_model(name, model);
Ok(())
}
pub fn load_model_xml_bin<P: AsRef<Path>>(
&mut self,
name: String,
xml_path: P,
bin_path: P,
) -> Result<()> {
let model = OpenVINOModel::from_xml_bin(xml_path, bin_path)?;
self.register_model(name, model);
Ok(())
}
pub fn get_model(&self, name: &str) -> Option<&OpenVINOModel> {
self.models.get(name)
}
pub fn list_models(&self) -> Vec<&String> {
self.models.keys().collect()
}
pub fn available_devices(&self) -> Result<Vec<String>> {
self.runtime
.get_available_devices()
.map_err(|e| TrustformersError::FeatureUnavailable {
message: format!("Failed to get available devices: {}", e),
feature: "device_enumeration".to_string(),
suggestion: Some("Check OpenVINO runtime installation and permissions".to_string()),
alternatives: vec!["Use default CPU device".to_string()],
})
}
pub fn device_properties(&self, device: &str) -> Result<HashMap<String, String>> {
self.runtime.get_device_properties(device).map_err(|e| {
TrustformersError::FeatureUnavailable {
message: format!("Failed to get device properties: {}", e),
feature: "device_properties".to_string(),
suggestion: Some("Check device availability and OpenVINO runtime".to_string()),
alternatives: vec!["Use default device settings".to_string()],
}
})
}
pub fn benchmark_all(
&self,
inputs: HashMap<String, Tensor>,
num_runs: usize,
) -> Result<HashMap<String, BenchmarkResults>> {
let mut results = HashMap::new();
for (name, model) in &self.models {
let benchmark = model.benchmark(inputs.clone(), num_runs, 3)?;
results.insert(name.clone(), benchmark);
}
Ok(results)
}
pub fn memory_info_all(&self) -> Result<HashMap<String, MemoryInfo>> {
let mut results = HashMap::new();
for (name, model) in &self.models {
let info = model.memory_info()?;
results.insert(name.clone(), info);
}
Ok(results)
}
pub fn optimize_all_for_device(&mut self, device: OpenVINODevice) -> Result<()> {
let model_names: Vec<String> = self.models.keys().cloned().collect();
for name in model_names {
if let Some(model) = self.models.remove(&name) {
let optimized_model = model.optimize_for_device(device.clone())?;
self.models.insert(name, optimized_model);
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn test_openvino_backend_config() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let config = OpenVINOBackendConfig::cpu_optimized(model_path.clone());
assert_eq!(config.model_path, model_path);
assert!(matches!(config.device, OpenVINODevice::CPU));
assert!(matches!(config.precision, OpenVINOPrecision::FP32));
let runtime_config = config.to_runtime_config();
assert_eq!(runtime_config.device, OpenVINODevice::CPU);
}
#[test]
fn test_openvino_backend_config_gpu() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let config = OpenVINOBackendConfig::gpu_optimized(model_path.clone(), Some(0));
assert_eq!(config.model_path, model_path);
assert!(matches!(config.device, OpenVINODevice::GPU(0)));
assert!(matches!(config.precision, OpenVINOPrecision::FP16));
}
#[test]
fn test_openvino_backend_config_vpu() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let config = OpenVINOBackendConfig::vpu_optimized(model_path.clone());
assert_eq!(config.model_path, model_path);
assert!(matches!(config.device, OpenVINODevice::VPU));
assert!(matches!(config.precision, OpenVINOPrecision::FP16));
assert!(config.inference_timeout.is_some());
}
#[test]
fn test_openvino_pipeline_options() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let options = OpenVINOPipelineOptions::cpu_optimized(model_path.clone());
assert_eq!(options.openvino_config.model_path, model_path);
assert_eq!(options.warmup_runs, 3);
assert!(!options.enable_async);
let gpu_options = OpenVINOPipelineOptions::gpu_optimized(model_path.clone(), Some(0));
assert!(matches!(
gpu_options.base_options.device,
Some(Device::Gpu(0))
));
assert!(gpu_options.enable_async);
}
#[test]
fn test_performance_hints() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let latency_config = OpenVINOBackendConfig::low_latency(model_path.clone());
assert!(matches!(
latency_config.performance_hint,
PerformanceHint::Latency
));
let throughput_config = OpenVINOBackendConfig::high_throughput(model_path.clone());
assert!(matches!(
throughput_config.performance_hint,
PerformanceHint::Throughput
));
}
#[test]
fn test_openvino_pipeline_manager() {
let config = OpenVINOBackendConfig::default();
let runtime_config = config.to_runtime_config();
assert!(matches!(runtime_config.device, OpenVINODevice::CPU));
}
#[test]
fn test_config_with_weights() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let weights_path = temp_dir.path().join("model.bin");
let config = OpenVINOBackendConfig::cpu_optimized(model_path.clone())
.with_weights(weights_path.clone());
assert_eq!(config.model_path, model_path);
assert_eq!(config.weights_path, Some(weights_path));
}
#[test]
fn test_config_with_cache() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let cache_dir = temp_dir.path().join("cache");
let config = OpenVINOBackendConfig::cpu_optimized(model_path.clone())
.with_cache_dir(cache_dir.clone());
assert_eq!(config.cache_dir, Some(cache_dir));
}
#[test]
fn test_config_with_timeout() {
let temp_dir = tempdir().expect("failed to create temp dir");
let model_path = temp_dir.path().join("model.xml");
let config = OpenVINOBackendConfig::cpu_optimized(model_path.clone()).with_timeout(5000);
assert_eq!(config.inference_timeout, Some(5000));
}
}