benchmark_gpu/
benchmark_gpu.rs

1use orp::params::RuntimeParameters;
2use gliner::util::result::Result;
3use gliner::model::{input::text::TextInput, params::Parameters, GLiNER};
4use gliner::model::pipeline::token::TokenMode;
5use ort::execution_providers::{CUDAExecutionProvider, CoreMLExecutionProvider};
6
7
8/// Sample usage of the public API using GPU for inferencing
9/// 
10/// This example will try two execution providers in that order:
11/// * CUDA
12/// * CoreML
13/// 
14/// To leverage one of them you need to enable the appropriate feature, for exemple:
15/// 
16/// ```console
17/// $ cargo run --example gpu --features=cuda
18/// ```
19/// 
20/// See `Readme.md` and `doc/ORT.md` for more information.
21fn main() -> Result<()> {    
22
23    const MAX_SAMPLES: usize = 1000;
24    const CSV_PATH: &str = "data/nuner-sample-1k.csv";
25
26    let entities = [
27        "person", 
28        "location",
29        "vehicle",
30    ];
31
32    println!("Loading data...");
33    let input = TextInput::new_from_csv(CSV_PATH, 0, MAX_SAMPLES, entities.map(|x| x.to_string()).to_vec())?;
34    let nb_samples = input.texts.len();
35    
36    println!("Loading model...");
37    let model = GLiNER::<TokenMode>::new(
38        Parameters::default(),
39        RuntimeParameters::default().with_execution_providers([
40            CUDAExecutionProvider::default().build(),
41            CoreMLExecutionProvider::default().build(),            
42        ]),
43        "models/gliner-multitask-large-v0.5/tokenizer.json",
44        "models/gliner-multitask-large-v0.5/onnx/model.onnx",
45    )?;
46
47    println!("Inferencing...");
48    let inference_start = std::time::Instant::now();
49    let _output = model.inference(input)?;
50    
51    let inference_time = inference_start.elapsed();
52    println!("Inference took {} seconds on {} samples ({:.2} samples/sec)", inference_time.as_secs(), nb_samples, nb_samples as f32 / inference_time.as_secs() as f32);
53
54    Ok(())
55}
56