benchmark_gpu/
benchmark_gpu.rs1use orp::params::RuntimeParameters;
2use gliner::util::result::Result;
3use gliner::model::{input::text::TextInput, params::Parameters, GLiNER};
4use gliner::model::pipeline::token::TokenMode;
5use ort::execution_providers::{CUDAExecutionProvider, CoreMLExecutionProvider};
6
7
8fn main() -> Result<()> {
22
23 const MAX_SAMPLES: usize = 1000;
24 const CSV_PATH: &str = "data/nuner-sample-1k.csv";
25
26 let entities = [
27 "person",
28 "location",
29 "vehicle",
30 ];
31
32 println!("Loading data...");
33 let input = TextInput::new_from_csv(CSV_PATH, 0, MAX_SAMPLES, entities.map(|x| x.to_string()).to_vec())?;
34 let nb_samples = input.texts.len();
35
36 println!("Loading model...");
37 let model = GLiNER::<TokenMode>::new(
38 Parameters::default(),
39 RuntimeParameters::default().with_execution_providers([
40 CUDAExecutionProvider::default().build(),
41 CoreMLExecutionProvider::default().build(),
42 ]),
43 "models/gliner-multitask-large-v0.5/tokenizer.json",
44 "models/gliner-multitask-large-v0.5/onnx/model.onnx",
45 )?;
46
47 println!("Inferencing...");
48 let inference_start = std::time::Instant::now();
49 let _output = model.inference(input)?;
50
51 let inference_time = inference_start.elapsed();
52 println!("Inference took {} seconds on {} samples ({:.2} samples/sec)", inference_time.as_secs(), nb_samples, nb_samples as f32 / inference_time.as_secs() as f32);
53
54 Ok(())
55}
56