benchmark_cpu/
benchmark_cpu.rs1use orp::params::RuntimeParameters;
2use gliner::model::input;
3use gliner::model::pipeline::token::TokenMode;
4use gliner::util::result::Result;
5use gliner::model::{GLiNER, params::Parameters};
6
7#[cfg(feature = "memprof")]
8use gliner::util::memprof::*;
9
10const REPEAT: usize = 100;
11const MAX_SAMPLES: usize = 100;
12const THREADS: usize = 12;
13const CSV_PATH: &str = "data/nuner-sample-1k.csv";
14
15fn main() -> Result<()> {
16 let entities = [
17 "person",
18 "location",
19 "vehicle",
20 ];
21
22 println!("Loading data...");
23 let input = input::text::TextInput::new_from_csv(CSV_PATH, 0, MAX_SAMPLES, entities.map(|x| x.to_string()).to_vec())?;
24 let nb_samples = input.texts.len();
25
26 println!("Loading model...");
27 let model = GLiNER::<TokenMode>::new(
28 Parameters::default(),
29 RuntimeParameters::default().with_threads(THREADS),
30 std::path::Path::new("models/gliner-multitask-large-v0.5/tokenizer.json"),
31 std::path::Path::new("models/gliner-multitask-large-v0.5/onnx/model.onnx")
32 )?;
33
34 let global_inference_start = std::time::Instant::now();
35
36 for i in 0..REPEAT {
37 println!("Inferencing ({})...", i + 1);
38 let inference_start = std::time::Instant::now();
39 let _output = model.inference(input.clone())?;
40
41 let inference_time = inference_start.elapsed();
42 println!("Took {} seconds on {} samples ({:.2} samples/sec)", inference_time.as_secs(), nb_samples, nb_samples as f32 / inference_time.as_secs() as f32);
43
44 #[cfg(feature = "memprof")]
45 print_memory_usage();
46 }
47
48 let global_inference_time = global_inference_start.elapsed();
49 let global_nb_samples = nb_samples * REPEAT;
50 println!("All {} inferences took {} seconds on {} samples total ({:.2} samples/sec)", REPEAT, global_inference_time.as_secs(), global_nb_samples, global_nb_samples as f32 / global_inference_time.as_secs() as f32);
51
52 Ok(())
53}