embed/
embed.rs

1//! Complete example for text embedding
2//! 
3//! Reproduces the example from <https://huggingface.co/Alibaba-NLP/gte-modernbert-base>
4//! and checks for consistency of the results.
5
6fn main() -> gte::util::result::Result<()> {    
7    const TOKENIZER_PATH: &str = "models/gte-modernbert-base/tokenizer.json";
8    const MODEL_PATH: &str = "models/gte-modernbert-base/onnx/model.onnx";
9
10    let params = gte::params::Parameters::default();
11    let pipeline = gte::embed::pipeline::TextEmbeddingPipeline::new(TOKENIZER_PATH, &params)?;
12    let model = orp::model::Model::new(MODEL_PATH, orp::params::RuntimeParameters::default())?;
13            
14    let inputs = gte::embed::input::TextInput::from_str(&[
15        "what is the capital of China?", 
16        "how to implement quick sort in python?", 
17        "Beijing", 
18        "sorting algorithms",
19    ]);
20
21    const EXPECTED_DISTANCES: [f32; 3] = [0.4289073944091797, 0.7130911254882812, 0.33664554595947266];
22    const EPSILON: f32 = 0.000001;
23
24    let outputs = model.inference(inputs, &pipeline, &params)?;
25    let distances = gte::util::test::distances_to_first(&outputs);
26
27    println!("Distances: {:?}", distances); 
28    assert!(gte::util::test::is_close_to_a(&distances.view(), &EXPECTED_DISTANCES, EPSILON));
29
30    Ok(())
31}