embed_mul_token/
embed-mul-token.rs

1//! Complete example for text embedding using `gte-multilingual` (first token)
2
3fn main() -> gte::util::result::Result<()> {    
4    const TOKENIZER_PATH: &str = "models/gte-multilingual-base/tokenizer.json";
5    const MODEL_PATH: &str = "models/gte-multilingual-base/onnx/model.onnx";
6
7    let params = gte::params::Parameters::default().with_output_id("token_embeddings");
8    let pipeline = gte::embed::pipeline::TextEmbeddingPipeline::new(TOKENIZER_PATH, &params)?;
9    let model = orp::model::Model::new(MODEL_PATH, orp::params::RuntimeParameters::default())?;
10            
11    let inputs = gte::embed::input::TextInput::from_str(&[
12        "What is the capital of France?", 
13        "How to implement quick sort in python?", 
14        "Die Hauptstadt von Frankreich ist Paris.",
15        "La capital de Francia es ParĂ­s.",
16        "London is the capital of the UK",
17    ]);
18
19    let outputs = model.inference(inputs, &pipeline, &params)?;
20    let distances = gte::util::test::distances_to_first(&outputs);
21
22    println!("Distances: {:?}", distances);     
23
24    Ok(())
25}