1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//! # Seasoning
//!
//! Retrieval-focused embedding and reranking infrastructure with explicit model
//! semantics, rate limiting, retries, and optional local llama.cpp execution.
//!
//! Config-driven local setups accept the `llama.cpp`, `llamacpp`, `llama-cpp`,
//! or `llama_cpp` dialect spellings when converting into [`Dialect::LlamaCpp`].
//!
//! Seasoning separates backend/runtime selection from retrieval formatting:
//! [`Dialect`] selects transport or local execution, [`ModelFamily`] selects
//! retrieval-family formatting, and [`EmbeddingRole`] identifies whether a
//! semantic embedding input is a query or document.
//!
//! Embedding execution consumes pre-tokenized [`PreparedEmbeddingInput`] values.
//! Callers render semantic inputs first, then tokenize the rendered payload with
//! the tokenizer for the target embedding model.
//!
//! ## Embeddings
//!
//! ```rust,no_run
//! use std::time::Duration;
//!
//! use secrecy::SecretString;
//! use seasoning::EmbeddingProvider;
//! use seasoning::embedding::{
//! Client as EmbedClient, Dialect, EmbedderConfig, EmbeddingInput, EmbeddingRole,
//! ModelFamily, PreparedEmbeddingInput,
//! };
//!
//! # async fn example() -> seasoning::Result<()> {
//! let embedder = EmbedClient::new(EmbedderConfig {
//! api_key: Some(SecretString::from("YOUR_API_KEY")),
//! base_url: "https://api.deepinfra.com/v1/openai".to_string(),
//! timeout: Duration::from_secs(10),
//! dialect: Dialect::DeepInfra,
//! model_family: ModelFamily::Qwen3,
//! model: "Qwen/Qwen3-Embedding-0.6B".to_string(),
//! query_instruction: None,
//! embedding_dim: 1024,
//! requests_per_minute: 1000,
//! max_concurrent_requests: 50,
//! tokens_per_minute: 1_000_000,
//! })?;
//!
//! let semantic = EmbeddingInput {
//! role: EmbeddingRole::Query,
//! text: "memory-safe systems programming".to_string(),
//! title: None,
//! };
//! let rendered = embedder.render_input(&semantic);
//! let _ = rendered;
//!
//! // Tokenize `rendered` with the tokenizer for the target embedding model.
//! let prepared = vec![PreparedEmbeddingInput::new(vec![1, 2, 3])?];
//! let _ = embedder.embed(&prepared).await?;
//! # Ok(())
//! # }
//! ```
//!
//! ## Reranking
//!
//! ```rust,no_run
//! use std::time::Duration;
//!
//! use secrecy::SecretString;
//! use seasoning::RerankingProvider;
//! use seasoning::embedding::{Dialect, ModelFamily};
//! use seasoning::reranker::{Client as RerankerClient, RerankerConfig};
//!
//! # async fn example() -> seasoning::Result<()> {
//! let reranker = RerankerClient::new(RerankerConfig {
//! api_key: Some(SecretString::from("YOUR_API_KEY")),
//! base_url: "https://api.deepinfra.com/v1".to_string(),
//! timeout: Duration::from_secs(10),
//! dialect: Dialect::DeepInfra,
//! model_family: ModelFamily::Qwen3,
//! model: "Qwen/Qwen3-Reranker-0.6B".to_string(),
//! instruction: None,
//! requests_per_minute: 1000,
//! max_concurrent_requests: 50,
//! tokens_per_minute: 1_000_000,
//! })?;
//!
//! let query = seasoning::RerankQuery {
//! text: "memory-safe systems programming".to_string(),
//! token_count: 4,
//! };
//! let documents = vec![seasoning::RerankDocument {
//! text: "Rust uses ownership and borrowing".to_string(),
//! token_count: 6,
//! }];
//!
//! let scores = reranker.rerank(&query, &documents).await?;
//! assert_eq!(scores.len(), documents.len());
//! # Ok(())
//! # }
//! ```
pub use ;
pub use ;
pub use ;