alith_interface/requests/
res_components.rs1use super::completion::request::CompletionRequest;
2use crate::llms::api::{
3 anthropic::completion::AnthropicCompletionResponse,
4 openai::completion::OpenAICompletionResponse,
5};
6
7#[derive(Debug)]
9pub struct InferenceProbabilities {
10 pub content: Option<String>,
12 pub top_probs: Vec<TopProbabilities>,
14}
15
16#[derive(Debug)]
17pub struct TopProbabilities {
18 pub token: String,
20 pub prob: f32,
22}
23
24pub struct GenerationSettings {
26 pub model: String,
28 pub frequency_penalty: Option<f32>,
30 pub presence_penalty: f32,
31 pub temperature: f32,
32 pub top_p: Option<f32>,
33 pub n_choices: u8,
35 pub n_predict: Option<i32>,
37 pub n_ctx: u64,
39 pub logit_bias: Option<Vec<Vec<serde_json::Value>>>,
40 pub grammar: Option<String>,
41 pub stop_sequences: Vec<String>, }
43
44impl GenerationSettings {
45 pub fn new_from_openai(req: &CompletionRequest, res: &OpenAICompletionResponse) -> Self {
46 Self {
47 model: res.model.to_owned(),
48 frequency_penalty: req.config.frequency_penalty,
49 presence_penalty: req.config.presence_penalty,
50 temperature: req.config.temperature,
51 top_p: req.config.top_p,
52 n_choices: 1,
53 n_predict: req.config.actual_request_tokens.map(|x| x as i32),
54 n_ctx: req.config.inference_ctx_size,
55 logit_bias: None,
56 grammar: None,
57 stop_sequences: req
58 .stop_sequences
59 .sequences
60 .iter()
61 .map(|x| x.as_str().to_owned())
62 .collect(),
63 }
64 }
65
66 pub fn new_from_anthropic(req: &CompletionRequest, res: &AnthropicCompletionResponse) -> Self {
67 Self {
68 model: res.model.to_string(),
69 frequency_penalty: req.config.frequency_penalty,
70 presence_penalty: req.config.presence_penalty,
71 temperature: req.config.temperature,
72 top_p: req.config.top_p,
73 n_choices: 1,
74 n_predict: req.config.actual_request_tokens.map(|x| x as i32),
75 n_ctx: req.config.inference_ctx_size,
76 logit_bias: None,
77 grammar: None,
78 stop_sequences: req
79 .stop_sequences
80 .sequences
81 .iter()
82 .map(|x| x.as_str().to_owned())
83 .collect(),
84 }
85 }
86}
87
88impl std::fmt::Display for GenerationSettings {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 writeln!(f)?;
91 writeln!(f, " model: {:?}", self.model)?;
92 writeln!(f, " frequency_penalty: {:?}", self.frequency_penalty)?;
93 writeln!(f, " presence_penalty: {:?}", self.presence_penalty)?;
94 writeln!(f, " temperature: {:?}", self.temperature)?;
95 writeln!(f, " top_p: {:?}", self.top_p)?;
96 writeln!(f, " n_choices: {:?}", self.n_choices)?;
97 writeln!(f, " n_predict: {:?}", self.n_predict)?;
98 writeln!(f, " n_ctx: {:?}", self.n_ctx)?;
99 writeln!(f, " logit_bias: {:?}", self.logit_bias)?;
100 writeln!(f, " grammar: {:?}", self.grammar)?;
101 writeln!(f, " stop_sequences: {:?}", self.stop_sequences)
102 }
103}
104
105pub struct TimingUsage {
107 pub start_time: std::time::Instant,
109 pub end_time: std::time::Instant,
111 pub total_time: std::time::Duration,
113 pub prompt_processing_t: Option<std::time::Duration>,
115 pub generation_t: Option<std::time::Duration>,
117 pub prompt_tok_per_ms: Option<f32>,
119 pub prompt_tok_per_sec: Option<f32>,
121 pub generation_tok_per_ms: Option<f32>,
123 pub generation_tok_per_sec: Option<f32>,
125}
126
127impl TimingUsage {
128 pub fn new_from_generic(start_time: std::time::Instant) -> Self {
129 Self {
130 total_time: start_time.elapsed(),
131 start_time,
132 end_time: std::time::Instant::now(),
133 prompt_processing_t: None,
134 generation_t: None,
135 prompt_tok_per_ms: None,
136 prompt_tok_per_sec: None,
137 generation_tok_per_ms: None,
138 generation_tok_per_sec: None,
139 }
140 }
141}
142
143impl std::fmt::Display for TimingUsage {
144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145 writeln!(f)?;
146 writeln!(f, " total_time: {:?}", self.total_time)?;
147 writeln!(f, " prompt_processing_t: {:?}", self.prompt_processing_t)?;
148 writeln!(f, " generation_t: {:?}", self.generation_t)?;
149 writeln!(f, " prompt_tok_per_ms: {:?}", self.prompt_tok_per_ms)?;
150 writeln!(f, " prompt_tok_per_sec: {:?}", self.prompt_tok_per_sec)?;
151 writeln!(
152 f,
153 " generation_tok_per_ms: {:?}",
154 self.generation_tok_per_ms
155 )?;
156 writeln!(
157 f,
158 " generation_tok_per_sec: {:?}",
159 self.generation_tok_per_sec
160 )
161 }
162}
163
164pub struct TokenUsage {
166 pub tokens_cached: Option<u32>,
168 pub prompt_tokens: u32,
170 pub completion_tokens: u32,
172 pub total_tokens: u32,
174 pub dollar_cost: Option<f32>,
176 pub cents_cost: Option<f32>,
178}
179
180impl TokenUsage {
181 pub fn new_from_generic(res: &OpenAICompletionResponse) -> Self {
182 if let Some(usage) = &res.usage {
183 Self {
184 tokens_cached: None,
185 prompt_tokens: usage.prompt_tokens,
186 completion_tokens: usage.completion_tokens,
187 total_tokens: usage.total_tokens,
188 dollar_cost: None,
189 cents_cost: None,
190 }
191 } else {
192 Self {
193 tokens_cached: None,
194 prompt_tokens: 0,
195 completion_tokens: 0,
196 total_tokens: 0,
197 dollar_cost: None,
198 cents_cost: None,
199 }
200 }
201 }
202
203 pub fn new_from_anthropic(res: &AnthropicCompletionResponse) -> Self {
204 Self {
205 tokens_cached: None,
206 prompt_tokens: res.usage.input_tokens,
207 completion_tokens: res.usage.output_tokens,
208 total_tokens: res.usage.input_tokens + res.usage.output_tokens,
209 dollar_cost: None,
210 cents_cost: None,
211 }
212 }
213}
214
215impl std::fmt::Display for TokenUsage {
216 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217 writeln!(f)?;
218 writeln!(f, " tokens_cached: {:?}", self.tokens_cached)?;
219 writeln!(f, " prompt_tokens: {:?}", self.prompt_tokens)?;
220 writeln!(f, " completion_tokens: {:?}", self.completion_tokens)?;
221 writeln!(f, " total_tokens: {:?}", self.total_tokens)?;
222 writeln!(f, " dollar_cost: {:?}", self.dollar_cost)?;
223 writeln!(f, " cents_cost: {:?}", self.cents_cost)
224 }
225}