1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
use crate::requests::*;
use serde::{Deserialize, Serialize};
impl CompletionResponse {
pub fn new_from_openai(
req: &CompletionRequest,
res: OpenAiCompletionResponse,
) -> Result<Self, CompletionError> {
let choice = if res.choices.is_empty() || res.choices[0].message.content.is_none() {
return Err(CompletionError::ReponseContentEmpty);
} else {
&res.choices[0]
};
let finish_reason = match choice.finish_reason {
Some(FinishReason::Stop) => CompletionFinishReason::Eos,
Some(FinishReason::Length) => CompletionFinishReason::StopLimit,
Some(FinishReason::ToolCalls) => {
return Err(CompletionError::StopReasonUnsupported(
"FinishReason::ToolCalls is not supported".to_owned(),
))
}
Some(FinishReason::ContentFilter) => {
return Err(CompletionError::StopReasonUnsupported(
"FinishReason::ContentFilter is not supported".to_owned(),
))
}
Some(FinishReason::FunctionCall) => {
return Err(CompletionError::StopReasonUnsupported(
"FinishReason::FunctionCall is not supported".to_owned(),
))
}
None => CompletionFinishReason::Eos,
};
Ok(Self {
id: res.id.to_owned(),
index: None,
content: choice.message.content.as_ref().unwrap().to_owned(),
finish_reason,
completion_probabilities: None,
truncated: false,
generation_settings: GenerationSettings::new_from_openai(req, &res),
timing_usage: TimingUsage::new_from_generic(req.start_time),
token_usage: TokenUsage::new_from_generic(&res),
})
}
}
/// Represents a chat completion response returned by model, based on the provided input.
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct OpenAiCompletionResponse {
/// A unique identifier for the chat completion.
pub id: String,
/// A list of chat completion choices. Can be more than one if `n` is greater than 1.
pub choices: Vec<ChatChoice>,
/// The Unix timestamp (in seconds) of when the chat completion was created.
pub created: u32,
/// The model used for the chat completion.
pub model: String,
pub usage: Option<CompletionUsage>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoice {
/// The index of the choice in the list of choices.
pub index: u32,
pub message: ChatCompletionResponseMessage,
/// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,
/// `length` if the maximum number of tokens specified in the request was reached,
/// `content_filter` if content was omitted due to a flag from our content filters,
/// `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
pub finish_reason: Option<FinishReason>,
/// Log probability information for the choice.
pub logprobs: Option<ChatChoiceLogprobs>,
}
/// Usage statistics for the completion request.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CompletionUsage {
/// Number of tokens in the prompt.
pub prompt_tokens: u32,
/// Number of tokens in the generated completion.
pub completion_tokens: u32,
/// Total number of tokens used in the request (prompt + completion).
pub total_tokens: u32,
}
/// A chat completion message generated by the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionResponseMessage {
/// The contents of the message.
pub content: Option<String>,
/// The role of the author of this message.
pub role: Role,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum FinishReason {
Stop,
Length,
ToolCalls,
ContentFilter,
FunctionCall,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoiceLogprobs {
/// A list of message content tokens with log probability information.
pub content: Option<Vec<ChatCompletionTokenLogprob>>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionTokenLogprob {
/// The token.
pub token: String,
/// The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.
pub logprob: f32,
/// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
pub bytes: Option<Vec<u8>>,
/// List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.
pub top_logprobs: Vec<TopLogprobs>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct TopLogprobs {
/// The token.
pub token: String,
/// The log probability of this token.
pub logprob: f32,
/// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
pub bytes: Option<Vec<u8>>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Role {
System,
#[default]
User,
Assistant,
Tool,
Function,
}