dynamo_async_openai/types/assistant.rs
1// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
5// Original Copyright (c) 2022 Himanshu Neema
6// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
7//
8// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
9// Licensed under Apache 2.0
10
11use std::collections::HashMap;
12
13use derive_builder::Builder;
14use serde::{Deserialize, Serialize};
15
16use crate::error::OpenAIError;
17
18use super::{FunctionName, FunctionObject, ResponseFormat};
19
20#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
21pub struct AssistantToolCodeInterpreterResources {
22 ///A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made available to the `code_interpreter`` tool. There can be a maximum of 20 files associated with the tool.
23 pub file_ids: Vec<String>, // maxItems: 20
24}
25
26#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
27pub struct AssistantToolFileSearchResources {
28 /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
29 pub vector_store_ids: Vec<String>,
30}
31
32#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
33pub struct AssistantToolResources {
34 #[serde(skip_serializing_if = "Option::is_none")]
35 pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub file_search: Option<AssistantToolFileSearchResources>,
38}
39
40#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
41pub struct CreateAssistantToolResources {
42 #[serde(skip_serializing_if = "Option::is_none")]
43 pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
44 #[serde(skip_serializing_if = "Option::is_none")]
45 pub file_search: Option<CreateAssistantToolFileSearchResources>,
46}
47
48#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
49pub struct CreateAssistantToolFileSearchResources {
50 /// The [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
51 pub vector_store_ids: Option<Vec<String>>,
52 /// A helper to create a [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) with file_ids and attach it to this assistant. There can be a maximum of 1 vector store attached to the assistant.
53 pub vector_stores: Option<Vec<AssistantVectorStore>>,
54}
55
56#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
57pub struct AssistantVectorStore {
58 /// A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to add to the vector store. There can be a maximum of 10000 files in a vector store.
59 pub file_ids: Vec<String>,
60
61 /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
62 pub chunking_strategy: Option<AssistantVectorStoreChunkingStrategy>,
63
64 /// Set of 16 key-value pairs that can be attached to a vector store. This can be useful for storing additional information about the vector store in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
65 pub metadata: Option<HashMap<String, String>>,
66}
67
68#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
69#[serde(tag = "type")]
70pub enum AssistantVectorStoreChunkingStrategy {
71 /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
72 #[default]
73 #[serde(rename = "auto")]
74 Auto,
75 #[serde(rename = "static")]
76 Static { r#static: StaticChunkingStrategy },
77}
78
79/// Static Chunking Strategy
80#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
81pub struct StaticChunkingStrategy {
82 /// The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`.
83 pub max_chunk_size_tokens: u16,
84 /// The number of tokens that overlap between chunks. The default value is `400`.
85 ///
86 /// Note that the overlap must not exceed half of `max_chunk_size_tokens`.
87 pub chunk_overlap_tokens: u16,
88}
89
90/// Represents an `assistant` that can call the model and use tools.
91#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
92pub struct AssistantObject {
93 /// The identifier, which can be referenced in API endpoints.
94 pub id: String,
95 /// The object type, which is always `assistant`.
96 pub object: String,
97 /// The Unix timestamp (in seconds) for when the assistant was created.
98 pub created_at: i32,
99 /// The name of the assistant. The maximum length is 256 characters.
100 pub name: Option<String>,
101 /// The description of the assistant. The maximum length is 512 characters.
102 pub description: Option<String>,
103 /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models) for descriptions of them.
104 pub model: String,
105 /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
106 pub instructions: Option<String>,
107 /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
108 #[serde(default)]
109 pub tools: Vec<AssistantTools>,
110 /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
111 pub tool_resources: Option<AssistantToolResources>,
112 /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
113 pub metadata: Option<HashMap<String, String>>,
114 /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
115 pub temperature: Option<f32>,
116 /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
117 /// We generally recommend altering this or temperature but not both.
118 pub top_p: Option<f32>,
119
120 pub response_format: Option<AssistantsApiResponseFormatOption>,
121}
122
123/// Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4), and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
124///
125/// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
126///
127/// Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
128///
129/// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
130#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
131pub enum AssistantsApiResponseFormatOption {
132 #[default]
133 #[serde(rename = "auto")]
134 Auto,
135 #[serde(untagged)]
136 Format(ResponseFormat),
137}
138
139/// Retrieval tool
140#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
141pub struct AssistantToolsFileSearch {
142 /// Overrides for the file search tool.
143 #[serde(skip_serializing_if = "Option::is_none")]
144 pub file_search: Option<AssistantToolsFileSearchOverrides>,
145}
146
147#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
148pub struct AssistantToolsFileSearchOverrides {
149 /// The maximum number of results the file search tool should output. The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo. This number should be between 1 and 50 inclusive.
150 ///
151 //// Note that the file search tool may output fewer than `max_num_results` results. See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information.
152 pub max_num_results: Option<u8>,
153 pub ranking_options: Option<FileSearchRankingOptions>,
154}
155
156#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
157pub enum FileSearchRanker {
158 #[serde(rename = "auto")]
159 Auto,
160 #[serde(rename = "default_2024_08_21")]
161 Default2024_08_21,
162}
163
164/// The ranking options for the file search. If not specified, the file search tool will use the `auto` ranker and a score_threshold of 0.
165///
166/// See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) for more information.
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
168pub struct FileSearchRankingOptions {
169 /// The ranker to use for the file search. If not specified will use the `auto` ranker.
170 #[serde(skip_serializing_if = "Option::is_none")]
171 pub ranker: Option<FileSearchRanker>,
172
173 /// The score threshold for the file search. All values must be a floating point number between 0 and 1.
174 pub score_threshold: f32,
175}
176
177/// Function tool
178#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
179pub struct AssistantToolsFunction {
180 pub function: FunctionObject,
181}
182
183#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
184#[serde(tag = "type")]
185#[serde(rename_all = "snake_case")]
186pub enum AssistantTools {
187 CodeInterpreter,
188 FileSearch(AssistantToolsFileSearch),
189 Function(AssistantToolsFunction),
190}
191
192#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
193#[builder(name = "CreateAssistantRequestArgs")]
194#[builder(pattern = "mutable")]
195#[builder(setter(into, strip_option), default)]
196#[builder(derive(Debug))]
197#[builder(build_fn(error = "OpenAIError"))]
198pub struct CreateAssistantRequest {
199 /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
200 pub model: String,
201
202 /// The name of the assistant. The maximum length is 256 characters.
203 #[serde(skip_serializing_if = "Option::is_none")]
204 pub name: Option<String>,
205
206 /// The description of the assistant. The maximum length is 512 characters.
207 #[serde(skip_serializing_if = "Option::is_none")]
208 pub description: Option<String>,
209
210 /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub instructions: Option<String>,
213
214 /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
215 #[serde(skip_serializing_if = "Option::is_none")]
216 pub tools: Option<Vec<AssistantTools>>,
217
218 /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
219 #[serde(skip_serializing_if = "Option::is_none")]
220 pub tool_resources: Option<CreateAssistantToolResources>,
221
222 /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
223 #[serde(skip_serializing_if = "Option::is_none")]
224 pub metadata: Option<HashMap<String, String>>,
225
226 /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
227 #[serde(skip_serializing_if = "Option::is_none")]
228 pub temperature: Option<f32>,
229
230 /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
231 ///
232 /// We generally recommend altering this or temperature but not both.
233 #[serde(skip_serializing_if = "Option::is_none")]
234 pub top_p: Option<f32>,
235
236 #[serde(skip_serializing_if = "Option::is_none")]
237 pub response_format: Option<AssistantsApiResponseFormatOption>,
238}
239
240#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
241#[builder(name = "ModifyAssistantRequestArgs")]
242#[builder(pattern = "mutable")]
243#[builder(setter(into, strip_option), default)]
244#[builder(derive(Debug))]
245#[builder(build_fn(error = "OpenAIError"))]
246pub struct ModifyAssistantRequest {
247 /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
248 #[serde(skip_serializing_if = "Option::is_none")]
249 pub model: Option<String>,
250
251 /// The name of the assistant. The maximum length is 256 characters.
252 #[serde(skip_serializing_if = "Option::is_none")]
253 pub name: Option<String>,
254
255 /// The description of the assistant. The maximum length is 512 characters.
256 #[serde(skip_serializing_if = "Option::is_none")]
257 pub description: Option<String>,
258
259 /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
260 #[serde(skip_serializing_if = "Option::is_none")]
261 pub instructions: Option<String>,
262
263 /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
264 #[serde(skip_serializing_if = "Option::is_none")]
265 pub tools: Option<Vec<AssistantTools>>,
266
267 /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
268 #[serde(skip_serializing_if = "Option::is_none")]
269 pub tool_resources: Option<AssistantToolResources>,
270 /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
271 #[serde(skip_serializing_if = "Option::is_none")]
272 pub metadata: Option<HashMap<String, String>>,
273
274 /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
275 #[serde(skip_serializing_if = "Option::is_none")]
276 pub temperature: Option<f32>,
277
278 /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
279 ///
280 /// We generally recommend altering this or temperature but not both.
281 #[serde(skip_serializing_if = "Option::is_none")]
282 pub top_p: Option<f32>,
283
284 #[serde(skip_serializing_if = "Option::is_none")]
285 pub response_format: Option<AssistantsApiResponseFormatOption>,
286}
287
288#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
289pub struct DeleteAssistantResponse {
290 pub id: String,
291 pub deleted: bool,
292 pub object: String,
293}
294
295#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
296pub struct ListAssistantsResponse {
297 pub object: String,
298 pub data: Vec<AssistantObject>,
299 pub first_id: Option<String>,
300 pub last_id: Option<String>,
301 pub has_more: bool,
302}
303
304/// Controls which (if any) tool is called by the model.
305/// `none` means the model will not call any tools and instead generates a message.
306/// `auto` is the default value and means the model can pick between generating a message or calling one or more tools.
307/// `required` means the model must call one or more tools before responding to the user.
308/// Specifying a particular tool like `{"type": "file_search"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
309#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
310#[serde(rename_all = "lowercase")]
311pub enum AssistantsApiToolChoiceOption {
312 #[default]
313 None,
314 Auto,
315 Required,
316 #[serde(untagged)]
317 Named(AssistantsNamedToolChoice),
318}
319
320/// Specifies a tool the model should use. Use to force the model to call a specific tool.
321#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
322pub struct AssistantsNamedToolChoice {
323 /// The type of the tool. If type is `function`, the function name must be set
324 pub r#type: AssistantToolType,
325
326 pub function: Option<FunctionName>,
327}
328
329#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
330#[serde(rename_all = "snake_case")]
331pub enum AssistantToolType {
332 #[default]
333 Function,
334 CodeInterpreter,
335 FileSearch,
336}