dynamo_async_openai/types/
assistant.rs

1// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
5// Original Copyright (c) 2022 Himanshu Neema
6// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
7//
8// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
9// Licensed under Apache 2.0
10
11use std::collections::HashMap;
12
13use derive_builder::Builder;
14use serde::{Deserialize, Serialize};
15
16use crate::error::OpenAIError;
17
18use super::{FunctionName, FunctionObject, ResponseFormat};
19
20#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
21pub struct AssistantToolCodeInterpreterResources {
22    ///A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made available to the `code_interpreter`` tool. There can be a maximum of 20 files associated with the tool.
23    pub file_ids: Vec<String>, // maxItems: 20
24}
25
26#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
27pub struct AssistantToolFileSearchResources {
28    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
29    pub vector_store_ids: Vec<String>,
30}
31
32#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
33pub struct AssistantToolResources {
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub file_search: Option<AssistantToolFileSearchResources>,
38}
39
40#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
41pub struct CreateAssistantToolResources {
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub file_search: Option<CreateAssistantToolFileSearchResources>,
46}
47
48#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
49pub struct CreateAssistantToolFileSearchResources {
50    ///  The [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
51    pub vector_store_ids: Option<Vec<String>>,
52    /// A helper to create a [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) with file_ids and attach it to this assistant. There can be a maximum of 1 vector store attached to the assistant.
53    pub vector_stores: Option<Vec<AssistantVectorStore>>,
54}
55
56#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
57pub struct AssistantVectorStore {
58    /// A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to add to the vector store. There can be a maximum of 10000 files in a vector store.
59    pub file_ids: Vec<String>,
60
61    /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
62    pub chunking_strategy: Option<AssistantVectorStoreChunkingStrategy>,
63
64    /// Set of 16 key-value pairs that can be attached to a vector store. This can be useful for storing additional information about the vector store in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
65    pub metadata: Option<HashMap<String, String>>,
66}
67
68#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
69#[serde(tag = "type")]
70pub enum AssistantVectorStoreChunkingStrategy {
71    /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
72    #[default]
73    #[serde(rename = "auto")]
74    Auto,
75    #[serde(rename = "static")]
76    Static { r#static: StaticChunkingStrategy },
77}
78
79/// Static Chunking Strategy
80#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
81pub struct StaticChunkingStrategy {
82    /// The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`.
83    pub max_chunk_size_tokens: u16,
84    /// The number of tokens that overlap between chunks. The default value is `400`.
85    ///
86    /// Note that the overlap must not exceed half of `max_chunk_size_tokens`.
87    pub chunk_overlap_tokens: u16,
88}
89
90/// Represents an `assistant` that can call the model and use tools.
91#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
92pub struct AssistantObject {
93    /// The identifier, which can be referenced in API endpoints.
94    pub id: String,
95    /// The object type, which is always `assistant`.
96    pub object: String,
97    /// The Unix timestamp (in seconds) for when the assistant was created.
98    pub created_at: i32,
99    /// The name of the assistant. The maximum length is 256 characters.
100    pub name: Option<String>,
101    /// The description of the assistant. The maximum length is 512 characters.
102    pub description: Option<String>,
103    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models) for descriptions of them.
104    pub model: String,
105    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
106    pub instructions: Option<String>,
107    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
108    #[serde(default)]
109    pub tools: Vec<AssistantTools>,
110    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
111    pub tool_resources: Option<AssistantToolResources>,
112    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
113    pub metadata: Option<HashMap<String, String>>,
114    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
115    pub temperature: Option<f32>,
116    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
117    /// We generally recommend altering this or temperature but not both.
118    pub top_p: Option<f32>,
119
120    pub response_format: Option<AssistantsApiResponseFormatOption>,
121}
122
123/// Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4), and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
124///
125/// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
126///
127/// Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
128///
129/// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
130#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
131pub enum AssistantsApiResponseFormatOption {
132    #[default]
133    #[serde(rename = "auto")]
134    Auto,
135    #[serde(untagged)]
136    Format(ResponseFormat),
137}
138
139/// Retrieval tool
140#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
141pub struct AssistantToolsFileSearch {
142    /// Overrides for the file search tool.
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub file_search: Option<AssistantToolsFileSearchOverrides>,
145}
146
147#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
148pub struct AssistantToolsFileSearchOverrides {
149    ///  The maximum number of results the file search tool should output. The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo. This number should be between 1 and 50 inclusive.
150    ///
151    //// Note that the file search tool may output fewer than `max_num_results` results. See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information.
152    pub max_num_results: Option<u8>,
153    pub ranking_options: Option<FileSearchRankingOptions>,
154}
155
156#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
157pub enum FileSearchRanker {
158    #[serde(rename = "auto")]
159    Auto,
160    #[serde(rename = "default_2024_08_21")]
161    Default2024_08_21,
162}
163
164/// The ranking options for the file search. If not specified, the file search tool will use the `auto` ranker and a score_threshold of 0.
165///
166/// See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) for more information.
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
168pub struct FileSearchRankingOptions {
169    /// The ranker to use for the file search. If not specified will use the `auto` ranker.
170    #[serde(skip_serializing_if = "Option::is_none")]
171    pub ranker: Option<FileSearchRanker>,
172
173    /// The score threshold for the file search. All values must be a floating point number between 0 and 1.
174    pub score_threshold: f32,
175}
176
177/// Function tool
178#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
179pub struct AssistantToolsFunction {
180    pub function: FunctionObject,
181}
182
183#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
184#[serde(tag = "type")]
185#[serde(rename_all = "snake_case")]
186pub enum AssistantTools {
187    CodeInterpreter,
188    FileSearch(AssistantToolsFileSearch),
189    Function(AssistantToolsFunction),
190}
191
192#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
193#[builder(name = "CreateAssistantRequestArgs")]
194#[builder(pattern = "mutable")]
195#[builder(setter(into, strip_option), default)]
196#[builder(derive(Debug))]
197#[builder(build_fn(error = "OpenAIError"))]
198pub struct CreateAssistantRequest {
199    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
200    pub model: String,
201
202    /// The name of the assistant. The maximum length is 256 characters.
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub name: Option<String>,
205
206    /// The description of the assistant. The maximum length is 512 characters.
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub description: Option<String>,
209
210    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub instructions: Option<String>,
213
214    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub tools: Option<Vec<AssistantTools>>,
217
218    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub tool_resources: Option<CreateAssistantToolResources>,
221
222    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub metadata: Option<HashMap<String, String>>,
225
226    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub temperature: Option<f32>,
229
230    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
231    ///
232    /// We generally recommend altering this or temperature but not both.
233    #[serde(skip_serializing_if = "Option::is_none")]
234    pub top_p: Option<f32>,
235
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub response_format: Option<AssistantsApiResponseFormatOption>,
238}
239
240#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
241#[builder(name = "ModifyAssistantRequestArgs")]
242#[builder(pattern = "mutable")]
243#[builder(setter(into, strip_option), default)]
244#[builder(derive(Debug))]
245#[builder(build_fn(error = "OpenAIError"))]
246pub struct ModifyAssistantRequest {
247    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub model: Option<String>,
250
251    /// The name of the assistant. The maximum length is 256 characters.
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub name: Option<String>,
254
255    /// The description of the assistant. The maximum length is 512 characters.
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub description: Option<String>,
258
259    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub instructions: Option<String>,
262
263    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
264    #[serde(skip_serializing_if = "Option::is_none")]
265    pub tools: Option<Vec<AssistantTools>>,
266
267    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
268    #[serde(skip_serializing_if = "Option::is_none")]
269    pub tool_resources: Option<AssistantToolResources>,
270    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
271    #[serde(skip_serializing_if = "Option::is_none")]
272    pub metadata: Option<HashMap<String, String>>,
273
274    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
275    #[serde(skip_serializing_if = "Option::is_none")]
276    pub temperature: Option<f32>,
277
278    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
279    ///
280    /// We generally recommend altering this or temperature but not both.
281    #[serde(skip_serializing_if = "Option::is_none")]
282    pub top_p: Option<f32>,
283
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub response_format: Option<AssistantsApiResponseFormatOption>,
286}
287
288#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
289pub struct DeleteAssistantResponse {
290    pub id: String,
291    pub deleted: bool,
292    pub object: String,
293}
294
295#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
296pub struct ListAssistantsResponse {
297    pub object: String,
298    pub data: Vec<AssistantObject>,
299    pub first_id: Option<String>,
300    pub last_id: Option<String>,
301    pub has_more: bool,
302}
303
304/// Controls which (if any) tool is called by the model.
305/// `none` means the model will not call any tools and instead generates a message.
306/// `auto` is the default value and means the model can pick between generating a message or calling one or more tools.
307/// `required` means the model must call one or more tools before responding to the user.
308/// Specifying a particular tool like `{"type": "file_search"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
309#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
310#[serde(rename_all = "lowercase")]
311pub enum AssistantsApiToolChoiceOption {
312    #[default]
313    None,
314    Auto,
315    Required,
316    #[serde(untagged)]
317    Named(AssistantsNamedToolChoice),
318}
319
320/// Specifies a tool the model should use. Use to force the model to call a specific tool.
321#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
322pub struct AssistantsNamedToolChoice {
323    /// The type of the tool. If type is `function`, the function name must be set
324    pub r#type: AssistantToolType,
325
326    pub function: Option<FunctionName>,
327}
328
329#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
330#[serde(rename_all = "snake_case")]
331pub enum AssistantToolType {
332    #[default]
333    Function,
334    CodeInterpreter,
335    FileSearch,
336}