dynamo_async_openai/types/
run.rs

1// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
5// Original Copyright (c) 2022 Himanshu Neema
6// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
7//
8// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
9// Licensed under Apache 2.0
10
11use std::collections::HashMap;
12
13use derive_builder::Builder;
14use serde::{Deserialize, Serialize};
15
16use crate::{error::OpenAIError, types::FunctionCall};
17
18use super::{
19    AssistantTools, AssistantsApiResponseFormatOption, AssistantsApiToolChoiceOption,
20    CreateMessageRequest,
21};
22
23/// Represents an execution run on a [thread](https://platform.openai.com/docs/api-reference/threads).
24#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
25pub struct RunObject {
26    /// The identifier, which can be referenced in API endpoints.
27    pub id: String,
28    /// The object type, which is always `thread.run`.
29    pub object: String,
30    /// The Unix timestamp (in seconds) for when the run was created.
31    pub created_at: i32,
32    ///The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was executed on as a part of this run.
33    pub thread_id: String,
34
35    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for execution of this run.
36    pub assistant_id: Option<String>,
37
38    /// The status of the run, which can be either `queued`, `in_progress`, `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, `incomplete`, or `expired`.
39    pub status: RunStatus,
40
41    /// Details on the action required to continue the run. Will be `null` if no action is required.
42    pub required_action: Option<RequiredAction>,
43
44    /// The last error associated with this run. Will be `null` if there are no errors.
45    pub last_error: Option<LastError>,
46
47    /// The Unix timestamp (in seconds) for when the run will expire.
48    pub expires_at: Option<i32>,
49    ///  The Unix timestamp (in seconds) for when the run was started.
50    pub started_at: Option<i32>,
51    /// The Unix timestamp (in seconds) for when the run was cancelled.
52    pub cancelled_at: Option<i32>,
53    /// The Unix timestamp (in seconds) for when the run failed.
54    pub failed_at: Option<i32>,
55    ///The Unix timestamp (in seconds) for when the run was completed.
56    pub completed_at: Option<i32>,
57
58    /// Details on why the run is incomplete. Will be `null` if the run is not incomplete.
59    pub incomplete_details: Option<RunObjectIncompleteDetails>,
60
61    /// The model that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
62    pub model: String,
63
64    /// The instructions that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
65    pub instructions: String,
66
67    /// The list of tools that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
68    pub tools: Vec<AssistantTools>,
69
70    pub metadata: Option<HashMap<String, serde_json::Value>>,
71
72    /// Usage statistics related to the run. This value will be `null` if the run is not in a terminal state (i.e. `in_progress`, `queued`, etc.).
73    pub usage: Option<RunCompletionUsage>,
74
75    /// The sampling temperature used for this run. If not set, defaults to 1.
76    pub temperature: Option<f32>,
77
78    /// The nucleus sampling value used for this run. If not set, defaults to 1.
79    pub top_p: Option<f32>,
80
81    /// The maximum number of prompt tokens specified to have been used over the course of the run.
82    pub max_prompt_tokens: Option<u32>,
83
84    /// The maximum number of completion tokens specified to have been used over the course of the run.
85    pub max_completion_tokens: Option<u32>,
86
87    /// Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
88    pub truncation_strategy: Option<TruncationObject>,
89
90    pub tool_choice: Option<AssistantsApiToolChoiceOption>,
91
92    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
93    pub parallel_tool_calls: bool,
94
95    pub response_format: Option<AssistantsApiResponseFormatOption>,
96}
97
98#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
99#[serde(rename_all = "snake_case")]
100pub enum TruncationObjectType {
101    #[default]
102    Auto,
103    LastMessages,
104}
105
106/// Thread Truncation Controls
107#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
108pub struct TruncationObject {
109    /// The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`.
110    pub r#type: TruncationObjectType,
111    /// The number of most recent messages from the thread when constructing the context for the run.
112    pub last_messages: Option<u32>,
113}
114
115#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
116pub struct RunObjectIncompleteDetails {
117    /// The reason why the run is incomplete. This will point to which specific token limit was reached over the course of the run.
118    pub reason: RunObjectIncompleteDetailsReason,
119}
120
121#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
122#[serde(rename_all = "snake_case")]
123pub enum RunObjectIncompleteDetailsReason {
124    MaxCompletionTokens,
125    MaxPromptTokens,
126}
127
128#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
129#[serde(rename_all = "snake_case")]
130pub enum RunStatus {
131    Queued,
132    InProgress,
133    RequiresAction,
134    Cancelling,
135    Cancelled,
136    Failed,
137    Completed,
138    Incomplete,
139    Expired,
140}
141
142#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
143pub struct RequiredAction {
144    /// For now, this is always `submit_tool_outputs`.
145    pub r#type: String,
146
147    pub submit_tool_outputs: SubmitToolOutputs,
148}
149
150#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
151pub struct SubmitToolOutputs {
152    pub tool_calls: Vec<RunToolCallObject>,
153}
154
155#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
156pub struct RunToolCallObject {
157    /// The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the [Submit tool outputs to run](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) endpoint.
158    pub id: String,
159    /// The type of tool call the output is required for. For now, this is always `function`.
160    pub r#type: String,
161    /// The function definition.
162    pub function: FunctionCall,
163}
164
165#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
166pub struct LastError {
167    /// One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`.
168    pub code: LastErrorCode,
169    /// A human-readable description of the error.
170    pub message: String,
171}
172
173#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
174#[serde(rename_all = "snake_case")]
175pub enum LastErrorCode {
176    ServerError,
177    RateLimitExceeded,
178    InvalidPrompt,
179}
180
181#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
182pub struct RunCompletionUsage {
183    /// Number of completion tokens used over the course of the run.
184    pub completion_tokens: u32,
185    /// Number of prompt tokens used over the course of the run.
186    pub prompt_tokens: u32,
187    /// Total number of tokens used (prompt + completion).
188    pub total_tokens: u32,
189}
190
191#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
192#[builder(name = "CreateRunRequestArgs")]
193#[builder(pattern = "mutable")]
194#[builder(setter(into, strip_option), default)]
195#[builder(derive(Debug))]
196#[builder(build_fn(error = "OpenAIError"))]
197pub struct CreateRunRequest {
198    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to execute this run.
199    pub assistant_id: String,
200
201    /// The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.
202    #[serde(skip_serializing_if = "Option::is_none")]
203    pub model: Option<String>,
204
205    /// Overrides the [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) of the assistant. This is useful for modifying the behavior on a per-run basis.
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub instructions: Option<String>,
208
209    /// Appends additional instructions at the end of the instructions for the run. This is useful for modifying the behavior on a per-run basis without overriding other instructions.
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub additional_instructions: Option<String>,
212
213    /// Adds additional messages to the thread before creating the run.
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub additional_messages: Option<Vec<CreateMessageRequest>>,
216
217    /// Override the tools the assistant can use for this run. This is useful for modifying the behavior on a per-run basis.
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub tools: Option<Vec<AssistantTools>>,
220
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub metadata: Option<HashMap<String, serde_json::Value>>,
223
224    /// The sampling temperature used for this run. If not set, defaults to 1.
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub temperature: Option<f32>,
227
228    ///  An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
229    ///
230    /// We generally recommend altering this or temperature but not both.
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub top_p: Option<f32>,
233
234    /// If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub stream: Option<bool>,
237
238    /// The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub max_prompt_tokens: Option<u32>,
241
242    /// The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub max_completion_tokens: Option<u32>,
245
246    /// Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
247    #[serde(skip_serializing_if = "Option::is_none")]
248    pub truncation_strategy: Option<TruncationObject>,
249
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub tool_choice: Option<AssistantsApiToolChoiceOption>,
252
253    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub parallel_tool_calls: Option<bool>,
256
257    #[serde(skip_serializing_if = "Option::is_none")]
258    pub response_format: Option<AssistantsApiResponseFormatOption>,
259}
260
261#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
262pub struct ModifyRunRequest {
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub metadata: Option<HashMap<String, serde_json::Value>>,
265}
266
267#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
268pub struct ListRunsResponse {
269    pub object: String,
270    pub data: Vec<RunObject>,
271    pub first_id: Option<String>,
272    pub last_id: Option<String>,
273    pub has_more: bool,
274}
275
276#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
277pub struct SubmitToolOutputsRunRequest {
278    /// A list of tools for which the outputs are being submitted.
279    pub tool_outputs: Vec<ToolsOutputs>,
280    /// If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
281    pub stream: Option<bool>,
282}
283
284#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
285#[builder(name = "ToolsOutputsArgs")]
286#[builder(pattern = "mutable")]
287#[builder(setter(into, strip_option), default)]
288#[builder(derive(Debug))]
289#[builder(build_fn(error = "OpenAIError"))]
290pub struct ToolsOutputs {
291    /// The ID of the tool call in the `required_action` object within the run object the output is being submitted for.
292    pub tool_call_id: Option<String>,
293    /// The output of the tool call to be submitted to continue the run.
294    pub output: Option<String>,
295}
dynamo_async_openai/types/run.rs

dynamo_async_openai/types/
run.rs