1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::protocols;
pub use protocols::{Annotated, TokenIdType};
pub mod openai {
use super::*;
use dynamo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod completions {
use super::*;
pub use protocols::openai::completions::{
NvCreateCompletionRequest, NvCreateCompletionResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Completions API
pub type OpenAICompletionsUnaryEngine =
UnaryEngine<NvCreateCompletionRequest, NvCreateCompletionResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Completions API
pub type OpenAICompletionsStreamingEngine =
ServerStreamingEngine<NvCreateCompletionRequest, Annotated<NvCreateCompletionResponse>>;
}
pub mod chat_completions {
use super::*;
pub use protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionResponse,
NvCreateChatCompletionStreamResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Chat Completions API
pub type OpenAIChatCompletionsUnaryEngine =
UnaryEngine<NvCreateChatCompletionRequest, NvCreateChatCompletionResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Chat Completions API
pub type OpenAIChatCompletionsStreamingEngine = ServerStreamingEngine<
NvCreateChatCompletionRequest,
Annotated<NvCreateChatCompletionStreamResponse>,
>;
}
pub mod embeddings {
use super::*;
pub use protocols::openai::embeddings::{
NvCreateEmbeddingRequest, NvCreateEmbeddingResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Embeddings API
pub type OpenAIEmbeddingsUnaryEngine =
UnaryEngine<NvCreateEmbeddingRequest, NvCreateEmbeddingResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Embeddings API
pub type OpenAIEmbeddingsStreamingEngine =
ServerStreamingEngine<NvCreateEmbeddingRequest, Annotated<NvCreateEmbeddingResponse>>;
}
pub mod images {
use super::*;
pub use protocols::openai::images::{NvCreateImageRequest, NvImagesResponse};
/// A [`UnaryEngine`] implementation for the OpenAI Images API
pub type OpenAIImagesUnaryEngine = UnaryEngine<NvCreateImageRequest, NvImagesResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Images API.
///
/// **Note**: This "streaming" refers to the internal routing/distribution architecture,
/// NOT client-facing Server-Sent Events (SSE) streaming. Image generation does not
/// support progressive streaming to clients - images are generated completely and
/// returned as finished artifacts (URLs or base64).
///
/// The HTTP endpoint folds this stream into a single response before returning to clients,
/// similar to how embeddings work. The streaming infrastructure is used for:
/// - Consistent routing architecture across all model types
/// - Request distribution via PushRouter
/// - Worker fault detection and load balancing
pub type OpenAIImagesStreamingEngine =
ServerStreamingEngine<NvCreateImageRequest, Annotated<NvImagesResponse>>;
}
pub mod videos {
use super::*;
pub use protocols::openai::videos::{NvCreateVideoRequest, NvVideosResponse};
/// A [`UnaryEngine`] implementation for the OpenAI Videos API
pub type OpenAIVideosUnaryEngine = UnaryEngine<NvCreateVideoRequest, NvVideosResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Videos API
pub type OpenAIVideosStreamingEngine =
ServerStreamingEngine<NvCreateVideoRequest, Annotated<NvVideosResponse>>;
}
pub mod audios {
use super::*;
pub use protocols::openai::audios::{NvAudioSpeechResponse, NvCreateAudioSpeechRequest};
/// A [`UnaryEngine`] implementation for the Audio Speech API
pub type OpenAIAudiosUnaryEngine =
UnaryEngine<NvCreateAudioSpeechRequest, NvAudioSpeechResponse>;
/// A [`ServerStreamingEngine`] implementation for the Audio Speech API
pub type OpenAIAudiosStreamingEngine =
ServerStreamingEngine<NvCreateAudioSpeechRequest, Annotated<NvAudioSpeechResponse>>;
}
}
pub mod generic {
use super::*;
use dynamo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod tensor {
use super::*;
pub use protocols::tensor::{NvCreateTensorRequest, NvCreateTensorResponse};
/// A [`UnaryEngine`] implementation for the generic Tensor API
pub type TensorUnaryEngine = UnaryEngine<NvCreateTensorRequest, NvCreateTensorResponse>;
/// A [`ServerStreamingEngine`] implementation for the generic Tensor API
pub type TensorStreamingEngine =
ServerStreamingEngine<NvCreateTensorRequest, Annotated<NvCreateTensorResponse>>;
}
}