1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::protocols;
pub use protocols::{Annotated, TokenIdType};
pub mod openai {
use super::*;
use dynamo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod completions {
use super::*;
pub use protocols::openai::completions::{
NvCreateCompletionRequest, NvCreateCompletionResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Completions API
pub type OpenAICompletionsUnaryEngine =
UnaryEngine<NvCreateCompletionRequest, NvCreateCompletionResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Completions API
pub type OpenAICompletionsStreamingEngine =
ServerStreamingEngine<NvCreateCompletionRequest, Annotated<NvCreateCompletionResponse>>;
}
pub mod chat_completions {
use super::*;
pub use protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionResponse,
NvCreateChatCompletionStreamResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Chat Completions API
pub type OpenAIChatCompletionsUnaryEngine =
UnaryEngine<NvCreateChatCompletionRequest, NvCreateChatCompletionResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Chat Completions API
pub type OpenAIChatCompletionsStreamingEngine = ServerStreamingEngine<
NvCreateChatCompletionRequest,
Annotated<NvCreateChatCompletionStreamResponse>,
>;
}
pub mod embeddings {
use super::*;
pub use protocols::openai::embeddings::{
NvCreateEmbeddingRequest, NvCreateEmbeddingResponse,
};
/// A [`UnaryEngine`] implementation for the OpenAI Embeddings API
pub type OpenAIEmbeddingsUnaryEngine =
UnaryEngine<NvCreateEmbeddingRequest, NvCreateEmbeddingResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Embeddings API
pub type OpenAIEmbeddingsStreamingEngine =
ServerStreamingEngine<NvCreateEmbeddingRequest, Annotated<NvCreateEmbeddingResponse>>;
}
pub mod images {
use super::*;
pub use protocols::openai::images::{NvCreateImageRequest, NvImagesResponse};
/// A [`UnaryEngine`] implementation for the OpenAI Images API
pub type OpenAIImagesUnaryEngine = UnaryEngine<NvCreateImageRequest, NvImagesResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Images API.
///
/// **Note**: This "streaming" refers to the internal routing/distribution architecture,
/// NOT client-facing Server-Sent Events (SSE) streaming. Image generation does not
/// support progressive streaming to clients - images are generated completely and
/// returned as finished artifacts (URLs or base64).
///
/// The HTTP endpoint folds this stream into a single response before returning to clients,
/// similar to how embeddings work. The streaming infrastructure is used for:
/// - Consistent routing architecture across all model types
/// - Request distribution via PushRouter
/// - Worker fault detection and load balancing
pub type OpenAIImagesStreamingEngine =
ServerStreamingEngine<NvCreateImageRequest, Annotated<NvImagesResponse>>;
}
pub mod videos {
use super::*;
pub use protocols::openai::videos::{NvCreateVideoRequest, NvVideosResponse};
/// A [`UnaryEngine`] implementation for the OpenAI Videos API
pub type OpenAIVideosUnaryEngine = UnaryEngine<NvCreateVideoRequest, NvVideosResponse>;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Videos API
pub type OpenAIVideosStreamingEngine =
ServerStreamingEngine<NvCreateVideoRequest, Annotated<NvVideosResponse>>;
}
}
pub mod generic {
use super::*;
use dynamo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod tensor {
use super::*;
pub use protocols::tensor::{NvCreateTensorRequest, NvCreateTensorResponse};
/// A [`UnaryEngine`] implementation for the generic Tensor API
pub type TensorUnaryEngine = UnaryEngine<NvCreateTensorRequest, NvCreateTensorResponse>;
/// A [`ServerStreamingEngine`] implementation for the generic Tensor API
pub type TensorStreamingEngine =
ServerStreamingEngine<NvCreateTensorRequest, Annotated<NvCreateTensorResponse>>;
}
}