dynamo_llm/http/service.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! HTTP Service for Dynamo LLM
5//!
6//! The primary purpose of this crate is to service the dynamo-llm protocols via OpenAI compatible HTTP endpoints. This component
7//! is meant to be a gateway/ingress into the Dynamo LLM Distributed Runtime.
8//!
9//! In order to create a common pattern, the HttpService forwards the incoming OAI Chat Request or OAI Completion Request to the
10//! to a model-specific engines. The engines can be attached and detached dynamically using the [`ModelManager`].
11//!
12//! Note: All requests, whether the client requests `stream=true` or `stream=false`, are propagated downstream as `stream=true`.
13//! This enables use to handle only 1 pattern of request-response in the downstream services. Non-streaming user requests are
14//! aggregated by the HttpService and returned as a single response.
15//!
16//! TODO(): Add support for model-specific metadata and status. Status will allow us to return a 503 when the model is supposed
17//! to be ready, but there is a problem with the model.
18//!
19//! The [`service_v2::HttpService`] can be further extended to host any [`axum::Router`] using the [`service_v2::HttpServiceConfigBuilder`].
20
21mod openai;
22
23pub mod disconnect;
24pub mod error;
25pub mod health;
26pub mod metrics;
27pub mod service_v2;
28
29pub use axum;
30pub use metrics::Metrics;
31
32/// Documentation for a route
33#[derive(Debug, Clone)]
34pub struct RouteDoc {
35 method: axum::http::Method,
36 path: String,
37}
38
39impl std::fmt::Display for RouteDoc {
40 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
41 write!(f, "{} {}", self.method, self.path)
42 }
43}
44
45impl RouteDoc {
46 pub fn new<T: Into<String>>(method: axum::http::Method, path: T) -> Self {
47 RouteDoc {
48 method,
49 path: path.into(),
50 }
51 }
52}