use utoipa::openapi::security::{ApiKey, ApiKeyValue, SecurityScheme};
use utoipa::{Modify, OpenApi};
use crate::api::{
ErrorBody, ErrorCode as ApiErrorCode, ErrorResponse, ModelListEntry,
ModelsResponse as ApiModelsResponse,
};
use crate::error_response::{ApiError, ErrorDetail};
use crate::responses::{HealthResponse, ModelInfo, ReadyResponse};
use crate::tokenize::TokenizeResponse;
#[derive(OpenApi)]
#[openapi(
info(
title = "Infernum API",
version = "2.0.0",
description = "Infernum-native local LLM inference server.\n\n\
Infernum provides high-performance inference with native tool calling, \
structured outputs, and agentic capabilities.\n\n\
See INFERNUM-API-SPEC.md for the complete wire format specification.",
license(name = "MIT", url = "https://opensource.org/licenses/MIT"),
contact(name = "Infernum", url = "https://github.com/daemoniorum/infernum")
),
servers(
(url = "http://localhost:8080", description = "Local development server"),
(url = "http://localhost:8081", description = "Docker container"),
),
paths(
health,
ready,
list_models,
tokenize,
generate,
embed,
),
components(
schemas(
HealthResponse,
ReadyResponse,
ModelInfo,
ApiModelsResponse,
ModelListEntry,
ErrorResponse,
ErrorBody,
ApiErrorCode,
TokenizeResponse,
ApiError,
ErrorDetail,
)
),
modifiers(&SecurityAddon),
tags(
(name = "Health", description = "Health and readiness endpoints"),
(name = "Models", description = "Model listing and management"),
(name = "Generate", description = "Text and chat generation endpoint"),
(name = "Embed", description = "Embedding generation endpoint"),
(name = "Tokenize", description = "Token counting endpoints"),
)
)]
pub struct ApiDoc;
struct SecurityAddon;
impl Modify for SecurityAddon {
fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) {
if let Some(components) = openapi.components.as_mut() {
components.add_security_scheme(
"bearer_auth",
SecurityScheme::ApiKey(ApiKey::Header(ApiKeyValue::new("Authorization"))),
);
}
}
}
#[utoipa::path(
get,
path = "/health",
tag = "Health",
responses(
(status = 200, description = "Server is healthy", body = HealthResponse),
)
)]
pub async fn health() {}
#[utoipa::path(
get,
path = "/ready",
tag = "Health",
responses(
(status = 200, description = "Readiness status", body = ReadyResponse),
)
)]
pub async fn ready() {}
#[utoipa::path(
get,
path = "/v1/models",
tag = "Models",
responses(
(status = 200, description = "List of available models", body = ApiModelsResponse),
(status = 401, description = "Unauthorized", body = ApiError),
),
security(
("bearer_auth" = [])
)
)]
pub async fn list_models() {}
#[utoipa::path(
post,
path = "/v1/tokenize",
tag = "Tokenize",
request_body = String,
responses(
(status = 200, description = "Token count result", body = TokenizeResponse),
(status = 400, description = "Invalid request", body = ApiError),
(status = 401, description = "Unauthorized", body = ApiError),
),
security(
("bearer_auth" = [])
)
)]
pub async fn tokenize() {}
#[utoipa::path(
post,
path = "/v1/generate",
tag = "Generate",
request_body(content = String, description = "GenerateRequest JSON. See INFERNUM-API-SPEC.md §3.1.",
example = json!({
"model": "llama-3.2-3b",
"prompt": [{"role": "user", "content": "Hello!"}],
"sampling": {"temperature": 0.7, "max_tokens": 4096},
"stream": false
})
),
responses(
(status = 200, description = "Successful generation", content_type = "application/json",
body = String,
example = json!({
"request_id": "550e8400-e29b-41d4-a716-446655440000",
"model": "llama-3.2-3b",
"choices": [{"index": 0, "text": "Hello! How can I help?", "finish_reason": "stop"}],
"usage": {"prompt_tokens": 10, "completion_tokens": 8, "total_tokens": 18}
})
),
(status = 400, description = "Invalid request", body = ApiError),
(status = 401, description = "Unauthorized", body = ApiError),
(status = 429, description = "Rate limited", body = ApiError),
(status = 503, description = "Model not loaded", body = ApiError),
),
security(
("bearer_auth" = [])
)
)]
pub async fn generate() {}
#[utoipa::path(
post,
path = "/v1/embed",
tag = "Embed",
request_body(content = String, description = "EmbedRequest JSON. See INFERNUM-API-SPEC.md §5.1.",
example = json!({
"model": "nomic-embed-text",
"input": "The quick brown fox"
})
),
responses(
(status = 200, description = "Successful embedding", content_type = "application/json",
body = String,
example = json!({
"model": "nomic-embed-text",
"data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}],
"usage": {"prompt_tokens": 5, "total_tokens": 5}
})
),
(status = 400, description = "Invalid request", body = ApiError),
(status = 401, description = "Unauthorized", body = ApiError),
(status = 429, description = "Rate limited", body = ApiError),
(status = 503, description = "Model not loaded", body = ApiError),
),
security(
("bearer_auth" = [])
)
)]
pub async fn embed() {}
#[cfg(test)]
mod tests {
use super::*;
use utoipa::OpenApi;
#[test]
fn test_openapi_spec_generates() {
let spec = ApiDoc::openapi();
assert_eq!(spec.info.title, "Infernum API");
assert_eq!(spec.info.version, "2.0.0");
}
#[test]
fn test_openapi_spec_has_paths() {
let spec = ApiDoc::openapi();
assert!(spec.paths.paths.contains_key("/health"));
assert!(spec.paths.paths.contains_key("/ready"));
assert!(spec.paths.paths.contains_key("/v1/models"));
assert!(spec.paths.paths.contains_key("/v1/tokenize"));
assert!(spec.paths.paths.contains_key("/v1/generate"));
assert!(spec.paths.paths.contains_key("/v1/embed"));
}
#[test]
fn test_openapi_spec_no_legacy_paths() {
let spec = ApiDoc::openapi();
assert!(!spec.paths.paths.contains_key("/v1/chat/completions"));
assert!(!spec.paths.paths.contains_key("/v1/completions"));
assert!(!spec.paths.paths.contains_key("/v1/embeddings"));
}
#[test]
fn test_openapi_spec_has_components() {
let spec = ApiDoc::openapi();
let components = spec.components.expect("components should exist");
let schemas = components.schemas;
assert!(schemas.contains_key("HealthResponse"));
assert!(schemas.contains_key("ModelsResponse"));
assert!(schemas.contains_key("ApiError"));
}
#[test]
fn test_openapi_spec_to_json() {
let spec = ApiDoc::openapi();
let json = spec.to_json().expect("should serialize to JSON");
assert!(json.contains("Infernum API"));
assert!(json.contains("/v1/generate"));
assert!(json.contains("/v1/embed"));
}
}