Skip to main content

Crate omnillm

Crate omnillm 

Source
Expand description

§OmniLLM

A production-grade Rust library for provider-neutral LLM access with multi-key load balancing, protocol conversion, per-key rate limiting, and lock-free cost tracking.

§Quick Start

use omnillm::{
    GatewayBuilder, GenerationConfig, KeyConfig, LlmRequest, Message, MessageRole,
    ProviderEndpoint, RequestItem,
};
use tokio_util::sync::CancellationToken;

let gateway = GatewayBuilder::new(ProviderEndpoint::openai_responses())
    .add_key(KeyConfig::new("sk-key-1", "prod-1").tpm_limit(90_000).rpm_limit(500))
    .budget_limit_usd(50.0)
    .build()?;

let req = LlmRequest {
    model: "gpt-4.1-mini".into(),
    instructions: Some("Answer concisely".into()),
    input: vec![RequestItem::from(Message::text(MessageRole::User, "Hello!"))],
    messages: Vec::new(),
    capabilities: Default::default(),
    generation: GenerationConfig {
        max_output_tokens: Some(256),
        ..Default::default()
    },
    metadata: Default::default(),
    vendor_extensions: Default::default(),
};

let resp = gateway.call(req, CancellationToken::new()).await?;
println!("{}", resp.content_text);

Re-exports§

pub use api::ApiRequest;
pub use api::ApiResponse;
pub use api::AudioInput;
pub use api::AudioSegment;
pub use api::AudioSpeechRequest;
pub use api::AudioSpeechResponse;
pub use api::AudioTranscriptionRequest;
pub use api::AudioTranscriptionResponse;
pub use api::ConversionReport;
pub use api::EmbeddingInput;
pub use api::EmbeddingRequest;
pub use api::EmbeddingResponse;
pub use api::EmbeddingUsage;
pub use api::EmbeddingVector;
pub use api::EndpointKind;
pub use api::GeneratedImage;
pub use api::HttpMethod;
pub use api::ImageGenerationRequest;
pub use api::ImageGenerationResponse;
pub use api::MultipartField;
pub use api::MultipartValue;
pub use api::ProviderKind;
pub use api::RequestBody;
pub use api::RerankDocument;
pub use api::RerankRequest;
pub use api::RerankResponse;
pub use api::RerankResult;
pub use api::RerankUsage;
pub use api::ResponseBody;
pub use api::TranscribedWord;
pub use api::TransportRequest;
pub use api::TransportResponse;
pub use api::WireFormat;
pub use api_protocol::emit_api_request;
pub use api_protocol::emit_api_response;
pub use api_protocol::emit_transport_request;
pub use api_protocol::parse_api_request;
pub use api_protocol::parse_api_response;
pub use api_protocol::parse_transport_response;
pub use api_protocol::transcode_api_request;
pub use api_protocol::transcode_api_response;
pub use api_protocol::ApiProtocolError;
pub use budget::tracker::BudgetTracker;
pub use config::GatewayConfig;
pub use config::KeyConfig;
pub use config::PoolConfig;
pub use error::GatewayError;
pub use error::ProviderError;
pub use key::lease::KeyLease;
pub use key::pool::KeyStatus;
pub use primitive::embedded_primitive_provider_registry;
pub use primitive::PrimitiveAsyncJobOperation;
pub use primitive::PrimitiveAsyncJobRequest;
pub use primitive::PrimitiveAsyncJobResponse;
pub use primitive::PrimitiveAsyncJobStatus;
pub use primitive::PrimitiveBillableUnit;
pub use primitive::PrimitiveBudgetClass;
pub use primitive::PrimitiveEndpointKind;
pub use primitive::PrimitiveEndpointSupport;
pub use primitive::PrimitiveProviderDescriptor;
pub use primitive::PrimitiveProviderEndpoint;
pub use primitive::PrimitiveProviderError;
pub use primitive::PrimitiveProviderKind;
pub use primitive::PrimitiveProviderRegistry;
pub use primitive::PrimitiveRealtimeSession;
pub use primitive::PrimitiveRequest;
pub use primitive::PrimitiveResponse;
pub use primitive::PrimitiveStreamEvent;
pub use primitive::PrimitiveStreamMode;
pub use primitive::PrimitiveSupportTier;
pub use primitive::PrimitiveUsageTelemetry;
pub use primitive::ProviderPrimitiveWireFormat;
pub use protocol::emit_error;
pub use protocol::emit_request;
pub use protocol::emit_response;
pub use protocol::emit_stream_event;
pub use protocol::parse_error;
pub use protocol::parse_request;
pub use protocol::parse_response;
pub use protocol::parse_stream_event;
pub use protocol::transcode_error;
pub use protocol::transcode_request;
pub use protocol::transcode_response;
pub use protocol::transcode_stream_event;
pub use protocol::AuthScheme;
pub use protocol::EndpointProtocol;
pub use protocol::ProtocolError;
pub use protocol::ProviderEndpoint;
pub use protocol::ProviderProtocol;
pub use protocol::ProviderStreamFrame;
pub use provider_registry::embedded_provider_registry;
pub use provider_registry::EndpointSupport;
pub use provider_registry::ProviderDescriptor;
pub use provider_registry::ProviderRegistry;
pub use provider_registry::SupportLevel;
pub use replay::sanitize_json_value;
pub use replay::sanitize_transport_request;
pub use replay::sanitize_transport_response;
pub use replay::ReplayFixture;
pub use types::BuiltinTool;
pub use types::CacheBreakpoint;
pub use types::CacheSettings;
pub use types::CapabilitySet;
pub use types::FinishReason;
pub use types::GenerationConfig;
pub use types::LlmRequest;
pub use types::LlmResponse;
pub use types::LlmStreamEvent;
pub use types::Message;
pub use types::MessagePart;
pub use types::MessageRole;
pub use types::OutputModality;
pub use types::PromptCacheKey;
pub use types::PromptCachePolicy;
pub use types::PromptCacheRetention;
pub use types::PromptCacheUsage;
pub use types::PromptLayoutBuilder;
pub use types::ReasoningCapability;
pub use types::RequestItem;
pub use types::ResponseItem;
pub use types::SafetySettings;
pub use types::StructuredOutputConfig;
pub use types::TokenUsage;
pub use types::ToolCallPart;
pub use types::ToolDefinition;
pub use types::ToolResultPart;
pub use types::VendorExtensions;

Modules§

api
Multi-endpoint API abstractions built around the existing Responses canonical.
api_protocol
Multi-endpoint canonical <-> wire conversion helpers.
budget
Cost tracking and budget enforcement.
config
Configuration types for building a crate::Gateway.
error
Error types for the OmniLLM crate.
key
Key management: pool, lease, registry, and per-key state.
primitive
protocol
Provider protocol definitions and canonical/raw conversion helpers.
provider_registry
Embedded provider and endpoint support metadata.
replay
Replay fixture helpers with default sanitization.
types
Public canonical request/response types used by crate::Gateway.

Structs§

Gateway
The main LLM API gateway.
GatewayBuilder
A builder for constructing a Gateway.

Type Aliases§

GatewayStream
PrimitiveGatewayStream