atomr_infer/lib.rs
1//! # atomr-infer
2//!
3//! Multi-runtime GPU + remote inference as a supervised actor system
4//! on top of [atomr](https://github.com/rustakka/atomr) and the
5//! backend-agnostic [atomr-accel](https://github.com/rustakka/atomr-accel)
6//! compute substrate. See `docs/architecture.md`
7//! for the design.
8//!
9//! This crate is a **rollup**: it re-exports the public surface of the
10//! workspace's per-runtime crates behind feature flags so downstream
11//! consumers depend on a single crate (`inference`) and pick the
12//! backends they need at compile time.
13//!
14//! ## Pure-remote builds
15//!
16//! ```sh
17//! cargo build -p inference --features remote-only
18//! ```
19//!
20//! produces a binary that compiles no GPU dependencies at all — useful
21//! for pure-remote routers (a deployment that fronts OpenAI /
22//! Anthropic / Gemini / LiteLLM with rate limiting, fallback and
23//! observability but owns no hardware).
24
25#![forbid(unsafe_code)]
26#![deny(rust_2018_idioms)]
27
28pub use atomr_infer_core as core;
29pub use atomr_infer_runtime as runtime;
30
31#[cfg(feature = "candle")]
32pub use atomr_infer_runtime_candle as runtime_candle;
33#[cfg(feature = "cudarc")]
34pub use atomr_infer_runtime_cudarc as runtime_cudarc;
35#[cfg(feature = "mistralrs")]
36pub use atomr_infer_runtime_mistralrs as runtime_mistralrs;
37#[cfg(feature = "ort")]
38pub use atomr_infer_runtime_ort as runtime_ort;
39#[cfg(feature = "tensorrt")]
40pub use atomr_infer_runtime_tensorrt as runtime_tensorrt;
41#[cfg(feature = "vllm")]
42pub use atomr_infer_runtime_vllm as runtime_vllm;
43
44#[cfg(feature = "anthropic")]
45pub use atomr_infer_runtime_anthropic as runtime_anthropic;
46#[cfg(feature = "gemini")]
47pub use atomr_infer_runtime_gemini as runtime_gemini;
48#[cfg(feature = "litellm")]
49pub use atomr_infer_runtime_litellm as runtime_litellm;
50#[cfg(feature = "openai")]
51pub use atomr_infer_runtime_openai as runtime_openai;
52
53#[cfg(feature = "pipeline")]
54pub use atomr_infer_pipeline as pipeline;
55
56#[cfg(feature = "testkit")]
57pub use atomr_infer_testkit as testkit;
58
59/// Re-export of the upstream `atomr-accel` trait surface so callers
60/// can reach `AccelBackend`, `AccelRef<T>`, `AccelError`,
61/// `CompletionStrategy`, `KernelOp`, etc. without taking a separate
62/// dependency.
63#[cfg(feature = "accel")]
64pub use atomr_accel as accel;
65
66/// Re-export of the NVIDIA CUDA backend (`atomr-accel-cuda`, split
67/// out of the umbrella in atomr-accel 0.3) so callers can reach
68/// `DeviceActor`, `ContextActor`, `GpuRef`, `GpuDispatcher`,
69/// `PerActorAllocator`, `PlacementActor`, and the kernel actors at
70/// `atomr_infer::accel_cuda::*`. Doc §4 ("Foundational Mapping" —
71/// `WorkerActor` ≡ `DeviceActor`).
72#[cfg(feature = "accel")]
73pub use atomr_accel_cuda as accel_cuda;
74
75/// Re-export of `atomr-accel-patterns` so callers can compose §9
76/// pipelines (`DynamicBatchingServer`, `InferenceCascade`,
77/// `ModelReplicaPool`, `FairShareScheduler`, `ModelHotSwapServer`,
78/// `SpeculativeDecoder`, `MoeRouter`) without a second dep.
79#[cfg(feature = "accel-patterns")]
80pub use atomr_accel_patterns as accel_patterns;
81
82/// Zero-config defaults — auto-provisioning helpers for common
83/// dev-experience setups.
84///
85/// Currently ships [`defaults::gemma`] for local Gemma 4 via the
86/// vLLM runner. Off by default; opt in with the `gemma-default`
87/// feature.
88#[cfg(feature = "gemma-default")]
89pub mod defaults {
90 /// Gemma 4 auto-provisioner. See
91 /// `atomr_infer_runtime_vllm::defaults` for the full surface;
92 /// re-exported here so rollup consumers don't need a second dep.
93 pub use atomr_infer_runtime_vllm::defaults as gemma;
94}
95
96/// Re-export the most commonly used types so callers can `use
97/// atomr_infer::prelude::*;` and have everything they need to declare
98/// `Deployment`s and write actors.
99pub mod prelude {
100 pub use atomr_infer_core::{
101 Deployment, ExecuteBatch, InferenceError, InferenceResult, ModelRunner, ProviderKind, RateLimits,
102 RetryPolicy, RuntimeConfig, RuntimeKind, SecretString, Serving, Timeouts, TokenChunk, Tokens,
103 TransportKind,
104 };
105}