Skip to main content

atomr_infer_remote_core/
lib.rs

1//! # inference-remote-core
2//!
3//! Shared remote-runtime infrastructure (doc §5, §10.3, §12). Provides
4//! the HTTP-shaped analog of the local-GPU `WorkerActor` /
5//! `EngineCoreActor` pair, plus the cross-cutting concerns that the
6//! GPU side doesn't need (rate limiting, circuit breaking, credential
7//! refresh, SSE parsing, retry/backoff, error classification, cost
8//! aggregation).
9//!
10//! Per-provider crates (`inference-runtime-openai`, `-anthropic`,
11//! `-gemini`, `-litellm`) depend on this crate and contribute one
12//! `ModelRunner` impl plus a `RuntimeConfig` shape.
13
14#![forbid(unsafe_code)]
15#![deny(rust_2018_idioms)]
16
17pub mod backoff;
18pub mod circuit_breaker;
19pub mod classify;
20pub mod engine;
21pub mod http;
22pub mod queue;
23pub mod rate_limit;
24pub mod retry;
25pub mod session;
26pub mod sse;
27pub mod worker;
28
29pub use backoff::{compute_backoff, BackoffPolicy};
30pub use circuit_breaker::{CircuitBreakerActor, CircuitBreakerHandle, CircuitState};
31pub use classify::{classify_http_status, parse_retry_after};
32pub use engine::{AddRequest, EngineMetrics, EngineMsg, RemoteEngineConfig, RemoteEngineCoreActor};
33pub use http::{build_client, HttpClient};
34pub use queue::{Priority, PriorityRequest, RequestQueue};
35pub use rate_limit::{AcquirePermit, Permit, RateLimiterActor, RateLimiterHandle, StrictRateLimiterActor};
36pub use retry::{Attempt, RetryDecision, RetryEngine};
37pub use session::{
38    CredentialProvider, RemoteSessionActor, SessionConfig, SessionRebuildRequest, SessionSnapshot,
39    StaticApiKey,
40};
41pub use sse::{decode_sse_stream, SseChunk};
42pub use worker::{RemoteWorkerActor, WorkerMsg, WorkerSlot};