Skip to main content

oxillama_server/
lib.rs

1//! # oxillama-server
2//!
3//! OpenAI-compatible HTTP API server for OxiLLaMa.
4//!
5//! ## Endpoints
6//!
7//! | Method | Path | Description |
8//! |--------|------|-------------|
9//! | POST | `/v1/chat/completions` | Chat completion |
10//! | POST | `/v1/completions` | Text completion |
11//! | POST | `/v1/embeddings` | Text embeddings |
12//! | GET | `/v1/models` | List loaded models |
13//! | GET | `/health` | Health check |
14//! | POST | `/v1/batches` | Create batch job (disk-spooled) |
15//! | GET | `/v1/batches/:id` | Retrieve batch job |
16//! | GET | `/v1/batches/:id/output` | Stream batch output JSONL |
17//! | POST | `/v1/batches/:id/cancel` | Cancel batch job |
18//! | GET | `/v1/batches` | List batch jobs |
19//! | POST | `/v1/threads` | Create Assistants API thread |
20//! | GET | `/v1/threads/:thread_id` | Retrieve thread |
21//! | POST | `/v1/threads/:thread_id/messages` | Append message to thread |
22//! | GET | `/v1/threads/:thread_id/messages` | List thread messages |
23//! | POST | `/v1/threads/:thread_id/runs` | Create and enqueue a run |
24//! | GET | `/v1/threads/:thread_id/runs/:run_id` | Get run status |
25//! | POST | `/v1/threads/:thread_id/runs/:run_id/cancel` | Cancel a run |
26//! | POST | `/admin/models/load` | Background-load model (admin) |
27//! | POST | `/admin/models/unload` | Unload model (admin) |
28//! | GET | `/admin/models` | List model pool (admin) |
29//! | GET | `/admin/stats` | Server stats (admin) |
30//! | GET | `/admin/health` | Extended health (admin) |
31//! | POST | `/admin/loras` | Register a LoRA adapter (admin) |
32//! | DELETE | `/admin/loras/{name}` | Unregister a LoRA adapter (admin) |
33//! | GET | `/admin/loras` | List registered LoRA adapters (admin) |
34
35pub mod admin;
36pub mod app;
37pub mod auth;
38pub mod batch;
39pub mod batch_spool;
40pub mod body_limit;
41pub mod config;
42pub mod error;
43pub mod files_store;
44#[cfg(feature = "jwt")]
45pub mod jwt_auth;
46pub mod metrics;
47pub mod queue;
48pub mod rate_limit;
49pub mod responses_store;
50pub mod router;
51pub mod routes;
52pub mod shutdown;
53pub mod sse;
54pub mod state;
55pub mod threads;
56pub mod tracing_layer;
57pub mod worker;
58pub mod ws;
59
60#[cfg(test)]
61pub(crate) mod test_helpers;
62
63pub use app::build_app;
64pub use auth::ApiKeys;
65pub use config::ServerConfig;
66pub use error::{ServerError, ServerResult};
67pub use metrics::Metrics;
68pub use queue::{BatchRequest, LoraSelection, VocabBytes};
69pub use rate_limit::{PerKeyRateLimiter, RateLimiter};
70pub use responses_store::ResponseStore;
71pub use router::{ModelLoader, ModelPool, ModelSpec};
72pub use shutdown::{shutdown_signal, ShutdownSignal, ShutdownTrigger};
73pub use state::AppState;
74pub use threads::{new_run_queue, RunQueueSender, ThreadStore};
75pub use worker::spawn_inference_worker;