Expand description
§Infernum Server
HTTP API server with OpenAI-compatible endpoints.
§Features
- OpenAI API Compatibility: Drop-in replacement for OpenAI’s API
- Chat Completions:
/v1/chat/completionswith streaming support - Text Completions:
/v1/completionsfor raw text generation - Embeddings:
/v1/embeddingsfor vector generation - Model Management: Load/unload models at runtime
- Health Checks:
/healthand/readyendpoints
§Example
ⓘ
use infernum_server::{Server, ServerConfig};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ServerConfig::builder()
.addr("0.0.0.0:8080".parse()?)
.model("meta-llama/Llama-3.2-3B-Instruct")
.build();
let server = Server::new(config);
server.run().await?;
Ok(())
}Re-exports§
pub use openai::ChatChoice;pub use openai::ChatCompletionRequest;pub use openai::ChatCompletionResponse;pub use openai::ChatMessage;pub use openai::CompletionChoice;pub use openai::CompletionRequest;pub use openai::CompletionResponse;pub use openai::EmbeddingData;pub use openai::EmbeddingInput;pub use openai::EmbeddingRequest;pub use openai::EmbeddingResponse;pub use openai::ModelObject;pub use openai::ModelsResponse;pub use openai::Usage;pub use server::AppState;pub use server::Server;pub use server::ServerConfig;pub use server::ServerConfigBuilder;