forge_orchestration/
lib.rs

1//! # Forge Orchestration
2//!
3//! A Rust-native orchestration platform for distributed workloads with
4//! Mixture of Experts (MoE) routing, autoscaling, and Nomad integration.
5//!
6//! ## Features
7//!
8//! - **Job Management**: Define and submit jobs with task groups
9//! - **MoE Routing**: Intelligent request routing to expert workers
10//! - **Autoscaling**: Threshold and predictive scaling policies
11//! - **Nomad Integration**: Schedule via HashiCorp Nomad
12//! - **Metrics**: Prometheus-compatible metrics export
13//! - **SDK**: Embedded SDK for workloads (lifecycle, ports, heartbeats)
14//!
15//! ## Quick Start
16//!
17//! ```rust,no_run
18//! use forge_orchestration::{ForgeBuilder, AutoscalerConfig, Job, Task, Driver};
19//!
20//! #[tokio::main]
21//! async fn main() -> forge_orchestration::Result<()> {
22//!     let forge = ForgeBuilder::new()
23//!         .with_autoscaler(AutoscalerConfig::default())
24//!         .build()?;
25//!
26//!     let job = Job::new("my-service")
27//!         .with_group("api", Task::new("server")
28//!             .driver(Driver::Exec)
29//!             .command("/usr/bin/server"));
30//!
31//!     forge.submit_job(job).await?;
32//!     forge.run().await?;
33//!     Ok(())
34//! }
35//! ```
36//!
37//! ## SDK Usage (for workloads)
38//!
39//! ```rust,no_run
40//! use forge_orchestration::sdk::{ready, allocate_port, graceful_shutdown};
41//!
42//! #[tokio::main]
43//! async fn main() -> forge_orchestration::Result<()> {
44//!     ready()?;
45//!     let port = allocate_port(8000..9000)?;
46//!     graceful_shutdown();
47//!     // ... serve on port ...
48//!     Ok(())
49//! }
50//! ```
51
52#![warn(missing_docs)]
53#![warn(rustdoc::missing_crate_level_docs)]
54
55pub mod autoscaler;
56pub mod builder;
57pub mod error;
58pub mod inference;
59pub mod job;
60pub mod metrics;
61pub mod moe;
62pub mod networking;
63pub mod nomad;
64pub mod resilience;
65pub mod runtime;
66pub mod sdk;
67pub mod storage;
68pub mod types;
69
70// Re-exports for ergonomic API
71pub use autoscaler::{Autoscaler, AutoscalerConfig, ScalingDecision};
72pub use builder::ForgeBuilder;
73pub use error::{ForgeError, Result};
74pub use job::{Driver, Job, Task, TaskGroup};
75pub use metrics::{ForgeMetrics, MetricsExporter, MetricsHook};
76pub use moe::{DefaultMoERouter, LoadAwareMoERouter, MoERouter, RoundRobinMoERouter, RouteResult, GpuAwareMoERouter, VersionAwareMoERouter};
77pub use networking::{HttpServer, HttpServerConfig, QuicTransport};
78pub use nomad::NomadClient;
79pub use runtime::Forge;
80pub use storage::{FileStore, MemoryStore, StateStore};
81pub use types::{Expert, GpuResources, NodeId, Region, Shard, ShardId};
82
83/// Prelude module for convenient imports
84pub mod prelude {
85    pub use crate::autoscaler::{Autoscaler, AutoscalerConfig};
86    pub use crate::builder::ForgeBuilder;
87    pub use crate::error::Result;
88    pub use crate::job::{Driver, Job, Task};
89    pub use crate::moe::MoERouter;
90    pub use crate::runtime::Forge;
91    pub use crate::sdk::{allocate_port, graceful_shutdown, ready, ForgeClient};
92    pub use crate::types::{Expert, Shard};
93}