#![allow(async_fn_in_trait)]
pub mod backend;
pub mod decode_backend;
pub mod engine;
pub mod kernel_ops;
pub mod kv_cache;
pub mod memory;
pub mod model_builder;
pub mod model_executor;
pub mod sampler;
pub mod scheduler;
pub mod tensor;
pub mod tokenizer;
pub mod transformer;
pub use backend::{BackendCapabilities, ComputeBackend, WeightLoader};
pub use decode_backend::DecodeBackend;
pub use engine::InferenceEngine;
pub use kv_cache::{
AllocationRequest, BlockTable, CacheHandleStats, KvCacheHandle, KvCacheManager,
};
pub use memory::{DeviceMemoryManager, MemoryHandle, StreamHandle};
pub use model_builder::{BuildOptions, ModelBuilder};
pub use model_executor::{DecodeInput, DecodeOutput, ModelExecutor, PrefillInput, PrefillOutput};
pub use sampler::{LogitsProcessor, Sampler, SamplingConfig, SamplingContext};
pub use scheduler::{BatchHint, BatchPlan, Scheduler as SchedulerInterface};
pub use tensor::{TensorFactory, TensorLike, TensorOps, TensorRef};
pub use tokenizer::{IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo};
pub use transformer::{TransformerConfig, TransformerWeights};
pub use kernel_ops::{
ActivationOps, AttentionOps, AttentionParams, KernelOps, KernelOpsDispatch, LinearOps, NormOps,
PositionOps, QuantScheme, RoPEConfig, SamplingOps, SamplingParams as KernelSamplingParams,
};
pub use ferrum_types::{
config::BackendConfig,
config::EngineConfig,
config::SchedulerConfig,
config::TokenizerConfig,
BatchId,
BlockId,
ClientId,
ComponentHealth,
ComponentStatus,
DataType,
Device,
EngineMetrics,
EngineStatus,
FerrumError,
FinishReason,
HealthStatus,
InferenceRequest,
InferenceResponse,
MemoryUsage,
ModelId,
ModelInfo,
ModelSource,
ModelType,
Priority,
RequestId,
Result,
SamplingParams,
SchedulerStats,
SessionId,
SpecialTokens,
StreamChunk,
TaskId,
TokenId,
};