1#![allow(async_fn_in_trait)]
11
12pub mod engine;
13pub mod kv_cache;
14pub mod kv_dtype;
15pub mod model_executor;
16pub mod sampler;
17pub mod scheduler;
18pub mod tensor;
19pub mod tokenizer;
20
21pub use engine::InferenceEngine;
23pub use kv_cache::{
24 AllocationRequest, BlockTable, CacheHandleStats, KvCacheHandle, KvCacheManager,
25};
26pub use kv_dtype::{KvBf16, KvDtypeKind, KvFp16, KvFp8, KvInt8};
27pub use model_executor::{DecodeInput, DecodeOutput, ModelExecutor, PrefillInput, PrefillOutput};
28pub use sampler::{LogitsProcessor, Sampler, SamplingConfig, SamplingContext};
29pub use scheduler::{BatchHint, BatchPlan, Scheduler as SchedulerInterface};
30pub use tensor::{TensorFactory, TensorLike, TensorOps, TensorRef};
31pub use tokenizer::{IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo};
32
33pub use ferrum_types::{
35 config::BackendConfig,
36 config::EngineConfig,
38 config::SchedulerConfig,
39 config::TokenizerConfig,
40 BatchId,
41 BlockId,
42 ClientId,
43 ComponentHealth,
44 ComponentStatus,
45 DataType,
46 Device,
48 EngineMetrics,
49 EngineStatus,
50 FerrumError,
51 FinishReason,
52 HealthStatus,
53 InferenceRequest,
55 InferenceResponse,
56 MemoryUsage,
58 ModelId,
59 ModelInfo,
61 ModelSource,
62 ModelType,
63 Priority,
64 RequestId,
66 Result,
67 SamplingParams,
69 SchedulerStats,
70 SessionId,
71 SpecialTokens,
72 StreamChunk,
73 TaskId,
74 TokenId,
76};