Skip to main content

ferrum_interfaces/
lib.rs

1//! Core interface definitions for the Ferrum inference framework
2//!
3//! This crate carries the stable, GPU-free trait contracts shared across
4//! the workspace: model execution, scheduling, KV cache management,
5//! tokenization, sampling, and the lifecycle/modality engine traits.
6//! Hardware backends live in `ferrum-kernels` (the `Backend<B>` trait
7//! and its supertraits); only types that compile without GPU features
8//! belong here.
9
10#![allow(async_fn_in_trait)]
11
12pub mod engine;
13pub mod kv_cache;
14pub mod kv_dtype;
15pub mod model_executor;
16pub mod sampler;
17pub mod scheduler;
18pub mod tensor;
19pub mod tokenizer;
20
21// Re-export core traits and important types
22pub use engine::InferenceEngine;
23pub use kv_cache::{
24    AllocationRequest, BlockTable, CacheHandleStats, KvCacheHandle, KvCacheManager,
25};
26pub use kv_dtype::{KvBf16, KvDtypeKind, KvFp16, KvFp8, KvInt8};
27pub use model_executor::{DecodeInput, DecodeOutput, ModelExecutor, PrefillInput, PrefillOutput};
28pub use sampler::{LogitsProcessor, Sampler, SamplingConfig, SamplingContext};
29pub use scheduler::{BatchHint, BatchPlan, Scheduler as SchedulerInterface};
30pub use tensor::{TensorFactory, TensorLike, TensorOps, TensorRef};
31pub use tokenizer::{IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo};
32
33// Re-export types from ferrum-types, avoiding conflicts
34pub use ferrum_types::{
35    config::BackendConfig,
36    // Config types - use fully qualified names to avoid conflicts
37    config::EngineConfig,
38    config::SchedulerConfig,
39    config::TokenizerConfig,
40    BatchId,
41    BlockId,
42    ClientId,
43    ComponentHealth,
44    ComponentStatus,
45    DataType,
46    // Device types
47    Device,
48    EngineMetrics,
49    EngineStatus,
50    FerrumError,
51    FinishReason,
52    HealthStatus,
53    // Requests and responses
54    InferenceRequest,
55    InferenceResponse,
56    // Metrics
57    MemoryUsage,
58    ModelId,
59    // Model types
60    ModelInfo,
61    ModelSource,
62    ModelType,
63    Priority,
64    // IDs
65    RequestId,
66    Result,
67    // Sampling
68    SamplingParams,
69    SchedulerStats,
70    SessionId,
71    SpecialTokens,
72    StreamChunk,
73    TaskId,
74    // Basic types
75    TokenId,
76};