Skip to main content

ferrum_interfaces/
lib.rs

1//! Core interface definitions for the Ferrum inference framework
2//!
3//! This crate defines all the stable trait interfaces that different components
4//! of Ferrum implement. It provides a clean abstraction layer that allows for
5//! pluggable implementations of tokenizers, model executors, schedulers,
6//! cache managers, and other core components.
7//!
8//! The interfaces are designed following the principles outlined in the
9//! refactoring documentation:
10//! - Single responsibility with stable boundaries
11//! - Zero-copy and handle semantics
12//! - Capability discovery driven
13//! - Performance-first API design
14
15#![allow(async_fn_in_trait)]
16
17pub mod backend;
18pub mod decode_backend;
19pub mod engine;
20pub mod kernel_ops;
21pub mod kv_cache;
22pub mod memory;
23pub mod model_builder;
24pub mod model_executor;
25pub mod sampler;
26pub mod scheduler;
27pub mod tensor;
28pub mod tokenizer;
29pub mod transformer;
30
31// Re-export core traits and important types
32pub use backend::{BackendCapabilities, ComputeBackend, WeightLoader};
33pub use decode_backend::DecodeBackend;
34pub use engine::InferenceEngine;
35pub use kv_cache::{
36    AllocationRequest, BlockTable, CacheHandleStats, KvCacheHandle, KvCacheManager,
37};
38pub use memory::{DeviceMemoryManager, MemoryHandle, StreamHandle};
39pub use model_builder::{BuildOptions, ModelBuilder};
40pub use model_executor::{DecodeInput, DecodeOutput, ModelExecutor, PrefillInput, PrefillOutput};
41pub use sampler::{LogitsProcessor, Sampler, SamplingConfig, SamplingContext};
42pub use scheduler::{BatchHint, BatchPlan, Scheduler as SchedulerInterface};
43pub use tensor::{TensorFactory, TensorLike, TensorOps, TensorRef};
44pub use tokenizer::{IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo};
45pub use transformer::{TransformerConfig, TransformerWeights};
46
47// Kernel ops re-exports
48pub use kernel_ops::{
49    ActivationOps, AttentionOps, AttentionParams, KernelOps, KernelOpsDispatch, LinearOps, NormOps,
50    PositionOps, QuantScheme, RoPEConfig, SamplingOps, SamplingParams as KernelSamplingParams,
51};
52
53// Re-export types from ferrum-types, avoiding conflicts
54pub use ferrum_types::{
55    config::BackendConfig,
56    // Config types - use fully qualified names to avoid conflicts
57    config::EngineConfig,
58    config::SchedulerConfig,
59    config::TokenizerConfig,
60    BatchId,
61    BlockId,
62    ClientId,
63    ComponentHealth,
64    ComponentStatus,
65    DataType,
66    // Device types
67    Device,
68    EngineMetrics,
69    EngineStatus,
70    FerrumError,
71    FinishReason,
72    HealthStatus,
73    // Requests and responses
74    InferenceRequest,
75    InferenceResponse,
76    // Metrics
77    MemoryUsage,
78    ModelId,
79    // Model types
80    ModelInfo,
81    ModelSource,
82    ModelType,
83    Priority,
84    // IDs
85    RequestId,
86    Result,
87    // Sampling
88    SamplingParams,
89    SchedulerStats,
90    SessionId,
91    SpecialTokens,
92    StreamChunk,
93    TaskId,
94    // Basic types
95    TokenId,
96};