1#![allow(async_fn_in_trait)]
16
17pub mod backend;
18pub mod decode_backend;
19pub mod engine;
20pub mod kernel_ops;
21pub mod kv_cache;
22pub mod memory;
23pub mod model_builder;
24pub mod model_executor;
25pub mod sampler;
26pub mod scheduler;
27pub mod tensor;
28pub mod tokenizer;
29pub mod transformer;
30
31pub use backend::{BackendCapabilities, ComputeBackend, WeightLoader};
33pub use decode_backend::DecodeBackend;
34pub use engine::InferenceEngine;
35pub use kv_cache::{
36 AllocationRequest, BlockTable, CacheHandleStats, KvCacheHandle, KvCacheManager,
37};
38pub use memory::{DeviceMemoryManager, MemoryHandle, StreamHandle};
39pub use model_builder::{BuildOptions, ModelBuilder};
40pub use model_executor::{DecodeInput, DecodeOutput, ModelExecutor, PrefillInput, PrefillOutput};
41pub use sampler::{LogitsProcessor, Sampler, SamplingConfig, SamplingContext};
42pub use scheduler::{BatchHint, BatchPlan, Scheduler as SchedulerInterface};
43pub use tensor::{TensorFactory, TensorLike, TensorOps, TensorRef};
44pub use tokenizer::{IncrementalTokenizer, Tokenizer, TokenizerFactory, TokenizerInfo};
45pub use transformer::{TransformerConfig, TransformerWeights};
46
47pub use kernel_ops::{
49 ActivationOps, AttentionOps, AttentionParams, KernelOps, KernelOpsDispatch, LinearOps, NormOps,
50 PositionOps, QuantScheme, RoPEConfig, SamplingOps, SamplingParams as KernelSamplingParams,
51};
52
53pub use ferrum_types::{
55 config::BackendConfig,
56 config::EngineConfig,
58 config::SchedulerConfig,
59 config::TokenizerConfig,
60 BatchId,
61 BlockId,
62 ClientId,
63 ComponentHealth,
64 ComponentStatus,
65 DataType,
66 Device,
68 EngineMetrics,
69 EngineStatus,
70 FerrumError,
71 FinishReason,
72 HealthStatus,
73 InferenceRequest,
75 InferenceResponse,
76 MemoryUsage,
78 ModelId,
79 ModelInfo,
81 ModelSource,
82 ModelType,
83 Priority,
84 RequestId,
86 Result,
87 SamplingParams,
89 SchedulerStats,
90 SessionId,
91 SpecialTokens,
92 StreamChunk,
93 TaskId,
94 TokenId,
96};