1pub mod batched_attention;
9pub mod beam_search;
10pub mod embedding;
11pub mod engine;
12pub mod error;
13pub mod flash_attention;
14pub mod kv_cache;
15pub mod kv_pool;
16pub mod lora_loader;
17pub mod metrics;
18pub mod offload;
19pub mod sampling;
20pub mod scheduler;
21pub mod sequence_pool;
22pub mod snapshot;
23pub mod speculative;
24#[cfg(feature = "native-async")]
25pub mod speculative_async;
26pub mod tokenizer_bridge;
27pub mod tool_dispatch;
28
29pub use batched_attention::batched_flash_attention;
30pub use beam_search::{
31 beam_generate, BeamForwardPass, BeamHypothesis, BeamSearchConfig, EngineBeamAdapter,
32};
33pub use embedding::PoolingMode;
34pub use engine::{EngineConfig, InferenceEngine, FLASH_ATTN_THRESHOLD};
35pub use error::{RuntimeError, RuntimeResult};
36pub use flash_attention::{
37 flash_attention, flash_attention_forward, flash_attention_gqa, flash_attention_multi_head,
38 FlashAttentionConfig,
39};
40pub use kv_cache::prefix::{CachedKvState, PrefixCacheConfig, PrefixKvCache};
41pub use kv_cache::{BatchedKvView, KvCache, KvCacheSnapshot, KvSlot, VecBatchedKvView};
42pub use kv_pool::KvCachePool;
43pub use lora_loader::apply_lora;
44pub use metrics::{EngineMetrics, MetricsSnapshot};
45pub use offload::{
46 FilePagerSource, LayerPager, MemoryPressureProbe, OffloadPolicy, PagerSource, ResidentTensor,
47 TensorEntry, TensorId,
48};
49pub use oxillama_arch::lora::LoadedLora;
50pub use oxillama_arch::LoraStack;
51pub use sampling::advanced::{DryStage, EtaStage, TopAStage, TypicalPStage, XtcStage};
52pub use sampling::chain::{LogitBias, SamplerChain, SamplerStage};
53pub use sampling::grammar::{Grammar, GrammarError, GrammarState, JsonSchemaCompiler};
54pub use sampling::{sample, Sampler, SamplerConfig};
55pub use scheduler::{Scheduler, SchedulerConfig, MAX_DECODE_WAIT_MS, PREFILL_CHUNK};
56pub use sequence_pool::{PoolError, PoolResult, SequencePool, SequenceSlot, SsmStatePool};
57pub use speculative::{SpeculativeConfig, SpeculativeDeltaSync, SpeculativeEngine};
58#[cfg(feature = "native-async")]
59pub use speculative_async::{
60 AsyncSpecConfig, RewindError, Rewindable, SpecStats, SpeculativeDecoder,
61};
62pub use tool_dispatch::{
63 no_op_dispatcher, NoOpDispatcher, ToolCall, ToolCallDetector, ToolCallGrammar, ToolDispatcher,
64 ToolResult,
65};
66pub use tokenizer_bridge::TokenizerBridge;