List of all items
Structs
- KvSlot
- LoadedLora
- LoraStack
- beam_search::BeamHypothesis
- beam_search::BeamSearchConfig
- beam_search::EngineBeamAdapter
- engine::EngineConfig
- engine::InferenceEngine
- flash_attention::FlashAttentionConfig
- kv_cache::KvCache
- kv_cache::KvCacheSnapshot
- kv_cache::KvSlot
- kv_cache::VecBatchedKvView
- kv_cache::paged::PagedKvCache
- kv_cache::prefix::CachedKvState
- kv_cache::prefix::PrefixCacheConfig
- kv_cache::prefix::PrefixKvCache
- kv_pool::KvCachePool
- metrics::EngineMetrics
- metrics::MetricsSnapshot
- offload::pager::FilePagerSource
- offload::pager::LayerPager
- offload::pager::MmapPagerSource
- offload::pager::ResidentTensor
- offload::pager::TensorEntry
- offload::pager::TensorId
- offload::pressure::MemoryPressureProbe
- sampling::Sampler
- sampling::SamplerConfig
- sampling::advanced::DryStage
- sampling::advanced::EtaStage
- sampling::advanced::TopAStage
- sampling::advanced::TypicalPStage
- sampling::advanced::XtcStage
- sampling::chain::GreedySelect
- sampling::chain::LogitBias
- sampling::chain::MinP
- sampling::chain::RepetitionPenalty
- sampling::chain::SamplerChain
- sampling::chain::TemperatureScale
- sampling::chain::TopK
- sampling::chain::TopP
- sampling::grammar::json_schema::JsonSchemaCompiler
- sampling::grammar::machine::GrammarState
- sampling::grammar::parser::CharRange
- sampling::grammar::parser::Grammar
- scheduler::ScheduledBatch
- scheduler::Scheduler
- scheduler::SchedulerConfig
- scheduler::Sequence
- sequence_pool::SequenceSlot
- sequence_pool::SsmStatePool
- snapshot::EngineSnapshot
- snapshot::GrammarStatePayload
- snapshot::KvStatePayload
- snapshot::ModelFingerprint
- snapshot::SamplerStatePayload
- snapshot::SpeculativeEngineSnapshot
- snapshot::SsmStatePayload
- speculative::SpeculativeConfig
- speculative::SpeculativeDeltaSync
- speculative::SpeculativeEngine
- speculative_async::AsyncSpecConfig
- speculative_async::SpecStats
- speculative_async::SpeculativeDecoder
- tokenizer_bridge::TokenizerBridge
- tool_dispatch::NoOpDispatcher
- tool_dispatch::ToolCall
- tool_dispatch::ToolCallDetector
Enums
- embedding::PoolingMode
- error::RuntimeError
- offload::policy::OffloadPolicy
- sampling::grammar::error::GrammarError
- sampling::grammar::parser::GrammarNode
- scheduler::SeqState
- sequence_pool::PoolError
- sequence_pool::SequencePool
- snapshot::SequenceStatePayload
- speculative_async::RewindError
- tool_dispatch::ToolCallGrammar
- tool_dispatch::ToolResult
Traits
- BatchedKvView
- beam_search::BeamForwardPass
- kv_cache::BatchedKvView
- offload::pager::PagerSource
- sampling::chain::SamplerStage
- speculative_async::Rewindable
- tool_dispatch::ToolDispatcher
Functions
- batched_attention::batched_flash_attention
- beam_search::beam_generate
- embedding::pool_hidden_states
- flash_attention::flash_attention
- flash_attention::flash_attention_forward
- flash_attention::flash_attention_gqa
- flash_attention::flash_attention_multi_head
- lora_loader::apply_lora
- offload::pressure::host_memory_pressure
- sampling::grammar::machine::apply_grammar_mask
- sampling::sample
- tool_dispatch::no_op_dispatcher
Type Aliases
- error::RuntimeResult
- sampling::grammar::error::GrammarResult
- scheduler::SeqId
- sequence_pool::PoolResult