List of all items
Structs
- adaptive_lookahead::AdaptiveLookahead
- adaptive_lookahead::AdaptiveLookaheadConfig
- adaptive_sampling::AdaptiveSamplerChain
- adaptive_sampling::EntropyCooling
- adaptive_sampling::GenerationState
- adaptive_sampling::RepetitionAdaptation
- adaptive_sampling::ScheduledDecay
- admin::AdminState
- admin::ConfigSnapshot
- admin::ServerStatus
- api_extensions::JsonModeEnforcer
- api_extensions::StopChecker
- api_types::ChoiceLogprobs
- api_types::ExtendedChatRequest
- api_types::ExtendedChatResponse
- api_types::ExtendedChoice
- api_types::FunctionCallResult
- api_types::FunctionDefinition
- api_types::FunctionName
- api_types::JsonSchemaFormat
- api_types::LogprobsContent
- api_types::NamedToolChoice
- api_types::ResponseFormat
- api_types::Tool
- api_types::ToolCall
- api_types::ToolCallResult
- api_types::ToolDefinition
- api_types::ToolFunction
- api_types::ToolFunctionCall
- api_types::TopLogprob
- api_types::UsageInfo
- async_engine::AsyncInferenceEngine
- auto_tuner::AutoTuner
- auto_tuner::CpuFeatures
- auto_tuner::KernelBenchmark
- auto_tuner::MemoryBudget
- auto_tuner::TuningRecommendation
- batch_engine::BatchConfig
- batch_engine::BatchRequest
- batch_engine::BatchResult
- batch_engine::RequestQueue
- beam_search::Beam
- beam_search::BeamSearchConfig
- beam_search::BeamSearchEngine
- beam_search::BeamSearchResult
- builders::ConfigBuilder
- builders::EngineBuilder
- builders::SamplerBuilder
- circuit_breaker::CircuitBreaker
- circuit_breaker::CircuitBreakerConfig
- completions::CompletionChoice
- completions::CompletionLogprobs
- completions::CompletionRequest
- completions::CompletionResponse
- config::ConfigWarning
- config::ModelConfig
- config::ObservabilityConfig
- config::OxiBonsaiConfig
- config::SamplingConfig
- config::ServerConfig
- constrained_decoding::AllowListConstraint
- constrained_decoding::ConstrainedSampler
- constrained_decoding::ConstrainedSamplerBuilder
- constrained_decoding::JsonConstraint
- constrained_decoding::LengthConstraint
- constrained_decoding::NoConstraint
- constrained_decoding::RegexConstraint
- constrained_decoding::SequenceConstraint
- context_manager::ContextError
- context_manager::ContextWindow
- context_manager::ConversationContext
- context_manager::ConversationTurn
- continuous_batch::BatchRequest
- continuous_batch::ContinuousBatchScheduler
- continuous_batch::SchedulerStats
- convenience::GenerationResult
- convenience::MemoryEstimate
- convenience::ModelFileInfo
- convenience::TokenStats
- dedup::CachedResponse
- dedup::DedupCache
- dedup::DedupStats
- dedup::RequestKey
- distributed::ConsistentHashRing
- distributed::CoordinatorConfig
- distributed::DistributedCoordinator
- distributed::NodeInfo
- distributed::NodeRegistry
- distributed::VNode
- embedding_index::EmbeddingIndex
- embedding_index::NswConfig
- embedding_index::NswIndex
- embedding_index::NswSearchResult
- embeddings::EmbedderRegistry
- embeddings::EmbeddingAppState
- embeddings::EmbeddingObject
- embeddings::EmbeddingRequest
- embeddings::EmbeddingResponse
- embeddings::EmbeddingUsage
- engine::EngineStats
- engine::InferenceEngine
- grammar::ast::Grammar
- grammar::ast::Rule
- grammar::cache::AllowedTokensCache
- grammar::constraint::GrammarConstraint
- grammar::earley::EarleyItem
- grammar::earley::EarleyRecognizer
- grammar::earley::FirstSets
- health::HealthCheck
- health::HealthReport
- hot_reload::HotReloadCoordinator
- hot_reload::ModelVersion
- hot_reload::ReloadEvent
- hot_reload::ReloadLog
- json_schema::SchemaState
- kv_cache_policy::KvCachePolicy
- kv_cache_policy::KvCachePolicyConfig
- memory::MemoryProfiler
- memory::MemorySnapshot
- metrics::Counter
- metrics::Gauge
- metrics::Histogram
- metrics::InferenceMetrics
- middleware::CorsConfig
- middleware::IdempotencyCache
- middleware::RequestContext
- middleware::RequestIdGen
- middleware::RequestLogger
- model_cache::ModelCache
- model_cache::ModelCacheConfig
- model_cache::ModelCacheStats
- model_cache::ModelEntry
- model_cache::ModelWarmup
- multi_model::AdapterRef
- multi_model::AdapterStack
- multi_model::ModelEndpoint
- multi_model::ModelId
- multi_model::ModelListEntry
- multi_model::ModelRegistry
- multi_model::ModelRouter
- native_tokenizer::NativeTokenizerBridge
- nbest::Hypothesis
- nbest::NBestDecoder
- nbest::NBestList
- ngram_cache::NgramCache
- pipeline::InferencePipeline
- pipeline::PipelineBuilder
- pipeline::PipelineOutput
- prefix_cache_engine::PrefixCachedEngine
- profiler::AggregateStats
- profiler::ProfileEvent
- profiler::ProfileGuard
- profiler::ProfileTrace
- profiler::Profiler
- quality_metrics::BatchQualityAnalyzer
- quality_metrics::BleuScore
- quality_metrics::DiversityMetrics
- quality_metrics::GenerationQualityReport
- quality_metrics::RepetitionMetrics
- rate_limiter::RateLimitConfig
- rate_limiter::RateLimiter
- request_id::RequestId
- request_metrics::AggregateRateSnapshot
- request_metrics::RequestRateAggregator
- request_metrics::RequestRateSnapshot
- request_metrics::RequestRateTracker
- request_queue::BoundedQueue
- request_queue::InferenceQueue
- request_queue::InferenceWorkItem
- request_queue::QueueStats
- sampling::Sampler
- sampling::SamplingParams
- sampling_advanced::EtaSampler
- sampling_advanced::LcgRng
- sampling_advanced::MinPSampler
- sampling_advanced::MirostatV1Sampler
- sampling_advanced::MirostatV2Sampler
- sampling_advanced::SamplerChain
- sampling_advanced::TypicalSampler
- semantic_cache::CachedInference
- semantic_cache::CachedResponse
- semantic_cache::SemanticCache
- semantic_cache::SemanticCacheConfig
- semantic_cache::SemanticCacheStats
- server::AppState
- server::ChatChoice
- server::ChatCompletionRequest
- server::ChatCompletionResponse
- server::ChatMessage
- server::QueueDepthTracker
- server::ServerConfig
- server::Usage
- speculative::SpeculativeConfig
- speculative::SpeculativeDecoder
- speculative::SpeculativeStep
- stream_metrics::RequestStreamMetrics
- stream_metrics::StreamMetricsSnapshot
- stream_metrics::StreamingMetricsAggregator
- streaming::SseFormatter
- streaming::StreamChoice
- streaming::StreamChunk
- streaming::StreamDelta
- streaming::StreamStats
- streaming::TokenStream
- token_budget::BudgetConfig
- token_budget::GlobalTokenBudget
- token_budget::RequestBudget
- token_budget::TokenCostEstimate
- token_healing::HealingDecoder
- token_healing::HealingResult
- token_healing::TokenHealer
- token_healing::TokenHealingConfig
- tokenizer_bridge::DecodeStreamState
- tokenizer_bridge::TokenizerBridge
- tool_calling::ToolRegistry
- tracing_setup::TracingConfig
Enums
- adaptive_lookahead::AdaptiveLookaheadError
- api_types::StopSequences
- api_types::ToolChoice
- auto_tuner::CpuArch
- auto_tuner::KvCacheType
- auto_tuner::SimdTier
- batch_engine::FinishReason
- circuit_breaker::CircuitBreakerError
- circuit_breaker::CircuitState
- completions::PromptInput
- config::WarningSeverity
- constrained_decoding::ConstraintError
- constrained_decoding::JsonParseState
- context_manager::TruncationStrategy
- continuous_batch::RequestPriority
- continuous_batch::RequestState
- continuous_batch::SchedulerError
- embeddings::EmbeddingData
- embeddings::EmbeddingInput
- error::RuntimeError
- grammar::ast::Symbol
- grammar::bnf_parser::BnfParseError
- grammar::gbnf_parser::GbnfParseError
- grammar::json_schema_compiler::JsonSchemaCompileError
- grammar::regex_compiler::RegexCompileError
- health::HealthStatus
- json_schema::SchemaError
- json_schema::SchemaType
- kv_cache_policy::KvCacheLevel
- kv_cache_policy::KvCachePolicyError
- multi_model::EndpointStatus
- multi_model::RoutingError
- native_tokenizer::NativeTokenizerError
- pipeline::GenerationStrategy
- pipeline::StopReason
- presets::SamplingPreset
- rate_limiter::RateLimitDecision
- recovery::ErrorClass
- recovery::RecoveryStrategy
- sampling_advanced::SamplerStep
- token_budget::BudgetError
- token_budget::BudgetPolicy
- tool_calling::ToolCallError
Traits
Functions
- admin::create_admin_router
- admin::features_enabled
- admin::get_cache_stats
- admin::get_config
- admin::get_status
- admin::get_workload_stats
- admin::reset_metrics
- api_extensions::apply_frequency_penalty
- api_extensions::extended_chat_completions
- api_extensions::generate_n_completions
- api_types::compute_logprobs
- api_types::fingerprint_from_config
- api_types::generate_tool_call_id
- api_types::is_valid_json
- api_types::parse_tool_call
- batch_engine::batch_generate
- batch_engine::batch_generate_with_timeout
- completions::create_completion
- convenience::estimate_memory_requirements
- convenience::format_bytes
- convenience::format_duration
- convenience::format_token_count
- convenience::format_tokens_per_second
- convenience::validate_model_file
- distributed::fnv1a_hash
- embeddings::create_embeddings
- embeddings::create_embeddings_router
- grammar::bnf_parser::parse_bnf
- grammar::examples::arithmetic_grammar
- grammar::examples::csv_row_grammar
- grammar::examples::json_lite_grammar
- grammar::examples::palindrome_grammar
- grammar::examples::simple_ab_grammar
- grammar::gbnf_parser::parse_gbnf
- grammar::json_schema_compiler::compile_json_schema
- grammar::json_schema_compiler::compile_json_schema_str
- grammar::regex_compiler::compile_regex
- health::check_kernel_tier
- health::check_kv_cache
- health::check_memory_pressure
- health::check_model_loaded
- health::run_health_checks
- json_schema::parse_schema
- json_schema::schema_example
- json_schema::schema_template
- json_schema::validate_against_schema
- memory::get_rss_bytes
- metrics::default_latency_buckets
- metrics::default_rate_buckets
- pipeline::chat_pipeline
- pipeline::code_pipeline
- pipeline::greedy_pipeline
- profiler::flop_counter::attention
- profiler::flop_counter::linear
- profiler::flop_counter::matmul
- profiler::flop_counter::rms_norm
- profiler::flop_counter::swiglu_ffn
- quality_metrics::extract_ngrams
- quality_metrics::perplexity_from_logprobs
- quality_metrics::repetition_penalty_rate
- quality_metrics::self_bleu
- quality_metrics::token_entropy
- rate_limiter::extract_client_id
- rate_limiter::rate_limit_middleware
- recovery::classify_error
- recovery::recommended_batch_size
- recovery::recovery_strategy_for
- recovery::retry_with_backoff
- recovery::with_timeout
- sampling_advanced::apply_repetition_penalty
- sampling_advanced::apply_temperature
- sampling_advanced::entropy
- sampling_advanced::log_softmax
- sampling_advanced::perplexity
- sampling_advanced::softmax_inplace
- sampling_advanced::top_k_indices
- server::create_router
- server::create_router_with_metrics
- server::create_server
- server::request_id_header_map
- server::resolve_request_id
- server::serve_with_shutdown
- server::shutdown_signal
- tool_calling::build_tool_constraint
- tool_calling::make_tool_call
- tool_calling::new_tool_call_id
- tool_calling::select_tool
- tool_calling::validate_tool_arguments
- tracing_setup::init_tracing
- wasm_api::generate_json
- web_ui::create_ui_router