List of all items
Structs
- backend::Backend
- backend::Decoder
- backend::SeqResult
- backend::StepResult
- disagg_router::DisaggRouterConf
- disagg_router::DisaggregatedRouter
- engines::MultiNodeConfig
- http::service::DeploymentState
- http::service::ModelManager
- http::service::RouteDoc
- http::service::discovery::ModelEntry
- http::service::discovery::ModelWatchState
- http::service::error::HttpError
- http::service::metrics::InflightGuard
- http::service::metrics::Metrics
- http::service::metrics::Registry
- http::service::service_v2::HttpService
- http::service::service_v2::HttpServiceConfig
- http::service::service_v2::HttpServiceConfigBuilder
- key_value_store::KeyValueStoreManager
- key_value_store::MemoryStorage
- key_value_store::NATSStorage
- kv_router::KvRouter
- kv_router::indexer::KvIndexer
- kv_router::indexer::KvIndexerSharded
- kv_router::indexer::MatchRequest
- kv_router::indexer::OverlapScores
- kv_router::indexer::RadixTree
- kv_router::indexer::RouterEvent
- kv_router::indexer::ShardedMatchRequest
- kv_router::metrics_aggregator::KvMetricsAggregator
- kv_router::protocols::ExternalSequenceBlockHash
- kv_router::protocols::ForwardPassMetrics
- kv_router::protocols::KvCacheEvent
- kv_router::protocols::KvCacheEvents
- kv_router::protocols::KvCacheRemoveData
- kv_router::protocols::KvCacheStoreData
- kv_router::protocols::KvCacheStoredBlockData
- kv_router::protocols::LocalBlockHash
- kv_router::protocols::RouterRequest
- kv_router::protocols::RouterResponse
- kv_router::protocols::WorkerSelectionResult
- kv_router::publisher::KvEventPublisher
- kv_router::publisher::KvMetricsPublisher
- kv_router::scheduler::DefaultWorkerSelector
- kv_router::scheduler::Endpoint
- kv_router::scheduler::KVHitRateEvent
- kv_router::scheduler::KvScheduler
- kv_router::scheduler::SchedulingRequest
- kv_router::scoring::ProcessedEndpoints
- model_card::model::ModelDeploymentCard
- model_card::model::ModelDeploymentCardBuilder
- preprocessor::OpenAIPreprocessor
- preprocessor::prompt::ContextMixins
- preprocessor::tools::CalledFunction
- preprocessor::tools::CalledFunctionArguments
- preprocessor::tools::CalledFunctionParameters
- preprocessor::tools::Function
- preprocessor::tools::Tool
- preprocessor::tools::ToolCallResponse
- preprocessor::tools::ToolCallingMatcher
- protocols::Annotated
- protocols::Usage
- protocols::codec::Message
- protocols::codec::SseLineCodec
- protocols::common::CalibrationResults
- protocols::common::ChatCompletionLogprobs
- protocols::common::ChatCompletionTokenLogprob
- protocols::common::ChatContext
- protocols::common::ChatTurn
- protocols::common::CompletionContext
- protocols::common::CompletionRequest
- protocols::common::CompletionRequestBuilder
- protocols::common::Delta
- protocols::common::Epilogue
- protocols::common::LoadgenResults
- protocols::common::OutputOptions
- protocols::common::PerformanceModel
- protocols::common::Prologue
- protocols::common::SamplingOptions
- protocols::common::ScatterData
- protocols::common::SequencePositionData
- protocols::common::Stats
- protocols::common::StopConditions
- protocols::common::StreamingCompletionResponse
- protocols::common::TopLogprob
- protocols::common::Trace
- protocols::common::Usage
- protocols::common::llm_backend::BackendOutput
- protocols::common::llm_backend::LLMEngineOutput
- protocols::common::postprocessor::PostprocessedResponse
- protocols::common::preprocessor::PreprocessedRequest
- protocols::common::preprocessor::PreprocessedRequestBuilder
- protocols::openai::AnnotatedDelta
- protocols::openai::CompletionTokensDetails
- protocols::openai::CompletionUsage
- protocols::openai::GenericCompletionResponse
- protocols::openai::PromptTokensDetails
- protocols::openai::chat_completions::DeltaAggregator
- protocols::openai::chat_completions::DeltaGenerator
- protocols::openai::chat_completions::NvCreateChatCompletionRequest
- protocols::openai::chat_completions::NvCreateChatCompletionResponse
- protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse
- protocols::openai::completions::CompletionChoice
- protocols::openai::completions::CompletionChoiceBuilder
- protocols::openai::completions::CompletionRequest
- protocols::openai::completions::CompletionResponse
- protocols::openai::completions::DeltaAggregator
- protocols::openai::completions::DeltaGenerator
- protocols::openai::completions::LogprobResult
- protocols::openai::completions::ResponseFactory
- protocols::openai::completions::ResponseFactoryBuilder
- protocols::openai::models::ModelInfo
- protocols::openai::models::ModelMetaData
- protocols::openai::models::Permission
- protocols::openai::nvext::NvExt
- protocols::openai::nvext::NvExtBuilder
- recorder::Recorder
- tokenizers::DecodeStream
- tokenizers::Encoding
- tokenizers::Error
- tokenizers::Sequence
- tokenizers::StopSequenceDecoder
- tokenizers::StopSequenceDecoderBuilder
- tokenizers::Tokenizer
- tokenizers::hf::HuggingFaceTokenizer
- tokens::PartialTokenBlock
- tokens::TokenBlock
- tokens::TokenSequence
- tokens::Tokens
- types::Annotated
Enums
- backend::StopTrigger
- gguf::GGUFArchitecture
- http::service::error::ServiceHttpError
- http::service::metrics::Endpoint
- http::service::metrics::RequestType
- http::service::metrics::Status
- http::service::service_v2::HttpServiceConfigBuilderError
- key_value_store::StorageError
- key_value_store::StorageOutcome
- kv_router::indexer::KvRouterError
- kv_router::protocols::KvCacheEventData
- kv_router::scheduler::KvSchedulerError
- model_card::model::ModelDeploymentCardBuilderError
- model_card::model::ModelInfoType
- model_card::model::PromptContextMixin
- model_card::model::PromptFormatterArtifact
- model_card::model::TokenizerKind
- model_type::ModelType
- preprocessor::prompt::PromptFormatter
- preprocessor::tools::ToolCallType
- preprocessor::tools::ToolChoice
- preprocessor::tools::ToolType
- protocols::codec::SseCodecError
- protocols::common::CompletionRequestBuilderError
- protocols::common::FinishReason
- protocols::common::LogProbs
- protocols::common::Logits
- protocols::common::PromptType
- protocols::common::StreamState
- protocols::common::StreamingResponse
- protocols::common::preprocessor::PreprocessedRequestBuilderError
- protocols::openai::StreamingDelta
- protocols::openai::completions::CompletionChoiceBuilderError
- protocols::openai::completions::ResponseFactoryBuilderError
- protocols::openai::nvext::NvExtBuilderError
- tokenizers::SequenceDecoderOutput
- tokenizers::TokenizerType
Traits
- common::versioned::Versioned
- key_value_store::KeyValueBucket
- key_value_store::KeyValueStore
- key_value_store::Versioned
- kv_router::WorkerSelector
- kv_router::indexer::KvIndexerInterface
- model_card::model::ModelInfo
- preprocessor::prompt::OAIChatLikeRequest
- preprocessor::prompt::OAIPromptFormatter
- protocols::ContentProvider
- protocols::common::SamplingOptionsProvider
- protocols::common::StopConditionsProvider
- protocols::openai::DeltaGeneratorExt
- protocols::openai::nvext::NvExtProvider
- tokenizers::traits::Decoder
- tokenizers::traits::Encoder
- tokenizers::traits::Tokenizer
Attribute Macros
Functions
- engines::make_engine_core
- engines::make_engine_full
- http::service::discovery::model_watcher
- http::service::metrics::router
- kv_router::indexer::compute_block_hash
- kv_router::indexer::compute_block_hash_for_seq
- kv_router::indexer::compute_hash
- kv_router::metrics_aggregator::collect_endpoints
- kv_router::metrics_aggregator::collect_endpoints_task
- kv_router::scheduler::process_worker_selection
- protocols::codec::create_message_stream
- protocols::convert_sse_stream
- protocols::openai::completions::prompt_to_string
- protocols::openai::scale_value
- tokenizers::create_tokenizer_from_file
Type Aliases
- backend::ExecutionContext
- backend::ExecutionOutputStream
- kv_router::indexer::WorkerId
- kv_router::recorder::KvRecorder
- protocols::DataStream
- protocols::TokenIdType
- protocols::common::llm_backend::LogProbs
- protocols::common::llm_backend::TokenType
- tokenizers::Offsets
- tokenizers::Result
- tokens::BlockHash
- tokens::SequenceHash
- tokens::Token
- types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine
- types::openai::chat_completions::OpenAIChatCompletionsUnaryEngine
- types::openai::completions::OpenAICompletionsStreamingEngine
- types::openai::completions::OpenAICompletionsUnaryEngine
Statics
Constants
- gguf::GGUF_MULTI_FILE_DELIMITER
- http::service::metrics::REQUEST_STATUS_ERROR
- http::service::metrics::REQUEST_STATUS_SUCCESS
- http::service::metrics::REQUEST_TYPE_STREAM
- http::service::metrics::REQUEST_TYPE_UNARY
- kv_router::KV_EVENT_SUBJECT
- kv_router::KV_HIT_RATE_SUBJECT
- kv_router::KV_METRICS_ENDPOINT
- kv_router::indexer::XXH3_SEED
- model_card::BUCKET_NAME
- model_card::BUCKET_TTL
- model_card::model::BUCKET_NAME
- model_card::model::BUCKET_TTL
- preprocessor::ANNOTATION_FORMATTED_PROMPT
- preprocessor::ANNOTATION_TOKEN_IDS
- protocols::common::FREQUENCY_PENALTY_RANGE
- protocols::common::TEMPERATURE_RANGE
- protocols::common::TOP_P_RANGE
- protocols::openai::FREQUENCY_PENALTY_RANGE
- protocols::openai::MAX_FREQUENCY_PENALTY
- protocols::openai::MAX_PRESENCE_PENALTY
- protocols::openai::MAX_TEMPERATURE
- protocols::openai::MAX_TOP_P
- protocols::openai::MIN_FREQUENCY_PENALTY
- protocols::openai::MIN_PRESENCE_PENALTY
- protocols::openai::MIN_TEMPERATURE
- protocols::openai::MIN_TOP_P
- protocols::openai::PRESENCE_PENALTY_RANGE
- protocols::openai::TEMPERATURE_RANGE
- protocols::openai::TOP_P_RANGE