1pub mod adaptive_lookahead;
34pub mod adaptive_sampling;
35#[cfg(feature = "server")]
36pub mod admin;
37#[cfg(feature = "server")]
38pub mod api_extensions;
39#[cfg(feature = "server")]
40pub mod api_types;
41#[cfg(not(target_arch = "wasm32"))]
42pub mod async_engine;
43pub mod auto_tuner;
44pub mod batch_engine;
45pub mod beam_search;
46pub mod builders;
47pub mod circuit_breaker;
48#[cfg(feature = "server")]
49pub mod completions;
50pub mod config;
51pub mod constrained_decoding;
52pub mod context_manager;
53pub mod continuous_batch;
54pub mod convenience;
55pub mod dedup;
56#[cfg(feature = "server")]
57pub mod distributed;
58pub mod embedding_index;
59#[cfg(feature = "server")]
60pub mod embeddings;
61pub mod engine;
62pub mod error;
63pub mod grammar;
64pub mod health;
65pub mod hot_reload;
66pub mod json_schema;
67pub mod kv_cache_policy;
68pub mod memory;
69pub mod metrics;
70pub mod middleware;
71pub mod model_cache;
72pub mod multi_model;
73pub mod native_tokenizer;
74pub mod nbest;
75pub mod ngram_cache;
76pub mod pipeline;
77pub mod prefix_cache_engine;
78pub mod presets;
79pub mod profiler;
80pub mod quality_metrics;
81#[cfg(feature = "rag")]
82pub mod rag_server;
83pub mod rate_limiter;
84pub mod recovery;
85pub mod request_id;
86pub mod request_metrics;
87pub mod request_queue;
88pub mod sampling;
89pub mod sampling_advanced;
90pub mod semantic_cache;
91#[cfg(feature = "server")]
92pub mod server;
93pub mod speculative;
94pub mod stream_metrics;
95pub mod streaming;
96pub mod token_budget;
97pub mod token_healing;
98pub mod tokenizer_bridge;
99#[cfg(feature = "server")]
100pub mod tool_calling;
101pub mod tracing_setup;
102pub mod wasm_api;
103#[cfg(feature = "server")]
104pub mod web_ui;
105
106pub use adaptive_lookahead::{AdaptiveLookahead, AdaptiveLookaheadConfig, AdaptiveLookaheadError};
107pub use adaptive_sampling::{
108 AdaptiveSamplerChain, AdaptiveStrategy, EntropyCooling, GenerationState, RepetitionAdaptation,
109 ScheduledDecay,
110};
111pub use auto_tuner::{
112 AutoTuner, CpuArch, CpuFeatures, KernelBenchmark, KvCacheType, MemoryBudget, SimdTier,
113 TuningRecommendation,
114};
115pub use builders::{ConfigBuilder, EngineBuilder, SamplerBuilder};
116pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
117pub use config::OxiBonsaiConfig;
118pub use constrained_decoding::{
119 AllowListConstraint, ConstrainedSampler, ConstrainedSamplerBuilder, ConstraintError,
120 JsonConstraint, JsonParseState, LengthConstraint, NoConstraint, RegexConstraint,
121 SequenceConstraint, TokenConstraint,
122};
123pub use convenience::{GenerationResult, MemoryEstimate, ModelFileInfo, TokenStats};
124pub use dedup::{DedupCache, DedupStats, RequestKey};
125#[cfg(feature = "server")]
126pub use distributed::{
127 ConsistentHashRing, CoordinatorConfig, DistributedCoordinator, NodeInfo, NodeRegistry,
128};
129pub use engine::InferenceEngine;
130pub use error::{RuntimeError, RuntimeResult};
131pub use grammar::{
132 compile_json_schema, compile_json_schema_str, compile_regex, parse_bnf, parse_gbnf,
133 BnfParseError, EarleyRecognizer, GbnfParseError, Grammar, GrammarConstraint,
134 JsonSchemaCompileError, RegexCompileError, Rule, Symbol,
135};
136pub use health::{HealthReport, HealthStatus};
137pub use hot_reload::{HotReloadCoordinator, ModelVersion, ReloadLog};
138pub use json_schema::{
139 parse_schema, schema_example, schema_template, validate_against_schema, SchemaError,
140 SchemaState, SchemaType,
141};
142pub use kv_cache_policy::{KvCacheLevel, KvCachePolicy, KvCachePolicyConfig, KvCachePolicyError};
143pub use memory::{get_rss_bytes, MemoryProfiler, MemorySnapshot};
144pub use metrics::InferenceMetrics;
145pub use multi_model::{
146 AdapterRef, AdapterStack, EndpointStatus, ModelEndpoint, ModelId, ModelListEntry,
147 ModelRegistry, ModelRouter, RoutingError,
148};
149pub use native_tokenizer::{NativeTokenizerBridge, NativeTokenizerError};
150pub use nbest::{Hypothesis, NBestDecoder, NBestList};
151pub use presets::SamplingPreset;
152pub use profiler::{flop_counter, AggregateStats, ProfileEvent, ProfileTrace, Profiler};
153pub use quality_metrics::{
154 extract_ngrams, perplexity_from_logprobs, repetition_penalty_rate, self_bleu, token_entropy,
155 BatchQualityAnalyzer, BleuScore, DiversityMetrics, GenerationQualityReport, RepetitionMetrics,
156};
157pub use recovery::{ErrorClass, RecoveryStrategy};
158pub use request_id::RequestId;
159pub use request_metrics::{
160 AggregateRateSnapshot, RequestRateAggregator, RequestRateSnapshot, RequestRateTracker,
161};
162pub use sampling::Sampler;
163pub use sampling_advanced::{
164 EtaSampler, LcgRng, MinPSampler, MirostatV1Sampler, MirostatV2Sampler, SamplerChain,
165 SamplerStep, TypicalSampler,
166};
167pub use stream_metrics::{RequestStreamMetrics, StreamMetricsSnapshot, StreamingMetricsAggregator};
168pub use token_budget::{
169 BudgetConfig, BudgetError, BudgetPolicy, GlobalTokenBudget, RequestBudget, TokenCostEstimate,
170};
171pub use tokenizer_bridge::TokenizerBridge;
172#[cfg(feature = "server")]
173pub use tool_calling::{
174 build_tool_constraint, make_tool_call, new_tool_call_id, select_tool, validate_tool_arguments,
175 ToolCallError, ToolRegistry,
176};
177pub use tracing_setup::{init_tracing, TracingConfig};
178#[cfg(feature = "server")]
179pub use web_ui::create_ui_router;