1mod batch;
39mod buffer;
40mod circuit;
41mod connection;
42mod memory;
43mod perf_metrics;
44mod profiling;
45mod rate_limit;
46mod resource_pool;
47mod shutdown;
48
49pub use profiling::{
51 cached_nanos, cached_nanos_or_now, cpu_cycles, get_page_faults, init_time_service,
52 with_page_fault_tracking,
53};
54
55pub use perf_metrics::{InferencePhase, PerfMetrics};
57
58#[cfg(not(target_arch = "wasm32"))]
60pub use memory::AlignedBuffer;
61pub use memory::{
62 is_direct_io_aligned, madvise_region, prefetch_for_inference, prefetch_ptr, prefetch_slice,
63 CacheAligned, MemoryAdvice, PrefetchLocality, CACHE_LINE_SIZE, CACHE_LINE_SIZE_F32,
64 DIRECT_IO_ALIGNMENT,
65};
66
67pub use buffer::{BufferWatermarks, WatermarkedBuffer};
69
70pub use circuit::{CircuitBreaker, CircuitState};
72
73pub use shutdown::{GracefulShutdown, ShutdownGuard, ShutdownResult};
75
76pub use resource_pool::{PooledResource, ResourcePool};
78
79pub use rate_limit::{LimitError, ServeLimits};
81
82pub use connection::{ConnectionState, KeepAliveConfig, ManagedConnection};
84
85pub use batch::{balance211, split_batch, Balance211Iter, BatchSplitStrategy};
87
88mod kv_cache;
90pub use kv_cache::{KvCacheManager, KvCacheSlotInfo, SequentialBatchOrderer};
91
92mod simd_config;
94pub use simd_config::{
95 unroll_tail_process, AmxTileConfig, LazySimdConfig, SimdBackendState, UnrollFactor,
96 UnrollTailIterator,
97};
98
99mod exec_graph;
101pub use exec_graph::{
102 BrickBottleneck, BrickCategory, BrickId, BrickSample, BrickStats, CategoryStats, EdgeType,
103 ExecutionEdge, ExecutionGraph, ExecutionNode, ExecutionNodeId, PtxRegistry, SyncMode,
104 TransferDirection,
105};
106
107mod profiler;
109pub use profiler::{
110 fnv1a_f32_checksum, BrickIdTimer, BrickProfiler, BrickTimer, DivergenceInfo, KernelChecksum,
111 TileLevel, TileStats, TileTimer,
112};
113
114mod tracing;
116pub use tracing::{
117 AttentionTraceConfig, AttentionWeightTrace, KvCacheSessionTrace, KvCacheStateTrace,
118 LayerActivationTrace, LogitEvolutionTrace, ModelActivationTrace, ModelQuantizationError,
119 ModelTracer, ModelTracerConfig, ModelTracerSummary, QuantType, QuantizationErrorTrace,
120 TensorStats, TokenLogitEvolution,
121};
122
123mod patterns;
125pub use patterns::{
126 reserve_capacity, AsyncResult, BoundedQueue, DualWakerState, FlowControlError,
127 GraphReuseCounter, ReserveStrategy, StrategicBuffer, StreamCapacity, WakeDecision,
128 WakeSkipState,
129};
130
131mod ops;
133pub use ops::{AddOp, DotOp, MatmulOp, SoftmaxOp};
134
135mod fused_ops;
137pub use fused_ops::{FusedGateUpOp, FusedGateUpWeights, FusedQKVOp, FusedQKVWeights};
138
139mod attention;
141pub use attention::AttentionOp;
142
143mod quant_ops;
145pub use quant_ops::{BlockQ5K, BlockQ6K, DotQ5KOp, DotQ6KOp};
146
147#[cfg(test)]
149mod tests;
150
151mod async_profiler;
153pub use async_profiler::AsyncTaskProfiler;
154
155mod budget;
157pub use budget::{ByteBudget, TokenBudget, TokenResult};
158
159mod types;
161pub use types::{
162 AssertionResult, Backend, BrickError, BrickVerification, ComputeAssertion, ComputeBackend,
163 ComputeOp,
164};
165
166mod compute_brick;
168pub use compute_brick::{BrickLayer, ComputeBrick};