Skip to main content

trueno/brick/
mod.rs

1//! ComputeBrick: Token-Centric Compute Units
2//!
3//! A **ComputeBrick** is a self-verifying, token-centric compute unit that bundles:
4//! - **Operation**: The compute operation (matmul, dot, softmax, etc.)
5//! - **Assertions**: Falsifiable claims about the output (equivalence, bounds)
6//! - **Budget**: Performance target in µs/token or tokens/sec
7//! - **Backend**: Execution target (Scalar, AVX2, CUDA, etc.)
8//!
9//! # Core Insight
10//!
11//! A **token** is the unit of data; a **ComputeBrick** is the unit of compute.
12//!
13//! ```text
14//! Token ──▶ [ComputeBrick] ──▶ Token
15//!            (matmul, softmax, attention)
16//! ```
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use trueno::brick::{ComputeBrick, ComputeBackend, MatmulOp};
22//!
23//! let matmul = ComputeBrick::new(MatmulOp::new(1024, 1024, 1024))
24//!     .assert_equiv(ComputeBackend::Scalar)
25//!     .budget_tok_per_sec(50_000.0)
26//!     .backend(ComputeBackend::Avx2);
27//!
28//! let result = matmul.run((a, b))?;
29//! println!("Throughput: {:.0} tok/s", result.tokens_per_sec);
30//! ```
31//!
32//! # Scientific Basis
33//!
34//! Per Popper (1959), a theory that makes no falsifiable predictions is not scientific.
35//! A ComputeBrick with no assertions makes no testable claims and is therefore invalid.
36
37// Submodules
38mod batch;
39mod buffer;
40mod circuit;
41mod connection;
42mod memory;
43mod perf_metrics;
44mod profiling;
45mod rate_limit;
46mod resource_pool;
47mod shutdown;
48
49// Re-export profiling functions
50pub use profiling::{
51    cached_nanos, cached_nanos_or_now, cpu_cycles, get_page_faults, init_time_service,
52    with_page_fault_tracking,
53};
54
55// Re-export perf_metrics types
56pub use perf_metrics::{InferencePhase, PerfMetrics};
57
58// Re-export memory types
59#[cfg(not(target_arch = "wasm32"))]
60pub use memory::AlignedBuffer;
61pub use memory::{
62    is_direct_io_aligned, madvise_region, prefetch_for_inference, prefetch_ptr, prefetch_slice,
63    CacheAligned, MemoryAdvice, PrefetchLocality, CACHE_LINE_SIZE, CACHE_LINE_SIZE_F32,
64    DIRECT_IO_ALIGNMENT,
65};
66
67// Re-export buffer types
68pub use buffer::{BufferWatermarks, WatermarkedBuffer};
69
70// Re-export circuit breaker types
71pub use circuit::{CircuitBreaker, CircuitState};
72
73// Re-export shutdown types
74pub use shutdown::{GracefulShutdown, ShutdownGuard, ShutdownResult};
75
76// Re-export resource pool types
77pub use resource_pool::{PooledResource, ResourcePool};
78
79// Re-export rate limiting types
80pub use rate_limit::{LimitError, ServeLimits};
81
82// Re-export connection types
83pub use connection::{ConnectionState, KeepAliveConfig, ManagedConnection};
84
85// Re-export batch types
86pub use batch::{balance211, split_batch, Balance211Iter, BatchSplitStrategy};
87
88// KV cache management
89mod kv_cache;
90pub use kv_cache::{KvCacheManager, KvCacheSlotInfo, SequentialBatchOrderer};
91
92// SIMD configuration
93mod simd_config;
94pub use simd_config::{
95    unroll_tail_process, AmxTileConfig, LazySimdConfig, SimdBackendState, UnrollFactor,
96    UnrollTailIterator,
97};
98
99// Execution graph and brick profiling types (PAR-073, PAR-200, PAR-201)
100mod exec_graph;
101pub use exec_graph::{
102    BrickBottleneck, BrickCategory, BrickId, BrickSample, BrickStats, CategoryStats, EdgeType,
103    ExecutionEdge, ExecutionGraph, ExecutionNode, ExecutionNodeId, PtxRegistry, SyncMode,
104    TransferDirection,
105};
106
107// BrickProfiler and tile profiling (TILING-SPEC-001)
108mod profiler;
109pub use profiler::{
110    fnv1a_f32_checksum, BrickIdTimer, BrickProfiler, BrickTimer, DivergenceInfo, KernelChecksum,
111    TileLevel, TileStats, TileTimer,
112};
113
114// Model-level inference tracing (Phase 13, E.11)
115mod tracing;
116pub use tracing::{
117    AttentionTraceConfig, AttentionWeightTrace, KvCacheSessionTrace, KvCacheStateTrace,
118    LayerActivationTrace, LogitEvolutionTrace, ModelActivationTrace, ModelQuantizationError,
119    ModelTracer, ModelTracerConfig, ModelTracerSummary, QuantType, QuantizationErrorTrace,
120    TensorStats, TokenLogitEvolution,
121};
122
123// Async and buffer patterns (Phase 12, E.10)
124mod patterns;
125pub use patterns::{
126    reserve_capacity, AsyncResult, BoundedQueue, DualWakerState, FlowControlError,
127    GraphReuseCounter, ReserveStrategy, StrategicBuffer, StreamCapacity, WakeDecision,
128    WakeSkipState,
129};
130
131// Built-in compute operations
132mod ops;
133pub use ops::{AddOp, DotOp, MatmulOp, SoftmaxOp};
134
135// Fused operations for transformer inference (PMAT-PERF-009)
136mod fused_ops;
137pub use fused_ops::{FusedGateUpOp, FusedGateUpWeights, FusedQKVOp, FusedQKVWeights};
138
139// SIMD-optimized attention operation (PMAT-017)
140mod attention;
141pub use attention::AttentionOp;
142
143// Q5_K and Q6_K quantization operations (llama.cpp compatible)
144mod quant_ops;
145pub use quant_ops::{BlockQ5K, BlockQ6K, DotQ5KOp, DotQ6KOp};
146
147// Tests (7,400+ lines extracted for TDG compliance)
148#[cfg(test)]
149mod tests;
150
151// Async task profiler (Phase 11, E.9.4)
152mod async_profiler;
153pub use async_profiler::AsyncTaskProfiler;
154
155// Budget types (TokenBudget, ByteBudget, TokenResult)
156mod budget;
157pub use budget::{ByteBudget, TokenBudget, TokenResult};
158
159// Core brick types (ComputeBackend, BrickError, ComputeAssertion, ComputeOp, etc.)
160mod types;
161pub use types::{
162    AssertionResult, Backend, BrickError, BrickVerification, ComputeAssertion, ComputeBackend,
163    ComputeOp,
164};
165
166// ComputeBrick struct, builder methods, and BrickLayer composition
167mod compute_brick;
168pub use compute_brick::{BrickLayer, ComputeBrick};