1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
//! Named constants for unit conversions and hardware math.
//!
//! Replaces magic numbers throughout the codebase with self-documenting
//! constants. All values are compile-time constants.
// ---------------------------------------------------------------------------
// Byte / bit conversions
// ---------------------------------------------------------------------------
/// Bits per byte (8).
pub const BITS_PER_BYTE: f64 = 8.0;
/// Bytes per gibibyte (1 GiB = 1,073,741,824 bytes).
pub const BYTES_PER_GIB: f64 = 1_073_741_824.0;
/// Bytes per gibibyte as `u64`.
pub const BYTES_PER_GIB_U64: u64 = 1_073_741_824;
/// Bytes per gigabyte (1 GB = 1,000,000,000 bytes, SI decimal).
pub const BYTES_PER_GB: f64 = 1_000_000_000.0;
/// Megahertz-to-gigahertz divisor.
pub const MHZ_PER_GHZ: f64 = 1000.0;
// ---------------------------------------------------------------------------
// DDR memory
// ---------------------------------------------------------------------------
/// DDR (Double Data Rate) multiplier — memory transfers twice per clock cycle.
pub const DDR_MULTIPLIER: f64 = 2.0;
// ---------------------------------------------------------------------------
// PCIe encoding overhead
// ---------------------------------------------------------------------------
/// PCIe Gen3+ uses 128b/130b encoding (~98.5% efficiency).
pub const PCIE_GEN3_PLUS_ENCODING: f64 = 128.0 / 130.0;
/// PCIe Gen1/Gen2 uses 8b/10b encoding (80% efficiency).
pub const PCIE_GEN1_GEN2_ENCODING: f64 = 8.0 / 10.0;
/// PCIe Gen3+ speed threshold in GT/s.
pub const PCIE_GEN3_SPEED_GTS: f64 = 8.0;
// ---------------------------------------------------------------------------
// Model parameter conversions
// ---------------------------------------------------------------------------
/// Parameters per million (for converting `model_params_millions` inputs).
pub const PARAMS_PER_MILLION: f64 = 1_000_000.0;
/// Bytes per parameter in FP16/BF16 (2 bytes = 16 bits).
pub const FP16_BYTES_PER_PARAM: f64 = 2.0;
/// FP32 bits per parameter (used as baseline for memory reduction factor).
pub const FP32_BITS: f64 = 32.0;
// ---------------------------------------------------------------------------
// Memory estimation heuristics
// ---------------------------------------------------------------------------
/// Activation / KV-cache overhead factor (20% of raw model size).
///
/// Applied as: `raw + raw / ACTIVATION_OVERHEAD_DIVISOR`.
pub const ACTIVATION_OVERHEAD_DIVISOR: u64 = 5;
/// Estimated parameters per transformer layer (~250M).
///
/// Used by the pipeline-parallel planner to estimate layer count from
/// total parameter count.
pub const PARAMS_PER_LAYER_ESTIMATE: u64 = 250_000_000;
/// Base tokens/sec numerator: 1 billion (1B params → 1 tok/s baseline).
pub const TOKENS_PER_SEC_BASE: f64 = 1_000_000_000.0;
// ---------------------------------------------------------------------------
// Sharding planner thresholds
// ---------------------------------------------------------------------------
/// NVSwitch interconnect bonus for tensor-parallel throughput.
pub const NVSWITCH_TP_BONUS: f64 = 1.8;
/// Maximum interconnect bonus when NVSwitch is absent.
pub const MAX_NON_NVSWITCH_TP_BONUS: f64 = 0.8;
/// Divisor for scaling interconnect bandwidth into a bonus factor.
pub const TP_INTERCONNECT_BW_DIVISOR: f64 = 200.0;
/// Pipeline-parallel efficiency with high-bandwidth interconnect (~15% overhead).
pub const PP_HIGH_BW_EFFICIENCY: f64 = 0.85;
/// Pipeline-parallel efficiency with PCIe-only (~35% overhead).
pub const PP_PCIE_ONLY_EFFICIENCY: f64 = 0.65;
/// Minimum interconnect bandwidth (GB/s) to consider tensor-parallel.
pub const TP_MIN_INTERCONNECT_BW: f64 = 100.0;
/// Maximum GPU count for tensor-parallel without NVSwitch.
pub const TP_MAX_DEVICES_WITHOUT_NVSWITCH: usize = 8;
/// TPU tensor-parallel ICI bonus multiplier.
pub const TPU_TP_ICI_BONUS: f64 = 2.0;
/// Gigabit-to-gigabyte divisor (for IB rate conversion: Gb/s → GB/s).
pub const GBITS_PER_GBYTE: f64 = 8.0;