1use serde::{Deserialize, Serialize};
6
7const LLAMA2_7B_INTERMEDIATE_SIZE: usize = 11008;
9const LLAMA2_13B_HIDDEN_SIZE: usize = 5120;
10const LLAMA2_13B_INTERMEDIATE_SIZE: usize = 13824;
11const LLAMA_VOCAB_SIZE: usize = 32000;
12const MISTRAL_INTERMEDIATE_SIZE: usize = 14336;
13const MISTRAL_MAX_SEQ_LEN: usize = 32768;
14const QWEN2_0_5B_HIDDEN_SIZE: usize = 896;
15const QWEN2_0_5B_INTERMEDIATE_SIZE: usize = 4864;
16const QWEN2_VOCAB_SIZE: usize = 151936;
17const QWEN2_MAX_SEQ_LEN: usize = 32768;
18const QWEN2_ROPE_THETA: f32 = 1_000_000.0;
19const QWEN3_4B_HIDDEN_SIZE: usize = 2560;
20const QWEN3_4B_INTERMEDIATE_SIZE: usize = 9728;
21const QWEN3_5_9B_HIDDEN_SIZE: usize = 4096;
22const QWEN3_5_9B_INTERMEDIATE_SIZE: usize = 12288;
23const QWEN3_5_VOCAB_SIZE: usize = 248320;
24const QWEN3_5_MAX_SEQ_LEN: usize = 262144;
25const DEFAULT_ROPE_THETA: f32 = 10000.0;
26
27const CODEBERT_HIDDEN_SIZE: usize = 768;
29const CODEBERT_INTERMEDIATE_SIZE: usize = 3072;
30const CODEBERT_VOCAB_SIZE: usize = 50265;
31const CODEBERT_MAX_POSITION: usize = 514; #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37#[serde(rename_all = "snake_case")]
38pub enum ModelArchitecture {
39 #[default]
41 Decoder,
42 Encoder,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct TransformerConfig {
49 pub hidden_size: usize,
51 pub num_attention_heads: usize,
53 pub num_kv_heads: usize,
55 pub intermediate_size: usize,
57 pub num_hidden_layers: usize,
59 pub vocab_size: usize,
61 pub max_position_embeddings: usize,
63 pub rms_norm_eps: f32,
65 pub rope_theta: f32,
67 pub use_bias: bool,
69 #[serde(default)]
72 pub head_dim_override: Option<usize>,
73 #[serde(default)]
76 pub architecture: ModelArchitecture,
77 #[serde(default)]
80 pub hf_architecture: Option<String>,
81 #[serde(default)]
84 pub hf_model_type: Option<String>,
85 #[serde(default)]
88 pub tie_word_embeddings: bool,
89}
90
91impl TransformerConfig {
92 pub fn llama2_7b() -> Self {
94 Self {
95 hidden_size: 4096,
96 num_attention_heads: 32,
97 num_kv_heads: 32,
98 intermediate_size: LLAMA2_7B_INTERMEDIATE_SIZE,
99 num_hidden_layers: 32,
100 vocab_size: LLAMA_VOCAB_SIZE,
101 max_position_embeddings: 4096,
102 rms_norm_eps: 1e-6,
103 rope_theta: DEFAULT_ROPE_THETA,
104 use_bias: false,
105 head_dim_override: None,
106 architecture: ModelArchitecture::Decoder,
107 hf_architecture: None,
108 hf_model_type: None,
109 tie_word_embeddings: false,
110 }
111 }
112
113 pub fn llama2_13b() -> Self {
115 Self {
116 hidden_size: LLAMA2_13B_HIDDEN_SIZE,
117 num_attention_heads: 40,
118 num_kv_heads: 40,
119 intermediate_size: LLAMA2_13B_INTERMEDIATE_SIZE,
120 num_hidden_layers: 40,
121 vocab_size: LLAMA_VOCAB_SIZE,
122 max_position_embeddings: 4096,
123 rms_norm_eps: 1e-6,
124 rope_theta: DEFAULT_ROPE_THETA,
125 use_bias: false,
126 head_dim_override: None,
127 architecture: ModelArchitecture::Decoder,
128 hf_architecture: None,
129 hf_model_type: None,
130 tie_word_embeddings: false,
131 }
132 }
133
134 pub fn mistral_7b() -> Self {
136 Self {
137 hidden_size: 4096,
138 num_attention_heads: 32,
139 num_kv_heads: 8, intermediate_size: MISTRAL_INTERMEDIATE_SIZE,
141 num_hidden_layers: 32,
142 vocab_size: LLAMA_VOCAB_SIZE,
143 max_position_embeddings: MISTRAL_MAX_SEQ_LEN,
144 rms_norm_eps: 1e-5,
145 rope_theta: DEFAULT_ROPE_THETA,
146 use_bias: false,
147 head_dim_override: None,
148 architecture: ModelArchitecture::Decoder,
149 hf_architecture: None,
150 hf_model_type: None,
151 tie_word_embeddings: false,
152 }
153 }
154
155 pub fn qwen2_0_5b() -> Self {
170 Self {
171 hidden_size: QWEN2_0_5B_HIDDEN_SIZE,
172 num_attention_heads: 14,
173 num_kv_heads: 2,
174 intermediate_size: QWEN2_0_5B_INTERMEDIATE_SIZE,
175 num_hidden_layers: 24,
176 vocab_size: QWEN2_VOCAB_SIZE,
177 max_position_embeddings: QWEN2_MAX_SEQ_LEN,
178 rms_norm_eps: 1e-6,
179 rope_theta: QWEN2_ROPE_THETA,
180 use_bias: true,
181 head_dim_override: None,
182 architecture: ModelArchitecture::Decoder,
183 hf_architecture: None,
184 hf_model_type: None,
185 tie_word_embeddings: true,
186 }
187 }
188
189 #[rustfmt::skip]
191 pub fn qwen2_1_5b() -> Self { Self { hidden_size: 1536, num_attention_heads: 12, intermediate_size: 8960, num_hidden_layers: 28, vocab_size: 151936, ..Self::qwen2_0_5b() } }
192
193 pub fn qwen2_7b() -> Self {
198 Self {
199 hidden_size: 3584,
200 num_attention_heads: 28,
201 num_kv_heads: 4,
202 intermediate_size: 18944,
203 num_hidden_layers: 28,
204 vocab_size: 152064,
205 max_position_embeddings: QWEN2_MAX_SEQ_LEN,
206 rms_norm_eps: 1e-6,
207 rope_theta: QWEN2_ROPE_THETA,
208 use_bias: true,
209 head_dim_override: None,
210 architecture: ModelArchitecture::Decoder,
211 hf_architecture: None,
212 hf_model_type: None,
213 tie_word_embeddings: false,
214 }
215 }
216
217 pub fn qwen3_4b() -> Self {
222 Self {
223 hidden_size: QWEN3_4B_HIDDEN_SIZE,
224 num_attention_heads: 32,
225 num_kv_heads: 8,
226 intermediate_size: QWEN3_4B_INTERMEDIATE_SIZE,
227 num_hidden_layers: 36,
228 vocab_size: QWEN2_VOCAB_SIZE, max_position_embeddings: 40960,
230 rms_norm_eps: 1e-6,
231 rope_theta: QWEN2_ROPE_THETA, use_bias: false, head_dim_override: Some(128), architecture: ModelArchitecture::Decoder,
235 hf_architecture: None,
236 hf_model_type: None,
237 tie_word_embeddings: false,
238 }
239 }
240
241 pub fn qwen3_5_9b() -> Self {
247 Self {
248 hidden_size: QWEN3_5_9B_HIDDEN_SIZE,
249 num_attention_heads: 16,
250 num_kv_heads: 4,
251 intermediate_size: QWEN3_5_9B_INTERMEDIATE_SIZE,
252 num_hidden_layers: 32,
253 vocab_size: QWEN3_5_VOCAB_SIZE,
254 max_position_embeddings: QWEN3_5_MAX_SEQ_LEN,
255 rms_norm_eps: 1e-6,
256 rope_theta: QWEN2_ROPE_THETA, use_bias: false, head_dim_override: None, architecture: ModelArchitecture::Decoder,
260 hf_architecture: None,
261 hf_model_type: None,
262 tie_word_embeddings: false,
263 }
264 }
265
266 pub fn from_apr_metadata(
278 hidden_size: Option<usize>,
279 num_heads: Option<usize>,
280 num_kv_heads: Option<usize>,
281 intermediate_size: Option<usize>,
282 num_layers: Option<usize>,
283 vocab_size: Option<usize>,
284 max_position_embeddings: Option<usize>,
285 rms_norm_eps: Option<f32>,
286 rope_theta: Option<f32>,
287 architecture: Option<&str>,
288 ) -> Option<Self> {
289 let hidden = hidden_size?;
290 let heads = num_heads?;
291 let layers = num_layers?;
292 let vocab = vocab_size?;
293 let intermediate = intermediate_size?;
294
295 let (use_bias, head_dim_override) = match architecture {
298 Some(a) if a.starts_with("qwen3") => {
299 let computed = hidden / heads;
301 let override_dim = if computed == 128 { None } else { Some(128) };
302 (false, override_dim)
303 }
304 Some(a) if a.starts_with("qwen2") => (true, None),
305 _ => (false, None),
306 };
307
308 Some(Self {
309 hidden_size: hidden,
310 num_attention_heads: heads,
311 num_kv_heads: num_kv_heads.unwrap_or(heads),
312 intermediate_size: intermediate,
313 num_hidden_layers: layers,
314 vocab_size: vocab,
315 max_position_embeddings: max_position_embeddings.unwrap_or(32768),
316 rms_norm_eps: rms_norm_eps.unwrap_or(1e-6),
317 rope_theta: rope_theta.unwrap_or(DEFAULT_ROPE_THETA),
318 use_bias,
319 head_dim_override,
320 architecture: match architecture {
321 Some(a) if a.contains("bert") || a.contains("roberta") => {
322 ModelArchitecture::Encoder
323 }
324 _ => ModelArchitecture::Decoder,
325 },
326 hf_architecture: None,
327 hf_model_type: None,
328 tie_word_embeddings: false,
329 })
330 }
331
332 pub fn from_size_str(size: &str) -> Result<Self, String> {
338 match size {
339 "codebert" | "codebert-base" | "125M" => Ok(Self::codebert()),
340 "0.5B" | "500M" | "qwen2-0.5b" => Ok(Self::qwen2_0_5b()),
341 "1.5B" | "qwen2.5-1.5b" | "qwen2-1.5b" => Ok(Self::qwen2_1_5b()),
342 "7B" | "qwen2.5-7b" => Ok(Self::qwen2_7b()),
343 "4B" | "qwen3-4b" | "qwen3" => Ok(Self::qwen3_4b()),
344 "9B" | "qwen3.5-9b" | "qwen3_5" | "qwen3.5" => Ok(Self::qwen3_5_9b()),
345 unknown => Err(format!(
346 "Unknown model size '{unknown}'. Known sizes: codebert, 0.5B, 4B, 7B, 9B"
347 )),
348 }
349 }
350
351 pub fn codebert() -> Self {
356 Self {
357 hidden_size: CODEBERT_HIDDEN_SIZE,
358 num_attention_heads: 12,
359 num_kv_heads: 12, intermediate_size: CODEBERT_INTERMEDIATE_SIZE,
361 num_hidden_layers: 12,
362 vocab_size: CODEBERT_VOCAB_SIZE,
363 max_position_embeddings: CODEBERT_MAX_POSITION,
364 rms_norm_eps: 1e-5, rope_theta: 0.0, use_bias: true,
367 head_dim_override: None,
368 architecture: ModelArchitecture::Encoder,
369 hf_architecture: None,
370 hf_model_type: None,
371 tie_word_embeddings: false,
372 }
373 }
374
375 pub fn tiny() -> Self {
377 Self {
378 hidden_size: 64,
379 num_attention_heads: 2,
380 num_kv_heads: 2,
381 intermediate_size: 256,
382 num_hidden_layers: 2,
383 vocab_size: 1000,
384 max_position_embeddings: 512,
385 rms_norm_eps: 1e-6,
386 rope_theta: DEFAULT_ROPE_THETA,
387 use_bias: false,
388 head_dim_override: None,
389 architecture: ModelArchitecture::Decoder,
390 hf_architecture: None,
391 hf_model_type: None,
392 tie_word_embeddings: false,
393 }
394 }
395
396 pub fn is_encoder(&self) -> bool {
398 self.architecture == ModelArchitecture::Encoder
399 }
400
401 pub fn hf_architecture_name(&self) -> &str {
404 if let Some(ref name) = self.hf_architecture {
405 return name;
406 }
407 if self.is_encoder() {
409 "BertModel"
410 } else if self.use_bias && self.vocab_size > 150000 {
411 "Qwen2ForCausalLM"
413 } else {
414 "LlamaForCausalLM"
415 }
416 }
417
418 pub fn hf_model_type_str(&self) -> &str {
420 if let Some(ref mt) = self.hf_model_type {
421 return mt;
422 }
423 if self.is_encoder() {
424 "roberta"
425 } else if self.use_bias && self.vocab_size > 150000 {
426 "qwen2"
427 } else {
428 "llama"
429 }
430 }
431
432 pub fn ties_embeddings(&self) -> bool {
435 if self.tie_word_embeddings {
436 return true;
437 }
438 self.use_bias && self.vocab_size > 150000
440 }
441
442 pub fn head_dim(&self) -> usize {
447 self.head_dim_override.unwrap_or(self.hidden_size / self.num_attention_heads)
448 }
449
450 pub fn q_dim(&self) -> usize {
455 self.num_attention_heads * self.head_dim()
456 }
457
458 fn kv_dim(&self) -> usize {
468 self.num_kv_heads * self.head_dim()
469 }
470
471 pub fn per_layer_weight_elements(&self) -> usize {
475 let h = self.hidden_size;
476 let q = self.q_dim();
477 let kv = self.kv_dim();
478 let i = self.intermediate_size;
479 q * h + kv * h * 2 + h * q + i * h * 3 + h * 2
482 }
483
484 fn per_layer_grad_weight_elements(&self) -> usize {
489 let h = self.hidden_size;
490 let q = self.q_dim();
491 let kv = self.kv_dim();
492 let i = self.intermediate_size;
493 h * 2 + h * i * 3 + q * h + h * q + h * kv * 2
497 }
498
499 fn per_layer_scratch_linear_coeff(&self) -> usize {
503 let h = self.hidden_size;
504 let kv = self.kv_dim();
505 let i = self.intermediate_size;
506 let n = self.num_attention_heads;
507 let hd = self.head_dim();
508 h * 8 + kv * 2 + i * 4 + n * hd * 3
513 }
514
515 fn per_layer_scratch_quadratic_coeff(&self) -> (usize, usize) {
524 let n = self.num_attention_heads;
525 let hd = self.head_dim();
526 (n, n * hd) }
532
533 pub fn total_training_vram_bytes(&self, max_seq_len: usize) -> usize {
537 let l = self.num_hidden_layers;
538 let s = max_seq_len;
539 let hd = self.head_dim();
540
541 let constant_per_layer =
542 self.per_layer_weight_elements() + self.per_layer_grad_weight_elements();
543 let linear_per_layer = self.per_layer_scratch_linear_coeff() * s;
544
545 let (n_quad, n_hd_linear) = self.per_layer_scratch_quadratic_coeff();
546 let quadratic_per_layer =
547 if s >= hd { 2 * n_quad * s * s } else { n_quad * s * s + n_hd_linear * s };
548
549 let elements_per_layer = constant_per_layer + linear_per_layer + quadratic_per_layer;
550 l * elements_per_layer * 4 }
552
553 pub fn total_training_vram_bytes_shared(&self, max_seq_len: usize) -> usize {
560 let l = self.num_hidden_layers;
561 let s = max_seq_len;
562 let hd = self.head_dim();
563
564 let weights_total = l * self.per_layer_weight_elements();
566
567 let grad_weights_shared = self.per_layer_grad_weight_elements();
569
570 let linear_shared = self.per_layer_scratch_linear_coeff() * s;
572 let (n_quad, n_hd_linear) = self.per_layer_scratch_quadratic_coeff();
573 let quadratic_shared =
574 if s >= hd { 2 * n_quad * s * s } else { n_quad * s * s + n_hd_linear * s };
575
576 let total_elements = weights_total + grad_weights_shared + linear_shared + quadratic_shared;
577 total_elements * 4 }
579
580 pub fn max_seq_len_for_vram_shared(&self, vram_bytes: usize) -> Option<usize> {
586 if self.total_training_vram_bytes_shared(1) > vram_bytes {
587 return None;
588 }
589
590 let mut lo: usize = 1;
591 let mut hi: usize = self.max_position_embeddings;
592
593 while lo < hi {
594 let mid = lo + (hi - lo).div_ceil(2);
595 if self.total_training_vram_bytes_shared(mid) <= vram_bytes {
596 lo = mid;
597 } else {
598 hi = mid - 1;
599 }
600 }
601
602 Some(lo)
603 }
604
605 pub fn max_seq_len_for_vram(&self, vram_bytes: usize) -> Option<usize> {
613 if self.total_training_vram_bytes(1) > vram_bytes {
614 return None;
615 }
616
617 let mut lo: usize = 1;
618 let mut hi: usize = self.max_position_embeddings;
619
620 while lo < hi {
621 let mid = lo + (hi - lo).div_ceil(2);
622 if self.total_training_vram_bytes(mid) <= vram_bytes {
623 lo = mid;
624 } else {
625 hi = mid - 1;
626 }
627 }
628
629 Some(lo)
630 }
631}
632
633#[cfg(test)]
634mod tests {
635 use super::*;
636
637 #[test]
638 fn test_transformer_config_llama2() {
639 let config = TransformerConfig::llama2_7b();
640 assert_eq!(config.hidden_size, 4096);
641 assert_eq!(config.num_attention_heads, 32);
642 assert_eq!(config.head_dim(), 128);
643 }
644
645 #[test]
646 fn test_transformer_config_tiny() {
647 let config = TransformerConfig::tiny();
648 assert_eq!(config.hidden_size, 64);
649 assert_eq!(config.num_attention_heads, 2);
650 assert_eq!(config.head_dim(), 32);
651 }
652
653 #[test]
654 fn test_config_serialization() {
655 let config = TransformerConfig::llama2_7b();
656 let json = serde_json::to_string(&config).expect("JSON serialization should succeed");
657 let restored: TransformerConfig =
658 serde_json::from_str(&json).expect("JSON deserialization should succeed");
659 assert_eq!(restored.hidden_size, config.hidden_size);
660 assert_eq!(restored.num_attention_heads, config.num_attention_heads);
661 }
662
663 #[test]
664 fn test_mistral_config() {
665 let config = TransformerConfig::mistral_7b();
666 assert_eq!(config.num_kv_heads, 8); assert_eq!(config.num_attention_heads, 32);
668 }
670
671 #[test]
680 fn qwen2_0_5b_matches_hf_config_2026_05_04() {
681 let config = TransformerConfig::qwen2_0_5b();
682 assert_eq!(config.hidden_size, 896, "hidden_size");
683 assert_eq!(config.num_attention_heads, 14, "num_attention_heads");
684 assert_eq!(config.num_kv_heads, 2, "num_kv_heads (GQA-7:1)");
685 assert_eq!(config.intermediate_size, 4864, "intermediate_size");
686 assert_eq!(config.num_hidden_layers, 24, "num_hidden_layers");
687 assert_eq!(config.vocab_size, 151_936, "vocab_size");
688 assert_eq!(config.max_position_embeddings, 32_768, "max_position_embeddings");
689 assert!(
690 (config.rms_norm_eps - 1e-6).abs() < f32::EPSILON,
691 "rms_norm_eps={}, want 1e-6",
692 config.rms_norm_eps
693 );
694 assert!(
695 (config.rope_theta - 1_000_000.0).abs() < f32::EPSILON,
696 "rope_theta={}, want 1_000_000.0",
697 config.rope_theta
698 );
699 assert!(config.use_bias, "use_bias must be true (Qwen2 quirk)");
700 assert!(
701 config.tie_word_embeddings,
702 "tie_word_embeddings must be true for Qwen2.5 0.5B (HF config 2026-05-04)"
703 );
704 assert_eq!(config.architecture, ModelArchitecture::Decoder);
705 assert_eq!(config.num_attention_heads / config.num_kv_heads, 7);
707 }
708
709 #[test]
740 fn falsify_qwen2_0_5b_named_parameters_count_matches_hf() {
741 use super::super::Transformer;
742 let config = TransformerConfig::qwen2_0_5b();
743 let model = Transformer::new(&config);
744 let params = model.named_parameters();
745 let actual = params.len();
746 let expected = 2 + 24 * 12; assert_eq!(
748 actual, expected,
749 "FALSIFY-APR-PRETRAIN-INIT-POPULATE-COVERAGE-001: \
750 Transformer::new(qwen2_0_5b()).named_parameters().len() = {actual}, \
751 expected {expected}. Missing params likely include Q/K/V \
752 projection biases (24 layers × 3 = 72 expected biases) — \
753 MultiHeadAttention::new must allocate them when \
754 config.use_bias == true. See evidence/section-59-5g-2-\
755 dispatch-2026-05-09/README.md for the val_loss=0.0008 \
756 anomaly that surfaced this gap.",
757 );
758 }
759
760 #[test]
768 fn falsify_qwen2_0_5b_layers_expose_qkv_biases_when_use_bias_true() {
769 use super::super::Transformer;
770 let config = TransformerConfig::qwen2_0_5b();
771 assert!(config.use_bias, "qwen2_0_5b config must declare use_bias=true");
772 let model = Transformer::new(&config);
773 let params = model.named_parameters();
774 let names: std::collections::BTreeSet<&str> =
775 params.iter().map(|(name, _)| name.as_str()).collect();
776
777 for layer_idx in 0..24 {
778 for proj in &["q_proj", "k_proj", "v_proj"] {
779 let key = format!("model.layers.{layer_idx}.self_attn.{proj}.bias");
780 assert!(
781 names.contains(key.as_str()),
782 "FALSIFY-APR-PRETRAIN-INIT-POPULATE-COVERAGE-002: \
783 missing named parameter `{key}` despite use_bias=true. \
784 MultiHeadAttention::new MUST allocate b_{} when \
785 config.use_bias is true; today it hardcodes None.",
786 proj.split('_').next().unwrap_or(proj)
787 );
788 }
789 }
790 }
791
792 #[test]
796 fn qwen2_1_5b_inherits_tie_word_embeddings_from_0_5b() {
797 let parent = TransformerConfig::qwen2_0_5b();
798 let child = TransformerConfig::qwen2_1_5b();
799 assert_eq!(
800 child.tie_word_embeddings, parent.tie_word_embeddings,
801 "qwen2_1_5b must inherit tie_word_embeddings from qwen2_0_5b — both are HF tie=true"
802 );
803 assert!(
804 child.tie_word_embeddings,
805 "qwen2_1_5b tie_word_embeddings must be true (HF config 2026-05-04)"
806 );
807 }
808
809 #[test]
813 fn qwen2_7b_does_not_tie_embeddings() {
814 let config = TransformerConfig::qwen2_7b();
815 assert!(
816 !config.tie_word_embeddings,
817 "qwen2_7b tie_word_embeddings MUST be false per HF config 2026-05-04 — \
818 larger Qwen variants pay param cost for untied weights"
819 );
820 }
821
822 #[test]
823 fn test_qwen2_config() {
824 let config = TransformerConfig::qwen2_0_5b();
825 assert!(config.use_bias);
826 assert_eq!(config.vocab_size, 151936);
827 }
828
829 #[test]
830 fn test_llama2_13b_config() {
831 let config = TransformerConfig::llama2_13b();
832 assert_eq!(config.hidden_size, 5120);
833 assert_eq!(config.num_attention_heads, 40);
834 assert_eq!(config.num_hidden_layers, 40);
835 assert_eq!(config.head_dim(), 128); }
837
838 #[test]
839 fn test_config_yaml_serialization() {
840 let config = TransformerConfig::tiny();
841 let yaml = serde_yaml::to_string(&config).expect("config should be valid");
842 let restored: TransformerConfig =
843 serde_yaml::from_str(&yaml).expect("config should be valid");
844 assert_eq!(restored.hidden_size, config.hidden_size);
845 assert_eq!(restored.num_hidden_layers, config.num_hidden_layers);
846 }
847
848 #[test]
849 fn test_grouped_query_attention_ratio() {
850 let config = TransformerConfig::mistral_7b();
851 let heads_per_kv = config.num_attention_heads / config.num_kv_heads;
852 assert_eq!(heads_per_kv, 4); }
854
855 #[test]
856 fn test_config_clone() {
857 let config = TransformerConfig::llama2_7b();
858 let cloned = config.clone();
859 assert_eq!(config.hidden_size, cloned.hidden_size);
860 assert_eq!(config.vocab_size, cloned.vocab_size);
861 }
862
863 #[test]
864 fn test_qwen3_5_9b_config() {
865 let config = TransformerConfig::qwen3_5_9b();
866 assert_eq!(config.hidden_size, 4096);
867 assert_eq!(config.num_attention_heads, 16);
868 assert_eq!(config.num_kv_heads, 4);
869 assert_eq!(config.intermediate_size, 12288);
870 assert_eq!(config.num_hidden_layers, 32);
871 assert_eq!(config.vocab_size, 248320);
872 assert_eq!(config.max_position_embeddings, 262144);
873 assert!(!config.use_bias);
874 }
875
876 #[test]
877 fn test_qwen3_5_9b_head_dim() {
878 let config = TransformerConfig::qwen3_5_9b();
879 assert_eq!(config.head_dim(), 256);
881 }
882
883 #[test]
884 fn test_qwen3_5_9b_gqa_ratio() {
885 let config = TransformerConfig::qwen3_5_9b();
886 let heads_per_kv = config.num_attention_heads / config.num_kv_heads;
887 assert_eq!(heads_per_kv, 4); }
889
890 #[test]
895 fn test_from_apr_metadata_qwen3_8b() {
896 let config = TransformerConfig::from_apr_metadata(
898 Some(4096), Some(32), Some(8), Some(12288), Some(36), Some(151936), Some(40960), Some(1e-6), Some(1e6), Some("qwen3"),
908 )
909 .expect("all required fields present");
910
911 assert_eq!(config.hidden_size, 4096);
912 assert_eq!(config.num_attention_heads, 32);
913 assert_eq!(config.num_kv_heads, 8);
914 assert_eq!(config.num_hidden_layers, 36);
915 assert_eq!(config.vocab_size, 151936);
916 assert_eq!(config.head_dim(), 128); assert!(!config.use_bias); }
919
920 #[test]
921 fn test_from_apr_metadata_qwen2_7b() {
922 let config = TransformerConfig::from_apr_metadata(
924 Some(3584),
925 Some(28),
926 Some(4),
927 Some(18944),
928 Some(28),
929 Some(152064),
930 Some(32768),
931 Some(1e-6),
932 Some(1e6),
933 Some("qwen2"),
934 )
935 .expect("all required fields present");
936
937 assert!(config.use_bias); assert_eq!(config.head_dim(), 128); }
940
941 #[test]
942 fn test_from_apr_metadata_missing_required_returns_none() {
943 assert!(TransformerConfig::from_apr_metadata(
945 None,
946 Some(32),
947 Some(8),
948 Some(12288),
949 Some(36),
950 Some(151936),
951 Some(40960),
952 Some(1e-6),
953 Some(1e6),
954 Some("qwen3"),
955 )
956 .is_none());
957
958 assert!(TransformerConfig::from_apr_metadata(
960 Some(4096),
961 Some(32),
962 Some(8),
963 Some(12288),
964 None,
965 Some(151936),
966 Some(40960),
967 Some(1e-6),
968 Some(1e6),
969 Some("qwen3"),
970 )
971 .is_none());
972 }
973
974 #[test]
982 fn falsify_vram_monotonic_in_seq_len() {
983 let config = TransformerConfig::qwen3_4b();
985 let mut prev = config.total_training_vram_bytes(1);
986 for s in [2, 4, 8, 16, 32, 64, 128, 256, 512] {
987 let cur = config.total_training_vram_bytes(s);
988 assert!(
989 cur > prev,
990 "VRAM must increase: seq_len={s} ({cur}) should exceed prev ({prev})"
991 );
992 prev = cur;
993 }
994 }
995
996 #[test]
997 fn falsify_vram_solver_postcondition() {
998 let config = TransformerConfig::qwen3_4b();
1000 let budget = 24 * 1024 * 1024 * 1024_usize; if let Some(max_s) = config.max_seq_len_for_vram(budget) {
1002 let used = config.total_training_vram_bytes(max_s);
1003 assert!(
1004 used <= budget,
1005 "Solver returned seq_len={max_s} using {used} bytes > budget {budget}"
1006 );
1007 if max_s < config.max_position_embeddings {
1009 let over = config.total_training_vram_bytes(max_s + 1);
1010 assert!(
1011 over > budget,
1012 "Solver not tight: seq_len={} uses {over} <= budget {budget}",
1013 max_s + 1
1014 );
1015 }
1016 }
1017 }
1018
1019 #[test]
1020 fn falsify_vram_solver_returns_none_when_impossible() {
1021 let config = TransformerConfig::qwen3_4b();
1023 let tiny_budget = 1024; assert!(
1025 config.max_seq_len_for_vram(tiny_budget).is_none(),
1026 "Solver should return None when budget is too small"
1027 );
1028 }
1029
1030 #[test]
1031 fn falsify_qwen3_4b_vram_matches_oom_observation() {
1032 let config = TransformerConfig::qwen3_4b();
1035 let vram_512 = config.total_training_vram_bytes(512);
1036 let usable_vram = 23 * 1024 * 1024 * 1024_usize; let vram_1 = config.total_training_vram_bytes(1);
1040 let shared_128 = config.total_training_vram_bytes_shared(128);
1041 let shared_512 = config.total_training_vram_bytes_shared(512);
1042 let solved = config.max_seq_len_for_vram_shared(24 * 1024 * 1024 * 1024);
1043 eprintln!("=== Qwen3-4B VRAM Budget ===");
1044 eprintln!(
1045 " Per-layer weights: {:.1} MB",
1046 config.per_layer_weight_elements() as f64 * 4.0 / 1e6
1047 );
1048 eprintln!(
1049 " Per-layer grad scratch: {:.1} MB",
1050 config.per_layer_grad_weight_elements() as f64 * 4.0 / 1e6
1051 );
1052 eprintln!(" Per-layer (S=512): {:.1} MB", (vram_512 / 36) as f64 / 1e6);
1053 eprintln!(" 36 layers S=1 (per-layer scratch): {:.1} GB", vram_1 as f64 / 1e9);
1054 eprintln!(" 36 layers S=512 (per-layer scratch): {:.1} GB", vram_512 as f64 / 1e9);
1055 eprintln!(" 36 layers S=128 (SHARED scratch): {:.1} GB", shared_128 as f64 / 1e9);
1056 eprintln!(" 36 layers S=512 (SHARED scratch): {:.1} GB", shared_512 as f64 / 1e9);
1057 eprintln!(" Max seq_len for 24 GB (shared): {solved:?}");
1058
1059 assert!(
1060 vram_512 > usable_vram,
1061 "Formula says {:.1} GB for seq_len=512, but we OOM'd on 23 GB — formula is wrong",
1062 vram_512 as f64 / 1e9
1063 );
1064 }
1065
1066 #[test]
1067 fn falsify_qwen2_0_5b_fits_on_4090() {
1068 let config = TransformerConfig::qwen2_0_5b();
1071 let vram_512 = config.total_training_vram_bytes(512);
1072 let total_vram = 24 * 1024 * 1024 * 1024_usize;
1073 assert!(
1074 vram_512 < total_vram,
1075 "Formula says {:.1} GB for Qwen2-0.5B at seq_len=512, but it fit on 4090",
1076 vram_512 as f64 / 1e9
1077 );
1078 }
1079
1080 #[test]
1081 fn falsify_vram_budget_concrete_values() {
1082 let config = TransformerConfig::qwen3_4b();
1084
1085 let expected_weights =
1089 4096 * 2560 + 1024 * 2560 * 2 + 2560 * 4096 + 9728 * 2560 * 3 + 2560 * 2;
1090 assert_eq!(config.per_layer_weight_elements(), expected_weights);
1091
1092 let budget_24gb = 24 * 1024 * 1024 * 1024_usize;
1096 assert!(
1097 config.max_seq_len_for_vram(budget_24gb).is_none(),
1098 "Qwen3-4B per-layer scratch CANNOT fit 24 GB — proves shared scratch needed"
1099 );
1100
1101 let shared_budget = config.total_training_vram_bytes_shared(128);
1105 assert!(
1106 shared_budget < budget_24gb,
1107 "Qwen3-4B shared scratch at seq_len=128 should fit 24 GB, got {:.1} GB",
1108 shared_budget as f64 / 1e9
1109 );
1110 }
1111
1112 #[test]
1115 fn test_model_architecture_default() {
1116 let arch: ModelArchitecture = Default::default();
1117 assert_eq!(arch, ModelArchitecture::Decoder);
1118 }
1119
1120 #[test]
1121 fn test_model_architecture_serialization() {
1122 let encoder = ModelArchitecture::Encoder;
1123 let json = serde_json::to_string(&encoder).expect("serialize");
1124 assert_eq!(json, "\"encoder\"");
1125 let decoder = ModelArchitecture::Decoder;
1126 let json = serde_json::to_string(&decoder).expect("serialize");
1127 assert_eq!(json, "\"decoder\"");
1128
1129 let restored: ModelArchitecture = serde_json::from_str("\"encoder\"").expect("deserialize");
1130 assert_eq!(restored, ModelArchitecture::Encoder);
1131 }
1132
1133 #[test]
1134 fn test_codebert_config() {
1135 let config = TransformerConfig::codebert();
1136 assert_eq!(config.hidden_size, 768);
1137 assert_eq!(config.num_attention_heads, 12);
1138 assert_eq!(config.num_kv_heads, 12);
1139 assert_eq!(config.intermediate_size, 3072);
1140 assert_eq!(config.num_hidden_layers, 12);
1141 assert_eq!(config.vocab_size, 50265);
1142 assert_eq!(config.max_position_embeddings, 514);
1143 assert!(config.use_bias);
1144 assert_eq!(config.architecture, ModelArchitecture::Encoder);
1145 assert!(config.is_encoder());
1146 assert_eq!(config.head_dim(), 64); }
1148
1149 #[test]
1150 fn test_is_encoder() {
1151 assert!(TransformerConfig::codebert().is_encoder());
1152 assert!(!TransformerConfig::llama2_7b().is_encoder());
1153 assert!(!TransformerConfig::tiny().is_encoder());
1154 assert!(!TransformerConfig::qwen2_0_5b().is_encoder());
1155 }
1156
1157 #[test]
1158 fn test_hf_architecture_name_inferred() {
1159 assert_eq!(TransformerConfig::codebert().hf_architecture_name(), "BertModel");
1161 assert_eq!(TransformerConfig::qwen2_0_5b().hf_architecture_name(), "Qwen2ForCausalLM");
1163 assert_eq!(TransformerConfig::llama2_7b().hf_architecture_name(), "LlamaForCausalLM");
1165 }
1166
1167 #[test]
1168 fn test_hf_architecture_name_override() {
1169 let mut config = TransformerConfig::tiny();
1170 config.hf_architecture = Some("CustomModel".to_string());
1171 assert_eq!(config.hf_architecture_name(), "CustomModel");
1172 }
1173
1174 #[test]
1175 fn test_hf_model_type_str_inferred() {
1176 assert_eq!(TransformerConfig::codebert().hf_model_type_str(), "roberta");
1177 assert_eq!(TransformerConfig::qwen2_0_5b().hf_model_type_str(), "qwen2");
1178 assert_eq!(TransformerConfig::llama2_7b().hf_model_type_str(), "llama");
1179 }
1180
1181 #[test]
1182 fn test_hf_model_type_str_override() {
1183 let mut config = TransformerConfig::tiny();
1184 config.hf_model_type = Some("custom_type".to_string());
1185 assert_eq!(config.hf_model_type_str(), "custom_type");
1186 }
1187
1188 #[test]
1189 fn test_ties_embeddings() {
1190 assert!(TransformerConfig::qwen2_0_5b().ties_embeddings());
1192 assert!(!TransformerConfig::llama2_7b().ties_embeddings());
1194 let mut config = TransformerConfig::llama2_7b();
1196 config.tie_word_embeddings = true;
1197 assert!(config.ties_embeddings());
1198 }
1199
1200 #[test]
1201 fn test_head_dim_override() {
1202 let config = TransformerConfig::qwen3_4b();
1203 assert_eq!(config.head_dim_override, Some(128));
1204 assert_eq!(config.head_dim(), 128);
1205 assert_ne!(config.hidden_size / config.num_attention_heads, 128);
1207 }
1208
1209 #[test]
1210 fn test_head_dim_no_override() {
1211 let config = TransformerConfig::llama2_7b();
1212 assert!(config.head_dim_override.is_none());
1213 assert_eq!(config.head_dim(), 128); }
1215
1216 #[test]
1217 fn test_q_dim() {
1218 let config = TransformerConfig::qwen3_4b();
1219 assert_eq!(config.q_dim(), 4096);
1221
1222 let config = TransformerConfig::llama2_7b();
1223 assert_eq!(config.q_dim(), 4096);
1225 }
1226
1227 #[test]
1228 fn test_q_dim_differs_from_hidden() {
1229 let config = TransformerConfig::qwen3_4b();
1230 assert_ne!(config.q_dim(), config.hidden_size);
1232 }
1233
1234 #[test]
1245 fn test_qwen3_4b_projection_shapes() {
1246 let config = TransformerConfig::qwen3_4b();
1247
1248 assert_eq!(config.hidden_size, 2560);
1250 assert_eq!(config.num_attention_heads, 32);
1251 assert_eq!(config.num_kv_heads, 8);
1252 assert_eq!(config.head_dim(), 128);
1253 assert_eq!(config.head_dim_override, Some(128));
1254
1255 let q_dim = config.q_dim();
1257 let kv_dim = config.kv_dim();
1258 assert_eq!(q_dim, 4096); assert_eq!(kv_dim, 1024); let hidden = config.hidden_size;
1263 assert_eq!(q_dim * hidden, 10_485_760); assert_eq!(kv_dim * hidden, 2_621_440); assert_eq!(kv_dim * hidden, 2_621_440); assert_eq!(hidden * q_dim, 10_485_760); }
1268
1269 #[test]
1271 fn test_qwen3_4b_grad_weight_elements_uses_q_dim() {
1272 let config = TransformerConfig::qwen3_4b();
1273 let h = config.hidden_size; let q = config.q_dim(); let kv = config.kv_dim(); let i = config.intermediate_size; let expected = h * 2 + h * i * 3 + q * h + h * q + h * kv * 2; assert_eq!(config.per_layer_grad_weight_elements(), expected);
1285
1286 assert!(q * h > h * h, "q_dim*hidden > hidden*hidden for Qwen3-4B");
1288 }
1289
1290 #[test]
1291 fn test_from_size_str_known_sizes() {
1292 assert!(TransformerConfig::from_size_str("codebert").is_ok());
1293 assert!(TransformerConfig::from_size_str("codebert-base").is_ok());
1294 assert!(TransformerConfig::from_size_str("125M").is_ok());
1295 assert!(TransformerConfig::from_size_str("0.5B").is_ok());
1296 assert!(TransformerConfig::from_size_str("500M").is_ok());
1297 assert!(TransformerConfig::from_size_str("qwen2-0.5b").is_ok());
1298 assert!(TransformerConfig::from_size_str("7B").is_ok());
1299 assert!(TransformerConfig::from_size_str("qwen2.5-7b").is_ok());
1300 assert!(TransformerConfig::from_size_str("4B").is_ok());
1301 assert!(TransformerConfig::from_size_str("qwen3-4b").is_ok());
1302 assert!(TransformerConfig::from_size_str("qwen3").is_ok());
1303 assert!(TransformerConfig::from_size_str("9B").is_ok());
1304 assert!(TransformerConfig::from_size_str("qwen3.5-9b").is_ok());
1305 assert!(TransformerConfig::from_size_str("qwen3_5").is_ok());
1306 assert!(TransformerConfig::from_size_str("qwen3.5").is_ok());
1307 }
1308
1309 #[test]
1310 fn test_from_size_str_unknown() {
1311 let err = TransformerConfig::from_size_str("99B").unwrap_err();
1312 assert!(err.contains("Unknown model size"));
1313 assert!(err.contains("99B"));
1314 }
1315
1316 #[test]
1317 fn test_from_size_str_configs_correct() {
1318 let codebert = TransformerConfig::from_size_str("codebert").unwrap();
1319 assert_eq!(codebert.hidden_size, 768);
1320 assert!(codebert.is_encoder());
1321
1322 let qwen2 = TransformerConfig::from_size_str("0.5B").unwrap();
1323 assert_eq!(qwen2.hidden_size, 896);
1324 assert!(qwen2.use_bias);
1325
1326 let qwen3 = TransformerConfig::from_size_str("4B").unwrap();
1327 assert_eq!(qwen3.hidden_size, 2560);
1328 assert!(!qwen3.use_bias);
1329 }
1330
1331 #[test]
1332 fn test_from_apr_metadata_missing_num_heads() {
1333 assert!(TransformerConfig::from_apr_metadata(
1334 Some(4096),
1335 None, Some(8),
1337 Some(12288),
1338 Some(36),
1339 Some(151936),
1340 None,
1341 None,
1342 None,
1343 None,
1344 )
1345 .is_none());
1346 }
1347
1348 #[test]
1349 fn test_from_apr_metadata_missing_vocab_size() {
1350 assert!(TransformerConfig::from_apr_metadata(
1351 Some(4096),
1352 Some(32),
1353 Some(8),
1354 Some(12288),
1355 Some(36),
1356 None, None,
1358 None,
1359 None,
1360 None,
1361 )
1362 .is_none());
1363 }
1364
1365 #[test]
1366 fn test_from_apr_metadata_missing_intermediate_size() {
1367 assert!(TransformerConfig::from_apr_metadata(
1368 Some(4096),
1369 Some(32),
1370 Some(8),
1371 None, Some(36),
1373 Some(151936),
1374 None,
1375 None,
1376 None,
1377 None,
1378 )
1379 .is_none());
1380 }
1381
1382 #[test]
1383 fn test_from_apr_metadata_defaults() {
1384 let config = TransformerConfig::from_apr_metadata(
1385 Some(512),
1386 Some(8),
1387 None, Some(2048),
1389 Some(6),
1390 Some(32000),
1391 None, None, None, None, )
1396 .unwrap();
1397
1398 assert_eq!(config.num_kv_heads, 8); assert_eq!(config.max_position_embeddings, 32768);
1400 assert!((config.rms_norm_eps - 1e-6).abs() < 1e-10);
1401 assert!((config.rope_theta - 10000.0).abs() < 0.1);
1402 assert_eq!(config.architecture, ModelArchitecture::Decoder);
1403 assert!(!config.use_bias);
1404 }
1405
1406 #[test]
1407 fn test_from_apr_metadata_encoder_architecture() {
1408 let config = TransformerConfig::from_apr_metadata(
1409 Some(768),
1410 Some(12),
1411 Some(12),
1412 Some(3072),
1413 Some(12),
1414 Some(50265),
1415 Some(514),
1416 Some(1e-5),
1417 Some(0.0),
1418 Some("codebert"),
1419 )
1420 .unwrap();
1421 assert_eq!(config.architecture, ModelArchitecture::Encoder);
1422 }
1423
1424 #[test]
1425 fn test_from_apr_metadata_roberta_architecture() {
1426 let config = TransformerConfig::from_apr_metadata(
1427 Some(768),
1428 Some(12),
1429 Some(12),
1430 Some(3072),
1431 Some(12),
1432 Some(50265),
1433 None,
1434 None,
1435 None,
1436 Some("roberta"),
1437 )
1438 .unwrap();
1439 assert_eq!(config.architecture, ModelArchitecture::Encoder);
1440 }
1441
1442 #[test]
1443 fn test_from_apr_metadata_qwen3_head_dim_override() {
1444 let config = TransformerConfig::from_apr_metadata(
1446 Some(2560),
1447 Some(32),
1448 Some(8),
1449 Some(9728),
1450 Some(36),
1451 Some(151936),
1452 Some(40960),
1453 Some(1e-6),
1454 Some(1e6),
1455 Some("qwen3-4b"),
1456 )
1457 .unwrap();
1458 assert_eq!(config.head_dim_override, Some(128));
1459 assert_eq!(config.head_dim(), 128);
1460 assert!(!config.use_bias);
1461 }
1462
1463 #[test]
1464 fn test_from_apr_metadata_qwen3_no_override_needed() {
1465 let config = TransformerConfig::from_apr_metadata(
1467 Some(4096),
1468 Some(32),
1469 Some(8),
1470 Some(12288),
1471 Some(36),
1472 Some(151936),
1473 None,
1474 None,
1475 None,
1476 Some("qwen3-8b"),
1477 )
1478 .unwrap();
1479 assert!(config.head_dim_override.is_none());
1480 assert_eq!(config.head_dim(), 128);
1481 }
1482
1483 #[test]
1484 fn test_qwen2_7b_config() {
1485 let config = TransformerConfig::qwen2_7b();
1486 assert_eq!(config.hidden_size, 3584);
1487 assert_eq!(config.num_attention_heads, 28);
1488 assert_eq!(config.num_kv_heads, 4);
1489 assert_eq!(config.intermediate_size, 18944);
1490 assert_eq!(config.num_hidden_layers, 28);
1491 assert_eq!(config.vocab_size, 152064);
1492 assert!(config.use_bias);
1493 assert_eq!(config.head_dim(), 128); }
1495
1496 #[test]
1497 fn test_qwen3_4b_config() {
1498 let config = TransformerConfig::qwen3_4b();
1499 assert_eq!(config.hidden_size, 2560);
1500 assert_eq!(config.num_attention_heads, 32);
1501 assert_eq!(config.num_kv_heads, 8);
1502 assert_eq!(config.intermediate_size, 9728);
1503 assert_eq!(config.num_hidden_layers, 36);
1504 assert!(!config.use_bias);
1505 assert_eq!(config.head_dim(), 128);
1506 }
1507
1508 #[test]
1509 fn test_per_layer_weight_elements_positive() {
1510 for config in [
1511 TransformerConfig::tiny(),
1512 TransformerConfig::codebert(),
1513 TransformerConfig::qwen2_0_5b(),
1514 TransformerConfig::qwen3_4b(),
1515 ] {
1516 assert!(config.per_layer_weight_elements() > 0);
1517 }
1518 }
1519
1520 #[test]
1521 fn test_vram_shared_less_than_per_layer() {
1522 let config = TransformerConfig::qwen2_0_5b();
1523 let per_layer = config.total_training_vram_bytes(128);
1524 let shared = config.total_training_vram_bytes_shared(128);
1525 assert!(
1527 shared < per_layer,
1528 "Shared ({shared}) should be less than per-layer ({per_layer})"
1529 );
1530 }
1531
1532 #[test]
1533 fn test_vram_shared_monotonic() {
1534 let config = TransformerConfig::qwen2_0_5b();
1535 let mut prev = config.total_training_vram_bytes_shared(1);
1536 for s in [2, 4, 8, 16, 32, 64, 128] {
1537 let cur = config.total_training_vram_bytes_shared(s);
1538 assert!(cur > prev, "Shared VRAM must increase: seq_len={s}");
1539 prev = cur;
1540 }
1541 }
1542
1543 #[test]
1544 fn test_max_seq_len_for_vram_shared() {
1545 let config = TransformerConfig::qwen2_0_5b();
1546 let budget = 8 * 1024 * 1024 * 1024_usize; let max_s = config.max_seq_len_for_vram_shared(budget);
1548 assert!(max_s.is_some());
1549 let s = max_s.unwrap();
1550 assert!(config.total_training_vram_bytes_shared(s) <= budget);
1551 }
1552
1553 #[test]
1554 fn test_max_seq_len_for_vram_shared_impossible() {
1555 let config = TransformerConfig::qwen3_4b();
1556 let tiny_budget = 1024; assert!(config.max_seq_len_for_vram_shared(tiny_budget).is_none());
1558 }
1559
1560 #[test]
1561 fn test_max_seq_len_for_vram_shared_tightness() {
1562 let config = TransformerConfig::tiny();
1563 let budget = 10 * 1024 * 1024_usize; if let Some(s) = config.max_seq_len_for_vram_shared(budget) {
1565 assert!(config.total_training_vram_bytes_shared(s) <= budget);
1566 if s < config.max_position_embeddings {
1567 assert!(config.total_training_vram_bytes_shared(s + 1) > budget);
1568 }
1569 }
1570 }
1571
1572 #[test]
1573 fn test_kv_dim() {
1574 assert_eq!(TransformerConfig::qwen3_4b().kv_dim(), 1024);
1575 assert_eq!(TransformerConfig::llama2_7b().kv_dim(), 4096);
1576 }
1577
1578 #[test]
1579 fn test_per_layer_scratch_coefficients() {
1580 let config = TransformerConfig::tiny();
1581 assert!(config.per_layer_scratch_linear_coeff() > 0);
1582 let (n_quad, n_hd_linear) = config.per_layer_scratch_quadratic_coeff();
1583 assert!(n_quad > 0 && n_hd_linear > 0);
1584 assert!(config.per_layer_grad_weight_elements() > 0);
1585 }
1586}