entrenar/finetune/instruct_pipeline/
constructors.rs

1//! InstructPipeline constructors: `new`, `from_pretrained`, `from_apr`,
2//! `build_lora_layers`, `inject_adapter_weights`.
3
4#[allow(clippy::wildcard_imports)]
5use super::*;
6use provable_contracts_macros::{ensures, requires};
7
8impl InstructPipeline {
9    /// Create a new pipeline with random weights.
10    pub fn new(model_config: &TransformerConfig, instruct_config: InstructConfig) -> Self {
11        let model = Transformer::new(model_config);
12        let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
13
14        for lora in &mut lora_layers {
15            for param in lora.trainable_params() {
16                param.set_requires_grad(true);
17            }
18        }
19
20        let optimizer = AdamW::default_params(instruct_config.learning_rate);
21
22        #[allow(unused_mut)]
23        let mut pipeline = Self {
24            model,
25            lora_layers,
26            config: instruct_config,
27            optimizer,
28            tokenizer: None,
29            model_dir: None,
30            profiler: StepProfiler::disabled(),
31            #[cfg(feature = "cuda")]
32            cuda_trainer: None,
33            #[cfg(feature = "cuda")]
34            cuda_blocks: None,
35            #[cfg(feature = "cuda")]
36            shared_scratch: None,
37            #[cfg(feature = "cuda")]
38            cuda_nan_count: 0,
39            #[cfg(feature = "cuda")]
40            gpu_training: None,
41            #[cfg(feature = "cuda")]
42            cuda_lora_grad_workspace: None,
43            #[cfg(feature = "cuda")]
44            lora_fused_clip: None,
45            #[cfg(feature = "cuda")]
46            cuda_lora_optimizer_states: None,
47            #[cfg(feature = "cuda")]
48            nf4_lora_step: 0,
49            #[cfg(feature = "cuda")]
50            vram_guard: None,
51            #[cfg(feature = "gpu")]
52            wgpu_training: None,
53        };
54
55        #[cfg(feature = "cuda")]
56        if pipeline.config.quantize_nf4 {
57            pipeline.init_cuda(model_config);
58        }
59
60        // Initialize wgpu training if CUDA is not available
61        #[cfg(feature = "gpu")]
62        if pipeline.wgpu_training.is_none() {
63            #[cfg(feature = "cuda")]
64            let cuda_active = pipeline.cuda_blocks.is_some();
65            #[cfg(not(feature = "cuda"))]
66            let cuda_active = false;
67
68            if !cuda_active {
69                pipeline.try_init_wgpu(model_config);
70            }
71        }
72
73        pipeline
74    }
75
76    /// Create pipeline from pretrained model weights.
77    ///
78    /// Loads transformer from SafeTensors and optionally a BPE tokenizer.
79    ///
80    /// # Errors
81    /// Returns error if model files cannot be loaded.
82    pub fn from_pretrained(
83        model_dir: &Path,
84        model_config: &TransformerConfig,
85        instruct_config: InstructConfig,
86    ) -> crate::Result<Self> {
87        let model = Transformer::from_safetensors(model_dir, model_config)?;
88        let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
89
90        // ENT-269: Auto-load trained LoRA adapter if present in model directory.
91        let adapter_path = model_dir.join("adapter_model.safetensors");
92        if adapter_path.exists() {
93            match crate::lora::load_adapter_peft(model_dir) {
94                Ok((_config, weights)) => {
95                    Self::inject_adapter_weights(
96                        &mut lora_layers,
97                        &weights,
98                        model_config.num_hidden_layers,
99                    );
100                    eprintln!(
101                        "[adapter] Loaded trained LoRA adapter ({} tensors) from {}",
102                        weights.len(),
103                        model_dir.display()
104                    );
105                }
106                Err(e) => {
107                    eprintln!(
108                        "[adapter] Warning: adapter_model.safetensors found but failed to load: {e}"
109                    );
110                }
111            }
112        }
113
114        for lora in &mut lora_layers {
115            for param in lora.trainable_params() {
116                param.set_requires_grad(true);
117            }
118        }
119
120        let optimizer = AdamW::default_params(instruct_config.learning_rate);
121
122        // CONTRACT: Training requires a BPE tokenizer — byte-fallback is not acceptable.
123        let tokenizer_path = model_dir.join("tokenizer.json");
124        let tokenizer = if tokenizer_path.exists() {
125            Some(HfTokenizer::from_file(&tokenizer_path).map_err(|e| {
126                crate::Error::ConfigError(format!(
127                    "Failed to load tokenizer from '{}': {e}. \
128                     Training requires a BPE tokenizer.",
129                    tokenizer_path.display(),
130                ))
131            })?)
132        } else {
133            return Err(crate::Error::ConfigError(format!(
134                "No tokenizer.json found in '{}'. Training requires a BPE tokenizer.",
135                model_dir.display(),
136            )));
137        };
138
139        #[allow(unused_mut)]
140        let mut pipeline = Self {
141            model,
142            lora_layers,
143            config: instruct_config,
144            optimizer,
145            tokenizer,
146            model_dir: Some(model_dir.to_path_buf()),
147            profiler: StepProfiler::disabled(),
148            #[cfg(feature = "cuda")]
149            cuda_trainer: None,
150            #[cfg(feature = "cuda")]
151            cuda_blocks: None,
152            #[cfg(feature = "cuda")]
153            shared_scratch: None,
154            #[cfg(feature = "cuda")]
155            cuda_nan_count: 0,
156            #[cfg(feature = "cuda")]
157            gpu_training: None,
158            #[cfg(feature = "cuda")]
159            cuda_lora_grad_workspace: None,
160            #[cfg(feature = "cuda")]
161            lora_fused_clip: None,
162            #[cfg(feature = "cuda")]
163            cuda_lora_optimizer_states: None,
164            #[cfg(feature = "cuda")]
165            nf4_lora_step: 0,
166            #[cfg(feature = "cuda")]
167            vram_guard: None,
168            #[cfg(feature = "gpu")]
169            wgpu_training: None,
170        };
171
172        #[cfg(feature = "cuda")]
173        if pipeline.config.quantize_nf4 {
174            pipeline.init_cuda(model_config);
175        }
176
177        Ok(pipeline)
178    }
179
180    /// Create pipeline from APR model file (.apr format).
181    ///
182    /// Loads transformer weights from the APR binary, dequantizing from any
183    /// stored dtype (F16, Q4K, etc.) to F32. Loads sibling tokenizer if present
184    /// (e.g., `model.tokenizer.json` next to `model.apr`).
185    ///
186    /// # Errors
187    /// Returns error if APR file cannot be loaded or weights are invalid.
188    /// CONTRACT L5: apr_tokenizer_embedding (model-format-conversion-v1.yaml)
189    /// APR files are self-contained — tokenizer is extracted from embedded metadata.
190    /// Sibling .tokenizer.json is a legacy fallback only.
191    #[requires(apr_path.exists())]
192    pub fn from_apr(
193        apr_path: &Path,
194        model_config: &TransformerConfig,
195        instruct_config: InstructConfig,
196    ) -> crate::Result<Self> {
197        let model = Transformer::from_apr(apr_path, model_config)?;
198        let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
199
200        for lora in &mut lora_layers {
201            for param in lora.trainable_params() {
202                param.set_requires_grad(true);
203            }
204        }
205
206        let optimizer = AdamW::default_params(instruct_config.learning_rate);
207
208        // Tokenizer resolution: APR is an embedded format — extract from metadata first.
209        // Fallback 1: Sibling {stem}.tokenizer.json next to the .apr file
210        // Fallback 2: Error — training requires a BPE tokenizer.
211        let tokenizer = {
212            // PRIMARY: Extract embedded tokenizer from APR metadata
213            let embedded = Self::extract_embedded_tokenizer(apr_path);
214
215            if let Some(tok) = embedded {
216                eprintln!(
217                    "[tokenizer] Loaded embedded BPE tokenizer from APR metadata (vocab_size={})",
218                    tok.vocab_size(),
219                );
220                Some(tok)
221            } else {
222                // FALLBACK: Sibling tokenizer.json file
223                let sibling = apr_path.file_stem().and_then(|stem| {
224                    apr_path
225                        .parent()
226                        .map(|p| p.join(format!("{}.tokenizer.json", stem.to_str().unwrap_or(""))))
227                });
228
229                match sibling {
230                    Some(ref path) if path.exists() => {
231                        let tok = HfTokenizer::from_file(path).map_err(|e| {
232                            crate::Error::ConfigError(format!(
233                                "Failed to load tokenizer from '{}': {e}. \
234                                 Training requires a BPE tokenizer.",
235                                path.display(),
236                            ))
237                        })?;
238                        eprintln!(
239                            "[tokenizer] Loaded BPE tokenizer from sibling {} (vocab_size={})",
240                            path.display(),
241                            tok.vocab_size(),
242                        );
243                        Some(tok)
244                    }
245                    _ => {
246                        return Err(crate::Error::ConfigError(format!(
247                            "No tokenizer found for '{}'. APR metadata has no embedded \
248                             tokenizer, and no sibling '{}.tokenizer.json' found. \
249                             Re-import with `apr import` to embed the tokenizer, or \
250                             place a tokenizer.json file next to the .apr file.",
251                            apr_path.display(),
252                            apr_path.file_stem().unwrap_or_default().to_str().unwrap_or(""),
253                        )));
254                    }
255                }
256            }
257        };
258
259        #[allow(unused_mut)]
260        let mut pipeline = Self {
261            model,
262            lora_layers,
263            config: instruct_config,
264            optimizer,
265            tokenizer,
266            model_dir: Some(apr_path.to_path_buf()),
267            profiler: StepProfiler::disabled(),
268            #[cfg(feature = "cuda")]
269            cuda_trainer: None,
270            #[cfg(feature = "cuda")]
271            cuda_blocks: None,
272            #[cfg(feature = "cuda")]
273            shared_scratch: None,
274            #[cfg(feature = "cuda")]
275            cuda_nan_count: 0,
276            #[cfg(feature = "cuda")]
277            gpu_training: None,
278            #[cfg(feature = "cuda")]
279            cuda_lora_grad_workspace: None,
280            #[cfg(feature = "cuda")]
281            lora_fused_clip: None,
282            #[cfg(feature = "cuda")]
283            cuda_lora_optimizer_states: None,
284            #[cfg(feature = "cuda")]
285            nf4_lora_step: 0,
286            #[cfg(feature = "cuda")]
287            vram_guard: None,
288            #[cfg(feature = "gpu")]
289            wgpu_training: None,
290        };
291
292        #[cfg(feature = "cuda")]
293        if pipeline.config.quantize_nf4 {
294            pipeline.init_cuda(model_config);
295        }
296
297        Ok(pipeline)
298    }
299
300    /// Extract BPE tokenizer from APR file's embedded metadata.
301    ///
302    /// CONTRACT: apr_tokenizer_embedding (model-format-conversion-v1.yaml, PMAT-154)
303    /// APR files store tokenizer vocabulary and merges in the metadata section.
304    /// This reconstructs a HuggingFace-compatible tokenizer.json from those fields.
305    ///
306    /// Returns None if the APR file lacks embedded tokenizer data (pre-PMAT-154 files).
307    // CONTRACT L5: If tokenizer is extracted, it must have non-zero vocab
308    #[ensures(ret.as_ref().is_none_or(|t| t.vocab_size() > 0))]
309    fn extract_embedded_tokenizer(apr_path: &Path) -> Option<HfTokenizer> {
310        use aprender::serialization::apr::AprReader;
311
312        let reader = AprReader::open(apr_path).ok()?;
313
314        // Extract vocabulary: tokenizer.vocabulary is an array of token strings
315        let vocab_array = reader.metadata.get("tokenizer.vocabulary")?;
316        let vocab: Vec<&str> = vocab_array.as_array()?.iter().filter_map(|v| v.as_str()).collect();
317
318        if vocab.is_empty() {
319            return None;
320        }
321
322        // Extract merges: tokenizer.merges is an array of "token1 token2" strings
323        let merges: Vec<&str> = reader
324            .metadata
325            .get("tokenizer.merges")
326            .and_then(|v| v.as_array())
327            .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
328            .unwrap_or_default();
329
330        // Reconstruct HuggingFace tokenizer.json format
331        // Format: {"model": {"type": "BPE", "vocab": {"token": id, ...}, "merges": [...]}, "added_tokens": []}
332        let mut vocab_map = serde_json::Map::new();
333        for (id, token) in vocab.iter().enumerate() {
334            vocab_map.insert(
335                (*token).to_string(),
336                serde_json::Value::Number(serde_json::Number::from(id)),
337            );
338        }
339
340        let merges_json: Vec<serde_json::Value> =
341            merges.iter().map(|m| serde_json::Value::String((*m).to_string())).collect();
342
343        let tokenizer_json = serde_json::json!({
344            "model": {
345                "type": "BPE",
346                "vocab": vocab_map,
347                "merges": merges_json,
348            },
349            "added_tokens": [],
350        });
351
352        let json_str = serde_json::to_string(&tokenizer_json).ok()?;
353        HfTokenizer::from_json(&json_str).ok()
354    }
355
356    /// Build LoRA layers for Q and V projections (same pattern as ClassifyPipeline).
357    /// Build LoRA layers for Q and V projections of each transformer layer.
358    pub fn build_lora_layers(
359        model: &Transformer,
360        model_config: &TransformerConfig,
361        config: &InstructConfig,
362    ) -> Vec<LoRALayer> {
363        // rank=0 means no LoRA — return empty (no trainable adapters)
364        if config.lora_rank == 0 {
365            return Vec::new();
366        }
367
368        let hidden = model_config.hidden_size;
369        let head_dim =
370            model_config.head_dim_override.unwrap_or(hidden / model_config.num_attention_heads);
371
372        let mut lora_layers = Vec::new();
373
374        for layer in &model.layers {
375            let attn = &layer.self_attn;
376
377            // Q projection LoRA
378            let q_dim = model_config.num_attention_heads * head_dim;
379            let q_weight = Tensor::from_vec(
380                attn.w_q.data().as_slice().expect("contiguous w_q").to_vec(),
381                false,
382            );
383            lora_layers.push(LoRALayer::new(
384                q_weight,
385                q_dim,
386                hidden,
387                config.lora_rank,
388                config.lora_alpha,
389            ));
390
391            // V projection LoRA
392            let v_dim = model_config.num_kv_heads * head_dim;
393            let v_weight = Tensor::from_vec(
394                attn.w_v.data().as_slice().expect("contiguous w_v").to_vec(),
395                false,
396            );
397            lora_layers.push(LoRALayer::new(
398                v_weight,
399                v_dim,
400                hidden,
401                config.lora_rank,
402                config.lora_alpha,
403            ));
404        }
405
406        lora_layers
407    }
408
409    /// Inject trained adapter weights from PEFT format into LoRA layers (ENT-269).
410    ///
411    /// Maps PEFT tensor names (e.g., `base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight`)
412    /// to the corresponding LoRA layer index. Layers are ordered as [Q(0), V(0), Q(1), V(1), ...].
413    fn inject_adapter_weights(
414        lora_layers: &mut [LoRALayer],
415        weights: &[(String, Vec<f32>)],
416        num_layers: usize,
417    ) {
418        let mut loaded = 0usize;
419        for (name, data) in weights {
420            // Parse layer index from "layers.{idx}" in the tensor name
421            let parts: Vec<&str> = name.split('.').collect();
422            let layer_idx = parts
423                .iter()
424                .position(|&p| p == "layers")
425                .and_then(|i| parts.get(i + 1))
426                .and_then(|s| s.parse::<usize>().ok());
427
428            let is_q = name.contains("q_proj");
429            let is_a = name.contains("lora_A");
430
431            if let Some(idx) = layer_idx {
432                if idx >= num_layers {
433                    continue;
434                }
435                let lora_idx = idx * 2 + usize::from(!is_q);
436                if lora_idx >= lora_layers.len() {
437                    continue;
438                }
439
440                let tensor = Tensor::from_vec(data.clone(), true);
441                if is_a {
442                    *lora_layers[lora_idx].lora_a_mut() = tensor;
443                } else {
444                    *lora_layers[lora_idx].lora_b_mut() = tensor;
445                }
446                loaded += 1;
447            }
448        }
449        eprintln!("[adapter] Injected {loaded}/{} weight tensors", weights.len());
450    }
451}
entrenar/finetune/instruct_pipeline/constructors.rs

entrenar/finetune/instruct_pipeline/
constructors.rs