entrenar/finetune/instruct_pipeline/
constructors.rs1#[allow(clippy::wildcard_imports)]
5use super::*;
6use provable_contracts_macros::{ensures, requires};
7
8impl InstructPipeline {
9 pub fn new(model_config: &TransformerConfig, instruct_config: InstructConfig) -> Self {
11 let model = Transformer::new(model_config);
12 let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
13
14 for lora in &mut lora_layers {
15 for param in lora.trainable_params() {
16 param.set_requires_grad(true);
17 }
18 }
19
20 let optimizer = AdamW::default_params(instruct_config.learning_rate);
21
22 #[allow(unused_mut)]
23 let mut pipeline = Self {
24 model,
25 lora_layers,
26 config: instruct_config,
27 optimizer,
28 tokenizer: None,
29 model_dir: None,
30 profiler: StepProfiler::disabled(),
31 #[cfg(feature = "cuda")]
32 cuda_trainer: None,
33 #[cfg(feature = "cuda")]
34 cuda_blocks: None,
35 #[cfg(feature = "cuda")]
36 shared_scratch: None,
37 #[cfg(feature = "cuda")]
38 cuda_nan_count: 0,
39 #[cfg(feature = "cuda")]
40 gpu_training: None,
41 #[cfg(feature = "cuda")]
42 cuda_lora_grad_workspace: None,
43 #[cfg(feature = "cuda")]
44 lora_fused_clip: None,
45 #[cfg(feature = "cuda")]
46 cuda_lora_optimizer_states: None,
47 #[cfg(feature = "cuda")]
48 nf4_lora_step: 0,
49 #[cfg(feature = "cuda")]
50 vram_guard: None,
51 #[cfg(feature = "gpu")]
52 wgpu_training: None,
53 };
54
55 #[cfg(feature = "cuda")]
56 if pipeline.config.quantize_nf4 {
57 pipeline.init_cuda(model_config);
58 }
59
60 #[cfg(feature = "gpu")]
62 if pipeline.wgpu_training.is_none() {
63 #[cfg(feature = "cuda")]
64 let cuda_active = pipeline.cuda_blocks.is_some();
65 #[cfg(not(feature = "cuda"))]
66 let cuda_active = false;
67
68 if !cuda_active {
69 pipeline.try_init_wgpu(model_config);
70 }
71 }
72
73 pipeline
74 }
75
76 pub fn from_pretrained(
83 model_dir: &Path,
84 model_config: &TransformerConfig,
85 instruct_config: InstructConfig,
86 ) -> crate::Result<Self> {
87 let model = Transformer::from_safetensors(model_dir, model_config)?;
88 let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
89
90 let adapter_path = model_dir.join("adapter_model.safetensors");
92 if adapter_path.exists() {
93 match crate::lora::load_adapter_peft(model_dir) {
94 Ok((_config, weights)) => {
95 Self::inject_adapter_weights(
96 &mut lora_layers,
97 &weights,
98 model_config.num_hidden_layers,
99 );
100 eprintln!(
101 "[adapter] Loaded trained LoRA adapter ({} tensors) from {}",
102 weights.len(),
103 model_dir.display()
104 );
105 }
106 Err(e) => {
107 eprintln!(
108 "[adapter] Warning: adapter_model.safetensors found but failed to load: {e}"
109 );
110 }
111 }
112 }
113
114 for lora in &mut lora_layers {
115 for param in lora.trainable_params() {
116 param.set_requires_grad(true);
117 }
118 }
119
120 let optimizer = AdamW::default_params(instruct_config.learning_rate);
121
122 let tokenizer_path = model_dir.join("tokenizer.json");
124 let tokenizer = if tokenizer_path.exists() {
125 Some(HfTokenizer::from_file(&tokenizer_path).map_err(|e| {
126 crate::Error::ConfigError(format!(
127 "Failed to load tokenizer from '{}': {e}. \
128 Training requires a BPE tokenizer.",
129 tokenizer_path.display(),
130 ))
131 })?)
132 } else {
133 return Err(crate::Error::ConfigError(format!(
134 "No tokenizer.json found in '{}'. Training requires a BPE tokenizer.",
135 model_dir.display(),
136 )));
137 };
138
139 #[allow(unused_mut)]
140 let mut pipeline = Self {
141 model,
142 lora_layers,
143 config: instruct_config,
144 optimizer,
145 tokenizer,
146 model_dir: Some(model_dir.to_path_buf()),
147 profiler: StepProfiler::disabled(),
148 #[cfg(feature = "cuda")]
149 cuda_trainer: None,
150 #[cfg(feature = "cuda")]
151 cuda_blocks: None,
152 #[cfg(feature = "cuda")]
153 shared_scratch: None,
154 #[cfg(feature = "cuda")]
155 cuda_nan_count: 0,
156 #[cfg(feature = "cuda")]
157 gpu_training: None,
158 #[cfg(feature = "cuda")]
159 cuda_lora_grad_workspace: None,
160 #[cfg(feature = "cuda")]
161 lora_fused_clip: None,
162 #[cfg(feature = "cuda")]
163 cuda_lora_optimizer_states: None,
164 #[cfg(feature = "cuda")]
165 nf4_lora_step: 0,
166 #[cfg(feature = "cuda")]
167 vram_guard: None,
168 #[cfg(feature = "gpu")]
169 wgpu_training: None,
170 };
171
172 #[cfg(feature = "cuda")]
173 if pipeline.config.quantize_nf4 {
174 pipeline.init_cuda(model_config);
175 }
176
177 Ok(pipeline)
178 }
179
180 #[requires(apr_path.exists())]
192 pub fn from_apr(
193 apr_path: &Path,
194 model_config: &TransformerConfig,
195 instruct_config: InstructConfig,
196 ) -> crate::Result<Self> {
197 let model = Transformer::from_apr(apr_path, model_config)?;
198 let mut lora_layers = Self::build_lora_layers(&model, model_config, &instruct_config);
199
200 for lora in &mut lora_layers {
201 for param in lora.trainable_params() {
202 param.set_requires_grad(true);
203 }
204 }
205
206 let optimizer = AdamW::default_params(instruct_config.learning_rate);
207
208 let tokenizer = {
212 let embedded = Self::extract_embedded_tokenizer(apr_path);
214
215 if let Some(tok) = embedded {
216 eprintln!(
217 "[tokenizer] Loaded embedded BPE tokenizer from APR metadata (vocab_size={})",
218 tok.vocab_size(),
219 );
220 Some(tok)
221 } else {
222 let sibling = apr_path.file_stem().and_then(|stem| {
224 apr_path
225 .parent()
226 .map(|p| p.join(format!("{}.tokenizer.json", stem.to_str().unwrap_or(""))))
227 });
228
229 match sibling {
230 Some(ref path) if path.exists() => {
231 let tok = HfTokenizer::from_file(path).map_err(|e| {
232 crate::Error::ConfigError(format!(
233 "Failed to load tokenizer from '{}': {e}. \
234 Training requires a BPE tokenizer.",
235 path.display(),
236 ))
237 })?;
238 eprintln!(
239 "[tokenizer] Loaded BPE tokenizer from sibling {} (vocab_size={})",
240 path.display(),
241 tok.vocab_size(),
242 );
243 Some(tok)
244 }
245 _ => {
246 return Err(crate::Error::ConfigError(format!(
247 "No tokenizer found for '{}'. APR metadata has no embedded \
248 tokenizer, and no sibling '{}.tokenizer.json' found. \
249 Re-import with `apr import` to embed the tokenizer, or \
250 place a tokenizer.json file next to the .apr file.",
251 apr_path.display(),
252 apr_path.file_stem().unwrap_or_default().to_str().unwrap_or(""),
253 )));
254 }
255 }
256 }
257 };
258
259 #[allow(unused_mut)]
260 let mut pipeline = Self {
261 model,
262 lora_layers,
263 config: instruct_config,
264 optimizer,
265 tokenizer,
266 model_dir: Some(apr_path.to_path_buf()),
267 profiler: StepProfiler::disabled(),
268 #[cfg(feature = "cuda")]
269 cuda_trainer: None,
270 #[cfg(feature = "cuda")]
271 cuda_blocks: None,
272 #[cfg(feature = "cuda")]
273 shared_scratch: None,
274 #[cfg(feature = "cuda")]
275 cuda_nan_count: 0,
276 #[cfg(feature = "cuda")]
277 gpu_training: None,
278 #[cfg(feature = "cuda")]
279 cuda_lora_grad_workspace: None,
280 #[cfg(feature = "cuda")]
281 lora_fused_clip: None,
282 #[cfg(feature = "cuda")]
283 cuda_lora_optimizer_states: None,
284 #[cfg(feature = "cuda")]
285 nf4_lora_step: 0,
286 #[cfg(feature = "cuda")]
287 vram_guard: None,
288 #[cfg(feature = "gpu")]
289 wgpu_training: None,
290 };
291
292 #[cfg(feature = "cuda")]
293 if pipeline.config.quantize_nf4 {
294 pipeline.init_cuda(model_config);
295 }
296
297 Ok(pipeline)
298 }
299
300 #[ensures(ret.as_ref().is_none_or(|t| t.vocab_size() > 0))]
309 fn extract_embedded_tokenizer(apr_path: &Path) -> Option<HfTokenizer> {
310 use aprender::serialization::apr::AprReader;
311
312 let reader = AprReader::open(apr_path).ok()?;
313
314 let vocab_array = reader.metadata.get("tokenizer.vocabulary")?;
316 let vocab: Vec<&str> = vocab_array.as_array()?.iter().filter_map(|v| v.as_str()).collect();
317
318 if vocab.is_empty() {
319 return None;
320 }
321
322 let merges: Vec<&str> = reader
324 .metadata
325 .get("tokenizer.merges")
326 .and_then(|v| v.as_array())
327 .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
328 .unwrap_or_default();
329
330 let mut vocab_map = serde_json::Map::new();
333 for (id, token) in vocab.iter().enumerate() {
334 vocab_map.insert(
335 (*token).to_string(),
336 serde_json::Value::Number(serde_json::Number::from(id)),
337 );
338 }
339
340 let merges_json: Vec<serde_json::Value> =
341 merges.iter().map(|m| serde_json::Value::String((*m).to_string())).collect();
342
343 let tokenizer_json = serde_json::json!({
344 "model": {
345 "type": "BPE",
346 "vocab": vocab_map,
347 "merges": merges_json,
348 },
349 "added_tokens": [],
350 });
351
352 let json_str = serde_json::to_string(&tokenizer_json).ok()?;
353 HfTokenizer::from_json(&json_str).ok()
354 }
355
356 pub fn build_lora_layers(
359 model: &Transformer,
360 model_config: &TransformerConfig,
361 config: &InstructConfig,
362 ) -> Vec<LoRALayer> {
363 if config.lora_rank == 0 {
365 return Vec::new();
366 }
367
368 let hidden = model_config.hidden_size;
369 let head_dim =
370 model_config.head_dim_override.unwrap_or(hidden / model_config.num_attention_heads);
371
372 let mut lora_layers = Vec::new();
373
374 for layer in &model.layers {
375 let attn = &layer.self_attn;
376
377 let q_dim = model_config.num_attention_heads * head_dim;
379 let q_weight = Tensor::from_vec(
380 attn.w_q.data().as_slice().expect("contiguous w_q").to_vec(),
381 false,
382 );
383 lora_layers.push(LoRALayer::new(
384 q_weight,
385 q_dim,
386 hidden,
387 config.lora_rank,
388 config.lora_alpha,
389 ));
390
391 let v_dim = model_config.num_kv_heads * head_dim;
393 let v_weight = Tensor::from_vec(
394 attn.w_v.data().as_slice().expect("contiguous w_v").to_vec(),
395 false,
396 );
397 lora_layers.push(LoRALayer::new(
398 v_weight,
399 v_dim,
400 hidden,
401 config.lora_rank,
402 config.lora_alpha,
403 ));
404 }
405
406 lora_layers
407 }
408
409 fn inject_adapter_weights(
414 lora_layers: &mut [LoRALayer],
415 weights: &[(String, Vec<f32>)],
416 num_layers: usize,
417 ) {
418 let mut loaded = 0usize;
419 for (name, data) in weights {
420 let parts: Vec<&str> = name.split('.').collect();
422 let layer_idx = parts
423 .iter()
424 .position(|&p| p == "layers")
425 .and_then(|i| parts.get(i + 1))
426 .and_then(|s| s.parse::<usize>().ok());
427
428 let is_q = name.contains("q_proj");
429 let is_a = name.contains("lora_A");
430
431 if let Some(idx) = layer_idx {
432 if idx >= num_layers {
433 continue;
434 }
435 let lora_idx = idx * 2 + usize::from(!is_q);
436 if lora_idx >= lora_layers.len() {
437 continue;
438 }
439
440 let tensor = Tensor::from_vec(data.clone(), true);
441 if is_a {
442 *lora_layers[lora_idx].lora_a_mut() = tensor;
443 } else {
444 *lora_layers[lora_idx].lora_b_mut() = tensor;
445 }
446 loaded += 1;
447 }
448 }
449 eprintln!("[adapter] Injected {loaded}/{} weight tensors", weights.len());
450 }
451}