1use crate::Example;
4use anyhow::Result;
5
6pub struct EG01;
23
24impl Example for EG01 {
25 fn description(&self) -> String {
26 String::from("Getting logits with `DummyGPTModel`.")
27 }
28
29 fn page_source(&self) -> usize {
30 97_usize
31 }
32
33 fn main(&self) -> Result<()> {
34 use crate::listings::ch04::{Config, DummyGPTModel};
35 use candle_core::{DType, IndexOp, Module};
36 use candle_nn::{VarBuilder, VarMap};
37
38 let batch = addons::get_batch_for_gpts()?;
39 println!("batch: {:?}", batch.to_vec2::<u32>());
40
41 let varmap = VarMap::new();
43 let vb = VarBuilder::from_varmap(&varmap, DType::F32, batch.device());
44 let model = DummyGPTModel::new(Config::gpt2_124m(), vb)?;
45
46 let logits = model.forward(&batch)?;
48 println!("output shape: {:?}", logits.shape());
49
50 println!("logits: {:?}", logits.i((.., .., 0..10))?.to_vec3::<f32>());
52 Ok(())
53 }
54}
55
56pub struct EG02;
73
74impl Example for EG02 {
75 fn description(&self) -> String {
76 String::from("Manual computation of layer normalization.")
77 }
78
79 fn page_source(&self) -> usize {
80 100_usize
81 }
82
83 fn main(&self) -> Result<()> {
84 use candle_core::{DType, Device, Module, Tensor, D};
85 use candle_nn::{linear_b, seq, Activation, VarBuilder, VarMap};
86
87 let dev = Device::cuda_if_available(0)?;
88 let varmap = VarMap::new();
89 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
90
91 let batch_example = Tensor::rand(0f32, 1f32, (2_usize, 5_usize), vb.device())?;
93
94 let layer = seq()
96 .add(linear_b(5_usize, 6_usize, false, vb.pp("linear"))?)
97 .add(Activation::Relu);
98
99 let out = layer.forward(&batch_example)?;
101 println!("out: {:?}", out.to_vec2::<f32>());
102
103 let mean = out.mean_keepdim(D::Minus1)?;
105 let var = out.var_keepdim(D::Minus1)?;
106 println!("mean: {:?}", mean.to_vec2::<f32>());
107 println!("variance: {:?}", var.to_vec2::<f32>());
108
109 let out_norm = (out.broadcast_sub(&mean)?.broadcast_div(&var.sqrt()?))?;
111 let mean = out_norm.mean_keepdim(D::Minus1)?;
112 let var = out_norm.var_keepdim(D::Minus1)?;
113 println!("normalized out: {:?}", out_norm.to_vec2::<f32>());
114 println!("mean: {:?}", mean.to_vec2::<f32>());
115 println!("variance: {:?}", var.to_vec2::<f32>());
116 Ok(())
117 }
118}
119
120pub struct EG03;
137
138impl Example for EG03 {
139 fn description(&self) -> String {
140 String::from("Example usage of `LayerNorm`.")
141 }
142
143 fn page_source(&self) -> usize {
144 104_usize
145 }
146
147 fn main(&self) -> Result<()> {
148 use crate::listings::ch04::LayerNorm;
149 use candle_core::{DType, Device, Module, Tensor, D};
150 use candle_nn::{VarBuilder, VarMap};
151
152 let dev = Device::cuda_if_available(0)?;
153 let varmap = VarMap::new();
154 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
155
156 let batch_example = Tensor::rand(0f32, 1f32, (2_usize, 5_usize), vb.device())?;
158
159 let emb_dim = 5_usize;
161 let ln = LayerNorm::new(emb_dim, vb.pp("layer_norm"))?;
162 let out_ln = ln.forward(&batch_example)?;
163
164 let mean = out_ln.mean_keepdim(D::Minus1)?;
166 let var = out_ln.var_keepdim(D::Minus1)?;
167 println!("mean: {:?}", mean.to_vec2::<f32>());
168 println!("variance: {:?}", var.to_vec2::<f32>());
169 Ok(())
170 }
171}
172
173pub struct EG04;
190
191impl Example for EG04 {
192 fn description(&self) -> String {
193 String::from("Example usage of `FeedForward` Module.")
194 }
195
196 fn page_source(&self) -> usize {
197 108_usize
198 }
199
200 fn main(&self) -> Result<()> {
201 use crate::listings::ch04::{Config, FeedForward};
202 use candle_core::{DType, Device, IndexOp, Module, Tensor};
203 use candle_nn::{VarBuilder, VarMap};
204
205 let dev = Device::cuda_if_available(0)?;
206 let varmap = VarMap::new();
207 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
208 let cfg = Config::gpt2_124m();
209
210 let (batch_size, seq_len) = (2_usize, 3_usize);
212 let x = Tensor::rand(0f32, 1f32, (batch_size, seq_len, cfg.emb_dim), vb.device())?;
213
214 let ffn = FeedForward::new(cfg, vb.pp("ffn"))?;
216 let out = ffn.forward(&x)?;
217
218 println!("{:?}", out);
219 println!("{:?}", out.i((0, 0, 0..10))?.to_vec1::<f32>());
221 Ok(())
222 }
223}
224
225pub struct EG05;
242
243impl Example for EG05 {
244 fn description(&self) -> String {
245 String::from("Comparison of gradients with and without shortcut connections.")
246 }
247
248 fn page_source(&self) -> usize {
249 111_usize
250 }
251
252 fn main(&self) -> Result<()> {
253 use crate::listings::ch04::ExampleDeepNeuralNetwork;
254 use candle_core::{DType, Device, Tensor};
255 use candle_nn::{VarBuilder, VarMap};
256
257 let dev = Device::cuda_if_available(0)?;
258 let varmap = VarMap::new();
259 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
260
261 let layer_sizes = &[3_usize, 3, 3, 3, 3, 1];
262 let sample_input = Tensor::new(&[[1_f32, 0., -1.]], vb.device())?;
263 let model_without_shortcut =
264 ExampleDeepNeuralNetwork::new(layer_sizes, false, vb.pp("model_wout_shortcut"))?;
265
266 let model_with_shortcut =
267 ExampleDeepNeuralNetwork::new(layer_sizes, true, vb.pp("model_with_shortcut"))?;
268
269 println!("model_without_shortcut gradients:");
270 addons::print_gradients(model_without_shortcut, &sample_input)?;
271 println!("model_with_shortcut gradients:");
272 addons::print_gradients(model_with_shortcut, &sample_input)?;
273 Ok(())
274 }
275}
276
277pub struct EG06;
294
295impl Example for EG06 {
296 fn description(&self) -> String {
297 String::from("Example usage of `TransformerBlock`.")
298 }
299
300 fn page_source(&self) -> usize {
301 116_usize
302 }
303
304 fn main(&self) -> Result<()> {
305 use crate::listings::ch04::{Config, TransformerBlock};
306 use candle_core::{DType, Device, IndexOp, Tensor};
307 use candle_nn::{VarBuilder, VarMap};
308
309 let dev = Device::cuda_if_available(0)?;
311 let varmap = VarMap::new();
312 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
313 let cfg = Config::gpt2_124m();
314 let block = TransformerBlock::new(cfg, vb.pp("block"))?;
315
316 let (batch_size, num_tokens) = (2_usize, 4_usize);
318 let x = Tensor::rand(
319 0f32,
320 1f32,
321 (batch_size, num_tokens, cfg.emb_dim),
322 vb.device(),
323 )?;
324
325 let output = block.forward(&x)?;
327
328 println!("Input shape: {:?}", x.shape());
329 println!("Output shape: {:?}", output.shape());
330
331 println!(
333 "Output: {:?}",
334 output.i((0..1, .., 0..10))?.to_vec3::<f32>()
335 );
336 Ok(())
337 }
338}
339
340pub struct EG07;
357
358impl Example for EG07 {
359 fn description(&self) -> String {
360 String::from("Example usage of `GPTModel`.")
361 }
362
363 fn page_source(&self) -> usize {
364 120_usize
365 }
366
367 fn main(&self) -> Result<()> {
368 use crate::listings::ch04::{Config, GPTModel};
369 use candle_core::{DType, Error, IndexOp, ModuleT};
370 use candle_nn::{VarBuilder, VarMap};
371
372 let batch = addons::get_batch_for_gpts()?;
373 println!("batch: {:?}", batch.to_vec2::<u32>());
374
375 let varmap = VarMap::new();
377 let vb = VarBuilder::from_varmap(&varmap, DType::F32, batch.device());
378 let model = GPTModel::new(Config::gpt2_124m(), vb)?;
379
380 let logits = model.forward_t(&batch, false)?;
382 println!("output shape: {:?}", logits.shape());
383
384 println!("logits: {:?}", logits.i((.., .., 0..10))?.to_vec3::<f32>());
386
387 let mut total_params = 0_usize;
389 for t in varmap.all_vars().iter() {
390 total_params += t.elem_count();
391 }
392 println!("Total number of parameters: {}", total_params);
393
394 let varmap_binding = varmap.data().lock().unwrap();
396 let tok_emb_dims = varmap_binding
397 .get("tok_emb.weight")
398 .ok_or_else(|| {
399 Error::CannotFindTensor {
400 path: "tok_emb.weight".to_string(),
401 }
402 .bt()
403 })?
404 .dims();
405 println!("Token embedding layer shape {:?}", tok_emb_dims);
406 let out_head_dims = varmap_binding
407 .get("out_head.weight")
408 .ok_or_else(|| {
409 Error::CannotFindTensor {
410 path: "out_head.weight".to_string(),
411 }
412 .bt()
413 })?
414 .dims();
415 println!("Output layer shape {:?}", out_head_dims);
416
417 let total_params_gpt2 = total_params - (out_head_dims[0] * out_head_dims[1]);
419 println!(
420 "Number of trainable parameters considering weight tying {}",
421 total_params_gpt2
422 );
423
424 let total_size_bytes = total_params * 4;
426 let total_size_mb = total_size_bytes as f32 / (1024_f32 * 1024.);
427 println!("Total size of the model: {} MB", total_size_mb);
428 Ok(())
429 }
430}
431
432pub struct EG08;
449
450impl Example for EG08 {
451 fn description(&self) -> String {
452 String::from("Example usage of `generate_text_simple`.")
453 }
454
455 fn page_source(&self) -> usize {
456 125_usize
457 }
458
459 fn main(&self) -> Result<()> {
460 use crate::listings::ch04::{generate_text_simple, Config, GPTModel};
461 use candle_core::{DType, Device, Tensor};
462 use candle_nn::{VarBuilder, VarMap};
463 use tiktoken_rs::get_bpe_from_model;
464
465 let dev = Device::cuda_if_available(0)?;
467 let start_context = "Hello, I am";
468 let tokenizer = get_bpe_from_model("gpt2")?;
469 let encoded = tokenizer.encode_with_special_tokens(start_context);
470 let num_tokens = encoded.len();
471 println!("encoded: {:?}", encoded);
472 let encoded_tensor = Tensor::from_vec(encoded, (1_usize, num_tokens), &dev)?;
473 println!("encoded_tensor.shape {:?}", encoded_tensor);
474
475 let varmap = VarMap::new();
477 let vb = VarBuilder::from_varmap(&varmap, DType::F32, &dev);
478 let cfg = Config::gpt2_124m();
479 let model = GPTModel::new(cfg, vb)?;
480
481 let out = generate_text_simple(&model, encoded_tensor, 6_usize, cfg.context_length)?;
483 println!("Output: {:?}", out.to_vec2::<u32>());
484 println!("Output length: {}", out.dims()[1]);
485
486 let decoded_text = tokenizer.decode(out.reshape(out.dims()[1])?.to_vec1::<u32>()?);
488 println!("{:?}", decoded_text);
489 Ok(())
490 }
491}
492
493pub mod addons {
494 use crate::listings::ch04::ExampleDeepNeuralNetwork;
496 use candle_core::{Device, Error, Module, Result, Tensor};
497 use tiktoken_rs::get_bpe_from_model;
498
499 pub fn get_batch_for_gpts() -> Result<Tensor> {
501 let dev = Device::cuda_if_available(0)?;
502
503 let mut batch_tokens: Vec<u32> = Vec::new();
505 let tokenizer =
506 get_bpe_from_model("gpt2").map_err(|e| Error::Msg(format!("Tokenizer error: {e}")))?;
507 batch_tokens.append(&mut tokenizer.encode_with_special_tokens("Every effort moves you"));
508 batch_tokens.append(&mut tokenizer.encode_with_special_tokens("Every day holds a"));
509
510 Tensor::from_vec(batch_tokens, (2_usize, 4_usize), &dev)
511 }
512
513 pub fn print_gradients(model: ExampleDeepNeuralNetwork, x: &Tensor) -> Result<()> {
515 use candle_nn::loss::mse;
516
517 let output = model.forward(x)?;
518 let target = Tensor::new(&[[0_f32]], x.device())?;
519
520 let loss = mse(&output, &target)?;
521 let grads = loss.backward()?;
522
523 for (ix, tensor_id) in model.tensor_ids.iter().enumerate() {
524 let grad_tensor = grads.get_id(tensor_id.to_owned()).ok_or_else(|| {
525 Error::CannotFindTensor {
526 path: format!("{:?}", tensor_id),
527 }
528 .bt()
529 })?;
530 println!(
531 "layer.{}.weight has gradient mean of {:?}",
532 ix,
533 grad_tensor.abs()?.mean_all()?.to_scalar::<f32>()?
534 );
535 }
536 println!("\n");
537 Ok(())
538 }
539}