pub struct GPTModel<F: Float + Debug + ScalarOperand + Send + Sync> { /* private fields */ }
Expand description
GPT model implementation
Implementations§
Source§impl<F: Float + Debug + ScalarOperand + Send + Sync> GPTModel<F>
impl<F: Float + Debug + ScalarOperand + Send + Sync> GPTModel<F>
Sourcepub fn new(config: GPTConfig) -> Result<Self>
pub fn new(config: GPTConfig) -> Result<Self>
Create a new GPT model
Examples found in repository?
examples/gpt_example.rs (line 18)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("GPT Model Example");
7
8 // Create a small GPT model for demonstration
9 println!("Creating a small GPT model...");
10
11 let config = GPTConfig::custom(
12 10000, // vocab_size
13 128, // hidden_size
14 2, // num_hidden_layers
15 2, // num_attention_heads
16 );
17
18 let model = GPTModel::<f32>::new(config)?;
19
20 // Create dummy input (batch_size=2, seq_len=16)
21 // Input tensor contains token IDs
22 let input = Array::from_shape_fn(
23 IxDyn(&[2, 16]),
24 |_| rand::random::<f32>() * 100.0, // Random token IDs between 0 and 100
25 );
26
27 println!("Input shape: {:?}", input.shape());
28
29 // Forward pass to get hidden states
30 let hidden_states = model.forward(&input)?;
31
32 println!("Hidden states shape: {:?}", hidden_states.shape());
33
34 // Calculate logits for next-token prediction
35 let logits = model.logits(&input)?;
36
37 println!("Logits shape: {:?}", logits.shape());
38 println!("Vocabulary size: {}", logits.shape()[2]);
39
40 // Let's create a GPT-2 Small model
41 println!("\nCreating a GPT-2 Small model...");
42
43 let gpt2_small = GPTModel::<f32>::gpt2_small()?;
44
45 // Create dummy input for a longer sequence
46 let small_input = Array::from_shape_fn(
47 IxDyn(&[1, 32]),
48 |_| rand::random::<f32>() * 1000.0, // Random token IDs
49 );
50
51 println!("GPT-2 Small input shape: {:?}", small_input.shape());
52
53 // Forward pass
54 let small_hidden_states = gpt2_small.forward(&small_input)?;
55
56 println!(
57 "GPT-2 Small hidden states shape: {:?}",
58 small_hidden_states.shape()
59 );
60 println!(
61 "GPT-2 Small hidden dimension: {}",
62 small_hidden_states.shape()[2]
63 );
64
65 // For text generation (logits for next token prediction)
66 let small_logits = gpt2_small.logits(&small_input)?;
67 println!("GPT-2 Small logits shape: {:?}", small_logits.shape());
68 println!("GPT-2 Small vocabulary size: {}", small_logits.shape()[2]);
69
70 println!("\nGPT example completed successfully!");
71
72 Ok(())
73}
Sourcepub fn gpt2_small() -> Result<Self>
pub fn gpt2_small() -> Result<Self>
Create a GPT-2 Small model
Examples found in repository?
examples/gpt_example.rs (line 43)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("GPT Model Example");
7
8 // Create a small GPT model for demonstration
9 println!("Creating a small GPT model...");
10
11 let config = GPTConfig::custom(
12 10000, // vocab_size
13 128, // hidden_size
14 2, // num_hidden_layers
15 2, // num_attention_heads
16 );
17
18 let model = GPTModel::<f32>::new(config)?;
19
20 // Create dummy input (batch_size=2, seq_len=16)
21 // Input tensor contains token IDs
22 let input = Array::from_shape_fn(
23 IxDyn(&[2, 16]),
24 |_| rand::random::<f32>() * 100.0, // Random token IDs between 0 and 100
25 );
26
27 println!("Input shape: {:?}", input.shape());
28
29 // Forward pass to get hidden states
30 let hidden_states = model.forward(&input)?;
31
32 println!("Hidden states shape: {:?}", hidden_states.shape());
33
34 // Calculate logits for next-token prediction
35 let logits = model.logits(&input)?;
36
37 println!("Logits shape: {:?}", logits.shape());
38 println!("Vocabulary size: {}", logits.shape()[2]);
39
40 // Let's create a GPT-2 Small model
41 println!("\nCreating a GPT-2 Small model...");
42
43 let gpt2_small = GPTModel::<f32>::gpt2_small()?;
44
45 // Create dummy input for a longer sequence
46 let small_input = Array::from_shape_fn(
47 IxDyn(&[1, 32]),
48 |_| rand::random::<f32>() * 1000.0, // Random token IDs
49 );
50
51 println!("GPT-2 Small input shape: {:?}", small_input.shape());
52
53 // Forward pass
54 let small_hidden_states = gpt2_small.forward(&small_input)?;
55
56 println!(
57 "GPT-2 Small hidden states shape: {:?}",
58 small_hidden_states.shape()
59 );
60 println!(
61 "GPT-2 Small hidden dimension: {}",
62 small_hidden_states.shape()[2]
63 );
64
65 // For text generation (logits for next token prediction)
66 let small_logits = gpt2_small.logits(&small_input)?;
67 println!("GPT-2 Small logits shape: {:?}", small_logits.shape());
68 println!("GPT-2 Small vocabulary size: {}", small_logits.shape()[2]);
69
70 println!("\nGPT example completed successfully!");
71
72 Ok(())
73}
Sourcepub fn gpt2_medium() -> Result<Self>
pub fn gpt2_medium() -> Result<Self>
Create a GPT-2 Medium model
Sourcepub fn gpt2_large() -> Result<Self>
pub fn gpt2_large() -> Result<Self>
Create a GPT-2 Large model
Sourcepub fn custom(
vocab_size: usize,
hidden_size: usize,
num_hidden_layers: usize,
num_attention_heads: usize,
) -> Result<Self>
pub fn custom( vocab_size: usize, hidden_size: usize, num_hidden_layers: usize, num_attention_heads: usize, ) -> Result<Self>
Create a custom GPT model
Sourcepub fn logits(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>>
pub fn logits(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>>
Calculate logits (prediction scores) for next tokens
Examples found in repository?
examples/gpt_example.rs (line 35)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("GPT Model Example");
7
8 // Create a small GPT model for demonstration
9 println!("Creating a small GPT model...");
10
11 let config = GPTConfig::custom(
12 10000, // vocab_size
13 128, // hidden_size
14 2, // num_hidden_layers
15 2, // num_attention_heads
16 );
17
18 let model = GPTModel::<f32>::new(config)?;
19
20 // Create dummy input (batch_size=2, seq_len=16)
21 // Input tensor contains token IDs
22 let input = Array::from_shape_fn(
23 IxDyn(&[2, 16]),
24 |_| rand::random::<f32>() * 100.0, // Random token IDs between 0 and 100
25 );
26
27 println!("Input shape: {:?}", input.shape());
28
29 // Forward pass to get hidden states
30 let hidden_states = model.forward(&input)?;
31
32 println!("Hidden states shape: {:?}", hidden_states.shape());
33
34 // Calculate logits for next-token prediction
35 let logits = model.logits(&input)?;
36
37 println!("Logits shape: {:?}", logits.shape());
38 println!("Vocabulary size: {}", logits.shape()[2]);
39
40 // Let's create a GPT-2 Small model
41 println!("\nCreating a GPT-2 Small model...");
42
43 let gpt2_small = GPTModel::<f32>::gpt2_small()?;
44
45 // Create dummy input for a longer sequence
46 let small_input = Array::from_shape_fn(
47 IxDyn(&[1, 32]),
48 |_| rand::random::<f32>() * 1000.0, // Random token IDs
49 );
50
51 println!("GPT-2 Small input shape: {:?}", small_input.shape());
52
53 // Forward pass
54 let small_hidden_states = gpt2_small.forward(&small_input)?;
55
56 println!(
57 "GPT-2 Small hidden states shape: {:?}",
58 small_hidden_states.shape()
59 );
60 println!(
61 "GPT-2 Small hidden dimension: {}",
62 small_hidden_states.shape()[2]
63 );
64
65 // For text generation (logits for next token prediction)
66 let small_logits = gpt2_small.logits(&small_input)?;
67 println!("GPT-2 Small logits shape: {:?}", small_logits.shape());
68 println!("GPT-2 Small vocabulary size: {}", small_logits.shape()[2]);
69
70 println!("\nGPT example completed successfully!");
71
72 Ok(())
73}
Trait Implementations§
Source§impl<F: Float + Debug + ScalarOperand + Send + Sync> Layer<F> for GPTModel<F>
impl<F: Float + Debug + ScalarOperand + Send + Sync> Layer<F> for GPTModel<F>
Source§fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>>
fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>>
Forward pass of the layer Read more
Source§fn backward(
&self,
_input: &Array<F, IxDyn>,
grad_output: &Array<F, IxDyn>,
) -> Result<Array<F, IxDyn>>
fn backward( &self, _input: &Array<F, IxDyn>, grad_output: &Array<F, IxDyn>, ) -> Result<Array<F, IxDyn>>
Backward pass of the layer to compute gradients Read more
Source§fn update(&mut self, learning_rate: F) -> Result<()>
fn update(&mut self, learning_rate: F) -> Result<()>
Update the layer parameters with the given gradients Read more
Source§fn as_any_mut(&mut self) -> &mut dyn Any
fn as_any_mut(&mut self) -> &mut dyn Any
Get the layer as a mutable dyn Any for downcasting Read more
Source§fn gradients(&self) -> Vec<Array<F, IxDyn>> ⓘ
fn gradients(&self) -> Vec<Array<F, IxDyn>> ⓘ
Get the gradients of the layer parameters Read more
Source§fn set_gradients(&mut self, _gradients: &[Array<F, IxDyn>]) -> Result<()>
fn set_gradients(&mut self, _gradients: &[Array<F, IxDyn>]) -> Result<()>
Set the gradients of the layer parameters Read more
Source§fn set_params(&mut self, _params: &[Array<F, IxDyn>]) -> Result<()>
fn set_params(&mut self, _params: &[Array<F, IxDyn>]) -> Result<()>
Set the parameters of the layer Read more
Source§fn set_training(&mut self, _training: bool)
fn set_training(&mut self, _training: bool)
Set the layer to training mode (true) or evaluation mode (false) Read more
Source§fn is_training(&self) -> bool
fn is_training(&self) -> bool
Get the current training mode Read more
Source§fn layer_type(&self) -> &str
fn layer_type(&self) -> &str
Get the type of the layer (e.g., “Dense”, “Conv2D”) Read more
Source§fn parameter_count(&self) -> usize
fn parameter_count(&self) -> usize
Get the number of trainable parameters in this layer Read more
Source§fn layer_description(&self) -> String
fn layer_description(&self) -> String
Get a detailed description of this layer Read more
Auto Trait Implementations§
impl<F> Freeze for GPTModel<F>where
F: Freeze,
impl<F> !RefUnwindSafe for GPTModel<F>
impl<F> !Send for GPTModel<F>
impl<F> !Sync for GPTModel<F>
impl<F> Unpin for GPTModel<F>where
F: Unpin,
impl<F> !UnwindSafe for GPTModel<F>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more