Struct Gpt2Model

Source

pub struct Gpt2Model { /* private fields */ }

Expand description

The GPT-2 natural langage ML model.

§Example Usage

// Load tokenizer and GPT-2 model.
let tokenizer = Tokenizer::new(bpe_path, encoder_path);
let gpt_model = Gpt2Model::new(model_path, batch_size, sequence_length).unwrap();

// Convert input text to a token sequence.
let text_in = "Horses aren't real; they can't hurt you.";
let (tokens_in, padding) = tokenizer.encode_to_length(text_in, sequence_length);

// Convert token sequence to an input tensor, and get
// an inference from the model.
let tensor_in = gpt_model.tensor_from_tokens(&[tokens_in]);
let (inference, hidden_layers) = gpt_model.infer(tensor_in);

// Generate the next tokens based on the inference,
// and convert the tokens to text.
let tokens_out = gpt_model.tokens_from_inference(inference, &[padding]);
let generated_text = tokenizer.decode(tokens_out);

// Bonus: Extract the embedding of the input text from
//        the hidden layers.
let text_embedding = gpt_model.embeddings_from_layers(&hidden_layers, &[padding], 11);

Implementations§

Source §

impl Gpt2Model

Source

pub fn new( onnx_model_path: &str, batch_size: usize, sequence_length: usize, ) -> Result<Self>

Creates a new GPT-2 model from the ONNX model saved at onnx_model_path, with fixed batch_size and sequence_length.

batch_size specifies the maximum number of texts (“token sequences”) that can be processed during each inference request.

sequence_length specifies the number of tokens that can be processed by the model in a single token sequence. Sequences will be truncated and/or padded to match this length.

Source

pub fn tensor_from_tokens(&self, tokens: &[Vec<i32>]) -> Array<i32, Ix2>

Converts a slice of one or more token sequences into a single tensor which may be passed into the GPT-2 model.

§Panics

If tokens contains any token sequences not matching this model’s sequence_length, or if the number of token sequences in tokens does not match this model’s batch_size.

Source

pub fn infer( &self, tensor: Array<i32, Ix2>, ) -> (Array<f32, Ix3>, Array<f32, Ix6>)

Runs the model to generate an inference for tensor.

The returned tuple will contain (inference, hidden_layers), where inference is a 3D tensor of shape [batch_size, sequence_length, vocabulary size], and hidden_layers is a 6D tensor of shape `[batch_size, layers, 2, head count, sequence_length, embeddings per head].

For most GPT-2 models, the vocabulary size is 50257.

For the 124M (“small”) GPT-2 model, there will be 12 layers, 12 heads, and 64 embeddings per head, for a total of 768 embeddings per layer.

Source

pub fn count_layers(&self, hidden_layers: &Array<f32, Ix6>) -> usize

Returns the number of hidden layers within hidden_layers.

Source

pub fn tokens_from_inference( &self, inference: Array<f32, Ix3>, tokens_padding: &[usize], ) -> Vec<i32>

Samples inference for the next token for each sequence in the batch.

tokens_padding must be a slice of the same length as batch_size, where each element corresponds to the number of padding tokens added onto the input token sequence for that batch element.

Returns a 1D tensor of shape [batch_size], where each batch entry is the next token in a sequence.

Source

pub fn embeddings_from_layers( &self, hidden_layers: &Array<f32, Ix6>, tokens_padding: &[usize], hidden_layer_index: usize, ) -> Array<f32, Ix2>

Post-processes hidden_layers to extract the embedding of each sequence in the batch.

Returns a 2D tensor of shape [batch_size, embeddings per layer], where each batch entry is the embedding of the entire input sequence for that entry.

For the 124M (“small”) GPT-2 model, there are 768 embeddings per layer.

tokens_padding must be a slice of the same length as batch_size, where each element corresponds to the number of padding tokens added onto the input token sequence for that batch element.

Source

pub fn last_token_inference_index( token_sequence_length: usize, token_padding: usize, ) -> usize

Returns the last index which should contain an inference on non-padding token data.

In the case where token_padding == token_sequence_length, 0 will be returned.

Auto Trait Implementations§

§

impl !UnwindSafe for Gpt2Model

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> Downcast for T
where T: Any,

Source §

fn into_any(self: Box<T>) -> Box<dyn Any>

Convert Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>. Box<dyn Any> can then be further downcast into Box<ConcreteType> where ConcreteType implements Trait.

Source §

fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>

Convert Rc<Trait> (where Trait: Downcast) to Rc<Any>. Rc<Any> can then be further downcast into Rc<ConcreteType> where ConcreteType implements Trait.

Source §

fn as_any(&self) -> &(dyn Any + 'static)

Convert &Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &Any’s vtable from &Trait’s.

Source §

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Convert &mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot generate &mut Any’s vtable from &mut Trait’s.

Source §

impl<T> DowncastSync for T
where T: Any + Send + Sync,

Source §

fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Send + Sync>

Convert Arc<Trait> (where Trait: Downcast) to Arc<Any>. Arc<Any> can then be further downcast into Arc<ConcreteType> where ConcreteType implements Trait.

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

Gpt2Model

Struct Gpt2Model Copy item path

§Example Usage

Implementations§

impl Gpt2Model

pub fn new( onnx_model_path: &str, batch_size: usize, sequence_length: usize, ) -> Result<Self>

pub fn tensor_from_tokens(&self, tokens: &[Vec<i32>]) -> Array<i32, Ix2>

§Panics

pub fn infer( &self, tensor: Array<i32, Ix2>, ) -> (Array<f32, Ix3>, Array<f32, Ix6>)

pub fn count_layers(&self, hidden_layers: &Array<f32, Ix6>) -> usize

pub fn tokens_from_inference( &self, inference: Array<f32, Ix3>, tokens_padding: &[usize], ) -> Vec<i32>

pub fn embeddings_from_layers( &self, hidden_layers: &Array<f32, Ix6>, tokens_padding: &[usize], hidden_layer_index: usize, ) -> Array<f32, Ix2>

pub fn last_token_inference_index( token_sequence_length: usize, token_padding: usize, ) -> usize

Auto Trait Implementations§

impl Freeze for Gpt2Model

impl !RefUnwindSafe for Gpt2Model

impl Send for Gpt2Model

impl Sync for Gpt2Model

impl Unpin for Gpt2Model

impl !UnwindSafe for Gpt2Model

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> Downcast for Twhere T: Any,

fn into_any(self: Box<T>) -> Box<dyn Any>

fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>

fn as_any(&self) -> &(dyn Any + 'static)

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

impl<T> DowncastSync for Twhere T: Any + Send + Sync,

fn into_any_arc(self: Arc<T>) -> Arc<dyn Any + Send + Sync>

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct Gpt2Model

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> Downcast for T
where T: Any,

impl<T> DowncastSync for T
where T: Any + Send + Sync,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,