// Copyright 2019-present Microsoft
// Copyright 2020-present, the HuggingFace Inc. team.
// Copyright 2020 Guillaume Becquin
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//     http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # Multi-turn dialogue
//! Conversation model based on Microsoft's [DialoGPT](https://github.com/microsoft/DialoGPT).
//! This pipeline allows the generation of single or multi-turn conversations between a human and a model.
//! The DialoGPT's page states that
//! > The human evaluation results indicate that the response generated from DialoGPT is comparable to human response quality
//! > under a single-turn conversation Turing test. ([DialoGPT repository](https://github.com/microsoft/DialoGPT))
//!
//!
//! The dependencies will be downloaded to the user's home directory, under ~/.cache/.rustbert/dialgpt-medium
//!
//! ```no_run
//! # fn main() -> failure::Fallible<()> {
//! use rust_bert::pipelines::conversation::{ConversationManager, ConversationModel};
//! let conversation_model = ConversationModel::new(Default::default())?;
//! let mut conversation_manager = ConversationManager::new();
//!
//! let conversation_id =
//!     conversation_manager.create("Going to the movies tonight - any suggestions?");
//! let output = conversation_model.generate_responses(&mut conversation_manager);
//! # Ok(())
//! # }
//! ```
//!
//! Example output: \
//! ```no_run
//! # let output =
//! "The Big Lebowski."
//! # ;
//! ```
//!
//! # Disclaimer
//! The authors of this repository are not responsible for any generation
//! from the 3rd party utilization of the pretrained system.
//!
use crate::common::resources::{RemoteResource, Resource};
use crate::gpt2::{
    Gpt2ConfigResources, Gpt2MergesResources, Gpt2ModelResources, Gpt2VocabResources,
};
use crate::pipelines::generation::private_generation_utils::PrivateLanguageGenerator;
use crate::pipelines::generation::{GPT2Generator, GenerateConfig, LanguageGenerator};
use itertools::Itertools;
use rust_tokenizers::Tokenizer;
use std::collections::HashMap;
use tch::{Device, Tensor};
use uuid::Uuid;

/// # Configuration for multi-turn classification
/// Contains information regarding the model to load, mirrors the GenerationConfig, with a
/// different set of default parameters and sets the device to place the model on.
pub struct ConversationConfig {
    /// Model weights resource (default: DialoGPT-medium)
    pub model_resource: Resource,
    /// Config resource (default: DialoGPT-medium)
    pub config_resource: Resource,
    /// Vocab resource (default: DialoGPT-medium)
    pub vocab_resource: Resource,
    /// Merges resource (default: DialoGPT-medium)
    pub merges_resource: Resource,
    /// Minimum sequence length (default: 0)
    pub min_length: u64,
    /// Maximum sequence length (default: 20)
    pub max_length: u64,
    /// Minimum free length available for generated responses (default: 32)
    pub min_length_for_response: u64,
    /// Sampling flag. If true, will perform top-k and/or nucleus sampling on generated tokens, otherwise greedy (deterministic) decoding (default: true)
    pub do_sample: bool,
    /// Early stopping flag indicating if the beam search should stop as soon as `num_beam` hypotheses have been generated (default: false)
    pub early_stopping: bool,
    /// Number of beams for beam search (default: 5)
    pub num_beams: u64,
    /// Temperature setting. Values higher than 1 will improve originality at the risk of reducing relevance (default: 1.0)
    pub temperature: f64,
    /// Top_k values for sampling tokens. Value higher than 0 will enable the feature (default: 0)
    pub top_k: u64,
    /// Top_p value for [Nucleus sampling, Holtzman et al.](http://arxiv.org/abs/1904.09751). Keep top tokens until cumulative probability reaches top_p (default: 0.9)
    pub top_p: f64,
    /// Repetition penalty (mostly useful for CTRL decoders). Values higher than 1 will penalize tokens that have been already generated. (default: 1.0)
    pub repetition_penalty: f64,
    /// Exponential penalty based on the length of the hypotheses generated (default: 1.0)
    pub length_penalty: f64,
    /// Number of allowed repetitions of n-grams. Values higher than 0 turn on this feature (default: 3)
    pub no_repeat_ngram_size: u64,
    /// Number of sequences to return for each prompt text (default: 1)
    pub num_return_sequences: u64,
    /// Device to place the model on (default: CUDA/GPU when available)
    pub device: Device,
}

impl Default for ConversationConfig {
    fn default() -> ConversationConfig {
        ConversationConfig {
            model_resource: Resource::Remote(RemoteResource::from_pretrained(
                Gpt2ModelResources::DIALOGPT_MEDIUM,
            )),
            config_resource: Resource::Remote(RemoteResource::from_pretrained(
                Gpt2ConfigResources::DIALOGPT_MEDIUM,
            )),
            vocab_resource: Resource::Remote(RemoteResource::from_pretrained(
                Gpt2VocabResources::DIALOGPT_MEDIUM,
            )),
            merges_resource: Resource::Remote(RemoteResource::from_pretrained(
                Gpt2MergesResources::DIALOGPT_MEDIUM,
            )),
            min_length: 0,
            max_length: 1000,
            min_length_for_response: 32,
            do_sample: true,
            early_stopping: false,
            num_beams: 1,
            temperature: 1.0,
            top_k: 50,
            top_p: 0.9,
            repetition_penalty: 1.0,
            length_penalty: 1.0,
            no_repeat_ngram_size: 0,
            num_return_sequences: 1,
            device: Device::cuda_if_available(),
        }
    }
}

#[derive(Debug, Clone)]
/// Data structure keeping track of a conversation in the system. It contains past user inputs and
/// generated answers, a history of the tokens generated and a placeholder for new user inputs to be
/// processed by the system if submitted for prediction
pub struct Conversation {
    /// Past user inputs that have already been processed
    pub past_user_inputs: Vec<String>,
    /// Past system generated responses
    pub generated_responses: Vec<String>,
    /// New user input that needs to be processed
    pub new_user_input: Option<String>,
    ///  History of the tokens passed as an input and generated so far used as context for next turn generation
    pub history: Vec<i64>,
}

impl Conversation {
    /// Build a new `Conversation` with an initial user input
    ///
    /// # Arguments
    ///
    /// * `text` - `String` with the initial user input to start a conversation
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let conversation = Conversation::new("Hi there!");
    /// ```
    pub fn new(text: &str) -> Conversation {
        Conversation {
            past_user_inputs: vec![],
            generated_responses: vec![],
            new_user_input: Some(text.to_string()),
            history: vec![],
        }
    }

    /// Build a new `Conversation` placeholder without user input
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let conversation = Conversation::new_empty();
    /// ```
    pub fn new_empty() -> Conversation {
        Conversation {
            past_user_inputs: vec![],
            generated_responses: vec![],
            new_user_input: None,
            history: vec![],
        }
    }

    /// Adds a new user input to the conversation. This method returns an error if an unprocessed
    /// user input already exists
    ///
    /// # Arguments
    ///
    /// * `text` - `&str` with the additional user input to continue a conversation
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new_empty();
    /// conversation.add_user_input("Hi there!");
    /// ```
    pub fn add_user_input(&mut self, text: &str) -> Result<(), &'static str> {
        if self.new_user_input.is_some() {
            Err("User input already provided for this conversation")
        } else {
            self.new_user_input = Some(text.to_string());
            Ok(())
        }
    }

    /// Adds a new user input to the conversation. If an unprocessed user input already exists,
    /// its contents are overwritten by the new value provided.
    ///
    /// # Arguments
    ///
    /// * `text` - `&str` with the additional user input to continue a conversation
    ///
    /// # Returns
    ///
    /// * `Option<String>` containing overwritten string if applicable
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new_empty();
    /// conversation.add_user_input("This input will not be used");
    /// let unused_string = conversation.add_user_input_with_overwrite("Hi there!");
    /// ```
    pub fn add_user_input_with_overwrite(&mut self, text: &str) -> Option<String> {
        let old_user_input = if self.new_user_input.is_some() {
            self.new_user_input.clone()
        } else {
            None
        };
        self.new_user_input = Some(text.to_string());
        old_user_input
    }

    /// Returns `true` if the conversation contains new user inputs to process
    ///
    /// # Returns
    ///
    /// * `bool` flag indicating if the conversation contains new inputs to process
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new_empty();
    /// let false_value = conversation.contains_new_input();
    /// conversation.add_user_input("This input will not be used");
    /// let true_value = conversation.contains_new_input();
    /// ```
    pub fn contains_new_input(&self) -> bool {
        self.new_user_input.is_some()
    }

    /// Marks the conversation as processed and moves the user input that was up for
    /// processing to the past user inputs.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new_empty();
    /// let false_value = conversation.contains_new_input();
    /// conversation.add_user_input("This input will not be used");
    /// let true_value = conversation.contains_new_input();
    /// conversation.mark_processed();
    /// let false_value = conversation.contains_new_input();
    /// assert_eq!(conversation.past_user_inputs.len(), 1usize);
    /// ```
    pub fn mark_processed(&mut self) {
        if self.new_user_input.is_some() {
            self.past_user_inputs
                .push(self.new_user_input.clone().unwrap());
            self.new_user_input = None;
        }
    }

    /// Returns the last user input provided (including non-processed inputs).
    ///
    /// # Returns
    ///
    /// * `Option<&str>` representation of the last user input provided
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new_empty();
    /// let none_value = conversation.get_last_input();
    /// conversation.add_user_input("This input will not be used");
    /// let last_provided_input = conversation.get_last_input();
    /// assert_eq!(last_provided_input, Some("This input will not be used"));
    /// ```
    pub fn get_last_input(&self) -> Option<&str> {
        if self.new_user_input.is_some() {
            Some(self.new_user_input.as_ref().unwrap().as_str())
        } else {
            if self.past_user_inputs.len() > 0 {
                Some(self.past_user_inputs.last().unwrap().as_str())
            } else {
                None
            }
        }
    }

    /// Returns the last response generated by the system.
    ///
    /// # Returns
    ///
    /// * `Option<&str>` representation of the last response generated by the system.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::Conversation;
    ///
    /// let mut conversation = Conversation::new("Hi There");
    /// let non_value = conversation.get_last_response();
    /// ```
    pub fn get_last_response(&self) -> Option<&str> {
        if !self.generated_responses.is_empty() {
            Some(self.generated_responses.last().unwrap().as_str())
        } else {
            None
        }
    }
}

/// Data structure allowing the management of conversations and main input to the dialogue model.
/// It contains a `HashMap` of conversations with `UUID` keys
#[derive(Debug)]
pub struct ConversationManager {
    conversations: HashMap<Uuid, Conversation>,
}

impl ConversationManager {
    /// Build a new `ConversationManager`
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::ConversationManager;
    ///
    /// let conversation_manager = ConversationManager::new();
    /// ```
    pub fn new() -> ConversationManager {
        ConversationManager {
            conversations: HashMap::new(),
        }
    }

    /// Returns a list of the active conversations (containing new inputs to be processed by the model)
    ///
    /// # Returns
    ///
    /// * `(Vec<&Uuid>, Vec<&mut Conversation>)` Tuple of vectors with the active `UUID` and `Conversations`
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation = Conversation::new("Hi there!");
    /// let empty_conversation = Conversation::new_empty();
    /// let conversation_id = conversation_manager.add(conversation);
    /// let empty_conversation_id = conversation_manager.add(empty_conversation);
    ///
    /// let active_conversations = conversation_manager.get_active_conversations();
    /// assert_eq!(active_conversations.0.len(), 1usize);
    /// ```
    pub fn get_active_conversations(&mut self) -> (Vec<&Uuid>, Vec<&mut Conversation>) {
        let mut active_uuid = vec![];
        let mut active_conversations = vec![];
        for (uuid, conversation) in self.conversations.iter_mut() {
            if conversation.new_user_input.is_some() {
                active_uuid.push(uuid);
                active_conversations.push(conversation)
            }
        }
        (active_uuid, active_conversations)
    }

    /// Returns a mutable reference to the conversation wih the provided UUID
    ///
    /// # Arguments
    ///
    /// * `uuid` - `&Uuid` of the conversation to retrieve
    ///
    /// # Returns
    ///
    /// * `Option<&mut Conversation>` Optional mutable reference to the conversation matching the UUID provided
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation = Conversation::new("Hi there!");
    /// let conversation_id = conversation_manager.add(conversation);
    ///
    /// let conversation_ref = conversation_manager.get(&conversation_id);
    /// ```
    pub fn get(&mut self, uuid: &Uuid) -> Option<&mut Conversation> {
        self.conversations.get_mut(uuid)
    }

    /// Returns a HashMap containing references to all conversations stored in the manager
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation = Conversation::new("Hi there!");
    /// let conversation_id = conversation_manager.add(conversation);
    ///
    /// let all_conversations = conversation_manager.get_all();
    /// ```
    pub fn get_all(&mut self) -> HashMap<&Uuid, &Conversation> {
        let mut output = HashMap::with_capacity(self.conversations.len());
        for (uuid, conversation) in self.conversations.iter() {
            output.insert(uuid, conversation);
        }
        output
    }

    /// Creates a conversation and add it to the conversation manager
    ///
    /// # Arguments
    ///
    /// * `text` - `&str` string slice with an original user input
    ///
    /// # Returns
    ///
    /// * `Uuid` for the conversation created
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation_id = conversation_manager.create("Hi there!");
    /// ```
    pub fn create(&mut self, text: &str) -> Uuid {
        let conversation = Conversation::new(text);
        self.add(conversation)
    }

    /// Creates an empty conversation and add it to the conversation manager
    ///
    /// # Returns
    ///
    /// * `Uuid` for the conversation created
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation_id = conversation_manager.create_empty();
    /// ```
    pub fn create_empty(&mut self) -> Uuid {
        let conversation = Conversation::new_empty();
        self.add(conversation)
    }

    /// Adds an existing conversation to the conversation manager
    ///
    /// # Arguments
    ///
    /// * `conversation` - `Conversation` to be added to the conversation manager
    ///
    /// # Returns
    ///
    /// * `Uuid` for the conversation created
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation = Conversation::new("Hi there!");
    /// let conversation_id = conversation_manager.add(conversation);
    /// ```
    pub fn add(&mut self, conversation: Conversation) -> Uuid {
        let mut uuid = Uuid::new_v4();
        while self.conversations.contains_key(&uuid) {
            uuid = Uuid::new_v4();
        }
        self.conversations.insert(uuid, conversation);
        uuid
    }

    /// Deregister a conversation from the conversation manager
    ///
    /// # Arguments
    ///
    /// * `uuid` - `&Uuid` of the conversation to deregister from the conversation manager
    ///
    /// # Returns
    ///
    /// * `Option<Conversation>` deregistered conversation
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation_id = conversation_manager.create("Hi there!");
    /// conversation_manager.remove(&conversation_id);
    /// ```
    pub fn remove(&mut self, uuid: &Uuid) -> Option<Conversation> {
        self.conversations.remove(uuid)
    }

    /// Clear all conversations from the conversation manager, and returns the conversations and their
    /// former UUID.
    ///
    /// # Returns
    ///
    /// * `HashMap<Uuid, Conversation>` deregistered conversations
    ///
    /// # Example
    ///
    /// ```no_run
    /// use rust_bert::pipelines::conversation::{Conversation, ConversationManager};
    ///
    /// let mut conversation_manager = ConversationManager::new();
    ///
    /// let conversation_id = conversation_manager.create("Hi there!");
    /// let conversations = conversation_manager.clear();
    /// ```
    pub fn clear(&mut self) -> HashMap<Uuid, Conversation> {
        let mut output = HashMap::with_capacity(self.conversations.len());
        for (uuid, conversation) in self.conversations.iter() {
            output.insert(*uuid, conversation.clone());
        }
        self.conversations = HashMap::new();
        output
    }
}

/// # Conversation model
/// Processes a ConversationManager and generate system responses for active conversations.
pub struct ConversationModel {
    model: GPT2Generator,
    eos_token_id: i64,
    max_allowed_context_length: u64,
}

impl ConversationModel {
    /// Build a new `ConversationModel`
    ///
    /// # Arguments
    ///
    /// * `conversation_config` - `ConversationConfig` object containing the resource references (model, vocabulary, configuration), conversation options and device placement (CPU/GPU)
    ///
    /// # Example
    ///
    /// ```no_run
    /// # fn main() -> failure::Fallible<()> {
    /// use rust_bert::pipelines::conversation::ConversationModel;
    ///
    /// let conversation_model = ConversationModel::new(Default::default())?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn new(conversation_config: ConversationConfig) -> failure::Fallible<ConversationModel> {
        let generate_config = GenerateConfig {
            model_resource: conversation_config.model_resource,
            config_resource: conversation_config.config_resource,
            merges_resource: conversation_config.merges_resource,
            vocab_resource: conversation_config.vocab_resource,
            min_length: conversation_config.min_length,
            max_length: conversation_config.max_length,
            do_sample: conversation_config.do_sample,
            early_stopping: conversation_config.early_stopping,
            num_beams: conversation_config.num_beams,
            temperature: conversation_config.temperature,
            top_k: conversation_config.top_k,
            top_p: conversation_config.top_p,
            repetition_penalty: conversation_config.repetition_penalty,
            length_penalty: conversation_config.length_penalty,
            no_repeat_ngram_size: conversation_config.no_repeat_ngram_size,
            num_return_sequences: conversation_config.num_return_sequences,
            device: conversation_config.device,
        };

        let model = GPT2Generator::new(generate_config)?;
        let eos_token_id = *model.get_eos_ids().as_ref().unwrap().first().unwrap();
        let max_allowed_length =
            conversation_config.max_length as u64 - conversation_config.min_length_for_response;
        Ok(ConversationModel {
            model,
            eos_token_id,
            max_allowed_context_length: max_allowed_length,
        })
    }

    /// Perform a multi-turn conversation based on user input
    ///
    /// # Arguments
    ///
    /// * `conversation_manager` - `&mut ConversationManager` Conversation manager keeping track of active conversations
    ///
    /// # Returns
    /// * `HashMap<&Uuid, &str>` Responses from the model for each active conversation, referenced by Uuid
    ///
    /// # Example
    ///
    /// ```no_run
    /// # fn main() -> failure::Fallible<()> {
    /// use rust_bert::pipelines::conversation::{ConversationManager, ConversationModel};
    /// use rust_bert::pipelines::generation::LanguageGenerator;
    /// let model = ConversationModel::new(Default::default())?;
    ///
    /// let mut conversation_manager = ConversationManager::new();
    /// conversation_manager.create("Hello, how are you?");
    ///
    /// let output = model.generate_responses(&mut conversation_manager);
    /// # Ok(())
    /// # }
    /// ```
    pub fn generate_responses<'a>(
        &self,
        conversation_manager: &'a mut ConversationManager,
    ) -> HashMap<&'a Uuid, &'a str> {
        let (active_uuid, active_conversations) = conversation_manager.get_active_conversations();
        if !active_uuid.is_empty() {
            let texts = active_conversations
                .iter()
                .map(|c| c.new_user_input.as_ref().unwrap().as_str())
                .collect_vec();

            let history = active_conversations
                .iter()
                .map(|c| &c.history)
                .collect_vec();

            let prompt_ids = self.encode_prompts(texts.as_slice());
            let input_tensor = self.concat_input_history(prompt_ids, history);
            let input_length = *input_tensor.size().last().unwrap() as usize;
            let mut generated = self.model.generate_from_ids_and_past(input_tensor, None);
            let removed_padding_quantities = self.clean_padding_indices(&mut generated);

            let mut output = HashMap::with_capacity(active_uuid.len());

            for (((conversation, generated_sequence), uuid), removed_padding) in
                active_conversations
                    .into_iter()
                    .zip(generated.into_iter())
                    .zip(active_uuid.into_iter())
                    .zip(removed_padding_quantities.into_iter())
            {
                conversation
                    .generated_responses
                    .push(self.model.get_tokenizer().decode(
                        generated_sequence[input_length - removed_padding.0..].to_vec(),
                        true,
                        true,
                    ));
                conversation.history = generated_sequence;
                conversation.mark_processed();
                output.insert(uuid, conversation.get_last_response().unwrap());
            }
            output
        } else {
            HashMap::new()
        }
    }

    fn clean_padding_indices(&self, model_output: &mut Vec<Vec<i64>>) -> Vec<(usize, usize)> {
        // In case inputs are sent as batch, this cleans the padding indices in the history for shorter outputs
        let pad_token = match self.model.get_pad_id() {
            Some(value) => *value,
            None => self.eos_token_id,
        };
        let mut removed_tokens = Vec::with_capacity(model_output.len());
        for sequence_history in model_output {
            let index_end = sequence_history
                .iter()
                .rev()
                .position(|&r| r != pad_token)
                .unwrap();
            let index_start = sequence_history
                .iter()
                .position(|&r| r != pad_token)
                .unwrap();
            sequence_history.drain(sequence_history.len() - index_end + 1..);
            sequence_history.drain(..index_start);
            removed_tokens.push((index_start, index_end));
        }
        removed_tokens
    }

    fn concat_input_history(&self, inputs: Vec<Vec<i64>>, history: Vec<&Vec<i64>>) -> Tensor {
        // Concatenates the history token indices with new user input
        let pad_token = match self.model.get_pad_id() {
            Some(value) => *value,
            None => self.eos_token_id,
        };

        assert_eq!(
            inputs.len(),
            history.len(),
            "Length of inputs should equal length of history"
        );

        let mut concatenated_inputs = Vec::with_capacity(inputs.len());
        for (input, history) in inputs.iter().zip(history.iter()) {
            let mut concatenated_element = Vec::with_capacity(input.len() + history.len());
            concatenated_element.extend_from_slice(history);
            concatenated_element.extend_from_slice(input);
            concatenated_inputs.push(concatenated_element);
        }

        let max_len = concatenated_inputs
            .iter()
            .map(|input| input.len())
            .max()
            .unwrap()
            .min(self.max_allowed_context_length as usize);

        let concatenated_inputs = concatenated_inputs
            .into_iter()
            .map(|input| {
                let (start, mut temp) = if input.len() > max_len {
                    (
                        self.get_truncated_input_index(&input, max_len, pad_token),
                        vec![],
                    )
                } else {
                    (0, vec![pad_token; max_len - input.len()])
                };
                temp.extend_from_slice(&input[start..]);
                temp
            })
            .map(|tokens| Tensor::of_slice(&tokens).to(self.model.get_var_store().device()))
            .collect::<Vec<Tensor>>();
        Tensor::stack(&concatenated_inputs, 0)
    }

    fn get_truncated_input_index(
        &self,
        history: &[i64],
        max_length: usize,
        pad_token: i64,
    ) -> usize {
        let start_length = history.len();
        let eos_indices: Vec<usize> = history
            .iter()
            .enumerate()
            .filter(|(i, &e)| {
                (e == pad_token)
                    & (*i != start_length - 1)
                    & ((start_length as isize - max_length as isize - *i as isize) < 0)
            })
            .map(|(i, _)| i + 1)
            .collect();

        *eos_indices.first().unwrap_or(&0usize)
    }

    fn encode_prompts(&self, texts: &[&str]) -> Vec<Vec<i64>> {
        // Encode the user prompt into token ids
        let tokens = self.model.get_tokenizer().tokenize_list(texts.to_vec());

        tokens
            .into_iter()
            .map(|prompt_tokens| {
                self.model
                    .get_tokenizer()
                    .convert_tokens_to_ids(&prompt_tokens)
            })
            .map(|mut tokens| {
                tokens.push(self.eos_token_id);
                tokens
            })
            .collect::<Vec<Vec<i64>>>()
    }
}