gte/commons/input/
encoded.rs

1use composable::Composable;
2use crate::tokenizer::Tokenizer;
3
4/// Encoded text input (using a tokenizer)
5pub struct EncodedInput {
6    pub input_ids: ndarray::Array2<i64>,
7    pub attn_masks: ndarray::Array2<i64>,
8}
9
10
11pub struct TextInputEncoder<'a> {
12    tokenizer: &'a Tokenizer,
13}
14
15
16impl<'a> TextInputEncoder<'a> {
17    pub fn new(tokenizer: &'a Tokenizer) -> Self {
18        Self { tokenizer }
19    }
20}
21
22
23impl<'a, T> Composable<T, EncodedInput> for TextInputEncoder<'a> where T: super::text::TextInput<'a> {
24    fn apply(&self, input: T) -> composable::Result<EncodedInput> {
25        let input = input.into_encode_input();
26        let (input_ids, attn_masks) = self.tokenizer.tokenize(input)?;
27        Ok(EncodedInput{
28            input_ids,
29            attn_masks,
30        })
31    }
32}