gte/commons/input/
encoded.rs1use composable::Composable;
2use crate::tokenizer::Tokenizer;
3
4pub struct EncodedInput {
6 pub input_ids: ndarray::Array2<i64>,
7 pub attn_masks: ndarray::Array2<i64>,
8}
9
10
11pub struct TextInputEncoder<'a> {
12 tokenizer: &'a Tokenizer,
13}
14
15
16impl<'a> TextInputEncoder<'a> {
17 pub fn new(tokenizer: &'a Tokenizer) -> Self {
18 Self { tokenizer }
19 }
20}
21
22
23impl<'a, T> Composable<T, EncodedInput> for TextInputEncoder<'a> where T: super::text::TextInput<'a> {
24 fn apply(&self, input: T) -> composable::Result<EncodedInput> {
25 let input = input.into_encode_input();
26 let (input_ids, attn_masks) = self.tokenizer.tokenize(input)?;
27 Ok(EncodedInput{
28 input_ids,
29 attn_masks,
30 })
31 }
32}