1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
pub mod bpe;
pub mod ctc;
pub mod wordpiece;
pub use super::pre_tokenizers::byte_level;
pub use super::pre_tokenizers::metaspace;
use serde::{Deserialize, Serialize};
use crate::decoders::bpe::BPEDecoder;
use crate::decoders::ctc::CTC;
use crate::decoders::wordpiece::WordPiece;
use crate::pre_tokenizers::byte_level::ByteLevel;
use crate::pre_tokenizers::metaspace::Metaspace;
use crate::{Decoder, Result};
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(untagged)]
pub enum DecoderWrapper {
BPE(BPEDecoder),
ByteLevel(ByteLevel),
WordPiece(WordPiece),
Metaspace(Metaspace),
CTC(CTC),
}
impl Decoder for DecoderWrapper {
fn decode(&self, tokens: Vec<String>) -> Result<Vec<String>> {
match self {
Self::BPE(bpe) => bpe.decode(tokens),
Self::ByteLevel(bl) => bl.decode(tokens),
Self::Metaspace(ms) => ms.decode(tokens),
Self::WordPiece(wp) => wp.decode(tokens),
Self::CTC(ctc) => ctc.decode(tokens),
}
}
}
impl_enum_from!(BPEDecoder, DecoderWrapper, BPE);
impl_enum_from!(ByteLevel, DecoderWrapper, ByteLevel);
impl_enum_from!(Metaspace, DecoderWrapper, Metaspace);
impl_enum_from!(WordPiece, DecoderWrapper, WordPiece);
impl_enum_from!(CTC, DecoderWrapper, CTC);