Skip to main content

mako/tokenization/hf_tokenizers/decoders/
mod.rs

1pub mod bpe;
2pub mod wordpiece;
3
4// Re-export these as decoders
5pub use super::pre_tokenizers::byte_level;
6pub use super::pre_tokenizers::metaspace;
7
8use serde::{Deserialize, Serialize};
9
10use crate::tokenization::hf_tokenizers::decoders::bpe::BPEDecoder;
11use crate::tokenization::hf_tokenizers::decoders::wordpiece::WordPiece;
12use crate::tokenization::hf_tokenizers::pre_tokenizers::byte_level::ByteLevel;
13use crate::tokenization::hf_tokenizers::pre_tokenizers::metaspace::Metaspace;
14use crate::tokenization::hf_tokenizers::{Decoder, Result};
15
16#[derive(Serialize, Deserialize, Clone, Debug)]
17#[serde(untagged)]
18pub enum DecoderWrapper {
19    BPE(BPEDecoder),
20    ByteLevel(ByteLevel),
21    WordPiece(WordPiece),
22    Metaspace(Metaspace),
23}
24
25impl Decoder for DecoderWrapper {
26    fn decode(&self, tokens: Vec<String>) -> Result<String> {
27        match self {
28            DecoderWrapper::BPE(bpe) => bpe.decode(tokens),
29            DecoderWrapper::ByteLevel(bl) => bl.decode(tokens),
30            DecoderWrapper::Metaspace(ms) => ms.decode(tokens),
31            DecoderWrapper::WordPiece(wp) => wp.decode(tokens),
32        }
33    }
34}
35
36impl_enum_from!(BPEDecoder, DecoderWrapper, BPE);
37impl_enum_from!(ByteLevel, DecoderWrapper, ByteLevel);
38impl_enum_from!(Metaspace, DecoderWrapper, Metaspace);
39impl_enum_from!(WordPiece, DecoderWrapper, WordPiece);