1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
use crate::tokenizer::{Decoder, Result}; /// Allows decoding Original BPE by joining all the tokens and then replacing /// the suffix used to identify end-of-words by whitespaces pub struct BPEDecoder { suffix: String, } impl BPEDecoder { pub fn new(suffix: String) -> Self { BPEDecoder { suffix } } } impl Default for BPEDecoder { fn default() -> Self { BPEDecoder::new("</w>".into()) } } impl Decoder for BPEDecoder { fn decode(&self, tokens: Vec<String>) -> Result<String> { Ok(tokens.join("").replace(&self.suffix, " ")) } }