tokenizers/decoders/
bpe.rs1use crate::tokenizer::{Decoder, Result};
2
3use serde::{Deserialize, Serialize};
4
5#[derive(Deserialize, Clone, Debug, Serialize)]
6#[serde(tag = "type")]
9#[non_exhaustive]
10pub struct BPEDecoder {
11 pub suffix: String,
12}
13
14impl BPEDecoder {
15 pub fn new(suffix: String) -> Self {
16 Self { suffix }
17 }
18}
19
20impl Default for BPEDecoder {
21 fn default() -> Self {
22 Self::new("</w>".into())
23 }
24}
25
26impl Decoder for BPEDecoder {
27 fn decode_chain(&self, tokens: Vec<String>) -> Result<Vec<String>> {
28 let n = tokens.len() - 1;
29 Ok(tokens
30 .into_iter()
31 .enumerate()
32 .map(|(i, token)| {
33 let replacement = if i == n { "" } else { " " };
34 token.replace(&self.suffix, replacement)
35 })
36 .collect())
37 }
38}