svod_model/
sentencepiece.rs1use std::path::{Path, PathBuf};
6
7use snafu::{ResultExt, Snafu};
8
9#[derive(prost::Message)]
13struct SpModelProto {
14 #[prost(message, repeated, tag = "1")]
15 pieces: Vec<SpPiece>,
16}
17
18#[derive(prost::Message)]
19struct SpPiece {
20 #[prost(string, optional, tag = "1")]
23 piece: Option<String>,
24 #[prost(int32, optional, tag = "3")]
26 r#type: Option<i32>,
27}
28
29#[derive(Debug, Snafu)]
30#[snafu(visibility(pub))]
31pub enum Error {
32 #[snafu(display("reading SentencePiece model from {}: {source}", path.display()))]
33 Io { path: PathBuf, source: std::io::Error },
34 #[snafu(display("parsing SentencePiece model at {}: {source}", path.display()))]
35 Decode { path: PathBuf, source: prost::DecodeError },
36}
37
38pub type Result<T> = std::result::Result<T, Error>;
39
40pub fn load_vocab(path: &Path) -> Result<Vec<String>> {
46 use prost::Message;
47 let bytes = std::fs::read(path).context(IoSnafu { path: path.to_path_buf() })?;
48 let proto = SpModelProto::decode(&*bytes).context(DecodeSnafu { path: path.to_path_buf() })?;
49 let mut pieces = Vec::with_capacity(proto.pieces.len());
50 for p in proto.pieces {
51 let kind = p.r#type.unwrap_or(1);
52 let s = if kind == 1 || kind == 4 { p.piece.unwrap_or_default() } else { String::new() };
56 pieces.push(s);
57 }
58 Ok(pieces)
59}