bpe-tokenizer 0.1.4

/// The character used to denote word breaks in the tokenized output.
pub(crate) const WORD_BREAK_CHAR: &str = "▁";

/// The token used to mark the start of a sentence.
pub(crate) const SENTENCE_START_TOKEN: &str = "<s>";

/// The token used to mark the end of a sentence.
pub(crate) const SENTENCE_END_TOKEN: &str = "</s>";

/// The token used to represent unknown words or subwords.
pub(crate) const UNKNOWN_TOKEN: &str = "<unk>";