/// The character used to denote word breaks in the tokenized output.
pub const WORD_BREAK_CHAR: &str = "▁";
/// The token used to mark the start of a sentence.
pub const SENTENCE_START_TOKEN: &str = "<s>";
/// The token used to mark the end of a sentence.
pub const SENTENCE_END_TOKEN: &str = "</s>";
/// The token used to represent unknown words or subwords.
pub const UNKNOWN_TOKEN: &str = "<unk>";