[][src]Function rust_tokenizers::preprocessing::tokenizer::tokenization_utils::split_on_char

pub fn split_on_char<'a, F>(
    token: TokenRef<'a>,
    test_character: F,
    add_separators: bool,
    set_mask: Mask
) -> Vec<TokenRef<'a>> where
    F: Fn(&char) -> bool

Split a token on one or more characters (given a character test function)

  • token: The token to split
  • test_character: A function that borrows a char and returns a boolean. If true, a split will be made here
  • add_separators: Add the separating characters to the tokens as well? (bool), separating tokens will be indicated in the returned mask by the value set in set_mask