Skip to main content

reconcile_text/
tokenizer.rs

1mod character_tokenizer;
2mod line_tokenizer;
3mod markdown_tokenizer;
4mod word_tokenizer;
5
6use std::ops::Deref;
7
8#[cfg(feature = "serde")]
9use serde::{Deserialize, Serialize};
10use token::Token;
11#[cfg(feature = "wasm")]
12use wasm_bindgen::prelude::*;
13
14pub mod token;
15
16/// Type alias for tokenizer functions that split a string into tokens
17pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
18
19#[cfg_attr(feature = "wasm", wasm_bindgen)]
20#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22#[cfg(feature = "wasm")]
23pub enum BuiltinTokenizer {
24    Character = "Character",
25    Line = "Line",
26    Markdown = "Markdown",
27    Word = "Word",
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31#[cfg(not(feature = "wasm"))]
32#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
33pub enum BuiltinTokenizer {
34    Character,
35    Line,
36    Markdown,
37    Word,
38}
39
40impl Deref for BuiltinTokenizer {
41    type Target = Tokenizer<String>;
42
43    fn deref(&self) -> &Self::Target {
44        match self {
45            BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
46            BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
47            BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
48            BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
49            #[cfg(feature = "wasm")]
50            BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
51        }
52    }
53}