use std::borrow::Cow;
use std::sync::Arc;
use ahash::{AHashMap, HashMapExt};
use aho_corasick_unsafe::{
AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, MatchKind as AhoCorasickMatchKind,
};
#[cfg(feature = "prebuilt")]
use daachorse::CharwiseDoubleArrayAhoCorasick;
#[cfg(feature = "runtime_build")]
use daachorse::{
CharwiseDoubleArrayAhoCorasick, CharwiseDoubleArrayAhoCorasickBuilder,
MatchKind as DoubleArrayAhoCorasickMatchKind,
};
use lazy_static::lazy_static;
use nohash_hasher::{IntMap, IntSet};
use parking_lot::RwLock;
#[cfg(feature = "serde")]
use sonic_rs::{Deserialize, Serialize};
use tinyvec::ArrayVec;
#[cfg(feature = "prebuilt")]
use crate::process::constants::prebuilt_feature::*;
#[cfg(feature = "runtime_build")]
use crate::process::constants::runtime_build_feature::*;
use crate::SimpleMatchType;
type ProcessMatcherCache =
RwLock<IntMap<SimpleMatchType, Arc<(Vec<&'static str>, ProcessMatcher)>>>;
lazy_static! {
pub static ref PROCESS_MATCHER_CACHE: ProcessMatcherCache =
RwLock::new(IntMap::with_capacity(8));
}
#[derive(Clone)]
pub enum ProcessMatcher {
Chinese(CharwiseDoubleArrayAhoCorasick<u32>),
Others(AhoCorasick),
}
impl ProcessMatcher {
#[inline(always)]
pub fn replace_all<'a>(
&self,
text: &'a str,
process_replace_list: &[&str],
) -> (bool, Cow<'a, str>) {
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
match self {
ProcessMatcher::Chinese(ac) => {
for mat in ac.find_iter(text) {
result.push_str(unsafe { text.get_unchecked(last_end..mat.start()) });
result.push_str(unsafe {
process_replace_list.get_unchecked(mat.value() as usize)
});
last_end = mat.end();
}
}
ProcessMatcher::Others(ac) => {
for mat in ac.find_iter(text) {
result.push_str(unsafe { text.get_unchecked(last_end..mat.start()) });
result.push_str(unsafe {
process_replace_list.get_unchecked(mat.pattern().as_usize())
});
last_end = mat.end();
}
}
}
if last_end > 0 {
result.push_str(unsafe { text.get_unchecked(last_end..) });
(true, Cow::Owned(result))
} else {
(false, Cow::Borrowed(text))
}
}
#[inline(always)]
pub fn delete_all<'a>(&self, text: &'a str) -> (bool, Cow<'a, str>) {
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
match self {
ProcessMatcher::Chinese(ac) => {
for mat in ac.find_iter(text) {
result.push_str(unsafe { text.get_unchecked(last_end..mat.start()) });
last_end = mat.end();
}
}
ProcessMatcher::Others(ac) => {
for mat in ac.find_iter(text) {
result.push_str(unsafe { text.get_unchecked(last_end..mat.start()) });
last_end = mat.end();
}
}
}
if last_end > 0 {
result.push_str(unsafe { text.get_unchecked(last_end..) });
(true, Cow::Owned(result))
} else {
(false, Cow::Borrowed(text))
}
}
}
#[cfg(feature = "runtime_build")]
pub fn get_process_matcher(smt_bit: SimpleMatchType) -> Arc<(Vec<&'static str>, ProcessMatcher)> {
{
let process_matcher_cache = PROCESS_MATCHER_CACHE.read();
if let Some(cached_result) = process_matcher_cache.get(&smt_bit) {
return Arc::clone(cached_result);
}
}
{
let mut process_dict = AHashMap::default();
match smt_bit {
SimpleMatchType::None => {}
SimpleMatchType::Fanjian => {
process_dict.extend(FANJIAN.trim().lines().map(|pair_str| {
let mut pair_str_split = pair_str.split('\t');
(
pair_str_split.next().unwrap(),
pair_str_split.next().unwrap(),
)
}));
}
SimpleMatchType::WordDelete => {
process_dict.extend(WHITE_SPACE.iter().map(|&c| (c, "")));
}
SimpleMatchType::TextDelete => {
process_dict.extend(TEXT_DELETE.trim().lines().map(|pair_str| (pair_str, "")));
process_dict.extend(WHITE_SPACE.iter().map(|&c| (c, "")));
}
SimpleMatchType::Normalize => {
for str_conv_map in [NORM, NUM_NORM] {
process_dict.extend(str_conv_map.trim().lines().map(|pair_str| {
let mut pair_str_split = pair_str.split('\t');
(
pair_str_split.next().unwrap(),
pair_str_split.next().unwrap(),
)
}));
}
}
SimpleMatchType::PinYin => {
process_dict.extend(PINYIN.trim().lines().map(|pair_str| {
let mut pair_str_split = pair_str.split('\t');
(
pair_str_split.next().unwrap(),
pair_str_split.next().unwrap(),
)
}));
}
SimpleMatchType::PinYinChar => {
process_dict.extend(PINYIN.trim().lines().map(|pair_str| {
let mut pair_str_split = pair_str.split('\t');
(
pair_str_split.next().unwrap(),
pair_str_split.next().unwrap().trim_matches('␀'),
)
}));
}
_ => {}
}
process_dict.retain(|&key, &mut value| key != value);
let (process_replace_list, process_matcher) = match smt_bit {
SimpleMatchType::Fanjian | SimpleMatchType::PinYin | SimpleMatchType::PinYinChar => (
process_dict.iter().map(|(_, &val)| val).collect(),
ProcessMatcher::Chinese(
CharwiseDoubleArrayAhoCorasickBuilder::new()
.match_kind(DoubleArrayAhoCorasickMatchKind::Standard)
.build(
process_dict
.iter()
.map(|(&key, _)| key)
.collect::<Vec<&str>>(),
)
.unwrap(),
),
),
_ => (
process_dict.iter().map(|(_, &val)| val).collect(),
ProcessMatcher::Others(
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(
process_dict
.iter()
.map(|(&key, _)| key)
.collect::<Vec<&str>>(),
)
.unwrap(),
),
),
};
let uncached_result = Arc::new((process_replace_list, process_matcher));
let mut process_matcher_cache = PROCESS_MATCHER_CACHE.write();
process_matcher_cache.insert(smt_bit, Arc::clone(&uncached_result));
uncached_result
}
}
#[cfg(feature = "prebuilt")]
pub fn get_process_matcher(smt_bit: SimpleMatchType) -> Arc<(Vec<&'static str>, ProcessMatcher)> {
{
let process_matcher_cache = PROCESS_MATCHER_CACHE.read();
if let Some(cached_result) = process_matcher_cache.get(&smt_bit) {
return Arc::clone(cached_result);
}
}
{
let (process_replace_list, process_matcher) = match smt_bit {
SimpleMatchType::None => {
let empty_patterns: Vec<&str> = Vec::new();
(
Vec::new(),
ProcessMatcher::Others(AhoCorasick::new(&empty_patterns).unwrap()),
)
}
SimpleMatchType::Fanjian => (
FANJIAN_PROCESS_REPLACE_LIST_STR.lines().collect(),
ProcessMatcher::Chinese(unsafe {
CharwiseDoubleArrayAhoCorasick::<u32>::deserialize_unchecked(
FANJIAN_PROCESS_MATCHER_BYTES,
)
.0
}),
),
SimpleMatchType::WordDelete => {
let mut process_dict = AHashMap::default();
process_dict.extend(WHITE_SPACE.iter().map(|&c| (c, "")));
process_dict.retain(|&key, &mut value| key != value);
let process_list = process_dict
.iter()
.map(|(&key, _)| key)
.collect::<Vec<&str>>();
(
Vec::new(),
ProcessMatcher::Others(
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(&process_list)
.unwrap(),
),
)
}
SimpleMatchType::TextDelete => {
let mut process_dict = AHashMap::default();
process_dict.extend(TEXT_DELETE.trim().lines().map(|pair_str| (pair_str, "")));
process_dict.extend(WHITE_SPACE.iter().map(|&c| (c, "")));
process_dict.retain(|&key, &mut value| key != value);
let process_list = process_dict
.iter()
.map(|(&key, _)| key)
.collect::<Vec<&str>>();
(
Vec::new(),
ProcessMatcher::Others(
#[cfg(feature = "dfa")]
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(&process_list)
.unwrap(),
#[cfg(not(feature = "dfa"))]
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::ContiguousNFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(&process_list)
.unwrap(),
),
)
}
SimpleMatchType::Normalize => (
NORMALIZE_PROCESS_REPLACE_LIST_STR.lines().collect(),
ProcessMatcher::Others(
#[cfg(feature = "dfa")]
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(NORMALIZE_PROCESS_LIST_STR.lines())
.unwrap(),
#[cfg(not(feature = "dfa"))]
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::ContiguousNFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(NORMALIZE_PROCESS_LIST_STR.lines())
.unwrap(),
),
),
SimpleMatchType::PinYin => (
PINYIN_PROCESS_REPLACE_LIST_STR.lines().collect(),
ProcessMatcher::Chinese(unsafe {
CharwiseDoubleArrayAhoCorasick::<u32>::deserialize_unchecked(
PINYIN_PROCESS_MATCHER_BYTES,
)
.0
}),
),
SimpleMatchType::PinYinChar => (
PINYINCHAR_PROCESS_REPLACE_LIST_STR.lines().collect(),
ProcessMatcher::Chinese(unsafe {
CharwiseDoubleArrayAhoCorasick::<u32>::deserialize_unchecked(
PINYIN_PROCESS_MATCHER_BYTES,
)
.0
}),
),
_ => unreachable!(),
};
let uncached_result = Arc::new((process_replace_list, process_matcher));
let mut process_matcher_cache = PROCESS_MATCHER_CACHE.write();
process_matcher_cache.insert(smt_bit, Arc::clone(&uncached_result));
uncached_result
}
}
#[inline(always)]
pub fn text_process(smt_bit: SimpleMatchType, text: &str) -> Result<Cow<'_, str>, &'static str> {
if smt_bit.iter().count() > 1 {
return Err("text_process function only accept one bit of simple_match_type");
}
let cached_result = get_process_matcher(smt_bit);
let (process_replace_list, process_matcher) = cached_result.as_ref();
let mut result = Cow::Borrowed(text);
match (smt_bit, process_matcher) {
(SimpleMatchType::None, _) => {}
(SimpleMatchType::Fanjian, pm) => match pm.replace_all(text, process_replace_list) {
(true, Cow::Owned(pt)) => {
result = Cow::Owned(pt);
}
(false, _) => {}
(_, _) => unreachable!(),
},
(SimpleMatchType::TextDelete | SimpleMatchType::WordDelete, pm) => {
match pm.delete_all(text) {
(true, Cow::Owned(pt)) => {
result = Cow::Owned(pt);
}
(false, _) => {}
(_, _) => unreachable!(),
}
}
(_, pm) => match pm.replace_all(text, process_replace_list) {
(true, Cow::Owned(pt)) => {
result = Cow::Owned(pt);
}
(false, _) => {}
(_, _) => unreachable!(),
},
};
Ok(result)
}
#[inline(always)]
pub fn reduce_text_process<'a>(
simple_match_type: SimpleMatchType,
text: &'a str,
) -> ArrayVec<[Cow<'a, str>; 8]> {
let mut processed_text_list: ArrayVec<[Cow<'a, str>; 8]> = ArrayVec::new();
processed_text_list.push(Cow::Borrowed(text));
for smt_bit in simple_match_type.iter() {
let cached_result = get_process_matcher(smt_bit);
let (process_replace_list, process_matcher) = cached_result.as_ref();
let tmp_processed_text = unsafe { processed_text_list.last_mut().unwrap_unchecked() };
match (smt_bit, process_matcher) {
(SimpleMatchType::None, _) => {}
(SimpleMatchType::TextDelete | SimpleMatchType::WordDelete, pm) => {
match pm.delete_all(tmp_processed_text.as_ref()) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
}
(false, _) => {}
(_, _) => unreachable!(),
}
}
(_, pm) => match pm.replace_all(tmp_processed_text.as_ref(), process_replace_list) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
}
(false, _) => {}
(_, _) => unreachable!(),
},
}
}
processed_text_list
}
#[inline(always)]
pub fn reduce_text_process_emit<'a>(
simple_match_type: SimpleMatchType,
text: &'a str,
) -> ArrayVec<[Cow<'a, str>; 8]> {
let mut processed_text_list: ArrayVec<[Cow<'a, str>; 8]> = ArrayVec::new();
processed_text_list.push(Cow::Borrowed(text));
for smt_bit in simple_match_type.iter() {
let cached_result = get_process_matcher(smt_bit);
let (process_replace_list, process_matcher) = cached_result.as_ref();
let tmp_processed_text = unsafe { processed_text_list.last_mut().unwrap_unchecked() };
match (smt_bit, process_matcher) {
(SimpleMatchType::None, _) => {}
(SimpleMatchType::Fanjian | SimpleMatchType::Normalize, pm) => {
match pm.replace_all(tmp_processed_text.as_ref(), process_replace_list) {
(true, Cow::Owned(pt)) => {
*tmp_processed_text = Cow::Owned(pt);
}
(false, _) => {}
(_, _) => unreachable!(),
}
}
(SimpleMatchType::TextDelete | SimpleMatchType::WordDelete, pm) => {
match pm.delete_all(tmp_processed_text.as_ref()) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
}
(false, _) => {}
(_, _) => unreachable!(),
}
}
(_, pm) => match pm.replace_all(tmp_processed_text.as_ref(), process_replace_list) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
}
(false, _) => {}
(_, _) => unreachable!(),
},
}
}
processed_text_list
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SimpleMatchTypeBitNode {
smt_list: ArrayVec<[SimpleMatchType; 8]>,
smt_bit: SimpleMatchType,
processed_text_index: usize,
children: ArrayVec<[usize; 8]>,
}
pub type SimpleMatchTypeIndexSetMap = IntMap<SimpleMatchType, IntSet<usize>>;
pub fn build_smt_tree(smt_list: &[SimpleMatchType]) -> Vec<SimpleMatchTypeBitNode> {
let mut smt_tree = Vec::new();
let mut root = SimpleMatchTypeBitNode {
smt_list: ArrayVec::new(),
smt_bit: SimpleMatchType::None,
processed_text_index: 0,
children: ArrayVec::new(),
};
root.smt_list.push(SimpleMatchType::None);
smt_tree.push(root);
for &simple_match_type in smt_list.iter() {
let mut current_node_index = 0;
for smt_bit in simple_match_type.iter() {
let mut is_found = false;
let current_node = smt_tree[current_node_index];
for child_node_index in current_node.children {
if smt_bit == smt_tree[child_node_index].smt_bit {
current_node_index = child_node_index;
is_found = true;
break;
}
}
if !is_found {
let mut child = SimpleMatchTypeBitNode {
smt_list: ArrayVec::new(),
smt_bit,
processed_text_index: 0,
children: ArrayVec::new(),
};
child.smt_list.push(simple_match_type);
smt_tree.push(child);
let new_node_index = smt_tree.len() - 1;
smt_tree[current_node_index].children.push(new_node_index);
current_node_index = new_node_index;
} else {
smt_tree[current_node_index]
.smt_list
.push(simple_match_type);
}
}
}
smt_tree
}
#[inline(always)]
pub fn reduce_text_process_with_tree<'a>(
smt_tree: &[SimpleMatchTypeBitNode],
text: &'a str,
) -> (SimpleMatchTypeIndexSetMap, ArrayVec<[Cow<'a, str>; 8]>) {
let mut smt_tree_copied: Vec<SimpleMatchTypeBitNode> = smt_tree.to_vec();
let mut smt_index_list_map = IntMap::with_capacity(8);
let mut processed_text_list: ArrayVec<[Cow<'a, str>; 8]> = ArrayVec::new();
processed_text_list.push(Cow::Borrowed(text));
for (current_node_index, current_node) in smt_tree.iter().enumerate() {
let (left_tree, right_tree) =
unsafe { smt_tree_copied.split_at_mut_unchecked(current_node_index.unchecked_add(1)) };
let current_copied_node = unsafe { left_tree.get_unchecked(current_node_index) };
let mut current_index = current_copied_node.processed_text_index;
let current_text_ptr =
unsafe { processed_text_list.get_unchecked(current_index) }.as_ref() as *const str;
for child_node_index in current_node.children {
let child_node = unsafe {
right_tree.get_unchecked_mut(
child_node_index
.unchecked_sub(current_node_index)
.unchecked_sub(1),
)
};
let cached_result = get_process_matcher(child_node.smt_bit);
let (process_replace_list, process_matcher) = cached_result.as_ref();
match child_node.smt_bit {
SimpleMatchType::None => {}
SimpleMatchType::TextDelete | SimpleMatchType::WordDelete => {
match process_matcher.delete_all(unsafe { &*current_text_ptr }) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
current_index = unsafe { processed_text_list.len().unchecked_sub(1) };
}
(false, _) => {
current_index = current_copied_node.processed_text_index;
}
(_, _) => unreachable!(),
}
}
_ => match process_matcher
.replace_all(unsafe { &*current_text_ptr }, process_replace_list)
{
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
current_index = unsafe { processed_text_list.len().unchecked_sub(1) };
}
(false, _) => {
current_index = current_copied_node.processed_text_index;
}
(_, _) => unreachable!(),
},
}
child_node.processed_text_index = current_index;
for simple_match_type in child_node.smt_list {
let index_list = smt_index_list_map
.entry(simple_match_type)
.or_insert_with(IntSet::default);
index_list.insert(current_index);
}
}
}
(smt_index_list_map, processed_text_list)
}
#[inline(always)]
#[allow(dead_code)]
pub fn reduce_text_process_with_list<'a>(
smt_list: &[SimpleMatchType],
text: &'a str,
) -> (SimpleMatchTypeIndexSetMap, ArrayVec<[Cow<'a, str>; 8]>) {
let mut smt_tree = Vec::new();
let mut root = SimpleMatchTypeBitNode {
smt_list: ArrayVec::new(),
smt_bit: SimpleMatchType::None,
processed_text_index: 0,
children: ArrayVec::new(),
};
root.smt_list.push(SimpleMatchType::None);
smt_tree.push(root);
let mut smt_index_list_map = IntMap::with_capacity(8);
let mut processed_text_list: ArrayVec<[Cow<'a, str>; 8]> = ArrayVec::new();
processed_text_list.push(Cow::Borrowed(text));
for &simple_match_type in smt_list.iter() {
let mut current_text = text;
let mut current_index = 0;
let mut current_node_index = 0;
for smt_bit in simple_match_type.iter() {
let mut is_found = false;
let current_node = unsafe { smt_tree.get_unchecked(current_node_index) };
for &child_node_index in ¤t_node.children {
if smt_bit == unsafe { smt_tree.get_unchecked(child_node_index) }.smt_bit {
current_node_index = child_node_index;
is_found = true;
break;
}
}
if !is_found {
let cached_result = get_process_matcher(smt_bit);
let (process_replace_list, process_matcher) = cached_result.as_ref();
match smt_bit {
SimpleMatchType::None => {}
SimpleMatchType::TextDelete | SimpleMatchType::WordDelete => {
match process_matcher.delete_all(current_text) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
current_index = processed_text_list.len() - 1;
}
(false, _) => {
current_index =
unsafe { smt_tree.get_unchecked(current_node_index) }
.processed_text_index;
}
(_, _) => unreachable!(),
}
}
_ => match process_matcher.replace_all(current_text, process_replace_list) {
(true, Cow::Owned(pt)) => {
processed_text_list.push(Cow::Owned(pt));
current_index = processed_text_list.len() - 1;
}
(false, _) => {
current_index = unsafe { smt_tree.get_unchecked(current_node_index) }
.processed_text_index;
}
(_, _) => unreachable!(),
},
}
if current_node_index != 0 {
let mut child = SimpleMatchTypeBitNode {
smt_list: ArrayVec::new(),
smt_bit,
processed_text_index: current_index,
children: ArrayVec::new(),
};
child.smt_list.push(simple_match_type);
smt_tree.push(child);
let new_node_index = smt_tree.len() - 1;
let current_node = unsafe { smt_tree.get_unchecked_mut(current_node_index) };
current_node.children.push(new_node_index);
current_node_index = new_node_index;
}
} else {
current_index =
unsafe { smt_tree.get_unchecked(current_node_index) }.processed_text_index;
unsafe { smt_tree.get_unchecked_mut(current_node_index) }
.smt_list
.push(simple_match_type);
}
let index_list = smt_index_list_map
.entry(simple_match_type)
.or_insert_with(IntSet::default);
index_list
.insert(unsafe { smt_tree.get_unchecked(current_node_index) }.processed_text_index);
current_text = unsafe { processed_text_list.get_unchecked(current_index) }.as_ref();
}
}
(smt_index_list_map, processed_text_list)
}
#[cfg(test)]
mod test_text_process {
use super::*;
#[test]
fn test_text_process() {
let text = text_process(SimpleMatchType::Fanjian, "躶軆");
println!("{:?}", text);
}
#[test]
fn test_reduce_text_process() {
let text = reduce_text_process(SimpleMatchType::FanjianDeleteNormalize, "~ᗩ~躶~𝚩~軆~Ⲉ~");
println!("{:?}", text);
}
#[test]
fn test_reduce_text_process_emit() {
let text =
reduce_text_process_emit(SimpleMatchType::FanjianDeleteNormalize, "~ᗩ~躶~𝚩~軆~Ⲉ~");
println!("{:?}", text);
}
#[test]
fn test_build_smt_tree() {
let smt_list = vec![
SimpleMatchType::Fanjian | SimpleMatchType::TextDelete,
SimpleMatchType::Fanjian,
SimpleMatchType::Normalize,
SimpleMatchType::Fanjian | SimpleMatchType::Normalize,
SimpleMatchType::TextDelete,
SimpleMatchType::TextDelete | SimpleMatchType::Normalize,
];
let smt_tree = build_smt_tree(&smt_list);
println!("{:?}", smt_tree);
}
#[test]
fn test_reduce_text_process_with_tree() {
let smt_list = vec![
SimpleMatchType::Fanjian,
SimpleMatchType::DeleteNormalize,
SimpleMatchType::FanjianDeleteNormalize,
SimpleMatchType::Delete,
SimpleMatchType::Normalize,
];
let smt_tree = build_smt_tree(&smt_list);
let text = "《西游记》";
let (smt_index_list_map, processed_text_list) =
reduce_text_process_with_tree(&smt_tree, text);
println!("{:?}, {:?}", smt_index_list_map, processed_text_list);
}
#[test]
fn test_reduce_text_process_with_list() {
let smt_list = vec![
SimpleMatchType::Fanjian | SimpleMatchType::TextDelete,
SimpleMatchType::Fanjian,
SimpleMatchType::Normalize,
SimpleMatchType::Fanjian | SimpleMatchType::Normalize,
SimpleMatchType::TextDelete,
SimpleMatchType::TextDelete | SimpleMatchType::Normalize,
];
let text = "test爽-︻";
let (smt_index_list_map, processed_text_list) =
reduce_text_process_with_list(&smt_list, text);
println!("{:?}, {:?}", smt_index_list_map, processed_text_list);
}
}