use std::borrow::Cow;
use std::collections::HashSet;
use tinyvec::TinyVec;
use crate::process::process_type::ProcessType;
use crate::process::registry::get_transform_step;
use crate::process::step::TransformStep;
use crate::process::variant::{
ProcessedTextMasks, TRANSFORM_STATE, TextVariant, return_string_to_pool,
};
#[derive(Clone)]
pub struct ProcessTypeBitNode {
process_type_list: TinyVec<[ProcessType; 4]>,
pub(crate) process_type_bit: ProcessType,
pub(crate) children: TinyVec<[usize; 4]>,
pub(crate) step: Option<&'static TransformStep>,
pub(crate) pt_index_mask: u64,
}
impl ProcessTypeBitNode {
pub(crate) fn recompute_mask_with_index(&mut self, pt_index_table: &[u8; 64]) {
self.pt_index_mask = self.process_type_list.iter().fold(0u64, |acc, pt| {
acc | (1u64 << pt_index_table[pt.bits() as usize])
});
}
}
pub fn build_process_type_tree(process_type_set: &HashSet<ProcessType>) -> Vec<ProcessTypeBitNode> {
let max_nodes: usize = 1 + process_type_set
.iter()
.map(|pt| pt.bits().count_ones() as usize)
.sum::<usize>();
let mut process_type_tree = Vec::with_capacity(max_nodes);
let mut root = ProcessTypeBitNode {
process_type_list: TinyVec::new(),
process_type_bit: ProcessType::None,
children: TinyVec::new(),
step: None,
pt_index_mask: 0,
};
if process_type_set.contains(&ProcessType::None) {
root.process_type_list.push(ProcessType::None);
root.pt_index_mask |= 1u64 << ProcessType::None.bits();
}
process_type_tree.push(root);
for &process_type in process_type_set.iter() {
let mut current_node_index = 0;
for process_type_bit in process_type.iter() {
let current_node = &process_type_tree[current_node_index];
if current_node.process_type_bit == process_type_bit {
continue;
}
let found_child = current_node
.children
.iter()
.find(|&&idx| process_type_tree[idx].process_type_bit == process_type_bit)
.copied();
if let Some(child_idx) = found_child {
current_node_index = child_idx;
process_type_tree[current_node_index]
.process_type_list
.push(process_type);
process_type_tree[current_node_index].pt_index_mask |= 1u64 << process_type.bits();
} else {
let mut child = ProcessTypeBitNode {
process_type_list: TinyVec::new(),
process_type_bit,
children: TinyVec::new(),
step: Some(get_transform_step(process_type_bit)),
pt_index_mask: 0,
};
child.process_type_list.push(process_type);
child.pt_index_mask |= 1u64 << process_type.bits();
process_type_tree.push(child);
let new_node_index = process_type_tree.len() - 1;
process_type_tree[current_node_index]
.children
.push(new_node_index);
current_node_index = new_node_index;
}
}
}
process_type_tree
}
fn dedup_insert(
text_masks: &mut ProcessedTextMasks<'_>,
current_index: usize,
changed: Option<String>,
is_ascii: bool,
) -> usize {
match changed {
Some(processed) => {
let plen = processed.len();
if let Some(pos) = text_masks
.iter()
.position(|tv| tv.text.len() == plen && tv.text.as_ref() == processed.as_str())
{
return_string_to_pool(processed);
pos
} else {
text_masks.push(TextVariant {
text: Cow::Owned(processed),
mask: 0u64,
is_ascii,
});
text_masks.len() - 1
}
}
None => current_index,
}
}
#[inline(always)]
pub fn walk_process_tree<'a, const LAZY: bool, F>(
process_type_tree: &[ProcessTypeBitNode],
text: &'a str,
on_variant: &mut F,
) -> (ProcessedTextMasks<'a>, bool)
where
F: FnMut(&str, usize, u64, bool) -> bool,
{
{
let ts = unsafe { &mut *TRANSFORM_STATE.get() };
let pooled: Option<ProcessedTextMasks<'static>> = ts.masks_pool.pop();
let mut text_masks: ProcessedTextMasks<'a> =
unsafe { std::mem::transmute(pooled.unwrap_or_default()) };
text_masks.clear();
let root_is_ascii = text.is_ascii();
text_masks.push(TextVariant {
text: Cow::Borrowed(text),
mask: process_type_tree[0].pt_index_mask,
is_ascii: root_is_ascii,
});
let mut scanned_masks: TinyVec<[u64; 8]> = TinyVec::new();
if LAZY {
scanned_masks.push(0u64);
let root_mask = process_type_tree[0].pt_index_mask;
if root_mask != 0 && on_variant(text, 0, root_mask, root_is_ascii) {
return (text_masks, true);
}
scanned_masks[0] = root_mask;
}
if process_type_tree[0].children.is_empty() {
return (text_masks, false);
}
ts.tree_node_indices.clear();
ts.tree_node_indices.resize(process_type_tree.len(), 0);
let mut stopped = false;
'walk: for (current_node_index, current_node) in process_type_tree.iter().enumerate() {
let current_index = ts.tree_node_indices[current_node_index];
let parent_is_ascii = text_masks[current_index].is_ascii;
for &child_node_index in ¤t_node.children {
let child_node = &process_type_tree[child_node_index];
let step = child_node
.step
.expect("non-root process tree nodes always cache a transform step");
let current_text = text_masks[current_index].text.as_ref();
let output = step.apply(current_text, parent_is_ascii);
let old_len = if LAZY { text_masks.len() } else { 0 };
let child_index = dedup_insert(
&mut text_masks,
current_index,
output.changed,
output.is_ascii,
);
if LAZY {
while scanned_masks.len() < text_masks.len() {
scanned_masks.push(0u64);
}
}
ts.tree_node_indices[child_node_index] = child_index;
text_masks[child_index].mask |= child_node.pt_index_mask;
if LAZY && child_index >= old_len {
let mask = text_masks[child_index].mask;
let is_ascii = text_masks[child_index].is_ascii;
if mask != 0
&& on_variant(
text_masks[child_index].text.as_ref(),
child_index,
mask,
is_ascii,
)
{
stopped = true;
break 'walk;
}
scanned_masks[child_index] = mask;
}
}
}
if LAZY {
if stopped {
return (text_masks, true);
}
for i in 0..text_masks.len() {
let delta = text_masks[i].mask & !scanned_masks[i];
if delta != 0
&& on_variant(
text_masks[i].text.as_ref(),
i,
delta,
text_masks[i].is_ascii,
)
{
return (text_masks, true);
}
}
}
(text_masks, false)
}
}