use crate::cea_source::CodePointSource;
use crate::collator::CollationContext;
use crate::consts::INCLUDED_UNASSIGNED;
use crate::tables::CollationTable;
use crate::weights::pack_weights;
use unicode_canonical_combining_class::get_canonical_combining_class_u32 as get_ccc;
pub fn implicit_a(cp: u32) -> u32 {
let aaaa = if INCLUDED_UNASSIGNED.contains(&cp) {
0xFBC0 + (cp >> 15)
} else {
match cp {
0x3400..=0x4DBF | 0x20000..=0x2A6DF | 0x2A700..=0x2EE5D | 0x30000..=0x323AF => {
0xFB80 + (cp >> 15)
} 0x4E00..=0x9FFF | 0xF900..=0xFAFF => 0xFB40 + (cp >> 15), 0x17000..=0x18AFF | 0x18D00..=0x18D8F => 0xFB00, 0x18B00..=0x18CFF => 0xFB02, 0x1B170..=0x1B2FF => 0xFB01, _ => 0xFBC0 + (cp >> 15), }
};
#[allow(clippy::cast_possible_truncation)]
pack_weights(false, aaaa as u16, 32, 2)
}
pub fn implicit_b(cp: u32) -> u32 {
let mut bbbb = if INCLUDED_UNASSIGNED.contains(&cp) {
cp & 0x7FFF
} else {
match cp {
0x17000..=0x18AFF | 0x18D00..=0x18D8F => cp - 0x17000, 0x18B00..=0x18CFF => cp - 0x18B00, 0x1B170..=0x1B2FF => cp - 0x1B170, _ => cp & 0x7FFF, }
};
bbbb |= 0x8000;
#[allow(clippy::cast_possible_truncation)]
pack_weights(false, bbbb as u16, 0, 0)
}
pub fn remove_pulled(char_vals: &mut Vec<u32>, i: usize, input_length: &mut usize, try_two: bool) {
char_vals.remove(i);
*input_length -= 1;
if try_two {
char_vals.remove(i - 1);
*input_length -= 1;
}
}
pub fn try_discontiguous_contraction<'a>(
table: &'a CollationTable,
entry: u64,
source: &mut impl CodePointSource,
match_len: usize,
) -> Option<&'a [u32]> {
if !CollationTable::is_contraction(entry) || match_len != 2 {
return None;
}
let next = source.peek(match_len + 1)?;
let ccc_a = get_ccc(source.peek(match_len).unwrap()) as u8;
let ccc_b = get_ccc(next) as u8;
if ccc_a > 0 && ccc_b > ccc_a {
table.get3(entry, source.peek(1).unwrap(), next)
} else {
None
}
}
pub fn try_pulled_contraction<'a>(
ctx: &'a CollationContext,
entry: u64,
source: &mut impl CodePointSource,
match_len: usize,
) -> Option<(usize, bool, &'a [u32])> {
let mut try_offset = match source.remaining() - match_len {
3.. => match_len + 2,
2 => match_len + 1,
_ => match_len,
};
let mut try_two = (try_offset - match_len == 2) && ctx.cldr;
while try_offset > match_len {
if !ccc_sequence_ok(source, match_len, try_offset) {
try_two = false;
try_offset -= 1;
continue;
}
let new_row = if try_two {
ctx.table.get3(
entry,
source.peek(try_offset - 1).unwrap(),
source.peek(try_offset).unwrap(),
)
} else {
ctx.table.get2(entry, source.peek(try_offset).unwrap())
};
if let Some(new_row) = new_row {
return Some((try_offset, try_two, new_row));
}
if try_two {
try_two = false;
} else {
try_offset -= 1;
}
}
None
}
fn ccc_sequence_ok(
source: &mut impl CodePointSource,
start_offset: usize,
end_offset: usize,
) -> bool {
let mut max_ccc = 0;
for offset in start_offset..=end_offset {
let ccc = get_ccc(source.peek(offset).unwrap()) as u8;
if ccc == 0 || ccc <= max_ccc {
return false;
}
max_ccc = ccc;
}
true
}