use crate::cea_match::{
implicit_a, implicit_b, try_discontiguous_contraction, try_pulled_contraction,
};
use crate::cea_source::{CodePointSource, Utf8Source, VecSource};
use crate::collator::CollationContext;
#[cfg(feature = "pipeline-stats")]
use crate::collator::PipelineStats;
use crate::tables::CollationTable;
use crate::weights::{primary, shift_weights, variability};
use std::cmp::Ordering;
const PENDING_CE_CAPACITY: usize = 20;
pub enum LazyPrimaryResult {
Decided(Ordering),
NeedsFullFallback,
}
pub fn compare_primary_streaming(
a_cea: &mut Vec<u32>,
b_cea: &mut Vec<u32>,
a_chars: &mut Vec<u32>,
b_chars: &mut Vec<u32>,
ctx: &CollationContext,
left: usize,
#[cfg(feature = "pipeline-stats")] stats: &mut PipelineStats,
) -> Option<Ordering> {
a_cea.clear();
b_cea.clear();
let mut a_cursor = CeaCursor::new(VecSource::new(a_chars, left), ctx);
let mut b_cursor = CeaCursor::new(VecSource::new(b_chars, left), ctx);
loop {
let a_p = next_primary(&mut a_cursor, a_cea, ctx.shifting);
let b_p = next_primary(&mut b_cursor, b_cea, ctx.shifting);
if a_p != b_p {
#[cfg(feature = "pipeline-stats")]
{
stats.codepoints_consumed_primary +=
u64::try_from(a_cursor.consumed() + b_cursor.consumed()).unwrap_or(u64::MAX);
}
return Some(a_p.cmp(&b_p));
}
if a_p == 0 {
a_cea.push(u32::MAX);
b_cea.push(u32::MAX);
#[cfg(feature = "pipeline-stats")]
{
stats.codepoints_consumed_primary +=
u64::try_from(a_cursor.consumed() + b_cursor.consumed()).unwrap_or(u64::MAX);
}
return None;
}
}
}
pub fn compare_primary_streaming_utf8(
a_cea: &mut Vec<u32>,
b_cea: &mut Vec<u32>,
a_bytes: &[u8],
b_bytes: &[u8],
ctx: &CollationContext,
) -> LazyPrimaryResult {
a_cea.clear();
b_cea.clear();
let mut a_cursor = CeaCursor::new(Utf8Source::new(a_bytes), ctx);
let mut b_cursor = CeaCursor::new(Utf8Source::new(b_bytes), ctx);
loop {
let a_p = next_primary(&mut a_cursor, a_cea, ctx.shifting);
let b_p = next_primary(&mut b_cursor, b_cea, ctx.shifting);
if a_cursor.is_blocked() || b_cursor.is_blocked() {
return LazyPrimaryResult::NeedsFullFallback;
}
if a_p != b_p {
return LazyPrimaryResult::Decided(a_p.cmp(&b_p));
}
if a_p == 0 {
a_cea.push(u32::MAX);
b_cea.push(u32::MAX);
return LazyPrimaryResult::NeedsFullFallback;
}
}
}
fn next_primary(
cursor: &mut CeaCursor<'_, impl CodePointSource>,
buffer: &mut Vec<u32>,
shifting: bool,
) -> u16 {
while let Some(weights) = cursor.next_ce() {
buffer.push(weights);
if shifting && variability(weights) {
continue;
}
let primary = primary(weights);
if primary != 0 {
return primary;
}
}
0
}
struct CeaCursor<'a, S> {
source: S,
ctx: &'a CollationContext,
pending: [u32; PENDING_CE_CAPACITY],
pending_start: usize,
pending_len: usize,
last_variable: bool,
}
impl<'a, S: CodePointSource> CeaCursor<'a, S> {
const fn new(source: S, ctx: &'a CollationContext) -> Self {
Self {
source,
ctx,
pending: [0; PENDING_CE_CAPACITY],
pending_start: 0,
pending_len: 0,
last_variable: false,
}
}
#[cfg(feature = "pipeline-stats")]
fn consumed(&self) -> usize {
self.source.consumed()
}
fn is_blocked(&self) -> bool {
self.source.is_blocked()
}
fn next_ce(&mut self) -> Option<u32> {
loop {
if self.pending_start < self.pending_len {
let weights = self.pending[self.pending_start];
self.pending_start += 1;
return Some(weights);
}
if self.source.is_empty() || self.source.is_blocked() {
return None;
}
self.pending_len = 0;
self.pending_start = 0;
self.queue_next_match();
if self.pending_len == 0 && self.source.is_blocked() {
return None;
}
}
}
fn queue_next_match(&mut self) {
let Some(left_val) = self.source.peek(0) else {
return;
};
if left_val < 0xB7 && left_val != 0x6C && left_val != 0x4C {
self.queue_weight(self.ctx.low[left_val as usize]);
self.source.consume(1);
return;
}
let table = self.ctx.table;
let entry = table.entry(left_val);
let lookahead = table.max_len(entry);
if lookahead == 1 {
if CollationTable::is_missing(entry) {
self.queue_raw_weight(implicit_a(left_val));
self.queue_raw_weight(implicit_b(left_val));
} else {
self.queue_row(table.simple_row(entry));
}
self.source.consume(1);
return;
}
if self.source.remaining() == 1 {
self.queue_row(table.simple_row(entry));
self.source.consume(1);
return;
}
let mut match_len = self.source.remaining().min(lookahead);
while match_len > 0 {
if match_len == 1 {
let row = table.simple_row(entry);
if let Some((pull_index, pulled_two, new_row)) =
try_pulled_contraction(self.ctx, entry, &mut self.source, match_len)
{
self.queue_row(new_row);
self.source.remove_pulled_lookahead(pull_index, pulled_two);
self.source.consume(1);
return;
}
self.queue_row(row);
self.source.consume(1);
return;
}
let row = match match_len {
2 => table.get2(entry, self.source.peek(1).unwrap()),
3 => table.get3(
entry,
self.source.peek(1).unwrap(),
self.source.peek(2).unwrap(),
),
_ => unreachable!(),
};
if let Some(row) = row {
if let Some(new_row) =
try_discontiguous_contraction(table, entry, &mut self.source, match_len)
{
self.queue_row(new_row);
self.source.remove_pulled_lookahead(match_len + 1, false);
self.source.consume(match_len);
return;
}
self.queue_row(row);
self.source.consume(match_len);
return;
}
match_len -= 1;
}
unreachable!();
}
fn queue_row(&mut self, row: &[u32]) {
assert!(row.len() <= PENDING_CE_CAPACITY);
for &weights in row {
self.queue_weight(weights);
}
}
const fn queue_weight(&mut self, weights: u32) {
let weights = if self.ctx.shifting {
shift_weights(weights, &mut self.last_variable)
} else {
weights
};
self.queue_raw_weight(weights);
}
const fn queue_raw_weight(&mut self, weights: u32) {
self.pending[self.pending_len] = weights;
self.pending_len += 1;
}
}