use crate::forward::Doc;
use rayon::iter::IntoParallelRefMutIterator;
use rayon::iter::ParallelIterator;
use rayon::slice::ParallelSliceMut;
use std::cmp::Ordering::Equal;
use std::cmp::{min, max};
const QUICKSELECT:bool = true;
const PSORT:bool = true;
const COOLING:bool = true;
const LOG2_E: f32 = 1.44269504089;
const LOG_PRECOMP_LIMIT: i32 = 4096;
lazy_static::lazy_static! {
static ref LOG2: Vec<f32> = (0..LOG_PRECOMP_LIMIT).map(|i|
match i {
0 => 0.0,
_ => (i as f32).log2()
}).collect();
}
#[inline]
fn cmp_log2(x: i32) -> f32 {
if x < LOG_PRECOMP_LIMIT {
LOG2[x as usize]
} else {
(x as f32).log2()
}
}
fn partition_quickselect(docs: &mut [Doc], k: usize, tolerance: f32) {
if not_partitioned(docs, k, tolerance) {
floydrivest(docs, k, 0, docs.len() - 1);
}
}
fn not_partitioned(
docs: &mut [Doc],
median_idx: usize,
tolerance: f32,
) -> bool {
let median_val = docs[median_idx].gain;
let (left, right) = docs.split_at(median_idx);
left.iter().any(|d| (d.gain - tolerance) > median_val) || right.iter().any(|d| (d.gain + tolerance) < median_val)
}
fn swap_docs(docs: &mut [Doc], left: usize, right: usize, median_idx: usize) {
if (left < median_idx && right < median_idx) || (left >= median_idx && right >= median_idx) {
docs.swap(left, right);
} else {
docs[left].leaf_id += 1; docs[right].leaf_id -= 1; docs.swap(left, right);
}
}
fn floydrivest(docs: &mut [Doc], k: usize, in_left: usize, in_right: usize)
{
let mut i: usize;
let mut j: usize;
let mut t_idx: usize;
let mut left = in_left;
let mut right = in_right;
const MAX_PARTITION: usize = 600; const C: f32 = 0.5;
while right > left {
if right - left > MAX_PARTITION {
let no_elements: f32 = (right - left + 1) as f32;
let i: f32 = (k - left + 1) as f32;
let z: f32 = no_elements.ln();
let s: f32 = C * (z * (2.0 / 3.0)).exp();
let sn: f32 = s / no_elements;
let sd: f32 = C * (z * s * (1.0 - sn)).sqrt() * (i - no_elements * 0.5).signum();
let isn: f32 = i * s / no_elements;
let inner: f32 = k as f32 - isn + sd;
let ll: usize = max(left, inner as usize);
let rr: usize = min(right, (inner + s) as usize);
floydrivest(docs, k, ll, rr);
}
i = left + 1;
j = right - 1;
swap_docs(docs, left, k, k);
if docs[left].gain >= docs[right].gain {
swap_docs(docs, left, right, k);
t_idx = right;
} else {
t_idx = left;
}
while docs[i].gain < docs[t_idx].gain {
i+=1 ;
}
while docs[j].gain > docs[t_idx].gain {
j-=1;
}
while i < j {
swap_docs(docs, i, j, k);
i += 1;
j -= 1;
while docs[i].gain < docs[t_idx].gain {
i+=1 ;
}
while docs[j].gain > docs[t_idx].gain {
j-=1;
}
}
if left == t_idx {
swap_docs(docs, left, j, k);
} else {
j += 1;
swap_docs(docs, j, right, k);
}
if j <= k {
left = j + 1;
}
if k <= j {
right = j.saturating_sub(1);
}
}
}
fn fix_degrees(
docs: &[Doc],
left_degs: &mut [i32],
right_degs: &mut [i32],
) -> usize {
let mut num_swaps = 0;
for doc in docs.iter() {
if doc.leaf_id == -1 {
for term in &doc.terms {
left_degs[*term as usize] += 1;
right_degs[*term as usize] -= 1;
}
num_swaps += 1;
}
else if doc.leaf_id == 1 {
for term in &doc.terms {
left_degs[*term as usize] -= 1;
right_degs[*term as usize] += 1;
}
num_swaps += 1;
}
}
num_swaps
}
fn swap_documents(
left: &mut [Doc],
right: &mut [Doc],
left_degs: &mut [i32],
right_degs: &mut [i32],
tolerance: f32,
) -> usize {
let mut num_swaps = 0;
for (l, r) in left.iter_mut().zip(right.iter_mut()) {
if l.gain - r.gain > tolerance {
for term in &l.terms {
left_degs[*term as usize] -= 1;
right_degs[*term as usize] += 1;
}
for term in &r.terms {
left_degs[*term as usize] += 1;
right_degs[*term as usize] -= 1;
}
std::mem::swap(l, r);
num_swaps += 1;
} else {
break;
}
}
num_swaps
}
fn compute_degrees(docs: &[Doc], num_terms: usize) -> Vec<i32> {
let mut degrees = vec![0; num_terms];
for doc in docs {
for term in &doc.terms {
degrees[*term as usize] += 1;
}
}
degrees
}
fn compute_degrees_l(docs: &[Doc], num_terms: usize) -> Vec<i32> {
let (left, _) = docs.split_at(docs.len() / 2);
compute_degrees(left, num_terms)
}
fn compute_degrees_r(docs: &[Doc], num_terms: usize) -> Vec<i32> {
let (_, right) = docs.split_at(docs.len() / 2);
compute_degrees(right, num_terms)
}
fn expb(log_from: f32, log_to: f32, deg1: i32, deg2: i32) -> f32 {
let d1f = deg1 as f32;
let d2f = deg2 as f32;
let a = d1f * log_from;
let b = d1f * cmp_log2(deg1 + 1);
let c = d2f * log_to;
let d = d2f * cmp_log2(deg2 + 1);
a - b + c - d
}
fn approx_one_a(_log_to: f32, _log_from: f32, deg_to: i32, deg_from: i32) -> f32 {
cmp_log2(deg_to + 2) - cmp_log2(deg_from) - LOG2_E / (1.0 + deg_to as f32)
}
fn approx_two_s(_log_to: f32, _log_from: f32, deg_to: i32, deg_from: i32) -> f32 {
cmp_log2(deg_to) - cmp_log2(deg_from)
}
fn compute_move_gains_default_l2r(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.par_iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = expb(log2_from, log2_to, from_deg, to_deg)
- expb(log2_from, log2_to, from_deg - 1, to_deg + 1);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_default_r2l(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.par_iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = expb(log2_from, log2_to, from_deg, to_deg)
- expb(log2_from, log2_to, from_deg - 1, to_deg + 1);
doc_gain -= term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a1_l2r(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.par_iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_one_a(log2_to, log2_from, to_deg, from_deg);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a1_r2l(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.par_iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_one_a(log2_to, log2_from, to_deg, from_deg);
doc_gain -= term_gain; }
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a2(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.par_iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_two_s(log2_to, log2_from, to_deg, from_deg);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_default_l2r_seq(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = expb(log2_from, log2_to, from_deg, to_deg)
- expb(log2_from, log2_to, from_deg - 1, to_deg + 1);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_default_r2l_seq(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = expb(log2_from, log2_to, from_deg, to_deg)
- expb(log2_from, log2_to, from_deg - 1, to_deg + 1);
doc_gain -= term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a1_l2r_seq(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_one_a(log2_to, log2_from, to_deg, from_deg);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a1_r2l_seq(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_one_a(log2_to, log2_from, to_deg, from_deg);
doc_gain -= term_gain; }
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_move_gains_a2_seq(
from: &mut [Doc],
log2_from: f32,
log2_to: f32,
fdeg: &[i32],
tdeg: &[i32],
) {
from.iter_mut().for_each(|doc| {
let mut doc_gain = 0.0f32;
for term in &doc.terms {
let from_deg = fdeg[*term as usize];
let to_deg = tdeg[*term as usize];
let term_gain = approx_two_s(log2_to, log2_from, to_deg, from_deg);
doc_gain += term_gain;
}
doc.gain = doc_gain;
doc.leaf_id = 0;
});
}
fn compute_gains(mut left: &mut [Doc], mut right: &mut [Doc], ldeg: &[i32], rdeg: &[i32]) {
let gain_func: &str = std::option_env!("GAIN").unwrap_or("approx_1");
let log2_left = 0.0;
let log2_right = 0.0;
match gain_func {
"default" => {
let log2_left = (left.len() as f32).log2();
let log2_right = (right.len() as f32).log2();
rayon::scope(|s| {
s.spawn(|_| {
compute_move_gains_default_l2r(&mut left, log2_left, log2_right, &ldeg, &rdeg);
});
s.spawn(|_| {
compute_move_gains_default_r2l(&mut right, log2_right, log2_left, &rdeg, &ldeg);
});
});
}
"approx_1" => {
rayon::scope(|s| {
s.spawn(|_| {
compute_move_gains_a1_l2r(&mut left, log2_left, log2_right, &ldeg, &rdeg);
});
s.spawn(|_| {
compute_move_gains_a1_r2l(&mut right, log2_right, log2_left, &rdeg, &ldeg);
});
});
}
"approx_2" => {
rayon::scope(|s| {
s.spawn(|_| {
compute_move_gains_a2(&mut left, log2_left, log2_right, &ldeg, &rdeg);
});
s.spawn(|_| {
compute_move_gains_a2(&mut right, log2_right, log2_left, &ldeg, &rdeg);
});
});
}
_ => {
log::info!("Error: Couldn't match the gain function.");
}
}
}
fn compute_gains_seq(mut left: &mut [Doc], mut right: &mut [Doc], ldeg: &[i32], rdeg: &[i32]) {
let gain_func: &str = std::option_env!("GAIN").unwrap_or("approx_1");
let log2_left = 0.0;
let log2_right = 0.0;
match gain_func {
"default" => {
let log2_left = (left.len() as f32).log2();
let log2_right = (right.len() as f32).log2();
compute_move_gains_default_l2r_seq(&mut left, log2_left, log2_right, &ldeg, &rdeg);
compute_move_gains_default_r2l_seq(&mut right, log2_right, log2_left, &rdeg, &ldeg);
}
"approx_1" => {
compute_move_gains_a1_l2r_seq(&mut left, log2_left, log2_right, &ldeg, &rdeg);
compute_move_gains_a1_r2l_seq(&mut right, log2_right, log2_left, &rdeg, &ldeg);
}
"approx_2" => {
compute_move_gains_a2_seq(&mut left, log2_left, log2_right, &ldeg, &rdeg);
compute_move_gains_a2_seq(&mut right, log2_right, log2_left, &ldeg, &rdeg);
}
_ => {
log::info!("Error: Couldn't match the gain function.");
}
}
}
fn process_partitions(
mut docs: &mut [Doc],
num_terms: usize,
iterations: usize,
) {
let (mut left_deg, mut right_deg) = rayon::join(
|| compute_degrees_l(&docs, num_terms),
|| compute_degrees_r(&docs, num_terms),
);
for _iter in 0..iterations {
if QUICKSELECT {
let (mut left, mut right) = docs.split_at_mut(docs.len() / 2);
compute_gains(&mut left, &mut right, &left_deg[..], &right_deg[..]);
let median_idx = docs.len() / 2;
if COOLING {
partition_quickselect(&mut docs, median_idx, (_iter as f32) * 0.5); } else {
partition_quickselect(&mut docs, median_idx, 0.0);
}
let nswaps = fix_degrees(docs, &mut left_deg[..], &mut right_deg[..]);
if nswaps == 0 {
break;
}
}
else {
let (mut left, mut right) = docs.split_at_mut(docs.len() / 2);
compute_gains(&mut left, &mut right, &left_deg[..], &right_deg[..]);
if PSORT {
left.par_sort_by(|a, b| b.gain.partial_cmp(&a.gain).unwrap_or(Equal)); right.par_sort_by(|a, b| a.gain.partial_cmp(&b.gain).unwrap_or(Equal)); } else {
left.sort_by(|a, b| b.gain.partial_cmp(&a.gain).unwrap_or(Equal)); right.sort_by(|a, b| a.gain.partial_cmp(&b.gain).unwrap_or(Equal)); }
let nswaps = if COOLING {
swap_documents(&mut left, &mut right, &mut left_deg[..], &mut right_deg[..], _iter as f32) } else {
swap_documents(&mut left, &mut right, &mut left_deg[..], &mut right_deg[..], 0.0)
};
if nswaps == 0 {
break;
}
}
}
}
fn process_partitions_seq(
mut docs: &mut [Doc],
num_terms: usize,
iterations: usize,
) {
let mut left_deg = compute_degrees_l(&docs, num_terms);
let mut right_deg = compute_degrees_r(&docs, num_terms);
for _iter in 0..iterations {
if QUICKSELECT {
let (mut left, mut right) = docs.split_at_mut(docs.len() / 2);
compute_gains_seq(&mut left, &mut right, &left_deg[..], &right_deg[..]);
let median_idx = docs.len() / 2;
if COOLING {
partition_quickselect(&mut docs, median_idx, (_iter as f32) * 0.5); } else {
partition_quickselect(&mut docs, median_idx, 0.0);
}
let nswaps = fix_degrees(docs, &mut left_deg[..], &mut right_deg[..]);
if nswaps == 0 {
break;
}
}
else {
let (mut left, mut right) = docs.split_at_mut(docs.len() / 2);
compute_gains_seq(&mut left, &mut right, &left_deg[..], &right_deg[..]);
if PSORT {
left.par_sort_by(|a, b| b.gain.partial_cmp(&a.gain).unwrap_or(Equal)); right.par_sort_by(|a, b| a.gain.partial_cmp(&b.gain).unwrap_or(Equal)); } else {
left.sort_by(|a, b| b.gain.partial_cmp(&a.gain).unwrap_or(Equal)); right.sort_by(|a, b| a.gain.partial_cmp(&b.gain).unwrap_or(Equal)); }
let nswaps = if COOLING {
swap_documents(&mut left, &mut right, &mut left_deg[..], &mut right_deg[..], _iter as f32) } else {
swap_documents(&mut left, &mut right, &mut left_deg[..], &mut right_deg[..], 0.0)
};
if nswaps == 0 {
break;
}
}
}
}
pub fn recursive_graph_bisection(
docs: &mut [Doc],
num_terms: usize,
iterations: usize,
min_partition_size: usize,
max_depth: usize,
parallel_switch: usize,
depth: usize,
sort_leaf: bool,
id: usize,
) {
if docs.len() <= min_partition_size || depth > max_depth {
if sort_leaf {
docs.sort_by(|a, b| a.org_id.cmp(&b.org_id));
}
return;
}
if depth > parallel_switch { process_partitions_seq(docs, num_terms, iterations);
} else {
process_partitions(docs, num_terms, iterations);
}
let (mut left, mut right) = docs.split_at_mut(docs.len() / 2);
rayon::scope(|s| {
s.spawn(|_| {
recursive_graph_bisection(&mut left, num_terms, iterations, min_partition_size, max_depth, parallel_switch, depth+1, sort_leaf, 2*id);
});
s.spawn(|_| {
recursive_graph_bisection(&mut right, num_terms, iterations, min_partition_size, max_depth, parallel_switch, depth+1, sort_leaf, 2*id+1);
});
});
}
const START_DEPTH:usize = 0;
pub fn recursive_graph_bisection_iterative(
docs: &mut [Doc],
num_terms: usize,
iterations: usize,
min_partition_size: usize,
max_depth: usize,
parallel_switch: usize,
_depth: usize,
sort_leaf: bool,
_id: usize,
) {
let mut all_slices = Vec::new();
all_slices.push(&mut docs[..]);
for _ in 0..START_DEPTH {
all_slices = all_slices
.into_iter()
.map(|s| s.split_at_mut(s.len() / 2))
.map(|(left, right)| vec![left, right])
.flatten()
.filter(|s| s.len() >= min_partition_size)
.collect();
}
let mut current_depth = START_DEPTH;
while !all_slices.is_empty() {
current_depth +=1;
if current_depth > parallel_switch {
all_slices
.par_iter_mut()
.for_each(|slice| process_partitions_seq(slice, num_terms, iterations));
} else {
all_slices
.par_iter_mut()
.for_each(|slice| process_partitions(slice, num_terms, iterations));
}
all_slices = all_slices
.into_iter()
.map(|s| s.split_at_mut(s.len() / 2))
.map(|(left, right)| vec![left, right])
.flatten()
.filter(|s| s.len() >= min_partition_size)
.collect();
if current_depth == max_depth {
break;
}
}
if sort_leaf {
docs.sort_by(|a, b| a.org_id.cmp(&b.org_id));
}
}