extern crate ndarray;
extern crate num_cpus;
extern crate rand;
use crate::clustering::{Clusterings, WorkingClustering};
use crate::log2cache::Log2Cache;
use crate::loss::*;
use crate::*;
use dahl_partition::*;
use dahl_roxido::mk_rng_isaac;
use ndarray::{Array2, Array3, Axis};
use rand::distributions::{Distribution, Uniform};
use rand::seq::SliceRandom;
use rand::{Rng, SeedableRng};
use rand_isaac::IsaacRng;
use std::convert::TryFrom;
use std::slice;
use std::sync::mpsc;
use std::time::Instant;
pub trait CMLossComputer {
#[allow(unused_variables)]
fn initialize(&mut self, state: &WorkingClustering, cms: &Array3<CountType>) {}
fn compute_loss(&self, state: &WorkingClustering, cms: &Array3<CountType>) -> f64;
fn change_in_loss(
&self,
item_index: usize,
to_label: LabelType,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) -> f64;
#[allow(unused_variables)]
fn decision_callback(
&mut self,
item_index: usize,
to_label_option: Option<LabelType>,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) {
}
}
pub struct BinderCMLossComputer {
a: f64,
}
impl BinderCMLossComputer {
pub fn new(a: f64) -> Self {
Self { a }
}
pub fn n_squared(x: CountType) -> f64 {
let x = x as f64;
x * x
}
}
impl CMLossComputer for BinderCMLossComputer {
fn compute_loss(&self, state: &WorkingClustering, cms: &Array3<CountType>) -> f64 {
let n_draws = cms.len_of(Axis(2));
let sum2 = state
.occupied_clusters()
.iter()
.map(|i| BinderCMLossComputer::n_squared(state.size_of(*i)))
.sum::<f64>()
* n_draws as f64;
let mut sum1 = 0.0;
let mut sum3 = 0.0;
for draw_index in 0..n_draws {
for other_index in 0..cms.len_of(Axis(1)) {
let n = cms[(0, other_index, draw_index)];
if n > 0 {
sum1 += BinderCMLossComputer::n_squared(cms[(0, other_index, draw_index)]);
for main_label in state.occupied_clusters().iter() {
sum3 += BinderCMLossComputer::n_squared(
cms[(*main_label as usize + 1, other_index, draw_index)],
);
}
}
}
}
(self.a * sum1 + sum2 - (self.a + 1.0) * sum3)
/ (n_draws as f64 * BinderCMLossComputer::n_squared(state.n_items()))
}
fn change_in_loss(
&self,
item_index: usize,
to_label: LabelType,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) -> f64 {
let offset = if from_label_option.is_some() && to_label == from_label_option.unwrap() {
1
} else {
0
};
let n_draws = cms.len_of(Axis(2));
let sum2 = (n_draws as f64) * ((state.size_of(to_label) - offset) as f64);
let to_index = to_label as usize + 1;
let mut sum3 = 0.0;
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
sum3 += (cms[(to_index, other_index, draw_index)] - offset) as f64;
}
sum2 - (self.a + 1.0) * sum3
}
}
pub struct OMARICMLossComputer {
n: CountType,
sum: f64,
sums: Array2<f64>,
}
impl OMARICMLossComputer {
pub fn new(n_draws: usize) -> Self {
Self {
n: 0,
sum: 0.0,
sums: Array2::<f64>::zeros((n_draws, 2)),
}
}
pub fn n_choose_2_times_2(x: CountType) -> f64 {
let x = x as f64;
x * (x - 1.0)
}
}
impl CMLossComputer for OMARICMLossComputer {
fn initialize(&mut self, state: &WorkingClustering, cms: &Array3<CountType>) {
self.n = state.n_items();
self.sum = state
.occupied_clusters()
.iter()
.map(|i| OMARICMLossComputer::n_choose_2_times_2(state.size_of(*i)))
.sum();
let n_draws = cms.len_of(Axis(2));
for draw_index in 0..n_draws {
for other_index in 0..cms.len_of(Axis(1)) {
let n = cms[(0, other_index, draw_index)];
if n > 0 {
self.sums[(draw_index, 0)] += OMARICMLossComputer::n_choose_2_times_2(n);
for main_label in state.occupied_clusters().iter() {
self.sums[(draw_index, 1)] += OMARICMLossComputer::n_choose_2_times_2(
cms[(*main_label as usize + 1, other_index, draw_index)],
);
}
}
}
}
}
fn compute_loss(&self, _state: &WorkingClustering, _cms: &Array3<CountType>) -> f64 {
let mut sum = 0.0;
let sum2 = self.sum;
let sum2_div_denom = sum2 / OMARICMLossComputer::n_choose_2_times_2(self.n);
let n_draws = self.sums.len_of(Axis(0));
for draw_index in 0..n_draws {
let sum1 = self.sums[(draw_index, 0)];
let offset = sum1 * sum2_div_denom;
let denominator = 0.5 * (sum1 + sum2) - offset;
if denominator > 0.0 {
let numerator = self.sums[(draw_index, 1)] - offset;
sum += numerator / denominator;
}
}
1.0 - sum / (n_draws as f64)
}
fn change_in_loss(
&self,
item_index: usize,
to_label: LabelType,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) -> f64 {
let offset = if from_label_option.is_some() && to_label == from_label_option.unwrap() {
1
} else {
0
};
let mut sum2 = if offset == 0 {
self.sum + 2.0 * state.size_of(to_label) as f64
} else {
self.sum
};
let to_index = to_label as usize + 1;
let mut n = self.n;
let from_index = match from_label_option {
Some(from_label) => {
if offset == 0 {
sum2 -= 2.0 * (state.size_of(from_label) - 1) as f64;
}
from_label as usize + 1
}
None => {
n += 1;
0
}
};
let sum2_div_denom = sum2 / OMARICMLossComputer::n_choose_2_times_2(n);
let mut sum = 0.0;
let n_draws = self.sums.len_of(Axis(0));
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
let sum1 = self.sums[(draw_index, 0)]
+ if from_label_option.is_none() {
2.0 * n as f64
} else {
0.0
};
let chance_adjustment = sum1 * sum2_div_denom;
let denominator = 0.5 * (sum1 + sum2) - chance_adjustment;
if denominator > 0.0 {
let mut unadjusted_numerator = self.sums[(draw_index, 1)];
if offset == 0 {
unadjusted_numerator += 2.0 * cms[(to_index, other_index, draw_index)] as f64;
if from_label_option.is_some() {
unadjusted_numerator -=
2.0 * (cms[(from_index, other_index, draw_index)] - 1) as f64;
}
}
let numerator = unadjusted_numerator - chance_adjustment;
sum += numerator / denominator;
}
}
-sum
}
fn decision_callback(
&mut self,
item_index: usize,
to_label_option: Option<LabelType>,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) {
let to_index = match to_label_option {
Some(to_label) => {
self.sum += 2.0 * state.size_of(to_label) as f64;
to_label as usize + 1
}
None => {
self.n -= 1;
0
}
};
let from_index = match from_label_option {
Some(from_label) => {
self.sum -= 2.0 * (state.size_of(from_label) - 1) as f64;
from_label as usize + 1
}
None => {
self.n += 1;
0
}
};
let n_draws = draws.n_clusterings();
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
if from_label_option.is_some() {
self.sums[(draw_index, 1)] -=
2.0 * (cms[(from_index, other_index, draw_index)] - 1) as f64;
} else {
self.sums[(draw_index, 0)] += 2.0 * cms[(0, other_index, draw_index)] as f64;
}
if to_label_option.is_some() {
self.sums[(draw_index, 1)] += 2.0 * cms[(to_index, other_index, draw_index)] as f64;
} else {
self.sums[(draw_index, 0)] -= 2.0 * (cms[(0, other_index, draw_index)] - 1) as f64;
}
}
}
}
pub struct VICMLossComputer<'a> {
a: f64,
cache: &'a Log2Cache,
}
impl<'a> VICMLossComputer<'a> {
pub fn new(a: f64, cache: &'a Log2Cache) -> Self {
Self { a, cache }
}
}
impl<'a> CMLossComputer for VICMLossComputer<'a> {
fn compute_loss(&self, state: &WorkingClustering, cms: &Array3<CountType>) -> f64 {
let sum2: f64 = state
.occupied_clusters()
.iter()
.map(|i| self.cache.nlog2n(state.size_of(*i)))
.sum();
let a_plus_1 = self.a + 1.0;
let n_draws = cms.len_of(Axis(2));
let mut sum = 0.0;
for draw_index in 0..n_draws {
let mut sum1 = 0.0;
let mut sum3 = 0.0;
for other_index in 0..cms.len_of(Axis(1)) {
let n = cms[(0, other_index, draw_index)];
if n > 0 {
sum1 += self.cache.nlog2n(cms[(0, other_index, draw_index)]);
for main_label in state.occupied_clusters().iter() {
sum3 += self
.cache
.nlog2n(cms[(*main_label as usize + 1, other_index, draw_index)]);
}
}
}
sum += (self.a * sum1 + sum2 - a_plus_1 * sum3) / (state.n_items() as f64);
}
sum / (n_draws as f64)
}
fn change_in_loss(
&self,
item_index: usize,
to_label: LabelType,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) -> f64 {
let offset = if from_label_option.is_some() && to_label == from_label_option.unwrap() {
1
} else {
0
};
let n_draws = cms.len_of(Axis(2));
let sum2 = (n_draws as f64)
* self
.cache
.nlog2n_difference(state.size_of(to_label) - offset);
let to_index = to_label as usize + 1;
let mut sum3 = 0.0;
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
sum3 += self
.cache
.nlog2n_difference(cms[(to_index, other_index, draw_index)] - offset);
}
sum2 - (self.a + 1.0) * sum3
}
}
pub struct GeneralInformationBasedCMLossComputer<'a, T>
where
T: InformationBasedLoss,
{
cache: &'a Log2Cache,
n: CountType,
sum: f64,
sums: Array2<f64>,
ibloss: T,
}
impl<'a, T> GeneralInformationBasedCMLossComputer<'a, T>
where
T: InformationBasedLoss,
{
pub fn new(n_draws: usize, cache: &'a Log2Cache, ibloss: T) -> Self {
Self {
cache,
n: 0,
sum: 0.0,
sums: Array2::<f64>::zeros((n_draws, 2)),
ibloss,
}
}
}
impl<'a, T> CMLossComputer for GeneralInformationBasedCMLossComputer<'a, T>
where
T: InformationBasedLoss,
{
fn initialize(&mut self, state: &WorkingClustering, cms: &Array3<CountType>) {
self.n = state.n_items();
self.sum = state
.occupied_clusters()
.iter()
.map(|i| self.cache.nlog2n(state.size_of(*i)))
.sum();
let n_draws = cms.len_of(Axis(2));
for draw_index in 0..n_draws {
for other_index in 0..cms.len_of(Axis(1)) {
let n = cms[(0, other_index, draw_index)];
if n > 0 {
self.sums[(draw_index, 0)] +=
self.cache.nlog2n(cms[(0, other_index, draw_index)]);
for main_label in state.occupied_clusters().iter() {
self.sums[(draw_index, 1)] += self
.cache
.nlog2n(cms[(*main_label as usize + 1, other_index, draw_index)]);
}
}
}
}
}
fn compute_loss(&self, _state: &WorkingClustering, _cms: &Array3<CountType>) -> f64 {
let ni = self.n as f64;
let nlog2n = ni * ni.log2();
let u = self.sum;
let mut sum = 0.0;
let n_draws = self.sums.len_of(Axis(0));
for draw_index in 0..n_draws {
let v = self.sums[(draw_index, 0)];
let uv = self.sums[(draw_index, 1)];
sum += self.ibloss.compute_kernel(u, v, uv, nlog2n);
}
self.ibloss.finalize(sum, n_draws as f64, ni)
}
fn change_in_loss(
&self,
item_index: usize,
to_label: LabelType,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) -> f64 {
let offset = if from_label_option.is_some() && to_label == from_label_option.unwrap() {
1
} else {
0
};
let mut v = if offset == 0 {
self.sum + self.cache.nlog2n_difference(state.size_of(to_label))
} else {
self.sum
};
let to_index = to_label as usize + 1;
let (n, from_index) = match from_label_option {
Some(from_label) => {
if offset == 0 {
v -= self.cache.nlog2n_difference(state.size_of(from_label) - 1);
}
(self.n, from_label as usize + 1)
}
None => (self.n + 1, 0),
};
let ni = n as f64;
let nlog2n = ni * ni.log2();
let mut sum = 0.0;
let n_draws = self.sums.len_of(Axis(0));
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
let u = self.sums[(draw_index, 0)]
+ if from_label_option.is_none() {
self.cache.nlog2n_difference(n)
} else {
0.0
};
let mut uv = self.sums[(draw_index, 1)];
if offset == 0 {
uv += self
.cache
.nlog2n_difference(cms[(to_index, other_index, draw_index)]);
if from_label_option.is_some() {
uv -= self
.cache
.nlog2n_difference(cms[(from_index, other_index, draw_index)] - 1)
}
}
sum += self.ibloss.compute_kernel(u, v, uv, nlog2n);
}
self.ibloss.score(sum)
}
fn decision_callback(
&mut self,
item_index: usize,
to_label_option: Option<LabelType>,
from_label_option: Option<LabelType>,
state: &WorkingClustering,
cms: &Array3<CountType>,
draws: &Clusterings,
) {
let to_index = match to_label_option {
Some(to_label) => {
self.sum += self.cache.nlog2n_difference(state.size_of(to_label));
to_label as usize + 1
}
None => {
self.n -= 1;
0
}
};
let from_index = match from_label_option {
Some(from_label) => {
self.sum -= self.cache.nlog2n_difference(state.size_of(from_label) - 1);
from_label as usize + 1
}
None => {
self.n += 1;
0
}
};
let n_draws = draws.n_clusterings();
for draw_index in 0..n_draws {
let other_index = draws.label(draw_index, item_index) as usize;
if from_label_option.is_some() {
self.sums[(draw_index, 1)] -= self
.cache
.nlog2n_difference(cms[(from_index, other_index, draw_index)] - 1);
} else {
self.sums[(draw_index, 0)] += self
.cache
.nlog2n_difference(cms[(0, other_index, draw_index)]);
}
if to_label_option.is_some() {
self.sums[(draw_index, 1)] += self
.cache
.nlog2n_difference(cms[(to_index, other_index, draw_index)]);
} else {
self.sums[(draw_index, 0)] -= self
.cache
.nlog2n_difference(cms[(0, other_index, draw_index)] - 1);
}
}
}
}
pub trait InformationBasedLoss {
fn compute_kernel(&self, u: f64, v: f64, uv: f64, nlog2n: f64) -> f64;
fn finalize(&self, sum: f64, n_draws: f64, n_items: f64) -> f64;
fn score(&self, sum: f64) -> f64;
}
pub struct NVIInformationBasedLoss;
impl InformationBasedLoss for NVIInformationBasedLoss {
fn compute_kernel(&self, u: f64, v: f64, uv: f64, nlog2n: f64) -> f64 {
(u + v - 2.0 * uv) / (nlog2n - uv)
}
fn finalize(&self, sum: f64, n_draws: f64, _n_items: f64) -> f64 {
sum / n_draws
}
fn score(&self, sum: f64) -> f64 {
sum
}
}
pub struct IDInformationBasedLoss;
impl InformationBasedLoss for IDInformationBasedLoss {
fn compute_kernel(&self, u: f64, v: f64, uv: f64, _nlog2n: f64) -> f64 {
u + v - uv - u.min(v)
}
fn finalize(&self, sum: f64, n_draws: f64, n_items: f64) -> f64 {
sum / (n_draws * n_items)
}
fn score(&self, sum: f64) -> f64 {
sum
}
}
pub struct NIDInformationBasedLoss;
impl InformationBasedLoss for NIDInformationBasedLoss {
fn compute_kernel(&self, u: f64, v: f64, uv: f64, nlog2n: f64) -> f64 {
(nlog2n + uv - u - v) / (nlog2n - u.min(v))
}
fn finalize(&self, sum: f64, n_draws: f64, _n_items: f64) -> f64 {
1.0 - sum / n_draws
}
fn score(&self, sum: f64) -> f64 {
-sum
}
}
fn find_label_of_minimum<I: Iterator<Item = (LabelType, f64)>>(pairs: I) -> LabelType {
let mut s0 = f64::INFINITY;
let mut l0 = 0;
for pair in pairs {
if pair.1 < s0 {
s0 = pair.1;
l0 = pair.0;
}
}
l0
}
fn allocation_scan<T: CMLossComputer>(
sweetening_scan: bool,
singletons_initialization: bool,
state: &mut WorkingClustering,
cms: &mut Array3<CountType>,
permutation: &[usize],
loss_computer: &mut T,
draws: &Clusterings,
) -> bool {
let mut state_changed = false;
let mut chosen_label_option = None;
for item_index in permutation {
let item_index = *item_index;
let label_of_empty_cluster = state.label_of_empty_cluster();
let from_label_option = match sweetening_scan {
true => Some(state.get(item_index)),
false => None,
};
let to_label = match label_of_empty_cluster {
Some(loec) if !sweetening_scan && singletons_initialization => loec,
Some(_) | None => {
let iter = state
.occupied_clusters()
.iter()
.chain(label_of_empty_cluster.iter())
.map(|to_label| {
(
*to_label,
loss_computer.change_in_loss(
item_index,
*to_label,
from_label_option,
&state,
&cms,
&draws,
),
)
});
find_label_of_minimum(iter)
}
};
if !sweetening_scan {
state.assign(item_index, to_label, loss_computer, cms, draws);
if chosen_label_option.is_none() {
chosen_label_option = Some(to_label);
state_changed = state.size_of(to_label) != 1;
}
if to_label != chosen_label_option.unwrap() {
state_changed = true;
}
} else if to_label != from_label_option.unwrap() {
state.reassign(item_index, to_label, loss_computer, cms, draws);
state_changed = true;
}
}
state_changed
}
fn sweetening_scans<T: CMLossComputer, U: Rng>(
state: &mut WorkingClustering,
cms: &mut Array3<CountType>,
permutation: &mut Vec<usize>,
loss_computer: &mut T,
scan_counter: &mut u32,
draws: &Clusterings,
p: &SALSOParameters,
rng: &mut U,
) -> bool {
let mut global_state_changed = false;
let mut state_changed = true;
while state_changed && *scan_counter < p.max_scans {
*scan_counter += 1;
permutation.shuffle(rng);
state_changed = allocation_scan(true, false, state, cms, permutation, loss_computer, draws);
global_state_changed = global_state_changed || state_changed;
}
global_state_changed
}
pub fn minimize_once_by_salso_v2<'a, T: CMLossComputer, U: Rng>(
loss_computer_factory: Box<dyn Fn() -> T + 'a>,
draws: &Clusterings,
p: &SALSOParameters,
seconds_target: f64,
rng: &mut U,
) -> SALSOResults {
let n_items = draws.n_items();
let max_size = match p.max_size {
0 => draws.max_clusters(),
_ => p.max_size,
};
let mut permutation: Vec<usize> = (0..p.n_items).collect();
let mut best = SALSOResults::dummy(max_size);
let start_time = Instant::now();
let uniform_0_1 = Uniform::from(0.0..1.0);
for run_counter in 1..=p.n_runs {
let mut loss_computer = loss_computer_factory();
let singletons_initialization = uniform_0_1.sample(rng) < p.prob_singletons_initialization;
let (mut state, mut cms, initialization_method) =
if uniform_0_1.sample(rng) < p.prob_sequential_allocation {
let mut state = WorkingClustering::empty(n_items, max_size);
let mut cms = Array3::<CountType>::zeros((
max_size as usize + 1,
draws.max_clusters() as usize,
draws.n_clusterings(),
));
permutation.shuffle(rng);
allocation_scan(
false,
singletons_initialization,
&mut state,
&mut cms,
&permutation,
&mut loss_computer,
draws,
);
let initialization_method = if singletons_initialization {
InitializationMethod::SequentialFromSingletons
} else {
InitializationMethod::SequentialFromEmpty
};
(state, cms, initialization_method)
} else {
let state = if p.max_size_as_rf {
WorkingClustering::random_as_rf(n_items, max_size, draws.max_clusters(), rng)
} else {
WorkingClustering::random(n_items, max_size, rng)
};
let cms = draws.make_confusion_matrices(&state);
loss_computer.initialize(&state, &cms);
(
state,
cms,
InitializationMethod::SampleOne2MaxWithReplacement,
)
};
let mut scan_counter = 0;
sweetening_scans(
&mut state,
&mut cms,
&mut permutation,
&mut loss_computer,
&mut scan_counter,
draws,
&p,
rng,
);
let mut expected_loss = loss_computer.compute_loss(&state, &cms);
let (mut n_zealous_accepts, mut n_zealous_attempts) = (0, 0);
if n_zealous_attempts < p.max_zealous_updates {
let labels = {
let mut x = state.occupied_clusters().clone();
x.shuffle(rng);
x
};
for label in labels {
let s = state.size_of(label) as usize;
if s <= 1 {
continue;
}
n_zealous_attempts += 1;
if n_zealous_attempts > p.max_zealous_updates {
break;
}
let mut active_items = Vec::with_capacity(s);
for item_index in 0..(state.n_items() as usize) {
if state.get(item_index) == label {
state.remove(item_index, &mut loss_computer, &mut cms, draws);
active_items.push(item_index)
}
if active_items.len() == s {
break;
}
}
active_items.shuffle(rng);
let state_changed = allocation_scan(
false,
singletons_initialization,
&mut state,
&mut cms,
&active_items,
&mut loss_computer,
draws,
);
if state_changed {
let labels_for_undo = state.clone_labels();
let state_changed = sweetening_scans(
&mut state,
&mut cms,
&mut permutation,
&mut loss_computer,
&mut scan_counter,
draws,
&p,
rng,
);
let expected_loss_of_candidate = loss_computer.compute_loss(&state, &cms);
if expected_loss_of_candidate < expected_loss {
expected_loss = expected_loss_of_candidate;
n_zealous_accepts += 1;
} else {
if state_changed {
for (item_index, new_label) in labels_for_undo.iter().enumerate() {
state.reassign(
item_index,
*new_label,
&mut loss_computer,
&mut cms,
draws,
)
}
}
for item_index in active_items {
state.reassign(item_index, label, &mut loss_computer, &mut cms, draws)
}
}
}
}
}
if expected_loss < best.expected_loss {
let clustering = state.standardize().iter().map(|x| *x as usize).collect();
best = SALSOResults {
clustering,
expected_loss,
n_scans: scan_counter,
n_zealous_accepts,
n_zealous_attempts,
initialization_method,
max_size: state.max_clusters(),
..best
}
}
if seconds_target.is_finite() {
let seconds = start_time.elapsed().as_secs_f64();
let rc = run_counter as f64;
let seconds_per_run = seconds / rc;
let projected_seconds_if_another = (rc + 1.0) * seconds_per_run;
if projected_seconds_if_another >= seconds_target {
let prob_stop = (projected_seconds_if_another - seconds_target) / seconds_per_run;
if uniform_0_1.sample(rng) < prob_stop {
return SALSOResults {
n_runs: run_counter,
seconds,
..best
};
}
}
}
}
SALSOResults {
n_runs: p.n_runs,
seconds: start_time.elapsed().as_secs_f64(),
..best
}
}
pub trait GeneralLossComputer {
fn expected_loss_kernel(&self) -> f64;
fn speculative_add(&mut self, partition: &Partition, i: usize, subset_index: LabelType) -> f64;
fn new_subset(&mut self, partition: &mut Partition);
fn add_with_index(&mut self, partition: &mut Partition, i: usize, subset_index: LabelType);
fn remove(&mut self, partition: &mut Partition, i: usize) -> LabelType;
}
#[derive(Debug)]
struct BinderSubsetCalculations {
committed_loss: f64,
speculative_loss: f64,
}
pub struct BinderGLossComputer<'a> {
subsets: Vec<BinderSubsetCalculations>,
psm: &'a SquareMatrixBorrower<'a>,
}
impl<'a> BinderGLossComputer<'a> {
pub fn new(psm: &'a SquareMatrixBorrower<'a>) -> Self {
Self {
subsets: Vec::new(),
psm,
}
}
fn expected_loss_from_kernel(psm: &'a SquareMatrixBorrower<'a>, kernel: f64) -> f64 {
let nif = psm.n_items() as f64;
(2.0 * kernel + psm.sum_of_triangle()) * 2.0 / (nif * nif)
}
}
impl<'a> GeneralLossComputer for BinderGLossComputer<'a> {
fn expected_loss_kernel(&self) -> f64 {
self.subsets
.iter()
.fold(0.0, |s, subset| s + subset.committed_loss)
}
fn speculative_add(&mut self, partition: &Partition, i: usize, subset_index: LabelType) -> f64 {
self.subsets[subset_index as usize].speculative_loss = partition.subsets()
[subset_index as usize]
.items()
.iter()
.fold(0.0, |s, j| {
s + 0.5 - unsafe { *self.psm.get_unchecked((i, *j)) }
});
self.subsets[subset_index as usize].speculative_loss
}
fn new_subset(&mut self, partition: &mut Partition) {
partition.new_subset();
self.subsets.push(BinderSubsetCalculations {
committed_loss: 0.0,
speculative_loss: 0.0,
});
}
fn add_with_index(&mut self, partition: &mut Partition, i: usize, subset_index: LabelType) {
self.subsets[subset_index as usize].committed_loss +=
self.subsets[subset_index as usize].speculative_loss;
partition.add_with_index(i, subset_index as usize);
}
fn remove(&mut self, partition: &mut Partition, i: usize) -> LabelType {
let subset_index = partition.label_of(i).unwrap();
self.subsets[subset_index].committed_loss -= partition.subsets()[subset_index]
.items()
.iter()
.fold(0.0, |s, j| {
let jj = *j;
s + if jj != i {
0.5 - unsafe { *self.psm.get_unchecked((i, jj)) }
} else {
0.0
}
});
partition.remove_clean_and_relabel(i, |killed_subset_index, moved_subset_index| {
self.subsets.swap_remove(killed_subset_index);
assert_eq!(moved_subset_index, self.subsets.len());
});
subset_index as LabelType
}
}
#[derive(Debug)]
struct OMARIApproxSubsetCalculations {
committed_ip: f64,
committed_i: f64,
speculative_ip: f64,
speculative_i: f64,
}
pub struct OMARIApproxGLossComputer<'a> {
committed_n_items: usize,
committed_sum_psm: f64,
speculative_sum_psm: f64,
subsets: Vec<OMARIApproxSubsetCalculations>,
psm: &'a SquareMatrixBorrower<'a>,
}
impl<'a> OMARIApproxGLossComputer<'a> {
pub fn new(psm: &'a SquareMatrixBorrower<'a>) -> Self {
Self {
committed_n_items: 0,
committed_sum_psm: 0.0,
speculative_sum_psm: f64::NEG_INFINITY,
subsets: Vec::new(),
psm,
}
}
}
impl<'a> OMARIApproxGLossComputer<'a> {
fn engine(
&self,
speculative_n_items: usize,
speculative_ip: f64,
speculative_i: f64,
speculative_sum_psm: f64,
) -> f64 {
let n_items = speculative_n_items + self.committed_n_items;
if n_items <= 1 {
return f64::INFINITY;
}
let n_choose_2 = (n_items * (n_items - 1) / 2) as f64;
let all_ip = speculative_ip + self.subsets.iter().fold(0.0, |s, c| s + c.committed_ip);
let all_i = speculative_i + self.subsets.iter().fold(0.0, |s, c| s + c.committed_i);
let all_p = speculative_sum_psm + self.committed_sum_psm;
let subtractor = all_i * all_p / n_choose_2;
let numerator = all_ip - subtractor;
let denominator = 0.5 * (all_i + all_p) - subtractor;
1.0 - numerator / denominator
}
}
impl<'a> GeneralLossComputer for OMARIApproxGLossComputer<'a> {
fn expected_loss_kernel(&self) -> f64 {
self.engine(0, 0.0, 0.0, 0.0)
}
fn speculative_add(&mut self, partition: &Partition, i: usize, subset_index: LabelType) -> f64 {
let s = &partition.subsets()[subset_index as usize];
self.subsets[subset_index as usize].speculative_ip = s
.items()
.iter()
.fold(0.0, |s, j| s + unsafe { *self.psm.get_unchecked((i, *j)) });
self.subsets[subset_index as usize].speculative_i = s.n_items() as f64;
if self.speculative_sum_psm == f64::NEG_INFINITY {
self.speculative_sum_psm = partition.subsets().iter().fold(0.0, |s, subset| {
s + subset.items().iter().fold(0.0, |ss, j| {
ss + unsafe { *self.psm.get_unchecked((i, *j)) }
})
})
}
self.engine(
1,
self.subsets[subset_index as usize].speculative_ip,
self.subsets[subset_index as usize].speculative_i,
self.speculative_sum_psm,
)
}
fn new_subset(&mut self, partition: &mut Partition) {
partition.new_subset();
self.subsets.push(OMARIApproxSubsetCalculations {
committed_ip: 0.0,
committed_i: 0.0,
speculative_ip: 0.0,
speculative_i: 0.0,
});
}
fn add_with_index(&mut self, partition: &mut Partition, i: usize, subset_index: LabelType) {
let mut sc = &mut self.subsets[subset_index as usize];
sc.committed_ip += sc.speculative_ip;
sc.committed_i += sc.speculative_i;
self.committed_n_items += 1;
self.committed_sum_psm += self.speculative_sum_psm;
self.speculative_sum_psm = f64::NEG_INFINITY;
partition.add_with_index(i, subset_index as usize);
}
fn remove(&mut self, partition: &mut Partition, i: usize) -> LabelType {
let subset_index = partition.label_of(i).unwrap();
self.subsets[subset_index].committed_ip -= partition.subsets()[subset_index]
.items()
.iter()
.fold(0.0, |s, j| {
let jj = *j;
s + if jj != i {
unsafe { *self.psm.get_unchecked((i, jj)) }
} else {
0.0
}
});
self.subsets[subset_index].committed_i -=
(partition.subsets()[subset_index].n_items() - 1) as f64;
self.committed_n_items -= 1;
self.committed_sum_psm -= partition.subsets().iter().fold(0.0, |s, subset| {
s + subset.items().iter().fold(0.0, |ss, j| {
let jj = *j;
ss + if jj != i {
unsafe { *self.psm.get_unchecked((i, jj)) }
} else {
0.0
}
})
});
partition.remove_clean_and_relabel(i, |killed_subset_index, moved_subset_index| {
self.subsets.swap_remove(killed_subset_index);
assert_eq!(moved_subset_index, self.subsets.len());
});
subset_index as LabelType
}
}
#[derive(Debug)]
struct VILBCacheUnit {
item: usize,
committed_sum: f64,
committed_contribution: f64,
speculative_sum: f64,
speculative_contribution: f64,
}
#[derive(Debug)]
struct VILBSubsetCalculations {
cached_units: Vec<VILBCacheUnit>,
committed_loss: f64,
speculative_loss: f64,
}
pub struct VILBGLossComputer<'a> {
subsets: Vec<VILBSubsetCalculations>,
psm: &'a SquareMatrixBorrower<'a>,
}
impl<'a> VILBGLossComputer<'a> {
pub fn new(psm: &'a SquareMatrixBorrower<'a>) -> Self {
Self {
subsets: Vec::new(),
psm,
}
}
pub fn expected_loss_from_kernel(psm: &'a SquareMatrixBorrower<'a>, kernel: f64) -> f64 {
let nif = psm.n_items() as f64;
(kernel + vilb_expected_loss_constant(psm)) / nif
}
}
impl<'a> GeneralLossComputer for VILBGLossComputer<'a> {
fn expected_loss_kernel(&self) -> f64 {
self.subsets
.iter()
.fold(0.0, |s, subset| s + subset.committed_loss)
}
fn speculative_add(&mut self, partition: &Partition, i: usize, subset_index: LabelType) -> f64 {
let subset_of_partition = &partition.subsets()[subset_index as usize];
if subset_of_partition.n_items() == 0 {
self.subsets[subset_index as usize]
.cached_units
.push(VILBCacheUnit {
item: i,
committed_sum: 0.0,
committed_contribution: 0.0,
speculative_sum: 1.0,
speculative_contribution: 0.0,
});
return 0.0;
}
for cu in self.subsets[subset_index as usize].cached_units.iter_mut() {
cu.speculative_sum =
cu.committed_sum + unsafe { *self.psm.get_unchecked((cu.item, i)) };
cu.speculative_contribution = cu.speculative_sum.log2();
}
let sum = subset_of_partition
.items()
.iter()
.fold(0.0, |s, j| s + unsafe { *self.psm.get_unchecked((i, *j)) })
+ 1.0;
self.subsets[subset_index as usize]
.cached_units
.push(VILBCacheUnit {
item: i,
committed_sum: 0.0,
committed_contribution: 0.0,
speculative_sum: sum,
speculative_contribution: sum.log2(),
});
let nif = subset_of_partition.n_items() as f64;
let s1 = (nif + 1.0) * (nif + 1.0).log2();
let s2 = self.subsets[subset_index as usize]
.cached_units
.iter()
.fold(0.0, |s, cu| s + cu.speculative_contribution);
self.subsets[subset_index as usize].speculative_loss = s1 - 2.0 * s2;
self.subsets[subset_index as usize].speculative_loss
- self.subsets[subset_index as usize].committed_loss
}
fn new_subset(&mut self, partition: &mut Partition) {
partition.new_subset();
self.subsets.push(VILBSubsetCalculations {
cached_units: Vec::new(),
committed_loss: 0.0,
speculative_loss: 0.0,
})
}
fn add_with_index(&mut self, partition: &mut Partition, i: usize, subset_index: LabelType) {
for (index, subset) in self.subsets.iter_mut().enumerate() {
if index == (subset_index as usize) {
for cu in subset.cached_units.iter_mut() {
cu.committed_sum = cu.speculative_sum;
cu.committed_contribution = cu.speculative_contribution;
}
} else {
subset.cached_units.pop();
}
}
self.subsets[subset_index as usize].committed_loss =
self.subsets[subset_index as usize].speculative_loss;
partition.add_with_index(i, subset_index as usize);
}
fn remove(&mut self, partition: &mut Partition, i: usize) -> LabelType {
let subset_index = partition.label_of(i).unwrap();
for cu in self.subsets[subset_index].cached_units.iter_mut() {
cu.committed_sum -= unsafe { *self.psm.get_unchecked((cu.item, i)) };
cu.committed_contribution = cu.committed_sum.log2();
}
let pos = self.subsets[subset_index]
.cached_units
.iter()
.enumerate()
.find(|cu| cu.1.item == i)
.unwrap()
.0;
self.subsets[subset_index].cached_units.swap_remove(pos);
self.subsets[subset_index].committed_loss =
match partition.subsets()[subset_index].n_items() {
0 => 0.0,
ni => {
let nif = ni as f64;
nif * nif.log2()
- 2.0
* self.subsets[subset_index]
.cached_units
.iter()
.fold(0.0, |s, cu| s + cu.committed_contribution)
}
};
partition.remove_clean_and_relabel(i, |killed_subset_index, moved_subset_index| {
self.subsets.swap_remove(killed_subset_index);
assert_eq!(moved_subset_index, self.subsets.len());
});
subset_index as LabelType
}
}
fn label_of_empty_cluster<U: GeneralLossComputer>(
partition: &mut Partition,
computer: &mut U,
max_label: LabelType,
) -> Option<LabelType> {
match partition.subsets().last() {
None => {
computer.new_subset(partition);
Some(0)
}
Some(last) => {
if last.is_empty() {
Some(partition.n_subsets() as LabelType - 1)
} else if partition.n_subsets() <= max_label as usize {
computer.new_subset(partition);
Some(partition.n_subsets() as LabelType - 1)
} else {
None
}
}
}
}
fn micro_optimized_allocation<U: GeneralLossComputer>(
partition: &mut Partition,
computer: &mut U,
i: usize,
label_to_take: Option<LabelType>,
) -> LabelType {
let max_label = partition.n_subsets() as LabelType - 1;
let iter = (0..=max_label).map(|subset_index| {
let value = computer.speculative_add(partition, i, subset_index as LabelType);
match label_to_take {
Some(label_to_take) => {
if subset_index == label_to_take {
(subset_index, f64::NEG_INFINITY)
} else {
(subset_index, f64::INFINITY)
}
}
None => (subset_index, value),
}
});
let subset_index = find_label_of_minimum(iter);
computer.add_with_index(partition, i, subset_index);
subset_index as LabelType
}
pub fn minimize_once_by_salso<'a, T: Rng, U: GeneralLossComputer>(
computer_factory: Box<dyn Fn() -> U + 'a>,
p: &SALSOParameters,
seconds_target: f64,
rng: &mut T,
) -> SALSOResults {
let start_time = Instant::now();
let max_label = if p.max_size == 0 {
MAX_LABEL
} else {
p.max_size.max(1) - 1
};
let mut best = SALSOResults::dummy(max_label + 1);
let mut permutation: Vec<usize> = (0..p.n_items).collect();
let uniform_0_1 = Uniform::from(0.0..1.0);
let mut run_counter = 0;
while run_counter < p.n_runs {
let mut computer = computer_factory();
let (mut partition, initialization_method) = if uniform_0_1.sample(rng)
< p.prob_sequential_allocation
{
let mut partition = Partition::new(p.n_items);
permutation.shuffle(rng);
let singletons_initialization =
uniform_0_1.sample(rng) < p.prob_singletons_initialization;
for i in 0..p.n_items {
let ii = unsafe { *permutation.get_unchecked(i) };
let empty_label = label_of_empty_cluster(&mut partition, &mut computer, max_label);
if singletons_initialization {
micro_optimized_allocation(&mut partition, &mut computer, ii, empty_label);
} else {
micro_optimized_allocation(&mut partition, &mut computer, ii, None);
}
}
let initialization_method = if singletons_initialization {
InitializationMethod::SequentialFromSingletons
} else {
InitializationMethod::SequentialFromEmpty
};
(partition, initialization_method)
} else {
panic!("Disabled because it's problematic.");
};
let mut n_scans = 0;
let mut change = true;
while change && n_scans < p.max_scans {
change = false;
n_scans += 1;
permutation.shuffle(rng);
for i in 0..p.n_items {
label_of_empty_cluster(&mut partition, &mut computer, max_label);
let ii = unsafe { *permutation.get_unchecked(i) };
let previous_subset_index = computer.remove(&mut partition, ii);
let subset_index =
micro_optimized_allocation(&mut partition, &mut computer, ii, None);
if subset_index != previous_subset_index {
change = true;
};
}
}
let expected_loss = computer.expected_loss_kernel();
if expected_loss < best.expected_loss {
partition.canonicalize();
best = SALSOResults {
clustering: partition.labels_via_copying(),
expected_loss,
n_scans,
initialization_method,
..best
};
}
run_counter += 1;
if seconds_target.is_finite() {
let seconds = start_time.elapsed().as_secs_f64();
let rc = run_counter as f64;
let seconds_per_run = seconds / rc;
let projected_seconds_if_another = (rc + 1.0) * seconds_per_run;
if projected_seconds_if_another >= seconds_target {
let prob_stop = (projected_seconds_if_another - seconds_target) / seconds_per_run;
if uniform_0_1.sample(rng) < prob_stop {
return SALSOResults {
n_runs: run_counter,
seconds,
..best
};
}
}
}
}
SALSOResults {
n_runs: p.n_runs,
seconds: start_time.elapsed().as_secs_f64(),
..best
}
}
#[derive(Debug, Clone)]
pub struct SALSOParameters {
pub n_items: usize,
pub max_size: LabelType,
pub max_size_as_rf: bool,
pub max_scans: u32,
pub max_zealous_updates: u32,
pub n_runs: u32,
pub prob_sequential_allocation: f64,
pub prob_singletons_initialization: f64,
}
pub struct SALSOResults {
pub clustering: Vec<usize>,
pub expected_loss: f64,
pub n_scans: u32,
pub n_zealous_accepts: u32,
pub n_zealous_attempts: u32,
pub initialization_method: InitializationMethod,
pub n_runs: u32,
pub max_size: LabelType,
pub seconds: f64,
}
impl SALSOResults {
pub fn new(
clustering: Vec<usize>,
expected_loss: f64,
n_scans: u32,
n_zealous_accepts: u32,
n_zealous_attempts: u32,
initialization_method: InitializationMethod,
n_runs: u32,
max_size: LabelType,
seconds: f64,
) -> Self {
Self {
clustering,
expected_loss,
n_scans,
n_zealous_accepts,
n_zealous_attempts,
initialization_method,
n_runs,
max_size,
seconds,
}
}
pub fn dummy(max_size: LabelType) -> Self {
SALSOResults::new(
vec![0usize; 0],
std::f64::INFINITY,
0,
0,
0,
InitializationMethod::SequentialFromEmpty,
0,
max_size,
0.0,
)
}
}
pub fn minimize_by_salso<T: Rng>(
pdi: PartitionDistributionInformation,
loss_function: LossFunction,
p: &SALSOParameters,
seconds: f64,
n_cores: u32,
mut rng: &mut T,
) -> SALSOResults {
let cache = Log2Cache::new(match loss_function {
LossFunction::VI(_) | LossFunction::NVI | LossFunction::ID | LossFunction::NID => p.n_items,
_ => 0,
});
let result = if n_cores == 1 {
match loss_function {
LossFunction::BinderDraws(a) => minimize_once_by_salso_v2(
Box::new(|| BinderCMLossComputer::new(a)),
pdi.draws(),
p,
seconds,
rng,
),
LossFunction::BinderPSM => minimize_once_by_salso(
Box::new(|| BinderGLossComputer::new(pdi.psm())),
p,
seconds,
rng,
),
LossFunction::OneMinusARI => minimize_once_by_salso_v2(
Box::new(|| OMARICMLossComputer::new(pdi.draws().n_clusterings())),
pdi.draws(),
p,
seconds,
rng,
),
LossFunction::OneMinusARIapprox => minimize_once_by_salso(
Box::new(|| OMARIApproxGLossComputer::new(pdi.psm())),
p,
seconds,
rng,
),
LossFunction::VI(a) => minimize_once_by_salso_v2(
Box::new(|| VICMLossComputer::new(a, &cache)),
pdi.draws(),
p,
seconds,
rng,
),
LossFunction::VIlb => minimize_once_by_salso(
Box::new(|| VILBGLossComputer::new(pdi.psm())),
p,
seconds,
rng,
),
LossFunction::NVI => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
&cache,
NVIInformationBasedLoss {},
)
}),
pdi.draws(),
p,
seconds,
rng,
),
LossFunction::ID => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
&cache,
IDInformationBasedLoss {},
)
}),
pdi.draws(),
p,
seconds,
rng,
),
LossFunction::NID => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
&cache,
NIDInformationBasedLoss {},
)
}),
pdi.draws(),
p,
seconds,
rng,
),
}
} else {
let (tx, rx) = mpsc::channel();
let n_cores = if n_cores == 0 {
num_cpus::get() as u32
} else {
n_cores
};
let p = p.clone();
let p = SALSOParameters {
n_runs: (p.n_runs + n_cores - 1) / n_cores,
..p
};
let cache_ref = &cache;
crossbeam::scope(|s| {
for _ in 0..n_cores {
let tx = mpsc::Sender::clone(&tx);
let mut child_rng = IsaacRng::from_rng(&mut rng).unwrap();
let p = p.clone();
s.spawn(move |_| {
let result = match loss_function {
LossFunction::BinderDraws(a) => minimize_once_by_salso_v2(
Box::new(|| BinderCMLossComputer::new(a)),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
LossFunction::BinderPSM => minimize_once_by_salso(
Box::new(|| BinderGLossComputer::new(pdi.psm())),
&p,
seconds,
&mut child_rng,
),
LossFunction::OneMinusARI => minimize_once_by_salso_v2(
Box::new(|| OMARICMLossComputer::new(pdi.draws().n_clusterings())),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
LossFunction::OneMinusARIapprox => minimize_once_by_salso(
Box::new(|| OMARIApproxGLossComputer::new(pdi.psm())),
&p,
seconds,
&mut child_rng,
),
LossFunction::VI(a) => minimize_once_by_salso_v2(
Box::new(|| VICMLossComputer::new(a, cache_ref)),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
LossFunction::VIlb => minimize_once_by_salso(
Box::new(|| VILBGLossComputer::new(pdi.psm())),
&p,
seconds,
&mut child_rng,
),
LossFunction::NVI => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
cache_ref,
NVIInformationBasedLoss {},
)
}),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
LossFunction::ID => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
cache_ref,
IDInformationBasedLoss {},
)
}),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
LossFunction::NID => minimize_once_by_salso_v2(
Box::new(|| {
GeneralInformationBasedCMLossComputer::new(
pdi.draws().n_clusterings(),
cache_ref,
NIDInformationBasedLoss {},
)
}),
pdi.draws(),
&p,
seconds,
&mut child_rng,
),
};
tx.send(result).unwrap();
});
}
})
.unwrap();
std::mem::drop(tx);
let mut best = SALSOResults::dummy(p.max_size);
let mut run_counter = 0;
for candidate in rx {
run_counter += candidate.n_runs;
if candidate.expected_loss < best.expected_loss {
best = candidate;
}
}
best.n_runs = run_counter;
best
};
let result = SALSOResults {
expected_loss: match loss_function {
LossFunction::BinderPSM => {
BinderGLossComputer::expected_loss_from_kernel(pdi.psm(), result.expected_loss)
}
LossFunction::VIlb => {
VILBGLossComputer::expected_loss_from_kernel(pdi.psm(), result.expected_loss)
}
_ => result.expected_loss,
},
..result
};
result
}
pub fn minimize_by_enumeration(
f: fn(&[LabelType], &SquareMatrixBorrower) -> f64,
psm: &SquareMatrixBorrower,
) -> Vec<usize> {
let (tx, rx) = mpsc::channel();
crossbeam::scope(|s| {
for iter in Partition::iter_sharded(num_cpus::get() as u32, psm.n_items()) {
let tx = mpsc::Sender::clone(&tx);
s.spawn(move |_| {
let mut working_minimum = std::f64::INFINITY;
let mut working_minimizer = vec![0usize; psm.n_items()];
for partition in iter {
let part: Vec<LabelType> = partition.iter().map(|x| *x as LabelType).collect();
let value = f(&part[..], psm);
if value < working_minimum {
working_minimum = value;
working_minimizer = partition;
}
}
tx.send(working_minimizer).unwrap();
});
}
})
.unwrap();
std::mem::drop(tx);
let mut working_minimum = std::f64::INFINITY;
let mut working_minimizer = vec![0usize; psm.n_items()];
for partition in rx {
let part: Vec<LabelType> = partition.iter().map(|x| *x as LabelType).collect();
let value = f(&part[..], psm);
if value < working_minimum {
working_minimum = value;
working_minimizer = partition;
}
}
working_minimizer
}
#[cfg(test)]
mod tests_optimize {
use super::rand::thread_rng;
use super::*;
#[test]
fn test_max_scan() {
let n_items = 5;
let mut psm = SquareMatrix::identity(n_items);
let psm_view = &psm.view();
let p = SALSOParameters {
n_items,
max_size: 2,
max_size_as_rf: true,
max_scans: 10,
max_zealous_updates: 10,
n_runs: 100,
prob_sequential_allocation: 1.0,
prob_singletons_initialization: 0.5,
};
minimize_by_salso(
PartitionDistributionInformation::PairwiseSimilarityMatrix(psm_view),
LossFunction::BinderPSM,
&p,
5.0,
1,
&mut thread_rng(),
);
}
}
#[no_mangle]
pub unsafe extern "C" fn dahl_salso__minimize_by_salso(
n_items: i32,
n_draws: i32,
draws_ptr: *mut i32,
psm_ptr: *mut f64,
loss: i32,
a: f64,
max_size: i32,
n_runs: i32,
seconds: f64,
max_scans: i32,
max_zealous_updates: i32,
prob_sequential_allocation: f64,
prob_singletons_initialization: f64,
n_cores: i32,
results_labels_ptr: *mut i32,
results_expected_loss_ptr: *mut f64,
results_n_scans_ptr: *mut i32,
results_n_zealous_accepts_ptr: *mut i32,
results_n_zealous_attempts_ptr: *mut i32,
results_n_runs_ptr: *mut i32,
results_max_size_ptr: *mut i32,
results_initialization_method_ptr: *mut i32,
results_seconds_ptr: *mut f64,
seed_ptr: *const i32,
) {
let n_items = usize::try_from(n_items).unwrap();
let nd = usize::try_from(n_draws).unwrap();
let draws = Clusterings::from_i32_column_major_order(
PartitionsHolderBorrower::from_ptr(draws_ptr, nd, n_items, true).data(),
n_items,
);
let psm = SquareMatrixBorrower::from_ptr(psm_ptr, n_items);
let (loss_function, pdi) = match LossFunction::from_code(loss, a) {
Some(loss_function) => match loss_function {
LossFunction::BinderDraws(_)
| LossFunction::OneMinusARI
| LossFunction::VI(_)
| LossFunction::NVI
| LossFunction::ID
| LossFunction::NID => (
loss_function,
PartitionDistributionInformation::Draws(&draws),
),
LossFunction::BinderPSM | LossFunction::OneMinusARIapprox | LossFunction::VIlb => (
loss_function,
PartitionDistributionInformation::PairwiseSimilarityMatrix(&psm),
),
},
None => panic!("Unsupported loss method: code = {}", loss),
};
let (max_size, max_size_as_rf) = if max_size < 0 {
(LabelType::try_from(-max_size).unwrap(), true)
} else {
(LabelType::try_from(max_size).unwrap(), false)
};
let n_runs = u32::try_from(n_runs).unwrap();
let max_scans = u32::try_from(max_scans).unwrap();
let max_zealous_updates = u32::try_from(max_zealous_updates).unwrap();
let n_cores = u32::try_from(n_cores).unwrap();
let mut rng = mk_rng_isaac(seed_ptr);
let p = SALSOParameters {
n_items,
max_size,
max_size_as_rf,
max_scans,
max_zealous_updates,
n_runs,
prob_sequential_allocation,
prob_singletons_initialization,
};
let results = minimize_by_salso(pdi, loss_function, &p, seconds, n_cores, &mut rng);
let results_slice = slice::from_raw_parts_mut(results_labels_ptr, n_items);
for (i, v) in results.clustering.iter().enumerate() {
results_slice[i] = i32::try_from(*v + 1).unwrap();
}
*results_expected_loss_ptr = results.expected_loss;
*results_n_scans_ptr = i32::try_from(results.n_scans).unwrap();
*results_n_zealous_accepts_ptr = i32::try_from(results.n_zealous_accepts).unwrap();
*results_n_zealous_attempts_ptr = i32::try_from(results.n_zealous_attempts).unwrap();
*results_n_runs_ptr = i32::try_from(results.n_runs).unwrap();
*results_max_size_ptr = i32::try_from(results.max_size).unwrap();
*results_seconds_ptr = results.seconds;
*results_initialization_method_ptr =
i32::try_from(results.initialization_method.to_code()).unwrap();
}
#[no_mangle]
pub unsafe extern "C" fn dahl_salso__minimize_by_enumeration(
n_items: i32,
psm_ptr: *mut f64,
loss: i32,
a: f64,
results_label_ptr: *mut i32,
) {
let ni = usize::try_from(n_items).unwrap();
let psm = SquareMatrixBorrower::from_ptr(psm_ptr, ni);
let f = match LossFunction::from_code(loss, a) {
Some(loss_function) => match loss_function {
LossFunction::BinderDraws(_) => panic!("No implementation for binder."),
LossFunction::BinderPSM => binder_single_kernel,
LossFunction::OneMinusARI => panic!("No implementation for omARI."),
LossFunction::OneMinusARIapprox => omariapprox_single,
LossFunction::VI(_) => panic!("No implementation for VI."),
LossFunction::VIlb => vilb_single_kernel,
LossFunction::NVI => panic!("No implementation for NVI."),
LossFunction::ID => panic!("No implementation for ID."),
LossFunction::NID => panic!("No implementation for NID."),
},
None => panic!("Unsupported loss method: code = {}", loss),
};
let minimizer = minimize_by_enumeration(f, &psm);
let results_slice = slice::from_raw_parts_mut(results_label_ptr, ni);
for (i, v) in minimizer.iter().enumerate() {
results_slice[i] = i32::try_from(*v + 1).unwrap();
}
}