#![allow(rustdoc::invalid_html_tags)]
use std::fmt;
use std::time;
extern crate num_cpus;
mod types;
pub use types::{ColorCount, DurationInSec, Interval, PointCount};
mod space;
pub use space::{ColoredMetric, Point, SpaceMatrix, SpaceND};
mod clustering;
pub use clustering::{Centers, Clustering};
mod utilities;
mod datastructures;
mod assertions;
pub use assertions::assert_clustering_problem;
use assertions::assert_optional_parameters;
mod phase1;
use phase1::gonzalez_heuristic;
mod phase2;
use phase2::make_private;
mod phase3;
use phase3::algebraic_pushing;
mod phase4;
use phase4::phase4;
mod phase5;
use phase5::phase5;
mod python_interface;
use python_interface::FFKCenter;
use pyo3::prelude::*;
#[pymodule]
#[pyo3(name = "ff_k_center")]
fn python_interface(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<FFKCenter>()?;
Ok(())
}
pub struct ClusteringProblem {
pub k: PointCount, pub privacy_bound: PointCount, pub rep_intervals: Vec<Interval>, }
impl fmt::Display for ClusteringProblem {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Clustering-Problem with k = {}, privacy_bound = {} and representative_intervals: ",
self.k, self.privacy_bound
)?;
let mut iter = self.rep_intervals.iter();
if let Some(first_interval) = iter.next() {
write!(f, "({},{})", first_interval.0, first_interval.1)?;
for interval in iter {
write!(f, ", ({},{})", interval.0, interval.1)?;
}
}
write!(f, ".")
}
}
pub struct OptionalParameters {
pub verbose: Option<u8>,
pub thread_count: Option<usize>,
pub phase_2_rerun: Option<bool>,
pub phase_5_gonzalez: Option<bool>,
}
const DEFAULT_VERBOSE: u8 = 1;
const DEFAULT_PHASE2_RERUN: bool = true;
const DEFAULT_PHASE5_GONZALEZ: bool = false;
fn default_thread_count() -> usize {
num_cpus::get()
}
pub fn compute_privacy_preserving_representative_k_center<M: ColoredMetric + std::marker::Sync>(
space: &M,
prob: &ClusteringProblem,
options: Option<OptionalParameters>,
) -> (Clustering, DurationInSec) {
let time_start = time::Instant::now();
let cpu_count = num_cpus::get();
let optional_parameters = options.unwrap_or(OptionalParameters {
verbose: Some(DEFAULT_VERBOSE),
thread_count: Some(default_thread_count()),
phase_2_rerun: Some(DEFAULT_PHASE2_RERUN),
phase_5_gonzalez: Some(DEFAULT_PHASE5_GONZALEZ),
});
if let Err(msg) = assert_optional_parameters(&optional_parameters) {
println!("\n\x1b[0;31mInvalidOptionalParameters:\x1b[0m {}", msg);
panic!("Invalid parameters. Abort!")
}
let verbose = optional_parameters.verbose.unwrap_or(DEFAULT_VERBOSE);
let thread_count = optional_parameters
.thread_count
.unwrap_or(default_thread_count());
let phase_2_rerun = optional_parameters
.phase_2_rerun
.unwrap_or(DEFAULT_PHASE2_RERUN);
let phase_5_gonzalez = optional_parameters
.phase_5_gonzalez
.unwrap_or(DEFAULT_PHASE5_GONZALEZ);
if verbose >= 1 {
println!("\n**** Solving: {}", prob);
}
if let Err(msg) = assert_clustering_problem(space, prob) {
println!("\n\x1b[0;31mInvalidProblemFormulation:\x1b[0m {}", msg);
panic!("Invalid problem. Abort!")
}
let time_after_assertions = time::Instant::now();
if verbose >= 2 {
println!(
"\n - Assertions done (time: {:?}): ClusteringProblem seems well stated.",
time_after_assertions.duration_since(time_start)
);
}
let gonzalez: Centers = gonzalez_heuristic(space, prob.k);
let time_after_phase1 = time::Instant::now();
if verbose >= 2 {
println!("\n - Phase 1 done (time: {:?}): Determined k = {} centers by the Gonzalez heuristic: ({}).", time_after_phase1.duration_since(time_after_assertions), prob.k, gonzalez);
} else if verbose == 1 {
println!(
" - Phase 1 done (time: {:?}): Determined k = {} centers by the Gonzalez heuristic.",
time_after_phase1.duration_since(time_after_assertions),
prob.k
);
}
let clusterings: Vec<Clustering> =
make_private(space, prob.privacy_bound, &gonzalez, thread_count);
let time_after_phase2 = time::Instant::now();
if verbose >= 2 {
println!(
"\n - Phase 2 done (time: {:?}): Determined k = {} clusterings:",
time_after_phase2.duration_since(time_after_phase1),
prob.k
);
for (i, clustering) in clusterings.iter().enumerate() {
println!("\tC_{}:\tradius = {};", i, clustering.get_radius());
}
} else if verbose == 1 {
println!(
" - Phase 2 done (time: {:?}): Determined k = {} clusterings.",
time_after_phase2.duration_since(time_after_phase1),
prob.k
);
}
#[cfg(debug_assertions)]
{
phase2::with_sorting::make_private_with_sorting(space, prob.privacy_bound, &gonzalez);
let time_after_phase2_sorting = time::Instant::now();
println!(
" - Phase 2 with sorting done (time: {:?})",
time_after_phase2_sorting.duration_since(time_after_phase2)
);
}
let time_before_phase3 = time::Instant::now();
let (spanning_trees, opening_lists) = algebraic_pushing(space, prob, &clusterings);
let time_after_phase3 = time::Instant::now();
if verbose >= 2 {
println!(
"\n - Phase 3 done (time: {:?}): Determined the following opening lists:",
time_after_phase3.duration_since(time_before_phase3)
);
for (i, openings) in opening_lists.iter().enumerate() {
println!("\tC_{}:", i);
for o in openings.iter() {
println!("\t\t{};", o);
}
}
} else if verbose == 1 {
println!(
" - Phase 3 done (time: {:?}): Determined opening lists.",
time_after_phase3.duration_since(time_before_phase3)
);
}
let (new_centers, counts) = phase4(space, prob, opening_lists, &gonzalez, thread_count);
let time_after_phase4 = time::Instant::now();
let count_sum: usize = counts.iter().sum();
if verbose >= 2 {
println!("\n - Phase 4 done (time: {:?} with {} threads on {} cores): Number of flow problems solved: {}. Determined the following new centers:", time_after_phase4.duration_since(time_after_phase3), thread_count, cpu_count, count_sum);
for (i, centers) in new_centers.iter().enumerate() {
println!("\tC_{}:\t({}) \tassignment_radius: {};\tforrest_radius: {}; number of flow problems solved: {}", i, centers.as_points, centers.assignment_radius, centers.forrest_radius, counts[i]);
}
} else if verbose == 1 {
println!(" - Phase 4 done (time: {:?} with {} threads on {} cores): Determined new centers. Number of flow problem solved: {}.", time_after_phase4.duration_since(time_after_phase3), thread_count, cpu_count, count_sum);
}
let (best_i, mut final_clustering, phase5_radius) = phase5(
space,
prob,
new_centers,
clusterings,
spanning_trees,
thread_count,
phase_5_gonzalez,
);
let time_after_phase5 = time::Instant::now();
if verbose >= 2 {
println!("\n - Phase 5 done (time: {:?} with {} threads on {} cores): Created assignments and chose the final clustering (based on C_{}) with centers: ({}) and radius: {}.", time_after_phase5.duration_since(time_after_phase4), thread_count, cpu_count, best_i, final_clustering.get_centers(), phase5_radius);
} else if verbose == 1 {
println!(" - Phase 5 done (time: {:?} with {} threads on {} cores): Created assignments and chose the final clustering (based on C_{}) with radius: {}.", time_after_phase5.duration_since(time_after_phase4), thread_count, cpu_count, best_i, phase5_radius);
}
let mut phase2_rerun_time_str = String::from("-");
if phase_2_rerun {
final_clustering = make_private(
space,
prob.privacy_bound,
final_clustering.get_centers(),
thread_count,
)
.pop()
.unwrap();
let time_after_phase2rerun = time::Instant::now();
if verbose >= 2 {
println!("\n - Rerun of phase 2 done (time: {:?}): final clustering determined with radius: {} (improvement to phase 5: {}).", time_after_phase2rerun.duration_since(time_after_phase5), final_clustering.get_radius(), final_clustering.get_radius() - phase5_radius);
} else if verbose == 1 {
println!(" - Rerun of phase 2 done (time: {:?}): final clustering determined with radius: {} (improvement to phase 5: {}).", time_after_phase2rerun.duration_since(time_after_phase5), final_clustering.get_radius(), phase5_radius - final_clustering.get_radius());
}
phase2_rerun_time_str = format!(
"{:?}",
time_after_phase2rerun.duration_since(time_after_phase5)
);
}
let finishing_time = time::Instant::now();
let total_time = finishing_time.duration_since(time_start);
if verbose >= 2 {
println!("\n**** Algorithm done (total time: {:?}): Privacy-preserving representative k-center computed. Number of centers: {}; final radius: {}.", total_time,final_clustering.get_centers().m(),final_clustering.get_radius());
println!("\tTimes: phase 1: {:?}; phase 2: {:?}; phase 3: {:?}; phase 4: {:?}; phase 5: {:?}; phase 2 rerun: {};",
time_after_phase1.duration_since(time_start),
time_after_phase2.duration_since(time_after_phase1),
time_after_phase3.duration_since(time_after_phase2),
time_after_phase4.duration_since(time_after_phase3),
time_after_phase5.duration_since(time_after_phase4),
phase2_rerun_time_str);
} else if verbose == 1 {
println!("**** Algorithm done (total time: {:?}): Privacy-preserving representative k-center computed. Number of centers: {}; final radius: {}.", total_time,final_clustering.get_centers().m(),final_clustering.get_radius());
}
(final_clustering, total_time.as_secs_f64())
}