use scirs2_core::ndarray::{Array1, Array2};
use crate::error::{ClusteringError, Result};
#[derive(Clone)]
struct Lcg {
state: u64,
}
impl Lcg {
fn new(seed: u64) -> Self {
Self {
state: seed.wrapping_add(1),
}
}
fn next_f64(&mut self) -> f64 {
self.state = self
.state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(self.state >> 11) as f64 / (1u64 << 53) as f64
}
}
#[inline]
fn sq_dist_row_to_center(
data: &Array2<f64>,
i: usize,
centers: &Array2<f64>,
k: usize,
n_features: usize,
) -> f64 {
let mut d2 = 0.0_f64;
for f in 0..n_features {
let diff = data[[i, f]] - centers[[k, f]];
d2 += diff * diff;
}
d2
}
fn compute_weighted_centers(
data: &Array2<f64>,
weights: &Array2<f64>,
m: f64,
k: usize,
n_features: usize,
) -> Result<Array2<f64>> {
let n = data.nrows();
let mut centers = Array2::<f64>::zeros((k, n_features));
for c in 0..k {
let mut wsum = 0.0_f64;
for i in 0..n {
let wm = weights[[i, c]].powf(m);
wsum += wm;
for f in 0..n_features {
centers[[c, f]] += wm * data[[i, f]];
}
}
if wsum < 1e-300 {
return Err(ClusteringError::ComputationError(format!(
"Cluster {} has near-zero total weight; consider reducing k",
c
)));
}
for f in 0..n_features {
centers[[c, f]] /= wsum;
}
}
Ok(centers)
}
#[derive(Debug, Clone)]
pub struct FuzzyCMeansConfig {
pub k: usize,
pub m: f64,
pub max_iter: usize,
pub tol: f64,
pub seed: u64,
}
impl Default for FuzzyCMeansConfig {
fn default() -> Self {
Self {
k: 2,
m: 2.0,
max_iter: 300,
tol: 1e-6,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
pub struct FuzzyCMeansResult {
pub membership: Array2<f64>,
pub centers: Array2<f64>,
pub assignments: Vec<usize>,
pub objective: f64,
pub n_iter: usize,
}
pub fn fuzzy_c_means(data: &Array2<f64>, config: FuzzyCMeansConfig) -> Result<FuzzyCMeansResult> {
let n = data.nrows();
let d = data.ncols();
let k = config.k;
if n == 0 || d == 0 {
return Err(ClusteringError::InvalidInput(
"Data matrix must be non-empty".into(),
));
}
if k < 2 {
return Err(ClusteringError::InvalidInput(
"k must be at least 2".into(),
));
}
if k > n {
return Err(ClusteringError::InvalidInput(format!(
"k ({}) must not exceed n_samples ({})",
k, n
)));
}
if config.m <= 1.0 {
return Err(ClusteringError::InvalidInput(
"Fuzziness m must be strictly greater than 1.0".into(),
));
}
let mut u = {
let mut rng = Lcg::new(config.seed);
let mut mat = Array2::<f64>::zeros((n, k));
for i in 0..n {
let mut vals: Vec<f64> = (0..k).map(|_| rng.next_f64() + 1e-10).collect();
let s: f64 = vals.iter().sum();
for (c, v) in vals.iter_mut().enumerate() {
mat[[i, c]] = *v / s;
}
}
mat
};
let exp = 2.0 / (config.m - 1.0);
let mut n_iter = 0usize;
for _iter in 0..config.max_iter {
n_iter += 1;
let centers = compute_weighted_centers(data, &u, config.m, k, d)?;
let u_new = fcm_update_membership(data, ¢ers, config.m, exp, k, d)?;
let mut change = 0.0_f64;
for i in 0..n {
for c in 0..k {
let diff = u_new[[i, c]] - u[[i, c]];
change += diff * diff;
}
}
u = u_new;
if change.sqrt() < config.tol {
break;
}
}
let centers = compute_weighted_centers(data, &u, config.m, k, d)?;
let objective = fcm_objective(data, &u, ¢ers, config.m, k, d);
let assignments = hard_assignments(&u, n, k);
Ok(FuzzyCMeansResult {
membership: u,
centers,
assignments,
objective,
n_iter,
})
}
fn fcm_update_membership(
data: &Array2<f64>,
centers: &Array2<f64>,
_m: f64,
exp: f64,
k: usize,
d: usize,
) -> Result<Array2<f64>> {
let n = data.nrows();
let mut u = Array2::<f64>::zeros((n, k));
for i in 0..n {
let mut dists: Vec<f64> = (0..k)
.map(|c| sq_dist_row_to_center(data, i, centers, c, d).sqrt())
.collect();
let n_zero = dists.iter().filter(|&&v| v < 1e-300).count();
if n_zero > 0 {
let eq_mem = 1.0 / n_zero as f64;
for c in 0..k {
u[[i, c]] = if dists[c] < 1e-300 { eq_mem } else { 0.0 };
}
continue;
}
let mut row_sum = 0.0_f64;
for c in 0..k {
let inner: f64 = (0..k).map(|j| (dists[c] / dists[j]).powf(exp)).sum();
u[[i, c]] = 1.0 / inner;
row_sum += u[[i, c]];
}
if row_sum > 1e-300 {
for c in 0..k {
u[[i, c]] /= row_sum;
}
}
}
Ok(u)
}
fn fcm_objective(
data: &Array2<f64>,
u: &Array2<f64>,
centers: &Array2<f64>,
m: f64,
k: usize,
d: usize,
) -> f64 {
let n = data.nrows();
let mut obj = 0.0_f64;
for i in 0..n {
for c in 0..k {
let dist2 = sq_dist_row_to_center(data, i, centers, c, d);
obj += u[[i, c]].powf(m) * dist2;
}
}
obj
}
fn hard_assignments(u: &Array2<f64>, n: usize, k: usize) -> Vec<usize> {
(0..n)
.map(|i| {
let mut best_c = 0;
let mut best_u = -1.0_f64;
for c in 0..k {
if u[[i, c]] > best_u {
best_u = u[[i, c]];
best_c = c;
}
}
best_c
})
.collect()
}
#[derive(Debug, Clone)]
pub struct PossibilisticConfig {
pub k: usize,
pub m: f64,
pub eta: f64,
pub max_iter: usize,
pub tol: f64,
pub seed: u64,
}
impl Default for PossibilisticConfig {
fn default() -> Self {
Self {
k: 2,
m: 2.0,
eta: 0.0, max_iter: 300,
tol: 1e-6,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
pub struct PossibilisticResult {
pub typicality: Array2<f64>,
pub centers: Array2<f64>,
pub assignments: Vec<usize>,
}
pub fn possibilistic_c_means(
data: &Array2<f64>,
config: PossibilisticConfig,
) -> Result<PossibilisticResult> {
let n = data.nrows();
let d = data.ncols();
let k = config.k;
if n == 0 || d == 0 {
return Err(ClusteringError::InvalidInput(
"Data matrix must be non-empty".into(),
));
}
if k < 2 {
return Err(ClusteringError::InvalidInput(
"k must be at least 2".into(),
));
}
if k > n {
return Err(ClusteringError::InvalidInput(format!(
"k ({}) must not exceed n_samples ({})",
k, n
)));
}
if config.m <= 1.0 {
return Err(ClusteringError::InvalidInput(
"Fuzziness m must be strictly greater than 1.0".into(),
));
}
let fcm_config = FuzzyCMeansConfig {
k,
m: config.m,
max_iter: 50,
tol: 1e-4,
seed: config.seed,
};
let fcm_result = fuzzy_c_means(data, fcm_config)?;
let mut centers = fcm_result.centers.clone();
let eta_vec: Vec<f64> = if config.eta > 0.0 {
vec![config.eta; k]
} else {
let u = &fcm_result.membership;
(0..k)
.map(|c| {
let mut num = 0.0_f64;
let mut den = 0.0_f64;
for i in 0..n {
let wm = u[[i, c]].powf(config.m);
num += wm * sq_dist_row_to_center(data, i, ¢ers, c, d);
den += wm;
}
if den < 1e-300 {
1.0
} else {
(num / den).max(1e-10)
}
})
.collect()
};
let mut t = pcm_update_typicality(data, ¢ers, &eta_vec, config.m, k, d)?;
for _iter in 0..config.max_iter {
let centers_new = compute_weighted_centers(data, &t, config.m, k, d)?;
let t_new = pcm_update_typicality(data, ¢ers_new, &eta_vec, config.m, k, d)?;
let mut change = 0.0_f64;
for i in 0..n {
for c in 0..k {
let diff = t_new[[i, c]] - t[[i, c]];
change += diff * diff;
}
}
centers = centers_new;
t = t_new;
if change.sqrt() < config.tol {
break;
}
}
let assignments = hard_assignments(&t, n, k);
Ok(PossibilisticResult {
typicality: t,
centers,
assignments,
})
}
fn pcm_update_typicality(
data: &Array2<f64>,
centers: &Array2<f64>,
eta: &[f64],
m: f64,
k: usize,
d: usize,
) -> Result<Array2<f64>> {
let n = data.nrows();
let exp = 1.0 / (m - 1.0);
let mut t = Array2::<f64>::zeros((n, k));
for i in 0..n {
for c in 0..k {
let dist2 = sq_dist_row_to_center(data, i, centers, c, d);
let eta_c = eta[c].max(1e-300);
t[[i, c]] = 1.0 / (1.0 + (dist2 / eta_c).powf(exp));
}
}
Ok(t)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
fn two_cluster_data() -> Array2<f64> {
Array2::from_shape_vec(
(16, 2),
vec![
0.0, 0.0, 0.1, 0.1, -0.1, 0.0, 0.0, -0.1, 0.2, 0.0, -0.2, 0.0, 0.0, 0.2, 0.0,
-0.2, 8.0, 8.0, 8.1, 8.1, 7.9, 8.0, 8.0, 7.9, 8.2, 8.0, 7.8, 8.0, 8.0, 8.2,
8.0, 7.8,
],
)
.expect("create test data")
}
#[test]
fn test_fcm_membership_rows_sum_to_one() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig {
k: 2,
m: 2.0,
max_iter: 200,
tol: 1e-6,
seed: 1,
};
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
let n = result.membership.nrows();
let k = result.membership.ncols();
for i in 0..n {
let row_sum: f64 = (0..k).map(|c| result.membership[[i, c]]).sum();
assert!(
(row_sum - 1.0).abs() < 1e-9,
"Row {} membership sum = {}, expected 1.0",
i,
row_sum
);
}
}
#[test]
fn test_fcm_membership_values_in_range() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig::default();
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
for &v in result.membership.iter() {
assert!(v >= 0.0 && v <= 1.0 + 1e-12, "Membership {} out of [0,1]", v);
}
}
#[test]
fn test_fcm_well_separated_clusters_membership_sharp() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig {
k: 2,
m: 2.0,
max_iter: 300,
tol: 1e-7,
seed: 7,
};
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
let n = data.nrows();
for i in 0..n {
let max_u = (0..2).map(|c| result.membership[[i, c]]).fold(0.0_f64, f64::max);
assert!(
max_u > 0.8,
"Point {}: max membership {} too low (clusters not well-separated?)",
i,
max_u
);
}
}
#[test]
fn test_fcm_hard_assignments_correct() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig {
k: 2,
m: 2.0,
max_iter: 300,
tol: 1e-7,
seed: 3,
};
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
let first_label = result.assignments[0];
let second_label = result.assignments[8];
assert_ne!(first_label, second_label, "Two clusters should differ");
for i in 0..8 {
assert_eq!(
result.assignments[i], first_label,
"Point {} should be in cluster {}",
i, first_label
);
}
for i in 8..16 {
assert_eq!(
result.assignments[i], second_label,
"Point {} should be in cluster {}",
i, second_label
);
}
}
#[test]
fn test_fcm_centers_shape() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig {
k: 3,
..FuzzyCMeansConfig::default()
};
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
assert_eq!(result.centers.shape(), &[3, 2]);
}
#[test]
fn test_fcm_objective_positive() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig::default();
let result = fuzzy_c_means(&data, config).expect("operation should succeed");
assert!(result.objective >= 0.0, "Objective must be non-negative");
}
#[test]
fn test_fcm_invalid_m() {
let data = two_cluster_data();
let config = FuzzyCMeansConfig {
m: 1.0,
..FuzzyCMeansConfig::default()
};
assert!(fuzzy_c_means(&data, config).is_err());
}
#[test]
fn test_fcm_k_exceeds_n() {
let data =
Array2::from_shape_vec((3, 2), vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0]).expect("data");
let config = FuzzyCMeansConfig {
k: 5,
..FuzzyCMeansConfig::default()
};
assert!(fuzzy_c_means(&data, config).is_err());
}
#[test]
fn test_pcm_typicality_shape() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 2,
..PossibilisticConfig::default()
};
let result = possibilistic_c_means(&data, config).expect("operation should succeed");
assert_eq!(result.typicality.shape(), &[16, 2]);
}
#[test]
fn test_pcm_typicality_in_01() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 2,
m: 2.0,
eta: 0.0,
max_iter: 100,
tol: 1e-5,
seed: 5,
};
let result = possibilistic_c_means(&data, config).expect("operation should succeed");
for &v in result.typicality.iter() {
assert!(
v >= 0.0 && v <= 1.0 + 1e-10,
"Typicality {} out of [0,1]",
v
);
}
}
#[test]
fn test_pcm_typicality_does_not_sum_to_one() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 2,
m: 2.0,
eta: 0.0,
max_iter: 100,
tol: 1e-5,
seed: 8,
};
let result = possibilistic_c_means(&data, config).expect("operation should succeed");
let n = result.typicality.nrows();
let k = result.typicality.ncols();
let mut all_one = true;
for i in 0..n {
let s: f64 = (0..k).map(|c| result.typicality[[i, c]]).sum();
if (s - 1.0).abs() > 1e-3 {
all_one = false;
break;
}
}
assert!(
!all_one,
"PCM typicality rows should generally NOT sum to 1.0 for well-separated data"
);
}
#[test]
fn test_pcm_centers_shape() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 2,
..PossibilisticConfig::default()
};
let result = possibilistic_c_means(&data, config).expect("operation should succeed");
assert_eq!(result.centers.shape(), &[2, 2]);
}
#[test]
fn test_pcm_invalid_k() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 1,
..PossibilisticConfig::default()
};
assert!(possibilistic_c_means(&data, config).is_err());
}
#[test]
fn test_pcm_explicit_eta() {
let data = two_cluster_data();
let config = PossibilisticConfig {
k: 2,
m: 2.0,
eta: 5.0,
max_iter: 100,
tol: 1e-5,
seed: 9,
};
let result = possibilistic_c_means(&data, config);
assert!(result.is_ok(), "PCM with explicit eta should succeed");
}
}