1use crate::types::Float;
6use scirs2_core::ndarray::Array1;
8
9pub fn generate_random_seed() -> u64 {
11 use std::time::{SystemTime, UNIX_EPOCH};
12 SystemTime::now()
13 .duration_since(UNIX_EPOCH)
14 .unwrap()
15 .as_nanos() as u64
16}
17
18pub fn entropy(probabilities: &Array1<Float>) -> Float {
36 probabilities
37 .iter()
38 .filter(|&&p| p > 0.0)
39 .map(|&p| -p * p.log2())
40 .sum()
41}
42
43pub fn gini_impurity(probabilities: &Array1<Float>) -> Float {
61 1.0 - probabilities.iter().map(|&p| p * p).sum::<Float>()
62}
63
64pub fn standardize(array: &Array1<Float>) -> Array1<Float> {
83 let mean = array.mean().unwrap();
84 let std = array.std(0.0);
85
86 if std > 1e-10 {
87 (array - mean) / std
88 } else {
89 array.clone()
90 }
91}
92
93pub fn min_max_normalize(array: &Array1<Float>) -> Array1<Float> {
112 let min_val = array.iter().fold(Float::INFINITY, |a, &b| a.min(b));
113 let max_val = array.iter().fold(Float::NEG_INFINITY, |a, &b| a.max(b));
114 let range = max_val - min_val;
115
116 if range > 1e-10 {
117 (array - min_val) / range
118 } else {
119 Array1::zeros(array.len())
120 }
121}
122
123pub fn cosine_similarity(a: &Array1<Float>, b: &Array1<Float>) -> Float {
143 if a.len() != b.len() {
144 panic!("Arrays must have the same length");
145 }
146
147 let dot_product = a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum::<Float>();
148 let norm_a = a.iter().map(|&x| x * x).sum::<Float>().sqrt();
149 let norm_b = b.iter().map(|&x| x * x).sum::<Float>().sqrt();
150
151 if norm_a > 1e-10 && norm_b > 1e-10 {
152 dot_product / (norm_a * norm_b)
153 } else {
154 0.0
155 }
156}
157
158pub fn euclidean_distance(a: &Array1<Float>, b: &Array1<Float>) -> Float {
178 if a.len() != b.len() {
179 panic!("Arrays must have the same length");
180 }
181
182 a.iter()
183 .zip(b.iter())
184 .map(|(&x, &y)| (x - y).powi(2))
185 .sum::<Float>()
186 .sqrt()
187}
188
189pub fn manhattan_distance(a: &Array1<Float>, b: &Array1<Float>) -> Float {
209 if a.len() != b.len() {
210 panic!("Arrays must have the same length");
211 }
212
213 a.iter().zip(b.iter()).map(|(&x, &y)| (x - y).abs()).sum()
214}
215
216pub fn is_zero(value: Float, tolerance: Option<Float>) -> bool {
225 let tol = tolerance.unwrap_or(1e-10);
226 value.abs() < tol
227}
228
229pub fn clamp(value: Float, min_val: Float, max_val: Float) -> Float {
239 value.clamp(min_val, max_val)
240}
241
242pub fn combinations(n: usize, k: usize) -> usize {
251 if k > n {
252 return 0;
253 }
254 if k == 0 || k == n {
255 return 1;
256 }
257
258 let k = k.min(n - k); let mut result = 1;
260
261 for i in 0..k {
262 result = result * (n - i) / (i + 1);
263 }
264
265 result
266}
267
268pub fn multivariate_normal_samples<R: scirs2_core::random::Rng>(
294 mean: &Array1<Float>,
295 n_samples: usize,
296 rng: &mut R,
297) -> scirs2_core::ndarray::Array2<Float> {
298 use scirs2_core::ndarray::Array2;
299 use scirs2_core::random::essentials::Normal;
300 use scirs2_core::Distribution;
301
302 let n_features = mean.len();
303 let mut samples = Array2::zeros((n_samples, n_features));
304
305 let standard_normal =
307 Normal::new(0.0, 1.0).expect("Failed to create standard normal distribution");
308
309 for i in 0..n_samples {
311 for j in 0..n_features {
312 let z = standard_normal.sample(rng);
313 samples[(i, j)] = mean[j] + z;
314 }
315 }
316
317 samples
318}
319
320#[allow(non_snake_case)]
321#[cfg(test)]
322mod tests {
323 use super::*;
324 use scirs2_core::ndarray::array;
326
327 #[test]
328 fn test_entropy_uniform() {
329 let probs = array![0.25, 0.25, 0.25, 0.25];
330 let ent = entropy(&probs);
331 assert!((ent - 2.0).abs() < 1e-10);
332 }
333
334 #[test]
335 fn test_entropy_certain() {
336 let probs = array![1.0, 0.0, 0.0, 0.0];
337 let ent = entropy(&probs);
338 assert!(ent.abs() < 1e-10);
339 }
340
341 #[test]
342 fn test_gini_impurity_uniform() {
343 let probs = array![0.5, 0.5];
344 let gini = gini_impurity(&probs);
345 assert!((gini - 0.5).abs() < 1e-10);
346 }
347
348 #[test]
349 fn test_gini_impurity_pure() {
350 let probs = array![1.0, 0.0];
351 let gini = gini_impurity(&probs);
352 assert!(gini.abs() < 1e-10);
353 }
354
355 #[test]
356 fn test_standardize() {
357 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
358 let normalized = standardize(&data);
359 let mean = normalized.mean().unwrap();
360 let std = normalized.std(0.0);
361 assert!(mean.abs() < 1e-10);
362 assert!((std - 1.0).abs() < 1e-10);
363 }
364
365 #[test]
366 fn test_min_max_normalize() {
367 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
368 let normalized = min_max_normalize(&data);
369 assert!((normalized[[0]] - 0.0).abs() < 1e-10);
370 assert!((normalized[[4]] - 1.0).abs() < 1e-10);
371 }
372
373 #[test]
374 fn test_cosine_similarity() {
375 let a = array![1.0, 0.0];
376 let b = array![1.0, 0.0];
377 let sim = cosine_similarity(&a, &b);
378 assert!((sim - 1.0).abs() < 1e-10);
379
380 let c = array![0.0, 1.0];
381 let sim2 = cosine_similarity(&a, &c);
382 assert!(sim2.abs() < 1e-10);
383 }
384
385 #[test]
386 fn test_euclidean_distance() {
387 let a = array![0.0, 0.0];
388 let b = array![3.0, 4.0];
389 let dist = euclidean_distance(&a, &b);
390 assert!((dist - 5.0).abs() < 1e-10);
391 }
392
393 #[test]
394 fn test_manhattan_distance() {
395 let a = array![0.0, 0.0];
396 let b = array![3.0, 4.0];
397 let dist = manhattan_distance(&a, &b);
398 assert!((dist - 7.0).abs() < 1e-10);
399 }
400
401 #[test]
402 fn test_multivariate_normal_samples() {
403 use scirs2_core::random::rngs::StdRng;
404 use scirs2_core::random::SeedableRng;
405
406 let mean = array![0.0, 1.0];
407 let mut rng = StdRng::seed_from_u64(42);
408 let samples = multivariate_normal_samples(&mean, 100, &mut rng);
409
410 assert_eq!(samples.shape(), &[100, 2]);
412
413 let sample_mean_0 = samples.column(0).mean().unwrap();
415 let sample_mean_1 = samples.column(1).mean().unwrap();
416
417 assert!(
419 (sample_mean_0 - 0.0).abs() < 0.3,
420 "Mean of first component should be close to 0.0"
421 );
422 assert!(
423 (sample_mean_1 - 1.0).abs() < 0.3,
424 "Mean of second component should be close to 1.0"
425 );
426
427 let sample_std_0 = samples.column(0).std(0.0);
429 let sample_std_1 = samples.column(1).std(0.0);
430 assert!(
431 sample_std_0 > 0.7 && sample_std_0 < 1.3,
432 "Std of first component should be close to 1.0"
433 );
434 assert!(
435 sample_std_1 > 0.7 && sample_std_1 < 1.3,
436 "Std of second component should be close to 1.0"
437 );
438 }
439
440 #[test]
441 fn test_is_zero() {
442 assert!(is_zero(0.0, None));
443 assert!(is_zero(1e-12, None));
444 assert!(!is_zero(1e-8, None));
445 assert!(is_zero(0.01, Some(0.1)));
446 }
447
448 #[test]
449 fn test_clamp() {
450 assert_eq!(clamp(5.0, 0.0, 10.0), 5.0);
451 assert_eq!(clamp(-1.0, 0.0, 10.0), 0.0);
452 assert_eq!(clamp(15.0, 0.0, 10.0), 10.0);
453 }
454
455 #[test]
456 fn test_combinations() {
457 assert_eq!(combinations(5, 0), 1);
458 assert_eq!(combinations(5, 1), 5);
459 assert_eq!(combinations(5, 2), 10);
460 assert_eq!(combinations(5, 3), 10);
461 assert_eq!(combinations(5, 5), 1);
462 assert_eq!(combinations(3, 5), 0);
463 }
464
465 #[test]
466 fn test_generate_random_seed() {
467 let seed1 = generate_random_seed();
468 std::thread::sleep(std::time::Duration::from_nanos(1000));
469 let seed2 = generate_random_seed();
470 assert_ne!(seed1, seed2);
472 }
473}