1use ferray_core::error::{FerrayError, FerrayResult};
4use ferray_core::{Array, Element, Ix1, Ix2, IxDyn};
5use num_traits::Float;
6
7#[derive(Debug, Clone)]
13pub enum Bins<T> {
14 Count(usize),
16 Edges(Vec<T>),
18}
19
20pub fn histogram<T>(
34 a: &Array<T, Ix1>,
35 bins: Bins<T>,
36 range: Option<(T, T)>,
37 density: bool,
38) -> FerrayResult<(Array<f64, Ix1>, Array<T, Ix1>)>
39where
40 T: Element + Float,
41{
42 let data: Vec<T> = a.iter().copied().collect();
43
44 let (lo, hi) = if let Some((l, h)) = range {
46 if l >= h {
47 return Err(FerrayError::invalid_value(
48 "range lower bound must be less than upper",
49 ));
50 }
51 (l, h)
52 } else {
53 if data.is_empty() {
54 return Err(FerrayError::invalid_value(
55 "cannot compute histogram of empty array without range",
56 ));
57 }
58 let lo = data
59 .iter()
60 .copied()
61 .fold(T::infinity(), num_traits::Float::min);
62 let hi = data
63 .iter()
64 .copied()
65 .fold(T::neg_infinity(), num_traits::Float::max);
66 if lo == hi {
67 (lo - <T as Element>::one(), hi + <T as Element>::one())
68 } else {
69 (lo, hi)
70 }
71 };
72
73 let edges = match bins {
75 Bins::Count(n) => {
76 if n == 0 {
77 return Err(FerrayError::invalid_value("number of bins must be > 0"));
78 }
79 let step = (hi - lo) / T::from(n).unwrap();
80 let mut edges = Vec::with_capacity(n + 1);
81 for i in 0..n {
82 edges.push(lo + step * T::from(i).unwrap());
83 }
84 edges.push(hi);
85 edges
86 }
87 Bins::Edges(e) => {
88 if e.len() < 2 {
89 return Err(FerrayError::invalid_value(
90 "bin edges must have at least 2 elements",
91 ));
92 }
93 e
94 }
95 };
96
97 let nbins = edges.len() - 1;
98 let mut counts = vec![0u64; nbins];
99
100 for &x in &data {
101 if x.is_nan() {
102 continue;
103 }
104 if x < edges[0] || x > edges[nbins] {
105 continue;
106 }
107 let bin = match edges[..nbins]
109 .binary_search_by(|e| e.partial_cmp(&x).unwrap_or(std::cmp::Ordering::Equal))
110 {
111 Ok(i) => i,
112 Err(i) => i.saturating_sub(1),
113 };
114 let bin = bin.min(nbins - 1);
116 counts[bin] += 1;
117 }
118
119 let result: Vec<f64> = if density {
120 let total: f64 = counts.iter().sum::<u64>() as f64;
122 if total == 0.0 {
123 vec![0.0; nbins]
124 } else {
125 counts
126 .iter()
127 .enumerate()
128 .map(|(i, &c)| {
129 let bin_width = (edges[i + 1] - edges[i]).to_f64().unwrap();
130 if bin_width == 0.0 {
131 0.0
132 } else {
133 (c as f64) / (total * bin_width)
134 }
135 })
136 .collect()
137 }
138 } else {
139 counts.iter().map(|&c| c as f64).collect()
140 };
141
142 let counts_arr = Array::from_vec(Ix1::new([nbins]), result)?;
143 let edges_arr = Array::from_vec(Ix1::new([edges.len()]), edges)?;
144 Ok((counts_arr, edges_arr))
145}
146
147pub fn histogram_bin_edges<T>(
158 a: &Array<T, Ix1>,
159 bins: Bins<T>,
160 range: Option<(T, T)>,
161) -> FerrayResult<Array<T, Ix1>>
162where
163 T: Element + Float,
164{
165 let edges = match bins {
166 Bins::Count(n) => {
167 if n == 0 {
168 return Err(FerrayError::invalid_value("number of bins must be > 0"));
169 }
170 let data: Vec<T> = a.iter().copied().collect();
171 let (lo, hi) = if let Some((l, h)) = range {
172 if l >= h {
173 return Err(FerrayError::invalid_value(
174 "range lower bound must be less than upper",
175 ));
176 }
177 (l, h)
178 } else {
179 if data.is_empty() {
180 return Err(FerrayError::invalid_value(
181 "cannot compute histogram_bin_edges of empty array without range",
182 ));
183 }
184 let lo = data
185 .iter()
186 .copied()
187 .fold(T::infinity(), num_traits::Float::min);
188 let hi = data
189 .iter()
190 .copied()
191 .fold(T::neg_infinity(), num_traits::Float::max);
192 if lo == hi {
193 (lo - <T as Element>::one(), hi + <T as Element>::one())
194 } else {
195 (lo, hi)
196 }
197 };
198 let step = (hi - lo) / T::from(n).unwrap();
199 let mut edges = Vec::with_capacity(n + 1);
200 for i in 0..n {
201 edges.push(lo + step * T::from(i).unwrap());
202 }
203 edges.push(hi);
204 edges
205 }
206 Bins::Edges(e) => {
207 if e.len() < 2 {
208 return Err(FerrayError::invalid_value(
209 "bin edges must have at least 2 elements",
210 ));
211 }
212 e
213 }
214 };
215
216 Array::from_vec(Ix1::new([edges.len()]), edges)
217}
218
219#[allow(clippy::type_complexity)]
229pub fn histogram2d<T>(
230 x: &Array<T, Ix1>,
231 y: &Array<T, Ix1>,
232 bins: (usize, usize),
233) -> FerrayResult<(Array<u64, Ix2>, Array<T, Ix1>, Array<T, Ix1>)>
234where
235 T: Element + Float,
236{
237 let xdata: Vec<T> = x.iter().copied().collect();
238 let ydata: Vec<T> = y.iter().copied().collect();
239
240 if xdata.len() != ydata.len() {
241 return Err(FerrayError::shape_mismatch(
242 "x and y must have the same length",
243 ));
244 }
245 if xdata.is_empty() {
246 return Err(FerrayError::invalid_value(
247 "cannot compute histogram2d of empty arrays",
248 ));
249 }
250
251 let (nx, ny) = bins;
252 if nx == 0 || ny == 0 {
253 return Err(FerrayError::invalid_value("number of bins must be > 0"));
254 }
255
256 let x_min = xdata
257 .iter()
258 .copied()
259 .fold(T::infinity(), num_traits::Float::min);
260 let x_max = xdata
261 .iter()
262 .copied()
263 .fold(T::neg_infinity(), num_traits::Float::max);
264 let y_min = ydata
265 .iter()
266 .copied()
267 .fold(T::infinity(), num_traits::Float::min);
268 let y_max = ydata
269 .iter()
270 .copied()
271 .fold(T::neg_infinity(), num_traits::Float::max);
272
273 let (x_lo, x_hi) = if x_min == x_max {
274 (x_min - <T as Element>::one(), x_max + <T as Element>::one())
275 } else {
276 (x_min, x_max)
277 };
278 let (y_lo, y_hi) = if y_min == y_max {
279 (y_min - <T as Element>::one(), y_max + <T as Element>::one())
280 } else {
281 (y_min, y_max)
282 };
283
284 let x_step = (x_hi - x_lo) / T::from(nx).unwrap();
285 let y_step = (y_hi - y_lo) / T::from(ny).unwrap();
286
287 let mut x_edges = Vec::with_capacity(nx + 1);
288 for i in 0..nx {
289 x_edges.push(x_lo + x_step * T::from(i).unwrap());
290 }
291 x_edges.push(x_hi);
292
293 let mut y_edges = Vec::with_capacity(ny + 1);
294 for i in 0..ny {
295 y_edges.push(y_lo + y_step * T::from(i).unwrap());
296 }
297 y_edges.push(y_hi);
298
299 let mut counts = vec![0u64; nx * ny];
300
301 for (&xv, &yv) in xdata.iter().zip(ydata.iter()) {
302 if xv.is_nan() || yv.is_nan() {
303 continue;
304 }
305 let xi = bin_index(xv, x_lo, x_step, nx);
306 let yi = bin_index(yv, y_lo, y_step, ny);
307 if let (Some(xi), Some(yi)) = (xi, yi) {
308 counts[xi * ny + yi] += 1;
309 }
310 }
311
312 let counts_arr = Array::from_vec(Ix2::new([nx, ny]), counts)?;
313 let x_edges_arr = Array::from_vec(Ix1::new([x_edges.len()]), x_edges)?;
314 let y_edges_arr = Array::from_vec(Ix1::new([y_edges.len()]), y_edges)?;
315
316 Ok((counts_arr, x_edges_arr, y_edges_arr))
317}
318
319fn bin_index<T: Float>(val: T, lo: T, step: T, nbins: usize) -> Option<usize> {
321 if val < lo {
322 return None;
323 }
324 let idx = ((val - lo) / step).floor().to_usize().unwrap_or(nbins);
325 if idx >= nbins {
326 Some(nbins - 1) } else {
328 Some(idx)
329 }
330}
331
332#[allow(clippy::type_complexity)]
345pub fn histogramdd<T>(
346 sample: &Array<T, Ix2>,
347 bins: &[usize],
348) -> FerrayResult<(Array<u64, IxDyn>, Vec<Array<T, Ix1>>)>
349where
350 T: Element + Float,
351{
352 let shape = sample.shape();
353 let (npoints, ndims) = (shape[0], shape[1]);
354 let data: Vec<T> = sample.iter().copied().collect();
355
356 if bins.len() != ndims {
357 return Err(FerrayError::shape_mismatch(format!(
358 "bins length {} does not match sample dimensions {}",
359 bins.len(),
360 ndims
361 )));
362 }
363 for &b in bins {
364 if b == 0 {
365 return Err(FerrayError::invalid_value("number of bins must be > 0"));
366 }
367 }
368
369 let mut lo = vec![T::infinity(); ndims];
371 let mut hi = vec![T::neg_infinity(); ndims];
372 for i in 0..npoints {
373 for j in 0..ndims {
374 let v = data[i * ndims + j];
375 if !v.is_nan() {
376 if v < lo[j] {
377 lo[j] = v;
378 }
379 if v > hi[j] {
380 hi[j] = v;
381 }
382 }
383 }
384 }
385 for j in 0..ndims {
386 if lo[j] == hi[j] {
387 lo[j] = lo[j] - <T as Element>::one();
388 hi[j] = hi[j] + <T as Element>::one();
389 }
390 }
391
392 let mut all_edges = Vec::with_capacity(ndims);
394 let mut steps = Vec::with_capacity(ndims);
395 for j in 0..ndims {
396 let step = (hi[j] - lo[j]) / T::from(bins[j]).unwrap();
397 steps.push(step);
398 let mut edges = Vec::with_capacity(bins[j] + 1);
399 for k in 0..bins[j] {
400 edges.push(lo[j] + step * T::from(k).unwrap());
401 }
402 edges.push(hi[j]);
403 all_edges.push(edges);
404 }
405
406 let out_size: usize = bins.iter().product();
408 let mut out_strides = vec![1usize; ndims];
409 for j in (0..ndims.saturating_sub(1)).rev() {
410 out_strides[j] = out_strides[j + 1] * bins[j + 1];
411 }
412
413 let mut counts = vec![0u64; out_size];
414 for i in 0..npoints {
415 let mut flat_idx = 0usize;
416 let mut valid = true;
417 for j in 0..ndims {
418 let v = data[i * ndims + j];
419 if v.is_nan() {
420 valid = false;
421 break;
422 }
423 if let Some(bi) = bin_index(v, lo[j], steps[j], bins[j]) {
424 flat_idx += bi * out_strides[j];
425 } else {
426 valid = false;
427 break;
428 }
429 }
430 if valid {
431 counts[flat_idx] += 1;
432 }
433 }
434
435 let counts_arr = Array::from_vec(IxDyn::new(bins), counts)?;
436 let edge_arrs: Vec<Array<T, Ix1>> = all_edges
437 .into_iter()
438 .map(|e| {
439 let n = e.len();
440 Array::from_vec(Ix1::new([n]), e).unwrap()
441 })
442 .collect();
443
444 Ok((counts_arr, edge_arrs))
445}
446
447pub fn bincount_u64(x: &Array<u64, Ix1>, minlength: usize) -> FerrayResult<Array<u64, Ix1>> {
460 let data: Vec<u64> = x.iter().copied().collect();
461 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
462 let out_len = (max_val + 1).max(minlength);
463 let mut result = vec![0u64; out_len];
464 for &v in &data {
465 result[v as usize] += 1;
466 }
467 Array::from_vec(Ix1::new([out_len]), result)
468}
469
470pub fn bincount_weighted(
476 x: &Array<u64, Ix1>,
477 weights: &Array<f64, Ix1>,
478 minlength: usize,
479) -> FerrayResult<Array<f64, Ix1>> {
480 if weights.size() != x.size() {
481 return Err(FerrayError::shape_mismatch(
482 "x and weights must have the same length",
483 ));
484 }
485 let data: Vec<u64> = x.iter().copied().collect();
486 let wdata: Vec<f64> = weights.iter().copied().collect();
487 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
488 let out_len = (max_val + 1).max(minlength);
489 let mut result = vec![0.0_f64; out_len];
490 for (i, &v) in data.iter().enumerate() {
491 result[v as usize] += wdata[i];
492 }
493 Array::from_vec(Ix1::new([out_len]), result)
494}
495
496pub fn bincount(
507 x: &Array<u64, Ix1>,
508 weights: Option<&Array<f64, Ix1>>,
509 minlength: usize,
510) -> FerrayResult<Array<f64, Ix1>> {
511 if let Some(w) = weights {
512 bincount_weighted(x, w, minlength)
513 } else {
514 let counts = bincount_u64(x, minlength)?;
515 let data: Vec<f64> = counts.iter().map(|&c| c as f64).collect();
516 Array::from_vec(Ix1::new([counts.size()]), data)
517 }
518}
519
520pub fn digitize<T>(
533 x: &Array<T, Ix1>,
534 bins: &Array<T, Ix1>,
535 right: bool,
536) -> FerrayResult<Array<u64, Ix1>>
537where
538 T: Element + PartialOrd + Copy,
539{
540 let xdata: Vec<T> = x.iter().copied().collect();
541 let bdata: Vec<T> = bins.iter().copied().collect();
542
543 if bdata.is_empty() {
544 return Err(FerrayError::invalid_value("bins must not be empty"));
545 }
546
547 let increasing = bdata.len() < 2 || bdata[0] <= bdata[bdata.len() - 1];
549
550 let mut result = Vec::with_capacity(xdata.len());
551 for &v in &xdata {
552 let idx = if increasing {
553 if right {
554 bdata.partition_point(|b| b < &v)
556 } else {
557 bdata.partition_point(|b| b <= &v)
559 }
560 } else {
561 if right {
563 bdata.len()
564 - bdata
565 .iter()
566 .rev()
567 .position(|b| b < &v)
568 .unwrap_or(bdata.len())
569 } else {
570 bdata.len()
571 - bdata
572 .iter()
573 .rev()
574 .position(|b| b <= &v)
575 .unwrap_or(bdata.len())
576 }
577 };
578 result.push(idx as u64);
579 }
580
581 let n = result.len();
582 Array::from_vec(Ix1::new([n]), result)
583}
584
585#[cfg(test)]
586mod tests {
587 use super::*;
588
589 #[test]
590 fn test_histogram_basic() {
591 let a =
592 Array::<f64, Ix1>::from_vec(Ix1::new([6]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
593 let (counts, edges) = histogram(&a, Bins::Count(3), None, false).unwrap();
594 assert_eq!(counts.shape(), &[3]);
595 assert_eq!(edges.shape(), &[4]);
596 let c: Vec<f64> = counts.iter().copied().collect();
597 assert!((c.iter().sum::<f64>() - 6.0).abs() < 1e-12);
598 }
599
600 #[test]
601 fn test_histogram_with_range() {
602 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
603 let (counts, edges) = histogram(&a, Bins::Count(5), Some((0.0, 5.0)), false).unwrap();
604 assert_eq!(counts.shape(), &[5]);
605 assert_eq!(edges.shape(), &[6]);
606 }
607
608 #[test]
609 fn test_histogram_explicit_edges() {
610 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
611 let (counts, _) = histogram(
612 &a,
613 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
614 None,
615 false,
616 )
617 .unwrap();
618 let c: Vec<f64> = counts.iter().copied().collect();
619 assert_eq!(c, vec![1.0, 1.0, 1.0, 1.0, 1.0]);
620 }
621
622 #[test]
623 fn test_histogram_density() {
624 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
626 let (density, edges) = histogram(
627 &a,
628 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
629 None,
630 true,
631 )
632 .unwrap();
633 let d: Vec<f64> = density.iter().copied().collect();
634 let e: Vec<f64> = edges.iter().copied().collect();
635 for &v in &d {
638 assert!((v - 0.2).abs() < 1e-12, "expected 0.2, got {v}");
639 }
640 let integral: f64 = d
642 .iter()
643 .enumerate()
644 .map(|(i, &di)| di * (e[i + 1] - e[i]))
645 .sum();
646 assert!(
647 (integral - 1.0).abs() < 1e-12,
648 "density integral should be 1.0, got {integral}"
649 );
650 }
651
652 #[test]
653 fn test_histogram_density_unequal_bins() {
654 let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 3.0, 4.0]).unwrap();
656 let (density, edges) = histogram(&a, Bins::Edges(vec![0.0, 2.0, 5.0]), None, true).unwrap();
657 let d: Vec<f64> = density.iter().copied().collect();
658 let e: Vec<f64> = edges.iter().copied().collect();
659 assert!((d[0] - 0.25).abs() < 1e-12, "expected 0.25, got {}", d[0]);
662 assert!(
663 (d[1] - 2.0 / 12.0).abs() < 1e-12,
664 "expected 1/6, got {}",
665 d[1]
666 );
667 let integral: f64 = d
669 .iter()
670 .enumerate()
671 .map(|(i, &di)| di * (e[i + 1] - e[i]))
672 .sum();
673 assert!(
674 (integral - 1.0).abs() < 1e-12,
675 "density integral should be 1.0, got {integral}"
676 );
677 }
678
679 #[test]
680 fn test_histogram2d() {
681 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
682 let y = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
683 let (counts, _xe, _ye) = histogram2d(&x, &y, (2, 2)).unwrap();
684 assert_eq!(counts.shape(), &[2, 2]);
685 let c: Vec<u64> = counts.iter().copied().collect();
686 assert_eq!(c.iter().sum::<u64>(), 4);
687 }
688
689 #[test]
690 fn test_bincount() {
691 let x = Array::<u64, Ix1>::from_vec(Ix1::new([6]), vec![0, 1, 1, 2, 2, 2]).unwrap();
692 let bc = bincount(&x, None, 0).unwrap();
693 let data: Vec<f64> = bc.iter().copied().collect();
694 assert_eq!(data, vec![1.0, 2.0, 3.0]);
695 }
696
697 #[test]
698 fn test_bincount_weighted() {
699 let x = Array::<u64, Ix1>::from_vec(Ix1::new([3]), vec![0, 1, 1]).unwrap();
700 let w = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.0, 1.5]).unwrap();
701 let bc = bincount(&x, Some(&w), 0).unwrap();
702 let data: Vec<f64> = bc.iter().copied().collect();
703 assert!((data[0] - 0.5).abs() < 1e-12);
704 assert!((data[1] - 2.5).abs() < 1e-12);
705 }
706
707 #[test]
708 fn test_bincount_minlength() {
709 let x = Array::<u64, Ix1>::from_vec(Ix1::new([2]), vec![0, 1]).unwrap();
710 let bc = bincount(&x, None, 5).unwrap();
711 assert_eq!(bc.shape(), &[5]);
712 }
713
714 #[test]
715 fn test_digitize_basic() {
716 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 2.5, 3.5]).unwrap();
717 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
718 let d = digitize(&x, &bins, false).unwrap();
719 let data: Vec<u64> = d.iter().copied().collect();
720 assert_eq!(data, vec![0, 1, 2, 3]);
725 }
726
727 #[test]
728 fn test_digitize_right() {
729 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.0, 2.5, 3.5]).unwrap();
730 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
731 let d = digitize(&x, &bins, true).unwrap();
732 let data: Vec<u64> = d.iter().copied().collect();
733 assert_eq!(data, vec![0, 0, 2, 3]);
739 }
740
741 #[test]
742 fn test_digitize_decreasing_bins() {
743 let x = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.5, 2.5]).unwrap();
751 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![3.0, 2.0, 1.0]).unwrap();
752 let d = digitize(&x, &bins, false);
753 assert!(d.is_ok());
757 assert_eq!(d.unwrap().shape(), &[3]);
758 }
759
760 #[test]
761 fn test_histogramdd() {
762 let sample = Array::<f64, Ix2>::from_vec(
763 Ix2::new([4, 2]),
764 vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
765 )
766 .unwrap();
767 let (counts, edges) = histogramdd(&sample, &[2, 2]).unwrap();
768 assert_eq!(counts.shape(), &[2, 2]);
769 let c: Vec<u64> = counts.iter().copied().collect();
770 assert_eq!(c.iter().sum::<u64>(), 4);
771 assert_eq!(edges.len(), 2);
772 }
773
774 #[test]
775 fn test_histogram_bin_edges_count() {
776 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
777 let edges = histogram_bin_edges(&a, Bins::Count(4), None).unwrap();
778 let data: Vec<f64> = edges.iter().copied().collect();
779 assert_eq!(data, vec![0.0, 1.0, 2.0, 3.0, 4.0]);
780 }
781
782 #[test]
783 fn test_histogram_bin_edges_explicit_range() {
784 let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.5, 2.5]).unwrap();
785 let edges = histogram_bin_edges(&a, Bins::Count(2), Some((0.0, 4.0))).unwrap();
786 let data: Vec<f64> = edges.iter().copied().collect();
787 assert_eq!(data, vec![0.0, 2.0, 4.0]);
788 }
789
790 #[test]
791 fn test_histogram_bin_edges_explicit_edges_passthrough() {
792 let a = Array::<f64, Ix1>::from_vec(Ix1::new([0]), vec![]).unwrap();
793 let edges = histogram_bin_edges(&a, Bins::Edges(vec![0.0, 1.0, 5.0]), None).unwrap();
794 let data: Vec<f64> = edges.iter().copied().collect();
795 assert_eq!(data, vec![0.0, 1.0, 5.0]);
796 }
797}