1use ferray_core::error::{FerrayError, FerrayResult};
4use ferray_core::{Array, Element, Ix1, Ix2, IxDyn};
5use num_traits::Float;
6
7#[derive(Debug, Clone)]
13pub enum Bins<T> {
14 Count(usize),
16 Edges(Vec<T>),
18}
19
20pub fn histogram<T>(
34 a: &Array<T, Ix1>,
35 bins: Bins<T>,
36 range: Option<(T, T)>,
37 density: bool,
38) -> FerrayResult<(Array<f64, Ix1>, Array<T, Ix1>)>
39where
40 T: Element + Float,
41{
42 let data: Vec<T> = a.iter().copied().collect();
43
44 let (lo, hi) = match range {
46 Some((l, h)) => {
47 if l >= h {
48 return Err(FerrayError::invalid_value(
49 "range lower bound must be less than upper",
50 ));
51 }
52 (l, h)
53 }
54 None => {
55 if data.is_empty() {
56 return Err(FerrayError::invalid_value(
57 "cannot compute histogram of empty array without range",
58 ));
59 }
60 let lo = data.iter().copied().fold(T::infinity(), |a, b| a.min(b));
61 let hi = data
62 .iter()
63 .copied()
64 .fold(T::neg_infinity(), |a, b| a.max(b));
65 if lo == hi {
66 (lo - <T as Element>::one(), hi + <T as Element>::one())
67 } else {
68 (lo, hi)
69 }
70 }
71 };
72
73 let edges = match bins {
75 Bins::Count(n) => {
76 if n == 0 {
77 return Err(FerrayError::invalid_value("number of bins must be > 0"));
78 }
79 let step = (hi - lo) / T::from(n).unwrap();
80 let mut edges = Vec::with_capacity(n + 1);
81 for i in 0..n {
82 edges.push(lo + step * T::from(i).unwrap());
83 }
84 edges.push(hi);
85 edges
86 }
87 Bins::Edges(e) => {
88 if e.len() < 2 {
89 return Err(FerrayError::invalid_value(
90 "bin edges must have at least 2 elements",
91 ));
92 }
93 e
94 }
95 };
96
97 let nbins = edges.len() - 1;
98 let mut counts = vec![0u64; nbins];
99
100 for &x in &data {
101 if x.is_nan() {
102 continue;
103 }
104 if x < edges[0] || x > edges[nbins] {
105 continue;
106 }
107 let bin = match edges[..nbins]
109 .binary_search_by(|e| e.partial_cmp(&x).unwrap_or(std::cmp::Ordering::Equal))
110 {
111 Ok(i) => i,
112 Err(i) => i.saturating_sub(1),
113 };
114 let bin = bin.min(nbins - 1);
116 counts[bin] += 1;
117 }
118
119 let result: Vec<f64> = if density {
120 let total: f64 = counts.iter().sum::<u64>() as f64;
122 if total == 0.0 {
123 vec![0.0; nbins]
124 } else {
125 counts
126 .iter()
127 .enumerate()
128 .map(|(i, &c)| {
129 let bin_width = (edges[i + 1] - edges[i]).to_f64().unwrap();
130 if bin_width == 0.0 {
131 0.0
132 } else {
133 (c as f64) / (total * bin_width)
134 }
135 })
136 .collect()
137 }
138 } else {
139 counts.iter().map(|&c| c as f64).collect()
140 };
141
142 let counts_arr = Array::from_vec(Ix1::new([nbins]), result)?;
143 let edges_arr = Array::from_vec(Ix1::new([edges.len()]), edges)?;
144 Ok((counts_arr, edges_arr))
145}
146
147#[allow(clippy::type_complexity)]
157pub fn histogram2d<T>(
158 x: &Array<T, Ix1>,
159 y: &Array<T, Ix1>,
160 bins: (usize, usize),
161) -> FerrayResult<(Array<u64, Ix2>, Array<T, Ix1>, Array<T, Ix1>)>
162where
163 T: Element + Float,
164{
165 let xdata: Vec<T> = x.iter().copied().collect();
166 let ydata: Vec<T> = y.iter().copied().collect();
167
168 if xdata.len() != ydata.len() {
169 return Err(FerrayError::shape_mismatch(
170 "x and y must have the same length",
171 ));
172 }
173 if xdata.is_empty() {
174 return Err(FerrayError::invalid_value(
175 "cannot compute histogram2d of empty arrays",
176 ));
177 }
178
179 let (nx, ny) = bins;
180 if nx == 0 || ny == 0 {
181 return Err(FerrayError::invalid_value("number of bins must be > 0"));
182 }
183
184 let x_min = xdata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
185 let x_max = xdata
186 .iter()
187 .copied()
188 .fold(T::neg_infinity(), |a, b| a.max(b));
189 let y_min = ydata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
190 let y_max = ydata
191 .iter()
192 .copied()
193 .fold(T::neg_infinity(), |a, b| a.max(b));
194
195 let (x_lo, x_hi) = if x_min == x_max {
196 (x_min - <T as Element>::one(), x_max + <T as Element>::one())
197 } else {
198 (x_min, x_max)
199 };
200 let (y_lo, y_hi) = if y_min == y_max {
201 (y_min - <T as Element>::one(), y_max + <T as Element>::one())
202 } else {
203 (y_min, y_max)
204 };
205
206 let x_step = (x_hi - x_lo) / T::from(nx).unwrap();
207 let y_step = (y_hi - y_lo) / T::from(ny).unwrap();
208
209 let mut x_edges = Vec::with_capacity(nx + 1);
210 for i in 0..nx {
211 x_edges.push(x_lo + x_step * T::from(i).unwrap());
212 }
213 x_edges.push(x_hi);
214
215 let mut y_edges = Vec::with_capacity(ny + 1);
216 for i in 0..ny {
217 y_edges.push(y_lo + y_step * T::from(i).unwrap());
218 }
219 y_edges.push(y_hi);
220
221 let mut counts = vec![0u64; nx * ny];
222
223 for (&xv, &yv) in xdata.iter().zip(ydata.iter()) {
224 if xv.is_nan() || yv.is_nan() {
225 continue;
226 }
227 let xi = bin_index(xv, x_lo, x_step, nx);
228 let yi = bin_index(yv, y_lo, y_step, ny);
229 if let (Some(xi), Some(yi)) = (xi, yi) {
230 counts[xi * ny + yi] += 1;
231 }
232 }
233
234 let counts_arr = Array::from_vec(Ix2::new([nx, ny]), counts)?;
235 let x_edges_arr = Array::from_vec(Ix1::new([x_edges.len()]), x_edges)?;
236 let y_edges_arr = Array::from_vec(Ix1::new([y_edges.len()]), y_edges)?;
237
238 Ok((counts_arr, x_edges_arr, y_edges_arr))
239}
240
241fn bin_index<T: Float>(val: T, lo: T, step: T, nbins: usize) -> Option<usize> {
243 if val < lo {
244 return None;
245 }
246 let idx = ((val - lo) / step).floor().to_usize().unwrap_or(nbins);
247 if idx >= nbins {
248 Some(nbins - 1) } else {
250 Some(idx)
251 }
252}
253
254#[allow(clippy::type_complexity)]
267pub fn histogramdd<T>(
268 sample: &Array<T, Ix2>,
269 bins: &[usize],
270) -> FerrayResult<(Array<u64, IxDyn>, Vec<Array<T, Ix1>>)>
271where
272 T: Element + Float,
273{
274 let shape = sample.shape();
275 let (npoints, ndims) = (shape[0], shape[1]);
276 let data: Vec<T> = sample.iter().copied().collect();
277
278 if bins.len() != ndims {
279 return Err(FerrayError::shape_mismatch(format!(
280 "bins length {} does not match sample dimensions {}",
281 bins.len(),
282 ndims
283 )));
284 }
285 for &b in bins {
286 if b == 0 {
287 return Err(FerrayError::invalid_value("number of bins must be > 0"));
288 }
289 }
290
291 let mut lo = vec![T::infinity(); ndims];
293 let mut hi = vec![T::neg_infinity(); ndims];
294 for i in 0..npoints {
295 for j in 0..ndims {
296 let v = data[i * ndims + j];
297 if !v.is_nan() {
298 if v < lo[j] {
299 lo[j] = v;
300 }
301 if v > hi[j] {
302 hi[j] = v;
303 }
304 }
305 }
306 }
307 for j in 0..ndims {
308 if lo[j] == hi[j] {
309 lo[j] = lo[j] - <T as Element>::one();
310 hi[j] = hi[j] + <T as Element>::one();
311 }
312 }
313
314 let mut all_edges = Vec::with_capacity(ndims);
316 let mut steps = Vec::with_capacity(ndims);
317 for j in 0..ndims {
318 let step = (hi[j] - lo[j]) / T::from(bins[j]).unwrap();
319 steps.push(step);
320 let mut edges = Vec::with_capacity(bins[j] + 1);
321 for k in 0..bins[j] {
322 edges.push(lo[j] + step * T::from(k).unwrap());
323 }
324 edges.push(hi[j]);
325 all_edges.push(edges);
326 }
327
328 let out_size: usize = bins.iter().product();
330 let mut out_strides = vec![1usize; ndims];
331 for j in (0..ndims.saturating_sub(1)).rev() {
332 out_strides[j] = out_strides[j + 1] * bins[j + 1];
333 }
334
335 let mut counts = vec![0u64; out_size];
336 for i in 0..npoints {
337 let mut flat_idx = 0usize;
338 let mut valid = true;
339 for j in 0..ndims {
340 let v = data[i * ndims + j];
341 if v.is_nan() {
342 valid = false;
343 break;
344 }
345 match bin_index(v, lo[j], steps[j], bins[j]) {
346 Some(bi) => flat_idx += bi * out_strides[j],
347 None => {
348 valid = false;
349 break;
350 }
351 }
352 }
353 if valid {
354 counts[flat_idx] += 1;
355 }
356 }
357
358 let counts_arr = Array::from_vec(IxDyn::new(bins), counts)?;
359 let edge_arrs: Vec<Array<T, Ix1>> = all_edges
360 .into_iter()
361 .map(|e| {
362 let n = e.len();
363 Array::from_vec(Ix1::new([n]), e).unwrap()
364 })
365 .collect();
366
367 Ok((counts_arr, edge_arrs))
368}
369
370pub fn bincount_u64(x: &Array<u64, Ix1>, minlength: usize) -> FerrayResult<Array<u64, Ix1>> {
383 let data: Vec<u64> = x.iter().copied().collect();
384 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
385 let out_len = (max_val + 1).max(minlength);
386 let mut result = vec![0u64; out_len];
387 for &v in &data {
388 result[v as usize] += 1;
389 }
390 Array::from_vec(Ix1::new([out_len]), result)
391}
392
393pub fn bincount_weighted(
399 x: &Array<u64, Ix1>,
400 weights: &Array<f64, Ix1>,
401 minlength: usize,
402) -> FerrayResult<Array<f64, Ix1>> {
403 if weights.size() != x.size() {
404 return Err(FerrayError::shape_mismatch(
405 "x and weights must have the same length",
406 ));
407 }
408 let data: Vec<u64> = x.iter().copied().collect();
409 let wdata: Vec<f64> = weights.iter().copied().collect();
410 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
411 let out_len = (max_val + 1).max(minlength);
412 let mut result = vec![0.0_f64; out_len];
413 for (i, &v) in data.iter().enumerate() {
414 result[v as usize] += wdata[i];
415 }
416 Array::from_vec(Ix1::new([out_len]), result)
417}
418
419pub fn bincount(
430 x: &Array<u64, Ix1>,
431 weights: Option<&Array<f64, Ix1>>,
432 minlength: usize,
433) -> FerrayResult<Array<f64, Ix1>> {
434 match weights {
435 Some(w) => bincount_weighted(x, w, minlength),
436 None => {
437 let counts = bincount_u64(x, minlength)?;
438 let data: Vec<f64> = counts.iter().map(|&c| c as f64).collect();
439 Array::from_vec(Ix1::new([counts.size()]), data)
440 }
441 }
442}
443
444pub fn digitize<T>(
457 x: &Array<T, Ix1>,
458 bins: &Array<T, Ix1>,
459 right: bool,
460) -> FerrayResult<Array<u64, Ix1>>
461where
462 T: Element + PartialOrd + Copy,
463{
464 let xdata: Vec<T> = x.iter().copied().collect();
465 let bdata: Vec<T> = bins.iter().copied().collect();
466
467 if bdata.is_empty() {
468 return Err(FerrayError::invalid_value("bins must not be empty"));
469 }
470
471 let increasing = bdata.len() < 2 || bdata[0] <= bdata[bdata.len() - 1];
473
474 let mut result = Vec::with_capacity(xdata.len());
475 for &v in &xdata {
476 let idx = if increasing {
477 if right {
478 bdata.partition_point(|b| b < &v)
480 } else {
481 bdata.partition_point(|b| b <= &v)
483 }
484 } else {
485 if right {
487 bdata.len()
488 - bdata
489 .iter()
490 .rev()
491 .position(|b| b < &v)
492 .unwrap_or(bdata.len())
493 } else {
494 bdata.len()
495 - bdata
496 .iter()
497 .rev()
498 .position(|b| b <= &v)
499 .unwrap_or(bdata.len())
500 }
501 };
502 result.push(idx as u64);
503 }
504
505 let n = result.len();
506 Array::from_vec(Ix1::new([n]), result)
507}
508
509#[cfg(test)]
510mod tests {
511 use super::*;
512
513 #[test]
514 fn test_histogram_basic() {
515 let a =
516 Array::<f64, Ix1>::from_vec(Ix1::new([6]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
517 let (counts, edges) = histogram(&a, Bins::Count(3), None, false).unwrap();
518 assert_eq!(counts.shape(), &[3]);
519 assert_eq!(edges.shape(), &[4]);
520 let c: Vec<f64> = counts.iter().copied().collect();
521 assert!((c.iter().sum::<f64>() - 6.0).abs() < 1e-12);
522 }
523
524 #[test]
525 fn test_histogram_with_range() {
526 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
527 let (counts, edges) = histogram(&a, Bins::Count(5), Some((0.0, 5.0)), false).unwrap();
528 assert_eq!(counts.shape(), &[5]);
529 assert_eq!(edges.shape(), &[6]);
530 }
531
532 #[test]
533 fn test_histogram_explicit_edges() {
534 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
535 let (counts, _) = histogram(
536 &a,
537 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
538 None,
539 false,
540 )
541 .unwrap();
542 let c: Vec<f64> = counts.iter().copied().collect();
543 assert_eq!(c, vec![1.0, 1.0, 1.0, 1.0, 1.0]);
544 }
545
546 #[test]
547 fn test_histogram_density() {
548 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
550 let (density, edges) = histogram(
551 &a,
552 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
553 None,
554 true,
555 )
556 .unwrap();
557 let d: Vec<f64> = density.iter().copied().collect();
558 let e: Vec<f64> = edges.iter().copied().collect();
559 for &v in &d {
562 assert!((v - 0.2).abs() < 1e-12, "expected 0.2, got {}", v);
563 }
564 let integral: f64 = d
566 .iter()
567 .enumerate()
568 .map(|(i, &di)| di * (e[i + 1] - e[i]))
569 .sum();
570 assert!(
571 (integral - 1.0).abs() < 1e-12,
572 "density integral should be 1.0, got {}",
573 integral
574 );
575 }
576
577 #[test]
578 fn test_histogram_density_unequal_bins() {
579 let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 3.0, 4.0]).unwrap();
581 let (density, edges) = histogram(&a, Bins::Edges(vec![0.0, 2.0, 5.0]), None, true).unwrap();
582 let d: Vec<f64> = density.iter().copied().collect();
583 let e: Vec<f64> = edges.iter().copied().collect();
584 assert!((d[0] - 0.25).abs() < 1e-12, "expected 0.25, got {}", d[0]);
587 assert!(
588 (d[1] - 2.0 / 12.0).abs() < 1e-12,
589 "expected 1/6, got {}",
590 d[1]
591 );
592 let integral: f64 = d
594 .iter()
595 .enumerate()
596 .map(|(i, &di)| di * (e[i + 1] - e[i]))
597 .sum();
598 assert!(
599 (integral - 1.0).abs() < 1e-12,
600 "density integral should be 1.0, got {}",
601 integral
602 );
603 }
604
605 #[test]
606 fn test_histogram2d() {
607 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
608 let y = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
609 let (counts, _xe, _ye) = histogram2d(&x, &y, (2, 2)).unwrap();
610 assert_eq!(counts.shape(), &[2, 2]);
611 let c: Vec<u64> = counts.iter().copied().collect();
612 assert_eq!(c.iter().sum::<u64>(), 4);
613 }
614
615 #[test]
616 fn test_bincount() {
617 let x = Array::<u64, Ix1>::from_vec(Ix1::new([6]), vec![0, 1, 1, 2, 2, 2]).unwrap();
618 let bc = bincount(&x, None, 0).unwrap();
619 let data: Vec<f64> = bc.iter().copied().collect();
620 assert_eq!(data, vec![1.0, 2.0, 3.0]);
621 }
622
623 #[test]
624 fn test_bincount_weighted() {
625 let x = Array::<u64, Ix1>::from_vec(Ix1::new([3]), vec![0, 1, 1]).unwrap();
626 let w = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.0, 1.5]).unwrap();
627 let bc = bincount(&x, Some(&w), 0).unwrap();
628 let data: Vec<f64> = bc.iter().copied().collect();
629 assert!((data[0] - 0.5).abs() < 1e-12);
630 assert!((data[1] - 2.5).abs() < 1e-12);
631 }
632
633 #[test]
634 fn test_bincount_minlength() {
635 let x = Array::<u64, Ix1>::from_vec(Ix1::new([2]), vec![0, 1]).unwrap();
636 let bc = bincount(&x, None, 5).unwrap();
637 assert_eq!(bc.shape(), &[5]);
638 }
639
640 #[test]
641 fn test_digitize_basic() {
642 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 2.5, 3.5]).unwrap();
643 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
644 let d = digitize(&x, &bins, false).unwrap();
645 let data: Vec<u64> = d.iter().copied().collect();
646 assert_eq!(data, vec![0, 1, 2, 3]);
651 }
652
653 #[test]
654 fn test_digitize_right() {
655 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.0, 2.5, 3.5]).unwrap();
656 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
657 let d = digitize(&x, &bins, true).unwrap();
658 let data: Vec<u64> = d.iter().copied().collect();
659 assert_eq!(data, vec![0, 0, 2, 3]);
665 }
666
667 #[test]
668 fn test_digitize_decreasing_bins() {
669 let x = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.5, 2.5]).unwrap();
677 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![3.0, 2.0, 1.0]).unwrap();
678 let d = digitize(&x, &bins, false);
679 assert!(d.is_ok());
683 assert_eq!(d.unwrap().shape(), &[3]);
684 }
685
686 #[test]
687 fn test_histogramdd() {
688 let sample = Array::<f64, Ix2>::from_vec(
689 Ix2::new([4, 2]),
690 vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
691 )
692 .unwrap();
693 let (counts, edges) = histogramdd(&sample, &[2, 2]).unwrap();
694 assert_eq!(counts.shape(), &[2, 2]);
695 let c: Vec<u64> = counts.iter().copied().collect();
696 assert_eq!(c.iter().sum::<u64>(), 4);
697 assert_eq!(edges.len(), 2);
698 }
699}