1use ferray_core::error::{FerrayError, FerrayResult};
4use ferray_core::{Array, Element, Ix1, Ix2, IxDyn};
5use num_traits::Float;
6
7#[derive(Debug, Clone)]
13pub enum Bins<T> {
14 Count(usize),
16 Edges(Vec<T>),
18}
19
20pub fn histogram<T>(
34 a: &Array<T, Ix1>,
35 bins: Bins<T>,
36 range: Option<(T, T)>,
37 density: bool,
38) -> FerrayResult<(Array<f64, Ix1>, Array<T, Ix1>)>
39where
40 T: Element + Float,
41{
42 let data: Vec<T> = a.iter().copied().collect();
43
44 let (lo, hi) = match range {
46 Some((l, h)) => {
47 if l >= h {
48 return Err(FerrayError::invalid_value(
49 "range lower bound must be less than upper",
50 ));
51 }
52 (l, h)
53 }
54 None => {
55 if data.is_empty() {
56 return Err(FerrayError::invalid_value(
57 "cannot compute histogram of empty array without range",
58 ));
59 }
60 let lo = data.iter().copied().fold(T::infinity(), |a, b| a.min(b));
61 let hi = data
62 .iter()
63 .copied()
64 .fold(T::neg_infinity(), |a, b| a.max(b));
65 if lo == hi {
66 (lo - <T as Element>::one(), hi + <T as Element>::one())
67 } else {
68 (lo, hi)
69 }
70 }
71 };
72
73 let edges = match bins {
75 Bins::Count(n) => {
76 if n == 0 {
77 return Err(FerrayError::invalid_value("number of bins must be > 0"));
78 }
79 let step = (hi - lo) / T::from(n).unwrap();
80 let mut edges = Vec::with_capacity(n + 1);
81 for i in 0..n {
82 edges.push(lo + step * T::from(i).unwrap());
83 }
84 edges.push(hi);
85 edges
86 }
87 Bins::Edges(e) => {
88 if e.len() < 2 {
89 return Err(FerrayError::invalid_value(
90 "bin edges must have at least 2 elements",
91 ));
92 }
93 e
94 }
95 };
96
97 let nbins = edges.len() - 1;
98 let mut counts = vec![0u64; nbins];
99
100 for &x in &data {
101 if x.is_nan() {
102 continue;
103 }
104 if x < edges[0] || x > edges[nbins] {
105 continue;
106 }
107 let bin = match edges[..nbins]
109 .binary_search_by(|e| e.partial_cmp(&x).unwrap_or(std::cmp::Ordering::Equal))
110 {
111 Ok(i) => i,
112 Err(i) => i.saturating_sub(1),
113 };
114 let bin = bin.min(nbins - 1);
116 counts[bin] += 1;
117 }
118
119 let result: Vec<f64> = if density {
120 let total: f64 = counts.iter().sum::<u64>() as f64;
122 if total == 0.0 {
123 vec![0.0; nbins]
124 } else {
125 counts
126 .iter()
127 .enumerate()
128 .map(|(i, &c)| {
129 let bin_width = (edges[i + 1] - edges[i]).to_f64().unwrap();
130 if bin_width == 0.0 {
131 0.0
132 } else {
133 (c as f64) / (total * bin_width)
134 }
135 })
136 .collect()
137 }
138 } else {
139 counts.iter().map(|&c| c as f64).collect()
140 };
141
142 let counts_arr = Array::from_vec(Ix1::new([nbins]), result)?;
143 let edges_arr = Array::from_vec(Ix1::new([edges.len()]), edges)?;
144 Ok((counts_arr, edges_arr))
145}
146
147#[allow(clippy::type_complexity)]
157pub fn histogram2d<T>(
158 x: &Array<T, Ix1>,
159 y: &Array<T, Ix1>,
160 bins: (usize, usize),
161) -> FerrayResult<(Array<u64, Ix2>, Array<T, Ix1>, Array<T, Ix1>)>
162where
163 T: Element + Float,
164{
165 let xdata: Vec<T> = x.iter().copied().collect();
166 let ydata: Vec<T> = y.iter().copied().collect();
167
168 if xdata.len() != ydata.len() {
169 return Err(FerrayError::shape_mismatch(
170 "x and y must have the same length",
171 ));
172 }
173 if xdata.is_empty() {
174 return Err(FerrayError::invalid_value(
175 "cannot compute histogram2d of empty arrays",
176 ));
177 }
178
179 let (nx, ny) = bins;
180 if nx == 0 || ny == 0 {
181 return Err(FerrayError::invalid_value("number of bins must be > 0"));
182 }
183
184 let x_min = xdata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
185 let x_max = xdata
186 .iter()
187 .copied()
188 .fold(T::neg_infinity(), |a, b| a.max(b));
189 let y_min = ydata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
190 let y_max = ydata
191 .iter()
192 .copied()
193 .fold(T::neg_infinity(), |a, b| a.max(b));
194
195 let (x_lo, x_hi) = if x_min == x_max {
196 (x_min - <T as Element>::one(), x_max + <T as Element>::one())
197 } else {
198 (x_min, x_max)
199 };
200 let (y_lo, y_hi) = if y_min == y_max {
201 (y_min - <T as Element>::one(), y_max + <T as Element>::one())
202 } else {
203 (y_min, y_max)
204 };
205
206 let x_step = (x_hi - x_lo) / T::from(nx).unwrap();
207 let y_step = (y_hi - y_lo) / T::from(ny).unwrap();
208
209 let mut x_edges = Vec::with_capacity(nx + 1);
210 for i in 0..nx {
211 x_edges.push(x_lo + x_step * T::from(i).unwrap());
212 }
213 x_edges.push(x_hi);
214
215 let mut y_edges = Vec::with_capacity(ny + 1);
216 for i in 0..ny {
217 y_edges.push(y_lo + y_step * T::from(i).unwrap());
218 }
219 y_edges.push(y_hi);
220
221 let mut counts = vec![0u64; nx * ny];
222
223 for (&xv, &yv) in xdata.iter().zip(ydata.iter()) {
224 if xv.is_nan() || yv.is_nan() {
225 continue;
226 }
227 let xi = bin_index(xv, x_lo, x_step, nx);
228 let yi = bin_index(yv, y_lo, y_step, ny);
229 if let (Some(xi), Some(yi)) = (xi, yi) {
230 counts[xi * ny + yi] += 1;
231 }
232 }
233
234 let counts_arr = Array::from_vec(Ix2::new([nx, ny]), counts)?;
235 let x_edges_arr = Array::from_vec(Ix1::new([x_edges.len()]), x_edges)?;
236 let y_edges_arr = Array::from_vec(Ix1::new([y_edges.len()]), y_edges)?;
237
238 Ok((counts_arr, x_edges_arr, y_edges_arr))
239}
240
241fn bin_index<T: Float>(val: T, lo: T, step: T, nbins: usize) -> Option<usize> {
243 if val < lo {
244 return None;
245 }
246 let idx = ((val - lo) / step).floor().to_usize().unwrap_or(nbins);
247 if idx >= nbins {
248 Some(nbins - 1) } else {
250 Some(idx)
251 }
252}
253
254#[allow(clippy::type_complexity)]
267pub fn histogramdd<T>(
268 sample: &Array<T, Ix2>,
269 bins: &[usize],
270) -> FerrayResult<(Array<u64, IxDyn>, Vec<Array<T, Ix1>>)>
271where
272 T: Element + Float,
273{
274 let shape = sample.shape();
275 let (npoints, ndims) = (shape[0], shape[1]);
276 let data: Vec<T> = sample.iter().copied().collect();
277
278 if bins.len() != ndims {
279 return Err(FerrayError::shape_mismatch(format!(
280 "bins length {} does not match sample dimensions {}",
281 bins.len(),
282 ndims
283 )));
284 }
285 for &b in bins {
286 if b == 0 {
287 return Err(FerrayError::invalid_value("number of bins must be > 0"));
288 }
289 }
290
291 let mut lo = vec![T::infinity(); ndims];
293 let mut hi = vec![T::neg_infinity(); ndims];
294 for i in 0..npoints {
295 for j in 0..ndims {
296 let v = data[i * ndims + j];
297 if !v.is_nan() {
298 if v < lo[j] {
299 lo[j] = v;
300 }
301 if v > hi[j] {
302 hi[j] = v;
303 }
304 }
305 }
306 }
307 for j in 0..ndims {
308 if lo[j] == hi[j] {
309 lo[j] = lo[j] - <T as Element>::one();
310 hi[j] = hi[j] + <T as Element>::one();
311 }
312 }
313
314 let mut all_edges = Vec::with_capacity(ndims);
316 let mut steps = Vec::with_capacity(ndims);
317 for j in 0..ndims {
318 let step = (hi[j] - lo[j]) / T::from(bins[j]).unwrap();
319 steps.push(step);
320 let mut edges = Vec::with_capacity(bins[j] + 1);
321 for k in 0..bins[j] {
322 edges.push(lo[j] + step * T::from(k).unwrap());
323 }
324 edges.push(hi[j]);
325 all_edges.push(edges);
326 }
327
328 let out_size: usize = bins.iter().product();
330 let mut out_strides = vec![1usize; ndims];
331 for j in (0..ndims.saturating_sub(1)).rev() {
332 out_strides[j] = out_strides[j + 1] * bins[j + 1];
333 }
334
335 let mut counts = vec![0u64; out_size];
336 for i in 0..npoints {
337 let mut flat_idx = 0usize;
338 let mut valid = true;
339 for j in 0..ndims {
340 let v = data[i * ndims + j];
341 if v.is_nan() {
342 valid = false;
343 break;
344 }
345 match bin_index(v, lo[j], steps[j], bins[j]) {
346 Some(bi) => flat_idx += bi * out_strides[j],
347 None => {
348 valid = false;
349 break;
350 }
351 }
352 }
353 if valid {
354 counts[flat_idx] += 1;
355 }
356 }
357
358 let counts_arr = Array::from_vec(IxDyn::new(bins), counts)?;
359 let edge_arrs: Vec<Array<T, Ix1>> = all_edges
360 .into_iter()
361 .map(|e| {
362 let n = e.len();
363 Array::from_vec(Ix1::new([n]), e).unwrap()
364 })
365 .collect();
366
367 Ok((counts_arr, edge_arrs))
368}
369
370pub fn bincount(
383 x: &Array<u64, Ix1>,
384 weights: Option<&Array<f64, Ix1>>,
385 minlength: usize,
386) -> FerrayResult<Array<f64, Ix1>> {
387 let data: Vec<u64> = x.iter().copied().collect();
388
389 if let Some(w) = weights {
390 if w.size() != x.size() {
391 return Err(FerrayError::shape_mismatch(
392 "x and weights must have the same length",
393 ));
394 }
395 }
396
397 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
398 let out_len = (max_val + 1).max(minlength);
399 let mut result = vec![0.0_f64; out_len];
400
401 match weights {
402 Some(w) => {
403 let wdata: Vec<f64> = w.iter().copied().collect();
404 for (i, &v) in data.iter().enumerate() {
405 result[v as usize] += wdata[i];
406 }
407 }
408 None => {
409 for &v in &data {
410 result[v as usize] += 1.0;
411 }
412 }
413 }
414
415 Array::from_vec(Ix1::new([out_len]), result)
416}
417
418pub fn digitize<T>(
431 x: &Array<T, Ix1>,
432 bins: &Array<T, Ix1>,
433 right: bool,
434) -> FerrayResult<Array<u64, Ix1>>
435where
436 T: Element + PartialOrd + Copy,
437{
438 let xdata: Vec<T> = x.iter().copied().collect();
439 let bdata: Vec<T> = bins.iter().copied().collect();
440
441 if bdata.is_empty() {
442 return Err(FerrayError::invalid_value("bins must not be empty"));
443 }
444
445 let increasing = bdata.len() < 2 || bdata[0] <= bdata[bdata.len() - 1];
447
448 let mut result = Vec::with_capacity(xdata.len());
449 for &v in &xdata {
450 let idx = if increasing {
451 if right {
452 bdata.partition_point(|b| b < &v)
454 } else {
455 bdata.partition_point(|b| b <= &v)
457 }
458 } else {
459 if right {
461 bdata.len()
462 - bdata
463 .iter()
464 .rev()
465 .position(|b| b < &v)
466 .unwrap_or(bdata.len())
467 } else {
468 bdata.len()
469 - bdata
470 .iter()
471 .rev()
472 .position(|b| b <= &v)
473 .unwrap_or(bdata.len())
474 }
475 };
476 result.push(idx as u64);
477 }
478
479 let n = result.len();
480 Array::from_vec(Ix1::new([n]), result)
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn test_histogram_basic() {
489 let a =
490 Array::<f64, Ix1>::from_vec(Ix1::new([6]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
491 let (counts, edges) = histogram(&a, Bins::Count(3), None, false).unwrap();
492 assert_eq!(counts.shape(), &[3]);
493 assert_eq!(edges.shape(), &[4]);
494 let c: Vec<f64> = counts.iter().copied().collect();
495 assert!((c.iter().sum::<f64>() - 6.0).abs() < 1e-12);
496 }
497
498 #[test]
499 fn test_histogram_with_range() {
500 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
501 let (counts, edges) = histogram(&a, Bins::Count(5), Some((0.0, 5.0)), false).unwrap();
502 assert_eq!(counts.shape(), &[5]);
503 assert_eq!(edges.shape(), &[6]);
504 }
505
506 #[test]
507 fn test_histogram_explicit_edges() {
508 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
509 let (counts, _) = histogram(
510 &a,
511 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
512 None,
513 false,
514 )
515 .unwrap();
516 let c: Vec<f64> = counts.iter().copied().collect();
517 assert_eq!(c, vec![1.0, 1.0, 1.0, 1.0, 1.0]);
518 }
519
520 #[test]
521 fn test_histogram_density() {
522 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
524 let (density, edges) = histogram(
525 &a,
526 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
527 None,
528 true,
529 )
530 .unwrap();
531 let d: Vec<f64> = density.iter().copied().collect();
532 let e: Vec<f64> = edges.iter().copied().collect();
533 for &v in &d {
536 assert!((v - 0.2).abs() < 1e-12, "expected 0.2, got {}", v);
537 }
538 let integral: f64 = d
540 .iter()
541 .enumerate()
542 .map(|(i, &di)| di * (e[i + 1] - e[i]))
543 .sum();
544 assert!(
545 (integral - 1.0).abs() < 1e-12,
546 "density integral should be 1.0, got {}",
547 integral
548 );
549 }
550
551 #[test]
552 fn test_histogram_density_unequal_bins() {
553 let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 3.0, 4.0]).unwrap();
555 let (density, edges) = histogram(
556 &a,
557 Bins::Edges(vec![0.0, 2.0, 5.0]),
558 None,
559 true,
560 )
561 .unwrap();
562 let d: Vec<f64> = density.iter().copied().collect();
563 let e: Vec<f64> = edges.iter().copied().collect();
564 assert!((d[0] - 0.25).abs() < 1e-12, "expected 0.25, got {}", d[0]);
567 assert!(
568 (d[1] - 2.0 / 12.0).abs() < 1e-12,
569 "expected 1/6, got {}",
570 d[1]
571 );
572 let integral: f64 = d
574 .iter()
575 .enumerate()
576 .map(|(i, &di)| di * (e[i + 1] - e[i]))
577 .sum();
578 assert!(
579 (integral - 1.0).abs() < 1e-12,
580 "density integral should be 1.0, got {}",
581 integral
582 );
583 }
584
585 #[test]
586 fn test_histogram2d() {
587 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
588 let y = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
589 let (counts, _xe, _ye) = histogram2d(&x, &y, (2, 2)).unwrap();
590 assert_eq!(counts.shape(), &[2, 2]);
591 let c: Vec<u64> = counts.iter().copied().collect();
592 assert_eq!(c.iter().sum::<u64>(), 4);
593 }
594
595 #[test]
596 fn test_bincount() {
597 let x = Array::<u64, Ix1>::from_vec(Ix1::new([6]), vec![0, 1, 1, 2, 2, 2]).unwrap();
598 let bc = bincount(&x, None, 0).unwrap();
599 let data: Vec<f64> = bc.iter().copied().collect();
600 assert_eq!(data, vec![1.0, 2.0, 3.0]);
601 }
602
603 #[test]
604 fn test_bincount_weighted() {
605 let x = Array::<u64, Ix1>::from_vec(Ix1::new([3]), vec![0, 1, 1]).unwrap();
606 let w = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.0, 1.5]).unwrap();
607 let bc = bincount(&x, Some(&w), 0).unwrap();
608 let data: Vec<f64> = bc.iter().copied().collect();
609 assert!((data[0] - 0.5).abs() < 1e-12);
610 assert!((data[1] - 2.5).abs() < 1e-12);
611 }
612
613 #[test]
614 fn test_bincount_minlength() {
615 let x = Array::<u64, Ix1>::from_vec(Ix1::new([2]), vec![0, 1]).unwrap();
616 let bc = bincount(&x, None, 5).unwrap();
617 assert_eq!(bc.shape(), &[5]);
618 }
619
620 #[test]
621 fn test_digitize_basic() {
622 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 2.5, 3.5]).unwrap();
623 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
624 let d = digitize(&x, &bins, false).unwrap();
625 let data: Vec<u64> = d.iter().copied().collect();
626 assert_eq!(data, vec![0, 1, 2, 3]);
631 }
632
633 #[test]
634 fn test_digitize_right() {
635 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.0, 2.5, 3.5]).unwrap();
636 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
637 let d = digitize(&x, &bins, true).unwrap();
638 let data: Vec<u64> = d.iter().copied().collect();
639 assert_eq!(data, vec![0, 0, 2, 3]);
645 }
646
647 #[test]
648 fn test_histogramdd() {
649 let sample = Array::<f64, Ix2>::from_vec(
650 Ix2::new([4, 2]),
651 vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
652 )
653 .unwrap();
654 let (counts, edges) = histogramdd(&sample, &[2, 2]).unwrap();
655 assert_eq!(counts.shape(), &[2, 2]);
656 let c: Vec<u64> = counts.iter().copied().collect();
657 assert_eq!(c.iter().sum::<u64>(), 4);
658 assert_eq!(edges.len(), 2);
659 }
660}