1use ferray_core::error::{FerrayError, FerrayResult};
4use ferray_core::{Array, Element, Ix1, Ix2, IxDyn};
5use num_traits::Float;
6
7#[derive(Debug, Clone)]
13pub enum Bins<T> {
14 Count(usize),
16 Edges(Vec<T>),
18}
19
20pub fn histogram<T>(
34 a: &Array<T, Ix1>,
35 bins: Bins<T>,
36 range: Option<(T, T)>,
37 density: bool,
38) -> FerrayResult<(Array<u64, Ix1>, Array<T, Ix1>)>
39where
40 T: Element + Float,
41{
42 let data: Vec<T> = a.iter().copied().collect();
43
44 let (lo, hi) = match range {
46 Some((l, h)) => {
47 if l >= h {
48 return Err(FerrayError::invalid_value(
49 "range lower bound must be less than upper",
50 ));
51 }
52 (l, h)
53 }
54 None => {
55 if data.is_empty() {
56 return Err(FerrayError::invalid_value(
57 "cannot compute histogram of empty array without range",
58 ));
59 }
60 let lo = data.iter().copied().fold(T::infinity(), |a, b| a.min(b));
61 let hi = data
62 .iter()
63 .copied()
64 .fold(T::neg_infinity(), |a, b| a.max(b));
65 if lo == hi {
66 (lo - <T as Element>::one(), hi + <T as Element>::one())
67 } else {
68 (lo, hi)
69 }
70 }
71 };
72
73 let edges = match bins {
75 Bins::Count(n) => {
76 if n == 0 {
77 return Err(FerrayError::invalid_value("number of bins must be > 0"));
78 }
79 let step = (hi - lo) / T::from(n).unwrap();
80 let mut edges = Vec::with_capacity(n + 1);
81 for i in 0..n {
82 edges.push(lo + step * T::from(i).unwrap());
83 }
84 edges.push(hi);
85 edges
86 }
87 Bins::Edges(e) => {
88 if e.len() < 2 {
89 return Err(FerrayError::invalid_value(
90 "bin edges must have at least 2 elements",
91 ));
92 }
93 e
94 }
95 };
96
97 let nbins = edges.len() - 1;
98 let mut counts = vec![0u64; nbins];
99
100 for &x in &data {
101 if x.is_nan() {
102 continue;
103 }
104 if x < edges[0] || x > edges[nbins] {
105 continue;
106 }
107 let bin = match edges[..nbins]
109 .binary_search_by(|e| e.partial_cmp(&x).unwrap_or(std::cmp::Ordering::Equal))
110 {
111 Ok(i) => i,
112 Err(i) => i.saturating_sub(1),
113 };
114 let bin = bin.min(nbins - 1);
116 counts[bin] += 1;
117 }
118
119 let counts_arr = Array::from_vec(Ix1::new([nbins]), counts.clone())?;
120
121 if density {
122 let _total = T::from(data.iter().filter(|x| !x.is_nan()).count()).unwrap();
126 }
127
128 let edges_arr = Array::from_vec(Ix1::new([edges.len()]), edges)?;
129 Ok((counts_arr, edges_arr))
130}
131
132#[allow(clippy::type_complexity)]
142pub fn histogram2d<T>(
143 x: &Array<T, Ix1>,
144 y: &Array<T, Ix1>,
145 bins: (usize, usize),
146) -> FerrayResult<(Array<u64, Ix2>, Array<T, Ix1>, Array<T, Ix1>)>
147where
148 T: Element + Float,
149{
150 let xdata: Vec<T> = x.iter().copied().collect();
151 let ydata: Vec<T> = y.iter().copied().collect();
152
153 if xdata.len() != ydata.len() {
154 return Err(FerrayError::shape_mismatch(
155 "x and y must have the same length",
156 ));
157 }
158 if xdata.is_empty() {
159 return Err(FerrayError::invalid_value(
160 "cannot compute histogram2d of empty arrays",
161 ));
162 }
163
164 let (nx, ny) = bins;
165 if nx == 0 || ny == 0 {
166 return Err(FerrayError::invalid_value("number of bins must be > 0"));
167 }
168
169 let x_min = xdata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
170 let x_max = xdata
171 .iter()
172 .copied()
173 .fold(T::neg_infinity(), |a, b| a.max(b));
174 let y_min = ydata.iter().copied().fold(T::infinity(), |a, b| a.min(b));
175 let y_max = ydata
176 .iter()
177 .copied()
178 .fold(T::neg_infinity(), |a, b| a.max(b));
179
180 let (x_lo, x_hi) = if x_min == x_max {
181 (x_min - <T as Element>::one(), x_max + <T as Element>::one())
182 } else {
183 (x_min, x_max)
184 };
185 let (y_lo, y_hi) = if y_min == y_max {
186 (y_min - <T as Element>::one(), y_max + <T as Element>::one())
187 } else {
188 (y_min, y_max)
189 };
190
191 let x_step = (x_hi - x_lo) / T::from(nx).unwrap();
192 let y_step = (y_hi - y_lo) / T::from(ny).unwrap();
193
194 let mut x_edges = Vec::with_capacity(nx + 1);
195 for i in 0..nx {
196 x_edges.push(x_lo + x_step * T::from(i).unwrap());
197 }
198 x_edges.push(x_hi);
199
200 let mut y_edges = Vec::with_capacity(ny + 1);
201 for i in 0..ny {
202 y_edges.push(y_lo + y_step * T::from(i).unwrap());
203 }
204 y_edges.push(y_hi);
205
206 let mut counts = vec![0u64; nx * ny];
207
208 for (&xv, &yv) in xdata.iter().zip(ydata.iter()) {
209 if xv.is_nan() || yv.is_nan() {
210 continue;
211 }
212 let xi = bin_index(xv, x_lo, x_step, nx);
213 let yi = bin_index(yv, y_lo, y_step, ny);
214 if let (Some(xi), Some(yi)) = (xi, yi) {
215 counts[xi * ny + yi] += 1;
216 }
217 }
218
219 let counts_arr = Array::from_vec(Ix2::new([nx, ny]), counts)?;
220 let x_edges_arr = Array::from_vec(Ix1::new([x_edges.len()]), x_edges)?;
221 let y_edges_arr = Array::from_vec(Ix1::new([y_edges.len()]), y_edges)?;
222
223 Ok((counts_arr, x_edges_arr, y_edges_arr))
224}
225
226fn bin_index<T: Float>(val: T, lo: T, step: T, nbins: usize) -> Option<usize> {
228 if val < lo {
229 return None;
230 }
231 let idx = ((val - lo) / step).floor().to_usize().unwrap_or(nbins);
232 if idx >= nbins {
233 Some(nbins - 1) } else {
235 Some(idx)
236 }
237}
238
239#[allow(clippy::type_complexity)]
252pub fn histogramdd<T>(
253 sample: &Array<T, Ix2>,
254 bins: &[usize],
255) -> FerrayResult<(Array<u64, IxDyn>, Vec<Array<T, Ix1>>)>
256where
257 T: Element + Float,
258{
259 let shape = sample.shape();
260 let (npoints, ndims) = (shape[0], shape[1]);
261 let data: Vec<T> = sample.iter().copied().collect();
262
263 if bins.len() != ndims {
264 return Err(FerrayError::shape_mismatch(format!(
265 "bins length {} does not match sample dimensions {}",
266 bins.len(),
267 ndims
268 )));
269 }
270 for &b in bins {
271 if b == 0 {
272 return Err(FerrayError::invalid_value("number of bins must be > 0"));
273 }
274 }
275
276 let mut lo = vec![T::infinity(); ndims];
278 let mut hi = vec![T::neg_infinity(); ndims];
279 for i in 0..npoints {
280 for j in 0..ndims {
281 let v = data[i * ndims + j];
282 if !v.is_nan() {
283 if v < lo[j] {
284 lo[j] = v;
285 }
286 if v > hi[j] {
287 hi[j] = v;
288 }
289 }
290 }
291 }
292 for j in 0..ndims {
293 if lo[j] == hi[j] {
294 lo[j] = lo[j] - <T as Element>::one();
295 hi[j] = hi[j] + <T as Element>::one();
296 }
297 }
298
299 let mut all_edges = Vec::with_capacity(ndims);
301 let mut steps = Vec::with_capacity(ndims);
302 for j in 0..ndims {
303 let step = (hi[j] - lo[j]) / T::from(bins[j]).unwrap();
304 steps.push(step);
305 let mut edges = Vec::with_capacity(bins[j] + 1);
306 for k in 0..bins[j] {
307 edges.push(lo[j] + step * T::from(k).unwrap());
308 }
309 edges.push(hi[j]);
310 all_edges.push(edges);
311 }
312
313 let out_size: usize = bins.iter().product();
315 let mut out_strides = vec![1usize; ndims];
316 for j in (0..ndims.saturating_sub(1)).rev() {
317 out_strides[j] = out_strides[j + 1] * bins[j + 1];
318 }
319
320 let mut counts = vec![0u64; out_size];
321 for i in 0..npoints {
322 let mut flat_idx = 0usize;
323 let mut valid = true;
324 for j in 0..ndims {
325 let v = data[i * ndims + j];
326 if v.is_nan() {
327 valid = false;
328 break;
329 }
330 match bin_index(v, lo[j], steps[j], bins[j]) {
331 Some(bi) => flat_idx += bi * out_strides[j],
332 None => {
333 valid = false;
334 break;
335 }
336 }
337 }
338 if valid {
339 counts[flat_idx] += 1;
340 }
341 }
342
343 let counts_arr = Array::from_vec(IxDyn::new(bins), counts)?;
344 let edge_arrs: Vec<Array<T, Ix1>> = all_edges
345 .into_iter()
346 .map(|e| {
347 let n = e.len();
348 Array::from_vec(Ix1::new([n]), e).unwrap()
349 })
350 .collect();
351
352 Ok((counts_arr, edge_arrs))
353}
354
355pub fn bincount(
368 x: &Array<u64, Ix1>,
369 weights: Option<&Array<f64, Ix1>>,
370 minlength: usize,
371) -> FerrayResult<Array<f64, Ix1>> {
372 let data: Vec<u64> = x.iter().copied().collect();
373
374 if let Some(w) = weights {
375 if w.size() != x.size() {
376 return Err(FerrayError::shape_mismatch(
377 "x and weights must have the same length",
378 ));
379 }
380 }
381
382 let max_val = data.iter().copied().max().unwrap_or(0) as usize;
383 let out_len = (max_val + 1).max(minlength);
384 let mut result = vec![0.0_f64; out_len];
385
386 match weights {
387 Some(w) => {
388 let wdata: Vec<f64> = w.iter().copied().collect();
389 for (i, &v) in data.iter().enumerate() {
390 result[v as usize] += wdata[i];
391 }
392 }
393 None => {
394 for &v in &data {
395 result[v as usize] += 1.0;
396 }
397 }
398 }
399
400 Array::from_vec(Ix1::new([out_len]), result)
401}
402
403pub fn digitize<T>(
416 x: &Array<T, Ix1>,
417 bins: &Array<T, Ix1>,
418 right: bool,
419) -> FerrayResult<Array<u64, Ix1>>
420where
421 T: Element + PartialOrd + Copy,
422{
423 let xdata: Vec<T> = x.iter().copied().collect();
424 let bdata: Vec<T> = bins.iter().copied().collect();
425
426 if bdata.is_empty() {
427 return Err(FerrayError::invalid_value("bins must not be empty"));
428 }
429
430 let increasing = bdata.len() < 2 || bdata[0] <= bdata[bdata.len() - 1];
432
433 let mut result = Vec::with_capacity(xdata.len());
434 for &v in &xdata {
435 let idx = if increasing {
436 if right {
437 bdata.partition_point(|b| b < &v)
439 } else {
440 bdata.partition_point(|b| b <= &v)
442 }
443 } else {
444 if right {
446 bdata.len()
447 - bdata
448 .iter()
449 .rev()
450 .position(|b| b < &v)
451 .unwrap_or(bdata.len())
452 } else {
453 bdata.len()
454 - bdata
455 .iter()
456 .rev()
457 .position(|b| b <= &v)
458 .unwrap_or(bdata.len())
459 }
460 };
461 result.push(idx as u64);
462 }
463
464 let n = result.len();
465 Array::from_vec(Ix1::new([n]), result)
466}
467
468#[cfg(test)]
469mod tests {
470 use super::*;
471
472 #[test]
473 fn test_histogram_basic() {
474 let a =
475 Array::<f64, Ix1>::from_vec(Ix1::new([6]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
476 let (counts, edges) = histogram(&a, Bins::Count(3), None, false).unwrap();
477 assert_eq!(counts.shape(), &[3]);
478 assert_eq!(edges.shape(), &[4]);
479 let c: Vec<u64> = counts.iter().copied().collect();
480 assert_eq!(c.iter().sum::<u64>(), 6);
481 }
482
483 #[test]
484 fn test_histogram_with_range() {
485 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
486 let (counts, edges) = histogram(&a, Bins::Count(5), Some((0.0, 5.0)), false).unwrap();
487 assert_eq!(counts.shape(), &[5]);
488 assert_eq!(edges.shape(), &[6]);
489 }
490
491 #[test]
492 fn test_histogram_explicit_edges() {
493 let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
494 let (counts, _) = histogram(
495 &a,
496 Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
497 None,
498 false,
499 )
500 .unwrap();
501 let c: Vec<u64> = counts.iter().copied().collect();
502 assert_eq!(c, vec![1, 1, 1, 1, 1]);
503 }
504
505 #[test]
506 fn test_histogram2d() {
507 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
508 let y = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
509 let (counts, _xe, _ye) = histogram2d(&x, &y, (2, 2)).unwrap();
510 assert_eq!(counts.shape(), &[2, 2]);
511 let c: Vec<u64> = counts.iter().copied().collect();
512 assert_eq!(c.iter().sum::<u64>(), 4);
513 }
514
515 #[test]
516 fn test_bincount() {
517 let x = Array::<u64, Ix1>::from_vec(Ix1::new([6]), vec![0, 1, 1, 2, 2, 2]).unwrap();
518 let bc = bincount(&x, None, 0).unwrap();
519 let data: Vec<f64> = bc.iter().copied().collect();
520 assert_eq!(data, vec![1.0, 2.0, 3.0]);
521 }
522
523 #[test]
524 fn test_bincount_weighted() {
525 let x = Array::<u64, Ix1>::from_vec(Ix1::new([3]), vec![0, 1, 1]).unwrap();
526 let w = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.0, 1.5]).unwrap();
527 let bc = bincount(&x, Some(&w), 0).unwrap();
528 let data: Vec<f64> = bc.iter().copied().collect();
529 assert!((data[0] - 0.5).abs() < 1e-12);
530 assert!((data[1] - 2.5).abs() < 1e-12);
531 }
532
533 #[test]
534 fn test_bincount_minlength() {
535 let x = Array::<u64, Ix1>::from_vec(Ix1::new([2]), vec![0, 1]).unwrap();
536 let bc = bincount(&x, None, 5).unwrap();
537 assert_eq!(bc.shape(), &[5]);
538 }
539
540 #[test]
541 fn test_digitize_basic() {
542 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 2.5, 3.5]).unwrap();
543 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
544 let d = digitize(&x, &bins, false).unwrap();
545 let data: Vec<u64> = d.iter().copied().collect();
546 assert_eq!(data, vec![0, 1, 2, 3]);
551 }
552
553 #[test]
554 fn test_digitize_right() {
555 let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.0, 2.5, 3.5]).unwrap();
556 let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
557 let d = digitize(&x, &bins, true).unwrap();
558 let data: Vec<u64> = d.iter().copied().collect();
559 assert_eq!(data, vec![0, 0, 2, 3]);
565 }
566
567 #[test]
568 fn test_histogramdd() {
569 let sample = Array::<f64, Ix2>::from_vec(
570 Ix2::new([4, 2]),
571 vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
572 )
573 .unwrap();
574 let (counts, edges) = histogramdd(&sample, &[2, 2]).unwrap();
575 assert_eq!(counts.shape(), &[2, 2]);
576 let c: Vec<u64> = counts.iter().copied().collect();
577 assert_eq!(c.iter().sum::<u64>(), 4);
578 assert_eq!(edges.len(), 2);
579 }
580}