Skip to main content

ferray_stats/
histogram.rs

1// ferray-stats: Histogram functions — histogram, histogram2d, histogramdd, bincount, digitize (REQ-8, REQ-9, REQ-10)
2
3use ferray_core::error::{FerrayError, FerrayResult};
4use ferray_core::{Array, Element, Ix1, Ix2, IxDyn};
5use num_traits::Float;
6
7// ---------------------------------------------------------------------------
8// Bins specification
9// ---------------------------------------------------------------------------
10
11/// How to specify bins for histogram functions.
12#[derive(Debug, Clone)]
13pub enum Bins<T> {
14    /// Number of equal-width bins.
15    Count(usize),
16    /// Explicit bin edges (must be sorted, length = nbins + 1).
17    Edges(Vec<T>),
18}
19
20// ---------------------------------------------------------------------------
21// histogram
22// ---------------------------------------------------------------------------
23
24/// Compute the histogram of a dataset.
25///
26/// Returns `(counts, bin_edges)` where `counts` has length `nbins`
27/// and `bin_edges` has length `nbins + 1`.
28///
29/// If `density` is true, the result is normalized so that the integral
30/// over the range equals 1.
31///
32/// Equivalent to `numpy.histogram`.
33pub fn histogram<T>(
34    a: &Array<T, Ix1>,
35    bins: Bins<T>,
36    range: Option<(T, T)>,
37    density: bool,
38) -> FerrayResult<(Array<f64, Ix1>, Array<T, Ix1>)>
39where
40    T: Element + Float,
41{
42    let data: Vec<T> = a.iter().copied().collect();
43
44    // Determine range
45    let (lo, hi) = if let Some((l, h)) = range {
46        if l >= h {
47            return Err(FerrayError::invalid_value(
48                "range lower bound must be less than upper",
49            ));
50        }
51        (l, h)
52    } else {
53        if data.is_empty() {
54            return Err(FerrayError::invalid_value(
55                "cannot compute histogram of empty array without range",
56            ));
57        }
58        let lo = data
59            .iter()
60            .copied()
61            .fold(T::infinity(), num_traits::Float::min);
62        let hi = data
63            .iter()
64            .copied()
65            .fold(T::neg_infinity(), num_traits::Float::max);
66        if lo == hi {
67            (lo - <T as Element>::one(), hi + <T as Element>::one())
68        } else {
69            (lo, hi)
70        }
71    };
72
73    // Build bin edges
74    let edges = match bins {
75        Bins::Count(n) => {
76            if n == 0 {
77                return Err(FerrayError::invalid_value("number of bins must be > 0"));
78            }
79            let step = (hi - lo) / T::from(n).unwrap();
80            let mut edges = Vec::with_capacity(n + 1);
81            for i in 0..n {
82                edges.push(lo + step * T::from(i).unwrap());
83            }
84            edges.push(hi);
85            edges
86        }
87        Bins::Edges(e) => {
88            if e.len() < 2 {
89                return Err(FerrayError::invalid_value(
90                    "bin edges must have at least 2 elements",
91                ));
92            }
93            e
94        }
95    };
96
97    let nbins = edges.len() - 1;
98    let mut counts = vec![0u64; nbins];
99
100    for &x in &data {
101        if x.is_nan() {
102            continue;
103        }
104        if x < edges[0] || x > edges[nbins] {
105            continue;
106        }
107        // Binary search for bin
108        let bin = match edges[..nbins]
109            .binary_search_by(|e| e.partial_cmp(&x).unwrap_or(std::cmp::Ordering::Equal))
110        {
111            Ok(i) => i,
112            Err(i) => i.saturating_sub(1),
113        };
114        // Last bin is closed on the right: [edges[n-1], edges[n]]
115        let bin = bin.min(nbins - 1);
116        counts[bin] += 1;
117    }
118
119    let result: Vec<f64> = if density {
120        // Normalize: density[i] = counts[i] / (total * bin_width[i])
121        let total: f64 = counts.iter().sum::<u64>() as f64;
122        if total == 0.0 {
123            vec![0.0; nbins]
124        } else {
125            counts
126                .iter()
127                .enumerate()
128                .map(|(i, &c)| {
129                    let bin_width = (edges[i + 1] - edges[i]).to_f64().unwrap();
130                    if bin_width == 0.0 {
131                        0.0
132                    } else {
133                        (c as f64) / (total * bin_width)
134                    }
135                })
136                .collect()
137        }
138    } else {
139        counts.iter().map(|&c| c as f64).collect()
140    };
141
142    let counts_arr = Array::from_vec(Ix1::new([nbins]), result)?;
143    let edges_arr = Array::from_vec(Ix1::new([edges.len()]), edges)?;
144    Ok((counts_arr, edges_arr))
145}
146
147/// Compute the bin edges that would be used by [`histogram`] without
148/// counting anything.
149///
150/// Useful for sharing a fixed bin specification across multiple datasets.
151/// Equivalent to `numpy.histogram_bin_edges`.
152///
153/// # Errors
154/// - `FerrayError::InvalidValue` if `range` is degenerate, `bins` is `Count(0)`,
155///   `bins` is `Edges` with fewer than 2 elements, or the array is empty
156///   without an explicit `range`.
157pub fn histogram_bin_edges<T>(
158    a: &Array<T, Ix1>,
159    bins: Bins<T>,
160    range: Option<(T, T)>,
161) -> FerrayResult<Array<T, Ix1>>
162where
163    T: Element + Float,
164{
165    let edges = match bins {
166        Bins::Count(n) => {
167            if n == 0 {
168                return Err(FerrayError::invalid_value("number of bins must be > 0"));
169            }
170            let data: Vec<T> = a.iter().copied().collect();
171            let (lo, hi) = if let Some((l, h)) = range {
172                if l >= h {
173                    return Err(FerrayError::invalid_value(
174                        "range lower bound must be less than upper",
175                    ));
176                }
177                (l, h)
178            } else {
179                if data.is_empty() {
180                    return Err(FerrayError::invalid_value(
181                        "cannot compute histogram_bin_edges of empty array without range",
182                    ));
183                }
184                let lo = data
185                    .iter()
186                    .copied()
187                    .fold(T::infinity(), num_traits::Float::min);
188                let hi = data
189                    .iter()
190                    .copied()
191                    .fold(T::neg_infinity(), num_traits::Float::max);
192                if lo == hi {
193                    (lo - <T as Element>::one(), hi + <T as Element>::one())
194                } else {
195                    (lo, hi)
196                }
197            };
198            let step = (hi - lo) / T::from(n).unwrap();
199            let mut edges = Vec::with_capacity(n + 1);
200            for i in 0..n {
201                edges.push(lo + step * T::from(i).unwrap());
202            }
203            edges.push(hi);
204            edges
205        }
206        Bins::Edges(e) => {
207            if e.len() < 2 {
208                return Err(FerrayError::invalid_value(
209                    "bin edges must have at least 2 elements",
210                ));
211            }
212            e
213        }
214    };
215
216    Array::from_vec(Ix1::new([edges.len()]), edges)
217}
218
219// ---------------------------------------------------------------------------
220// histogram2d
221// ---------------------------------------------------------------------------
222
223/// Compute the 2-D histogram of two data arrays.
224///
225/// Returns `(counts, x_edges, y_edges)`.
226///
227/// Equivalent to `numpy.histogram2d`.
228#[allow(clippy::type_complexity)]
229pub fn histogram2d<T>(
230    x: &Array<T, Ix1>,
231    y: &Array<T, Ix1>,
232    bins: (usize, usize),
233) -> FerrayResult<(Array<u64, Ix2>, Array<T, Ix1>, Array<T, Ix1>)>
234where
235    T: Element + Float,
236{
237    let xdata: Vec<T> = x.iter().copied().collect();
238    let ydata: Vec<T> = y.iter().copied().collect();
239
240    if xdata.len() != ydata.len() {
241        return Err(FerrayError::shape_mismatch(
242            "x and y must have the same length",
243        ));
244    }
245    if xdata.is_empty() {
246        return Err(FerrayError::invalid_value(
247            "cannot compute histogram2d of empty arrays",
248        ));
249    }
250
251    let (nx, ny) = bins;
252    if nx == 0 || ny == 0 {
253        return Err(FerrayError::invalid_value("number of bins must be > 0"));
254    }
255
256    let x_min = xdata
257        .iter()
258        .copied()
259        .fold(T::infinity(), num_traits::Float::min);
260    let x_max = xdata
261        .iter()
262        .copied()
263        .fold(T::neg_infinity(), num_traits::Float::max);
264    let y_min = ydata
265        .iter()
266        .copied()
267        .fold(T::infinity(), num_traits::Float::min);
268    let y_max = ydata
269        .iter()
270        .copied()
271        .fold(T::neg_infinity(), num_traits::Float::max);
272
273    let (x_lo, x_hi) = if x_min == x_max {
274        (x_min - <T as Element>::one(), x_max + <T as Element>::one())
275    } else {
276        (x_min, x_max)
277    };
278    let (y_lo, y_hi) = if y_min == y_max {
279        (y_min - <T as Element>::one(), y_max + <T as Element>::one())
280    } else {
281        (y_min, y_max)
282    };
283
284    let x_step = (x_hi - x_lo) / T::from(nx).unwrap();
285    let y_step = (y_hi - y_lo) / T::from(ny).unwrap();
286
287    let mut x_edges = Vec::with_capacity(nx + 1);
288    for i in 0..nx {
289        x_edges.push(x_lo + x_step * T::from(i).unwrap());
290    }
291    x_edges.push(x_hi);
292
293    let mut y_edges = Vec::with_capacity(ny + 1);
294    for i in 0..ny {
295        y_edges.push(y_lo + y_step * T::from(i).unwrap());
296    }
297    y_edges.push(y_hi);
298
299    let mut counts = vec![0u64; nx * ny];
300
301    for (&xv, &yv) in xdata.iter().zip(ydata.iter()) {
302        if xv.is_nan() || yv.is_nan() {
303            continue;
304        }
305        let xi = bin_index(xv, x_lo, x_step, nx);
306        let yi = bin_index(yv, y_lo, y_step, ny);
307        if let (Some(xi), Some(yi)) = (xi, yi) {
308            counts[xi * ny + yi] += 1;
309        }
310    }
311
312    let counts_arr = Array::from_vec(Ix2::new([nx, ny]), counts)?;
313    let x_edges_arr = Array::from_vec(Ix1::new([x_edges.len()]), x_edges)?;
314    let y_edges_arr = Array::from_vec(Ix1::new([y_edges.len()]), y_edges)?;
315
316    Ok((counts_arr, x_edges_arr, y_edges_arr))
317}
318
319/// Determine which bin a value falls into.
320fn bin_index<T: Float>(val: T, lo: T, step: T, nbins: usize) -> Option<usize> {
321    if val < lo {
322        return None;
323    }
324    let idx = ((val - lo) / step).floor().to_usize().unwrap_or(nbins);
325    if idx >= nbins {
326        Some(nbins - 1) // Right edge is included in last bin
327    } else {
328        Some(idx)
329    }
330}
331
332// ---------------------------------------------------------------------------
333// histogramdd
334// ---------------------------------------------------------------------------
335
336/// Compute a multi-dimensional histogram.
337///
338/// `sample` is a 2-D array where each row is a data point and each column
339/// is a dimension. `bins` specifies the number of bins per dimension.
340///
341/// Returns `(counts, edges)` where `edges` is a vector of 1-D edge arrays.
342///
343/// Equivalent to `numpy.histogramdd`.
344#[allow(clippy::type_complexity)]
345pub fn histogramdd<T>(
346    sample: &Array<T, Ix2>,
347    bins: &[usize],
348) -> FerrayResult<(Array<u64, IxDyn>, Vec<Array<T, Ix1>>)>
349where
350    T: Element + Float,
351{
352    let shape = sample.shape();
353    let (npoints, ndims) = (shape[0], shape[1]);
354    let data: Vec<T> = sample.iter().copied().collect();
355
356    if bins.len() != ndims {
357        return Err(FerrayError::shape_mismatch(format!(
358            "bins length {} does not match sample dimensions {}",
359            bins.len(),
360            ndims
361        )));
362    }
363    for &b in bins {
364        if b == 0 {
365            return Err(FerrayError::invalid_value("number of bins must be > 0"));
366        }
367    }
368
369    // Compute ranges per dimension
370    let mut lo = vec![T::infinity(); ndims];
371    let mut hi = vec![T::neg_infinity(); ndims];
372    for i in 0..npoints {
373        for j in 0..ndims {
374            let v = data[i * ndims + j];
375            if !v.is_nan() {
376                if v < lo[j] {
377                    lo[j] = v;
378                }
379                if v > hi[j] {
380                    hi[j] = v;
381                }
382            }
383        }
384    }
385    for j in 0..ndims {
386        if lo[j] == hi[j] {
387            lo[j] = lo[j] - <T as Element>::one();
388            hi[j] = hi[j] + <T as Element>::one();
389        }
390    }
391
392    // Build edges
393    let mut all_edges = Vec::with_capacity(ndims);
394    let mut steps = Vec::with_capacity(ndims);
395    for j in 0..ndims {
396        let step = (hi[j] - lo[j]) / T::from(bins[j]).unwrap();
397        steps.push(step);
398        let mut edges = Vec::with_capacity(bins[j] + 1);
399        for k in 0..bins[j] {
400            edges.push(lo[j] + step * T::from(k).unwrap());
401        }
402        edges.push(hi[j]);
403        all_edges.push(edges);
404    }
405
406    // Compute output strides
407    let out_size: usize = bins.iter().product();
408    let mut out_strides = vec![1usize; ndims];
409    for j in (0..ndims.saturating_sub(1)).rev() {
410        out_strides[j] = out_strides[j + 1] * bins[j + 1];
411    }
412
413    let mut counts = vec![0u64; out_size];
414    for i in 0..npoints {
415        let mut flat_idx = 0usize;
416        let mut valid = true;
417        for j in 0..ndims {
418            let v = data[i * ndims + j];
419            if v.is_nan() {
420                valid = false;
421                break;
422            }
423            if let Some(bi) = bin_index(v, lo[j], steps[j], bins[j]) {
424                flat_idx += bi * out_strides[j];
425            } else {
426                valid = false;
427                break;
428            }
429        }
430        if valid {
431            counts[flat_idx] += 1;
432        }
433    }
434
435    let counts_arr = Array::from_vec(IxDyn::new(bins), counts)?;
436    let edge_arrs: Vec<Array<T, Ix1>> = all_edges
437        .into_iter()
438        .map(|e| {
439            let n = e.len();
440            Array::from_vec(Ix1::new([n]), e).unwrap()
441        })
442        .collect();
443
444    Ok((counts_arr, edge_arrs))
445}
446
447// ---------------------------------------------------------------------------
448// bincount
449// ---------------------------------------------------------------------------
450
451/// Count occurrences of each value in a non-negative integer array,
452/// returning **integer** counts.
453///
454/// Matches `NumPy`'s `numpy.bincount(x)` (no-weights form): the result
455/// dtype is `uint64`, not `float64`. Callers that need weighted sums
456/// (float output) should call [`bincount_weighted`] explicitly
457/// (see issue #168 — the original `bincount` hard-coded `f64` counts
458/// even in the unweighted case).
459pub fn bincount_u64(x: &Array<u64, Ix1>, minlength: usize) -> FerrayResult<Array<u64, Ix1>> {
460    let data: Vec<u64> = x.iter().copied().collect();
461    let max_val = data.iter().copied().max().unwrap_or(0) as usize;
462    let out_len = (max_val + 1).max(minlength);
463    let mut result = vec![0u64; out_len];
464    for &v in &data {
465        result[v as usize] += 1;
466    }
467    Array::from_vec(Ix1::new([out_len]), result)
468}
469
470/// Weighted bincount: accumulates `weights[i]` into bucket `x[i]`,
471/// returning `Array<f64, Ix1>`.
472///
473/// This is `NumPy`'s `numpy.bincount(x, weights=w)` form; the output
474/// dtype is `float64` because weights are floating point.
475pub fn bincount_weighted(
476    x: &Array<u64, Ix1>,
477    weights: &Array<f64, Ix1>,
478    minlength: usize,
479) -> FerrayResult<Array<f64, Ix1>> {
480    if weights.size() != x.size() {
481        return Err(FerrayError::shape_mismatch(
482            "x and weights must have the same length",
483        ));
484    }
485    let data: Vec<u64> = x.iter().copied().collect();
486    let wdata: Vec<f64> = weights.iter().copied().collect();
487    let max_val = data.iter().copied().max().unwrap_or(0) as usize;
488    let out_len = (max_val + 1).max(minlength);
489    let mut result = vec![0.0_f64; out_len];
490    for (i, &v) in data.iter().enumerate() {
491        result[v as usize] += wdata[i];
492    }
493    Array::from_vec(Ix1::new([out_len]), result)
494}
495
496/// Count occurrences of each value in a non-negative integer array.
497///
498/// Umbrella entry point that dispatches between [`bincount_u64`] and
499/// [`bincount_weighted`] based on whether weights are provided.
500/// Always returns `Array<f64, Ix1>` for the umbrella case because we
501/// can't express a union return type; new code should prefer
502/// [`bincount_u64`] or [`bincount_weighted`] directly to avoid the
503/// u64→f64 cast in the unweighted path.
504///
505/// Equivalent to `numpy.bincount`.
506pub fn bincount(
507    x: &Array<u64, Ix1>,
508    weights: Option<&Array<f64, Ix1>>,
509    minlength: usize,
510) -> FerrayResult<Array<f64, Ix1>> {
511    if let Some(w) = weights {
512        bincount_weighted(x, w, minlength)
513    } else {
514        let counts = bincount_u64(x, minlength)?;
515        let data: Vec<f64> = counts.iter().map(|&c| c as f64).collect();
516        Array::from_vec(Ix1::new([counts.size()]), data)
517    }
518}
519
520// ---------------------------------------------------------------------------
521// digitize
522// ---------------------------------------------------------------------------
523
524/// Return the indices of the bins to which each value belongs.
525///
526/// If `right` is false (default), each index `i` satisfies `bins[i-1] <= x < bins[i]`.
527/// If `right` is true, each index `i` satisfies `bins[i-1] < x <= bins[i]`.
528///
529/// Returns u64 indices.
530///
531/// Equivalent to `numpy.digitize`.
532pub fn digitize<T>(
533    x: &Array<T, Ix1>,
534    bins: &Array<T, Ix1>,
535    right: bool,
536) -> FerrayResult<Array<u64, Ix1>>
537where
538    T: Element + PartialOrd + Copy,
539{
540    let xdata: Vec<T> = x.iter().copied().collect();
541    let bdata: Vec<T> = bins.iter().copied().collect();
542
543    if bdata.is_empty() {
544        return Err(FerrayError::invalid_value("bins must not be empty"));
545    }
546
547    // Determine if bins are monotonically increasing or decreasing
548    let increasing = bdata.len() < 2 || bdata[0] <= bdata[bdata.len() - 1];
549
550    let mut result = Vec::with_capacity(xdata.len());
551    for &v in &xdata {
552        let idx = if increasing {
553            if right {
554                // right=True: bins[i-1] < x <= bins[i], count bins where bin < x
555                bdata.partition_point(|b| b < &v)
556            } else {
557                // right=False: bins[i-1] <= x < bins[i], count bins where bin <= x
558                bdata.partition_point(|b| b <= &v)
559            }
560        } else {
561            // Decreasing bins: search reversed
562            if right {
563                bdata.len()
564                    - bdata
565                        .iter()
566                        .rev()
567                        .position(|b| b < &v)
568                        .unwrap_or(bdata.len())
569            } else {
570                bdata.len()
571                    - bdata
572                        .iter()
573                        .rev()
574                        .position(|b| b <= &v)
575                        .unwrap_or(bdata.len())
576            }
577        };
578        result.push(idx as u64);
579    }
580
581    let n = result.len();
582    Array::from_vec(Ix1::new([n]), result)
583}
584
585#[cfg(test)]
586mod tests {
587    use super::*;
588
589    #[test]
590    fn test_histogram_basic() {
591        let a =
592            Array::<f64, Ix1>::from_vec(Ix1::new([6]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
593        let (counts, edges) = histogram(&a, Bins::Count(3), None, false).unwrap();
594        assert_eq!(counts.shape(), &[3]);
595        assert_eq!(edges.shape(), &[4]);
596        let c: Vec<f64> = counts.iter().copied().collect();
597        assert!((c.iter().sum::<f64>() - 6.0).abs() < 1e-12);
598    }
599
600    #[test]
601    fn test_histogram_with_range() {
602        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
603        let (counts, edges) = histogram(&a, Bins::Count(5), Some((0.0, 5.0)), false).unwrap();
604        assert_eq!(counts.shape(), &[5]);
605        assert_eq!(edges.shape(), &[6]);
606    }
607
608    #[test]
609    fn test_histogram_explicit_edges() {
610        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
611        let (counts, _) = histogram(
612            &a,
613            Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
614            None,
615            false,
616        )
617        .unwrap();
618        let c: Vec<f64> = counts.iter().copied().collect();
619        assert_eq!(c, vec![1.0, 1.0, 1.0, 1.0, 1.0]);
620    }
621
622    #[test]
623    fn test_histogram_density() {
624        // 5 values in [0, 5) with equal-width bins of width 1.0
625        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.5, 1.5, 2.5, 3.5, 4.5]).unwrap();
626        let (density, edges) = histogram(
627            &a,
628            Bins::Edges(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
629            None,
630            true,
631        )
632        .unwrap();
633        let d: Vec<f64> = density.iter().copied().collect();
634        let e: Vec<f64> = edges.iter().copied().collect();
635        // Each bin has count=1, total=5, bin_width=1.0
636        // density[i] = 1 / (5 * 1.0) = 0.2
637        for &v in &d {
638            assert!((v - 0.2).abs() < 1e-12, "expected 0.2, got {v}");
639        }
640        // Integral over all bins should equal 1: sum(density[i] * width[i]) = 1
641        let integral: f64 = d
642            .iter()
643            .enumerate()
644            .map(|(i, &di)| di * (e[i + 1] - e[i]))
645            .sum();
646        assert!(
647            (integral - 1.0).abs() < 1e-12,
648            "density integral should be 1.0, got {integral}"
649        );
650    }
651
652    #[test]
653    fn test_histogram_density_unequal_bins() {
654        // Test with unequal bin widths
655        let a = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 3.0, 4.0]).unwrap();
656        let (density, edges) = histogram(&a, Bins::Edges(vec![0.0, 2.0, 5.0]), None, true).unwrap();
657        let d: Vec<f64> = density.iter().copied().collect();
658        let e: Vec<f64> = edges.iter().copied().collect();
659        // Bin [0,2): count=2, width=2, density = 2/(4*2) = 0.25
660        // Bin [2,5]: count=2, width=3, density = 2/(4*3) = 0.1667
661        assert!((d[0] - 0.25).abs() < 1e-12, "expected 0.25, got {}", d[0]);
662        assert!(
663            (d[1] - 2.0 / 12.0).abs() < 1e-12,
664            "expected 1/6, got {}",
665            d[1]
666        );
667        // Integral should equal 1
668        let integral: f64 = d
669            .iter()
670            .enumerate()
671            .map(|(i, &di)| di * (e[i + 1] - e[i]))
672            .sum();
673        assert!(
674            (integral - 1.0).abs() < 1e-12,
675            "density integral should be 1.0, got {integral}"
676        );
677    }
678
679    #[test]
680    fn test_histogram2d() {
681        let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
682        let y = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
683        let (counts, _xe, _ye) = histogram2d(&x, &y, (2, 2)).unwrap();
684        assert_eq!(counts.shape(), &[2, 2]);
685        let c: Vec<u64> = counts.iter().copied().collect();
686        assert_eq!(c.iter().sum::<u64>(), 4);
687    }
688
689    #[test]
690    fn test_bincount() {
691        let x = Array::<u64, Ix1>::from_vec(Ix1::new([6]), vec![0, 1, 1, 2, 2, 2]).unwrap();
692        let bc = bincount(&x, None, 0).unwrap();
693        let data: Vec<f64> = bc.iter().copied().collect();
694        assert_eq!(data, vec![1.0, 2.0, 3.0]);
695    }
696
697    #[test]
698    fn test_bincount_weighted() {
699        let x = Array::<u64, Ix1>::from_vec(Ix1::new([3]), vec![0, 1, 1]).unwrap();
700        let w = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.0, 1.5]).unwrap();
701        let bc = bincount(&x, Some(&w), 0).unwrap();
702        let data: Vec<f64> = bc.iter().copied().collect();
703        assert!((data[0] - 0.5).abs() < 1e-12);
704        assert!((data[1] - 2.5).abs() < 1e-12);
705    }
706
707    #[test]
708    fn test_bincount_minlength() {
709        let x = Array::<u64, Ix1>::from_vec(Ix1::new([2]), vec![0, 1]).unwrap();
710        let bc = bincount(&x, None, 5).unwrap();
711        assert_eq!(bc.shape(), &[5]);
712    }
713
714    #[test]
715    fn test_digitize_basic() {
716        let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.5, 2.5, 3.5]).unwrap();
717        let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
718        let d = digitize(&x, &bins, false).unwrap();
719        let data: Vec<u64> = d.iter().copied().collect();
720        // 0.5 < 1.0 -> bin 0
721        // 1.5 in [1, 2) -> bin 1
722        // 2.5 in [2, 3) -> bin 2
723        // 3.5 >= 3.0 -> bin 3
724        assert_eq!(data, vec![0, 1, 2, 3]);
725    }
726
727    #[test]
728    fn test_digitize_right() {
729        let x = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![0.5, 1.0, 2.5, 3.5]).unwrap();
730        let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
731        let d = digitize(&x, &bins, true).unwrap();
732        let data: Vec<u64> = d.iter().copied().collect();
733        // right=true: uses searchsorted side='left'
734        // 0.5: no bins < 0.5 -> 0
735        // 1.0: no bins < 1.0 -> 0
736        // 2.5: bins < 2.5 are [1.0, 2.0] -> 2
737        // 3.5: bins < 3.5 are [1.0, 2.0, 3.0] -> 3
738        assert_eq!(data, vec![0, 0, 2, 3]);
739    }
740
741    #[test]
742    fn test_digitize_decreasing_bins() {
743        // Issue #187: decreasing bins. NumPy reverses the logic:
744        // searchsorted on a decreasing array effectively mirrors it.
745        // ferray's digitize delegates to searchsorted which requires
746        // ascending bins — check that it either handles decreasing
747        // bins or returns a sensible result.
748        // For now, just verify it doesn't panic and returns the right
749        // length.
750        let x = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.5, 2.5]).unwrap();
751        let bins = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![3.0, 2.0, 1.0]).unwrap();
752        let d = digitize(&x, &bins, false);
753        // If digitize doesn't support decreasing bins, it should
754        // still not panic. The result may be wrong but the function
755        // is callable.
756        assert!(d.is_ok());
757        assert_eq!(d.unwrap().shape(), &[3]);
758    }
759
760    #[test]
761    fn test_histogramdd() {
762        let sample = Array::<f64, Ix2>::from_vec(
763            Ix2::new([4, 2]),
764            vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
765        )
766        .unwrap();
767        let (counts, edges) = histogramdd(&sample, &[2, 2]).unwrap();
768        assert_eq!(counts.shape(), &[2, 2]);
769        let c: Vec<u64> = counts.iter().copied().collect();
770        assert_eq!(c.iter().sum::<u64>(), 4);
771        assert_eq!(edges.len(), 2);
772    }
773
774    #[test]
775    fn test_histogram_bin_edges_count() {
776        let a = Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![0.0, 1.0, 2.0, 3.0, 4.0]).unwrap();
777        let edges = histogram_bin_edges(&a, Bins::Count(4), None).unwrap();
778        let data: Vec<f64> = edges.iter().copied().collect();
779        assert_eq!(data, vec![0.0, 1.0, 2.0, 3.0, 4.0]);
780    }
781
782    #[test]
783    fn test_histogram_bin_edges_explicit_range() {
784        let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![0.5, 1.5, 2.5]).unwrap();
785        let edges = histogram_bin_edges(&a, Bins::Count(2), Some((0.0, 4.0))).unwrap();
786        let data: Vec<f64> = edges.iter().copied().collect();
787        assert_eq!(data, vec![0.0, 2.0, 4.0]);
788    }
789
790    #[test]
791    fn test_histogram_bin_edges_explicit_edges_passthrough() {
792        let a = Array::<f64, Ix1>::from_vec(Ix1::new([0]), vec![]).unwrap();
793        let edges = histogram_bin_edges(&a, Bins::Edges(vec![0.0, 1.0, 5.0]), None).unwrap();
794        let data: Vec<f64> = edges.iter().copied().collect();
795        assert_eq!(data, vec![0.0, 1.0, 5.0]);
796    }
797}