fp_columnar/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4//! Columnar storage layer for **frankenpandas** — provides the
5//! [`Column`] container that backs every `DataFrame` column and
6//! `Series` value buffer in fp-frame.
7//!
8//! A column is a typed value buffer ([`DType`]) plus a separate
9//! [`ValidityMask`] tracking which cells are missing. This split
10//! mirrors Apache Arrow's storage layout and lets the type system
11//! enforce correctness on the dense-value side while keeping
12//! pandas-style missing-value semantics ([`NullKind::Null`],
13//! [`NullKind::NaN`], [`NullKind::NaT`]) on the validity side.
14//!
15//! ## Public surface
16//!
17//! - [`Column`]: the public columnar container. Built from a
18//!   [`DType`] + a `Vec<Scalar>`. Exposes value access
19//!   ([`Column::value`], [`Column::values`]), reductions
20//!   ([`Column::sum`], [`Column::mean`], [`Column::count`], the
21//!   nan-aware aggregations from fp-types), and typed binary
22//!   operations dispatched through [`ArithmeticOp`] /
23//!   [`ComparisonOp`].
24//! - [`ColumnData`]: the inner enum holding the dense buffer. Most
25//!   callers go through `Column` rather than touching this directly.
26//! - [`SparseColumn`]: opt-in sparse encoding (paired value buffer +
27//!   index-of-non-fill positions). Stored alongside the dense
28//!   `Column` for backwards compat when consumers only need
29//!   [`Column`].
30//! - [`ValidityMask`]: per-cell missing-value bitmap. Stored on
31//!   [`Column`]; exposed for users that want to compose masks
32//!   directly (logical masking, conditional updates, etc.).
33//! - [`ArithmeticOp`] / [`ComparisonOp`]: enum tags for typed
34//!   binary-op dispatch (used by fp-frame's expression engine and
35//!   Series arithmetic).
36//! - [`CrackIndex`]: an internal positional index used by the
37//!   "cracking" optimisation for repeated boolean-mask filters.
38//!
39//! ## Error reporting
40//!
41//! [`ColumnError`] enumerates the failure modes (length mismatch,
42//! dtype mismatch, missing-value-in-required-slot, etc.). All
43//! Column-mutating fns return `Result<_, ColumnError>` so callers
44//! get explicit error categories.
45//!
46//! ## Relationship to other crates
47//!
48//! - **fp-types** supplies the [`DType`] / [`Scalar`] /
49//!   [`NullKind`] / `nan*` reduction primitives this crate composes
50//!   on top of.
51//! - **fp-frame** stores a `Vec<Column>` per `DataFrame` (one column
52//!   per data column) plus a separate `Index` from fp-index for the
53//!   row labels.
54//! - **fp-index** uses [`Column`] internally for some MultiIndex
55//!   level storage.
56
57use std::sync::{Arc, OnceLock};
58
59use fp_types::{
60    DType, Interval, IntervalClosed, NullKind, Scalar, SparseDType, Timedelta, Timestamp,
61    TypeError, cast_scalar, cast_scalar_owned, common_dtype, infer_dtype, nanall, nanany,
62    nanargmax, nanargmin, nancummax, nancummin, nancumprod, nancumsum, nankurt, nanmax, nanmean,
63    nanmedian, nanmin, nannunique, nanprod, nanptp, nanquantile, nansem, nanskew, nanstd, nansum,
64    nanvar,
65};
66use rustc_hash::{FxHashMap, FxHashSet};
67use serde::{Deserialize, Serialize};
68use thiserror::Error;
69
70#[derive(Debug, Clone, Eq)]
71pub struct ValidityMask {
72    words: Vec<u64>,
73    len: usize,
74}
75
76impl ValidityMask {
77    fn is_all_valid_sentinel(&self) -> bool {
78        self.len > 0 && self.words.is_empty()
79    }
80
81    fn materialized_all_valid_words(len: usize) -> Vec<u64> {
82        let word_count = len.div_ceil(64);
83        let mut words = vec![u64::MAX; word_count];
84        let remainder = len % 64;
85        if remainder > 0
86            && let Some(last) = words.last_mut()
87        {
88            *last = (1_u64 << remainder) - 1;
89        }
90        words
91    }
92
93    fn words_are_all_valid(words: &[u64], len: usize) -> bool {
94        if len == 0 {
95            return words.is_empty();
96        }
97        let word_count = len.div_ceil(64);
98        if words.len() != word_count {
99            return false;
100        }
101        let full_words = len / 64;
102        if words.iter().take(full_words).any(|&word| word != u64::MAX) {
103            return false;
104        }
105        let remainder = len % 64;
106        if remainder == 0 {
107            return true;
108        }
109        words.get(full_words).copied() == Some((1_u64 << remainder) - 1)
110    }
111
112    fn materialize_if_all_valid_sentinel(&mut self) {
113        if self.is_all_valid_sentinel() {
114            self.words = Self::materialized_all_valid_words(self.len);
115        }
116    }
117
118    #[must_use]
119    pub fn from_values(values: &[Scalar]) -> Self {
120        let len = values.len();
121        let word_count = len.div_ceil(64);
122        let mut words = vec![0_u64; word_count];
123        let mut all_valid = true;
124        for (idx, value) in values.iter().enumerate() {
125            if !value.is_missing() {
126                words[idx / 64] |= 1_u64 << (idx % 64);
127            } else {
128                all_valid = false;
129            }
130        }
131        if all_valid {
132            return Self::all_valid(len);
133        }
134        Self { words, len }
135    }
136
137    /// Build a validity mask from a contiguous `f64` buffer, marking NaN
138    /// positions invalid. pandas treats float NaN as missing, so this mirrors
139    /// what `from_values` would produce for the equivalent `Scalar::Float64`
140    /// values (`Scalar::is_missing` is true for NaN). See
141    /// [`Column::from_f64_values`].
142    #[must_use]
143    pub fn from_f64(data: &[f64]) -> Self {
144        let len = data.len();
145        let word_count = len.div_ceil(64);
146        let mut words = vec![0_u64; word_count];
147        let mut all_valid = true;
148        for (idx, &v) in data.iter().enumerate() {
149            if !v.is_nan() {
150                words[idx / 64] |= 1_u64 << (idx % 64);
151            } else {
152                all_valid = false;
153            }
154        }
155        if all_valid {
156            return Self::all_valid(len);
157        }
158        Self { words, len }
159    }
160
161    #[must_use]
162    pub fn all_valid(len: usize) -> Self {
163        Self {
164            words: Vec::new(),
165            len,
166        }
167    }
168
169    /// Build a mask from pre-packed validity words (LSB-first within each
170    /// word, bit `i` of word `i / 64` = row `i` valid). Bits at positions
171    /// `>= len` must be zero. Public (hidden) for typed builders that compute
172    /// validity in bulk (br-frankenpandas-7wxoc).
173    #[must_use]
174    #[doc(hidden)]
175    pub fn from_words(words: Vec<u64>, len: usize) -> Self {
176        debug_assert_eq!(words.len(), len.div_ceil(64));
177        debug_assert!(
178            len.is_multiple_of(64) || words.last().is_none_or(|w| w >> (len % 64) == 0),
179            "validity bits beyond len must be zero"
180        );
181        if Self::words_are_all_valid(&words, len) {
182            return Self::all_valid(len);
183        }
184        Self { words, len }
185    }
186
187    #[must_use]
188    pub fn all_invalid(len: usize) -> Self {
189        let word_count = len.div_ceil(64);
190        Self {
191            words: vec![0_u64; word_count],
192            len,
193        }
194    }
195
196    #[must_use]
197    pub fn get(&self, idx: usize) -> bool {
198        if idx >= self.len {
199            return false;
200        }
201        if self.is_all_valid_sentinel() {
202            return true;
203        }
204        (self.words[idx / 64] >> (idx % 64)) & 1 == 1
205    }
206
207    pub fn set(&mut self, idx: usize, value: bool) {
208        if idx >= self.len {
209            return;
210        }
211        if self.is_all_valid_sentinel() {
212            if value {
213                return;
214            }
215            self.materialize_if_all_valid_sentinel();
216        }
217        if value {
218            self.words[idx / 64] |= 1_u64 << (idx % 64);
219        } else {
220            self.words[idx / 64] &= !(1_u64 << (idx % 64));
221        }
222    }
223
224    #[must_use]
225    pub fn count_valid(&self) -> usize {
226        if self.is_all_valid_sentinel() {
227            return self.len;
228        }
229        let full_words = self.len / 64;
230        let mut count: u32 = self.words[..full_words]
231            .iter()
232            .map(|w| w.count_ones())
233            .sum();
234        let remainder = self.len % 64;
235        if remainder > 0 && full_words < self.words.len() {
236            let mask = (1_u64 << remainder) - 1;
237            count += (self.words[full_words] & mask).count_ones();
238        }
239        count as usize
240    }
241
242    #[must_use]
243    pub fn len(&self) -> usize {
244        self.len
245    }
246
247    #[must_use]
248    pub fn is_empty(&self) -> bool {
249        self.len == 0
250    }
251
252    #[must_use]
253    pub fn and_mask(&self, other: &Self) -> Self {
254        let len = self.len.min(other.len);
255        if len == 0 {
256            return Self::all_invalid(0);
257        }
258        if self.is_all_valid_sentinel() && other.is_all_valid_sentinel() {
259            return Self::all_valid(len);
260        }
261        if self.is_all_valid_sentinel() {
262            return other.slice(0, len);
263        }
264        if other.is_all_valid_sentinel() {
265            return self.slice(0, len);
266        }
267        let word_count = len.div_ceil(64);
268        let words = self.words[..word_count]
269            .iter()
270            .zip(&other.words[..word_count])
271            .map(|(a, b)| a & b)
272            .collect();
273        Self { words, len }
274    }
275
276    #[must_use]
277    pub fn or_mask(&self, other: &Self) -> Self {
278        let len = self.len.min(other.len);
279        if len == 0 {
280            return Self::all_invalid(0);
281        }
282        if self.is_all_valid_sentinel() || other.is_all_valid_sentinel() {
283            return Self::all_valid(len);
284        }
285        let word_count = len.div_ceil(64);
286        let words = self.words[..word_count]
287            .iter()
288            .zip(&other.words[..word_count])
289            .map(|(a, b)| a | b)
290            .collect();
291        Self { words, len }
292    }
293
294    #[must_use]
295    pub fn not_mask(&self) -> Self {
296        if self.is_all_valid_sentinel() {
297            return Self::all_invalid(self.len);
298        }
299        let mut words: Vec<u64> = self.words.iter().map(|w| !w).collect();
300        let remainder = self.len % 64;
301        if remainder > 0 && !words.is_empty() {
302            let last = words.len() - 1;
303            words[last] &= (1_u64 << remainder) - 1;
304        }
305        Self {
306            words,
307            len: self.len,
308        }
309    }
310
311    /// Returns an iterator yielding bool values, compatible with the previous
312    /// `&[bool]` API. Materializes from the packed representation.
313    pub fn bits(&self) -> impl Iterator<Item = bool> + '_ {
314        (0..self.len).map(|idx| self.get(idx))
315    }
316
317    /// Number of invalid (cleared) positions.
318    #[must_use]
319    pub fn count_invalid(&self) -> usize {
320        self.len.saturating_sub(self.count_valid())
321    }
322
323    /// Whether any bit is set.
324    #[must_use]
325    pub fn any(&self) -> bool {
326        if self.is_all_valid_sentinel() {
327            return true;
328        }
329        self.count_valid() > 0
330    }
331
332    /// Whether all bits are set.
333    #[must_use]
334    pub fn all(&self) -> bool {
335        if self.is_all_valid_sentinel() {
336            return true;
337        }
338        self.count_valid() == self.len
339    }
340
341    /// Bitwise XOR (symmetric difference) with another mask.
342    ///
343    /// Produced length is `min(self.len, other.len)`.
344    #[must_use]
345    pub fn xor_mask(&self, other: &Self) -> Self {
346        let len = self.len.min(other.len);
347        if len == 0 {
348            return Self::all_invalid(0);
349        }
350        if self.is_all_valid_sentinel() && other.is_all_valid_sentinel() {
351            return Self::all_invalid(len);
352        }
353        if self.is_all_valid_sentinel() {
354            return other.slice(0, len).not_mask();
355        }
356        if other.is_all_valid_sentinel() {
357            return self.slice(0, len).not_mask();
358        }
359        let word_count = len.div_ceil(64);
360        let mut words: Vec<u64> = self.words[..word_count]
361            .iter()
362            .zip(&other.words[..word_count])
363            .map(|(a, b)| a ^ b)
364            .collect();
365        let remainder = len % 64;
366        if remainder > 0 && !words.is_empty() {
367            let last = words.len() - 1;
368            words[last] &= (1_u64 << remainder) - 1;
369        }
370        Self { words, len }
371    }
372
373    /// Extract a contiguous sub-range as a new mask.
374    ///
375    /// `start..start+len` is clamped to the available tail — callers
376    /// don't need to pre-validate against `self.len`.
377    #[must_use]
378    pub fn slice(&self, start: usize, len: usize) -> Self {
379        if start >= self.len {
380            return Self::all_invalid(0);
381        }
382        let effective_len = len.min(self.len - start);
383        if self.is_all_valid_sentinel() {
384            return Self::all_valid(effective_len);
385        }
386        let mut out = Self::all_invalid(effective_len);
387        for i in 0..effective_len {
388            if self.get(start + i) {
389                out.set(i, true);
390            }
391        }
392        out
393    }
394
395    /// Append another mask's bits to the end of this one.
396    #[must_use]
397    pub fn concat(&self, other: &Self) -> Self {
398        let total = self.len + other.len;
399        if self.all() && other.all() {
400            return Self::all_valid(total);
401        }
402        let mut out = Self::all_invalid(total);
403        for i in 0..self.len {
404            if self.get(i) {
405                out.set(i, true);
406            }
407        }
408        for i in 0..other.len {
409            if other.get(i) {
410                out.set(self.len + i, true);
411            }
412        }
413        out
414    }
415
416    /// Position of the first valid bit.
417    #[must_use]
418    pub fn first_valid(&self) -> Option<usize> {
419        if self.is_all_valid_sentinel() {
420            return Some(0);
421        }
422        (0..self.len).find(|&i| self.get(i))
423    }
424
425    /// Position of the last valid bit.
426    #[must_use]
427    pub fn last_valid(&self) -> Option<usize> {
428        if self.is_all_valid_sentinel() {
429            return Some(self.len - 1);
430        }
431        (0..self.len).rev().find(|&i| self.get(i))
432    }
433}
434
435impl PartialEq for ValidityMask {
436    fn eq(&self, other: &Self) -> bool {
437        self.len == other.len && self.bits().eq(other.bits())
438    }
439}
440
441impl Serialize for ValidityMask {
442    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
443        use serde::ser::SerializeStruct;
444        let bits: Vec<bool> = self.bits().collect();
445        let mut state = serializer.serialize_struct("ValidityMask", 1)?;
446        state.serialize_field("bits", &bits)?;
447        state.end()
448    }
449}
450
451impl<'de> Deserialize<'de> for ValidityMask {
452    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
453        #[derive(Deserialize)]
454        struct Raw {
455            bits: Vec<bool>,
456        }
457        let raw = Raw::deserialize(deserializer)?;
458        let len = raw.bits.len();
459        let word_count = len.div_ceil(64);
460        let mut words = vec![0_u64; word_count];
461        for (idx, &valid) in raw.bits.iter().enumerate() {
462            if valid {
463                words[idx / 64] |= 1_u64 << (idx % 64);
464            }
465        }
466        Ok(Self::from_words(words, len))
467    }
468}
469
470/// AG-10: Typed array representation for vectorized batch execution.
471///
472/// Stores column data as contiguous typed arrays rather than `Vec<Scalar>`.
473/// Validity is tracked by `ValidityMask`; invalid positions hold unspecified
474/// values in the typed array (callers must check validity before reading).
475///
476/// This eliminates per-element enum dispatch for arithmetic operations,
477/// enabling SIMD auto-vectorization on `&[f64]` / `&[i64]` slices.
478#[derive(Debug, Clone, PartialEq)]
479pub enum ColumnData {
480    Float64(Vec<f64>),
481    Int64(Vec<i64>),
482    Bool(Vec<bool>),
483    Utf8(Vec<String>),
484    Timedelta64(Vec<i64>),
485    Datetime64(Vec<i64>),
486    Period(Vec<i64>),
487    Interval(Vec<Interval>),
488}
489
490impl ColumnData {
491    /// Materialize typed array from a `Vec<Scalar>` and its `ValidityMask`.
492    ///
493    /// Invalid positions get a default sentinel (0 / 0.0 / false / "").
494    /// The caller must pair this with the corresponding `ValidityMask` to
495    /// interpret which positions are actually valid.
496    #[must_use]
497    pub fn from_scalars(values: &[Scalar], dtype: DType) -> Self {
498        match dtype {
499            DType::Float64 => {
500                let data: Vec<f64> = values
501                    .iter()
502                    .map(|v| match v {
503                        Scalar::Float64(f) => *f,
504                        Scalar::Int64(i) => *i as f64,
505                        Scalar::Bool(true) => 1.0,
506                        Scalar::Bool(false) => 0.0,
507                        _ => 0.0, // sentinel for invalid positions
508                    })
509                    .collect();
510                Self::Float64(data)
511            }
512            DType::Int64 | DType::Int64Nullable => {
513                let data: Vec<i64> = values
514                    .iter()
515                    .map(|v| match v {
516                        Scalar::Int64(i) => *i,
517                        Scalar::Bool(b) => i64::from(*b),
518                        _ => 0, // sentinel for invalid positions
519                    })
520                    .collect();
521                Self::Int64(data)
522            }
523            DType::Categorical => {
524                let data: Vec<i64> = values
525                    .iter()
526                    .map(|v| match v {
527                        Scalar::Int64(i) => *i,
528                        _ => -1,
529                    })
530                    .collect();
531                Self::Int64(data)
532            }
533            DType::Bool | DType::BoolNullable => {
534                let data: Vec<bool> = values
535                    .iter()
536                    .map(|v| match v {
537                        Scalar::Bool(b) => *b,
538                        _ => false,
539                    })
540                    .collect();
541                Self::Bool(data)
542            }
543            DType::Utf8 => {
544                let data: Vec<String> = values
545                    .iter()
546                    .map(|v| match v {
547                        Scalar::Utf8(s) => s.clone(),
548                        _ => String::new(),
549                    })
550                    .collect();
551                Self::Utf8(data)
552            }
553            DType::Null => Self::Float64(vec![0.0; values.len()]),
554            DType::Sparse => Self::Utf8(vec![String::new(); values.len()]),
555            DType::Timedelta64 => {
556                let data: Vec<i64> = values
557                    .iter()
558                    .map(|v| match v {
559                        Scalar::Timedelta64(n) => *n,
560                        Scalar::Int64(i) => *i,
561                        _ => Timedelta::NAT,
562                    })
563                    .collect();
564                Self::Timedelta64(data)
565            }
566            DType::Datetime64 => {
567                let data: Vec<i64> = values
568                    .iter()
569                    .map(|v| match v {
570                        Scalar::Datetime64(n) => *n,
571                        Scalar::Int64(i) => *i,
572                        _ => Timestamp::NAT,
573                    })
574                    .collect();
575                Self::Datetime64(data)
576            }
577            DType::Period => {
578                let data: Vec<i64> = values
579                    .iter()
580                    .map(|v| match v {
581                        Scalar::Period(n) => *n,
582                        Scalar::Int64(i) => *i,
583                        _ => i64::MIN, // NaT sentinel for Period
584                    })
585                    .collect();
586                Self::Period(data)
587            }
588            DType::Interval => {
589                let data: Vec<Interval> = values
590                    .iter()
591                    .map(|v| match v {
592                        Scalar::Interval(interval) => *interval,
593                        _ => Interval::new(0.0, 0.0, IntervalClosed::Right),
594                    })
595                    .collect();
596                Self::Interval(data)
597            }
598        }
599    }
600
601    /// Convert typed array back to `Vec<Scalar>`, respecting `ValidityMask`.
602    #[must_use]
603    pub fn to_scalars(&self, dtype: DType, validity: &ValidityMask) -> Vec<Scalar> {
604        match self {
605            Self::Float64(data) => data
606                .iter()
607                .enumerate()
608                .map(|(i, v)| {
609                    if !validity.get(i) {
610                        Scalar::missing_for_dtype(dtype)
611                    } else {
612                        Scalar::Float64(*v)
613                    }
614                })
615                .collect(),
616            Self::Int64(data) => data
617                .iter()
618                .enumerate()
619                .map(|(i, v)| {
620                    if !validity.get(i) {
621                        Scalar::missing_for_dtype(dtype)
622                    } else {
623                        Scalar::Int64(*v)
624                    }
625                })
626                .collect(),
627            Self::Bool(data) => data
628                .iter()
629                .enumerate()
630                .map(|(i, v)| {
631                    if !validity.get(i) {
632                        Scalar::missing_for_dtype(dtype)
633                    } else {
634                        Scalar::Bool(*v)
635                    }
636                })
637                .collect(),
638            Self::Utf8(data) => data
639                .iter()
640                .enumerate()
641                .map(|(i, v)| {
642                    if !validity.get(i) {
643                        Scalar::missing_for_dtype(dtype)
644                    } else {
645                        Scalar::Utf8(v.clone())
646                    }
647                })
648                .collect(),
649            Self::Timedelta64(data) => data
650                .iter()
651                .enumerate()
652                .map(|(i, v)| {
653                    if !validity.get(i) || *v == Timedelta::NAT {
654                        Scalar::Timedelta64(Timedelta::NAT)
655                    } else {
656                        Scalar::Timedelta64(*v)
657                    }
658                })
659                .collect(),
660            Self::Datetime64(data) => data
661                .iter()
662                .enumerate()
663                .map(|(i, v)| {
664                    if !validity.get(i) || *v == Timestamp::NAT {
665                        Scalar::Datetime64(Timestamp::NAT)
666                    } else {
667                        Scalar::Datetime64(*v)
668                    }
669                })
670                .collect(),
671            Self::Period(data) => data
672                .iter()
673                .enumerate()
674                .map(|(i, v)| {
675                    if !validity.get(i) || *v == i64::MIN {
676                        Scalar::Period(i64::MIN)
677                    } else {
678                        Scalar::Period(*v)
679                    }
680                })
681                .collect(),
682            Self::Interval(data) => data
683                .iter()
684                .enumerate()
685                .map(|(i, v)| {
686                    if !validity.get(i) {
687                        Scalar::missing_for_dtype(dtype)
688                    } else {
689                        Scalar::Interval(*v)
690                    }
691                })
692                .collect(),
693        }
694    }
695
696    #[must_use]
697    pub fn len(&self) -> usize {
698        match self {
699            Self::Float64(d) => d.len(),
700            Self::Int64(d) => d.len(),
701            Self::Bool(d) => d.len(),
702            Self::Utf8(d) => d.len(),
703            Self::Timedelta64(d) => d.len(),
704            Self::Datetime64(d) => d.len(),
705            Self::Period(d) => d.len(),
706            Self::Interval(d) => d.len(),
707        }
708    }
709
710    #[must_use]
711    pub fn is_empty(&self) -> bool {
712        self.len() == 0
713    }
714}
715
716/// Compare two non-missing scalars using the given comparison operator.
717///
718/// Both scalars are converted to `f64` for comparison. For `Utf8` values,
719/// lexicographic ordering is used. Returns `Err` for incompatible types.
720fn scalar_compare(left: &Scalar, right: &Scalar, op: ComparisonOp) -> Result<bool, ColumnError> {
721    // Coerce differing numeric types to avoid precision loss (e.g. Bool vs Int64).
722    let left_dtype = left.dtype();
723    let right_dtype = right.dtype();
724    if left_dtype != right_dtype
725        && let Ok(common) = fp_types::common_dtype(left_dtype, right_dtype)
726        && common == DType::Int64
727    {
728        let l_cast = fp_types::cast_scalar(left, common)?;
729        let r_cast = fp_types::cast_scalar(right, common)?;
730        // Handle Int64 comparisons to avoid precision loss in f64 cast.
731        if let (Scalar::Int64(a), Scalar::Int64(b)) = (&l_cast, &r_cast) {
732            return Ok(match op {
733                ComparisonOp::Gt => a > b,
734                ComparisonOp::Lt => a < b,
735                ComparisonOp::Eq => a == b,
736                ComparisonOp::Ne => a != b,
737                ComparisonOp::Ge => a >= b,
738                ComparisonOp::Le => a <= b,
739            });
740        }
741    }
742
743    // Handle Utf8 comparisons separately (lexicographic).
744    if let (Scalar::Utf8(a), Scalar::Utf8(b)) = (left, right) {
745        return Ok(match op {
746            ComparisonOp::Gt => a > b,
747            ComparisonOp::Lt => a < b,
748            ComparisonOp::Eq => a == b,
749            ComparisonOp::Ne => a != b,
750            ComparisonOp::Ge => a >= b,
751            ComparisonOp::Le => a <= b,
752        });
753    }
754
755    // Handle Bool comparisons (false < true).
756    if let (Scalar::Bool(a), Scalar::Bool(b)) = (left, right) {
757        return Ok(match op {
758            ComparisonOp::Gt => *a && !*b,
759            ComparisonOp::Lt => !*a && *b,
760            ComparisonOp::Eq => a == b,
761            ComparisonOp::Ne => a != b,
762            ComparisonOp::Ge => *a >= *b,
763            ComparisonOp::Le => *a <= *b,
764        });
765    }
766
767    // Handle Int64 comparisons to avoid precision loss in f64 cast.
768    if let (Scalar::Int64(a), Scalar::Int64(b)) = (left, right) {
769        return Ok(match op {
770            ComparisonOp::Gt => a > b,
771            ComparisonOp::Lt => a < b,
772            ComparisonOp::Eq => a == b,
773            ComparisonOp::Ne => a != b,
774            ComparisonOp::Ge => a >= b,
775            ComparisonOp::Le => a <= b,
776        });
777    }
778
779    // Numeric: convert both to f64.
780    let lhs = left.to_f64()?;
781    let rhs = right.to_f64()?;
782
783    Ok(match op {
784        ComparisonOp::Gt => lhs > rhs,
785        ComparisonOp::Lt => lhs < rhs,
786        ComparisonOp::Eq => lhs == rhs,
787        ComparisonOp::Ne => lhs != rhs,
788        ComparisonOp::Ge => lhs >= rhs,
789        ComparisonOp::Le => lhs <= rhs,
790    })
791}
792
793/// AG-10: Vectorized binary arithmetic on `&[f64]` slices.
794///
795/// Both inputs must have the same length. The combined validity mask
796/// determines which positions produce a valid output; invalid positions
797/// get 0.0 as a sentinel. Returns `(result_data, result_validity)`.
798fn vectorized_binary_f64(
799    left: &[f64],
800    right: &[f64],
801    left_validity: &ValidityMask,
802    right_validity: &ValidityMask,
803    op: ArithmeticOp,
804) -> (Vec<f64>, ValidityMask) {
805    let combined = left_validity.and_mask(right_validity);
806
807    // Zip iterators over contiguous slices — auto-vectorizable by LLVM.
808    let apply = binary_f64_apply(op);
809
810    let out: Vec<f64> = left
811        .iter()
812        .zip(right.iter())
813        .enumerate()
814        .map(|(i, (&l, &r))| {
815            if combined.get(i) {
816                apply(l, r)
817            } else {
818                0.0 // sentinel for invalid positions
819            }
820        })
821        .collect();
822
823    (out, combined)
824}
825
826fn binary_f64_apply(op: ArithmeticOp) -> fn(f64, f64) -> f64 {
827    match op {
828        ArithmeticOp::Add => |a, b| a + b,
829        ArithmeticOp::Sub => |a, b| a - b,
830        ArithmeticOp::Mul => |a, b| a * b,
831        ArithmeticOp::Div => |a, b| a / b,
832        ArithmeticOp::Mod => python_mod_f64,
833        ArithmeticOp::Pow => |a, b| a.powf(b),
834        ArithmeticOp::FloorDiv => python_floor_div_f64,
835    }
836}
837
838/// Apply a binary f64 op over two equal-length slices with the operation
839/// monomorphized into each arm (br-frankenpandas-f64simd). Unlike a
840/// `fn(f64,f64)->f64` pointer applied per element, the closed-form arms let LLVM
841/// autovectorize Add/Sub/Mul/Div to packed SIMD; Mod/Pow/FloorDiv keep their
842/// scalar helpers but still avoid the indirect call. Element-for-element
843/// identical to `binary_f64_apply(op)` applied in order.
844#[inline]
845fn apply_f64_slices(op: ArithmeticOp, a: &[f64], b: &[f64]) -> Vec<f64> {
846    match op {
847        ArithmeticOp::Add => a.iter().zip(b).map(|(x, y)| x + y).collect(),
848        ArithmeticOp::Sub => a.iter().zip(b).map(|(x, y)| x - y).collect(),
849        ArithmeticOp::Mul => a.iter().zip(b).map(|(x, y)| x * y).collect(),
850        ArithmeticOp::Div => a.iter().zip(b).map(|(x, y)| x / y).collect(),
851        ArithmeticOp::Mod => a
852            .iter()
853            .zip(b)
854            .map(|(x, y)| python_mod_f64(*x, *y))
855            .collect(),
856        ArithmeticOp::Pow => a.iter().zip(b).map(|(x, y)| x.powf(*y)).collect(),
857        ArithmeticOp::FloorDiv => a
858            .iter()
859            .zip(b)
860            .map(|(x, y)| python_floor_div_f64(*x, *y))
861            .collect(),
862    }
863}
864
865fn unit_range_len(start: i64, end: i64) -> Option<usize> {
866    usize::try_from(end.checked_sub(start)?.checked_add(1)?).ok()
867}
868
869fn python_mod_f64(lhs: f64, rhs: f64) -> f64 {
870    if lhs.is_nan() || rhs.is_nan() {
871        return f64::NAN;
872    }
873
874    if rhs.is_infinite() {
875        if lhs.is_infinite() {
876            return f64::NAN;
877        }
878        if lhs == 0.0 {
879            return 0.0_f64.copysign(rhs);
880        }
881        if lhs.is_sign_positive() == rhs.is_sign_positive() {
882            lhs
883        } else {
884            rhs
885        }
886    } else {
887        lhs - python_floor_div_f64(lhs, rhs) * rhs
888    }
889}
890
891fn python_floor_div_f64(lhs: f64, rhs: f64) -> f64 {
892    if lhs.is_nan() || rhs.is_nan() {
893        return f64::NAN;
894    }
895
896    if rhs.is_infinite() {
897        if lhs.is_infinite() {
898            return f64::NAN;
899        }
900        if lhs == 0.0 {
901            return (lhs / rhs).floor();
902        }
903        if lhs.is_sign_positive() == rhs.is_sign_positive() {
904            0.0
905        } else {
906            -1.0
907        }
908    } else if lhs.is_infinite() && rhs != 0.0 {
909        f64::NAN
910    } else {
911        (lhs / rhs).floor()
912    }
913}
914
915fn python_floor_div_i64(lhs: i64, rhs: i64) -> i64 {
916    debug_assert_ne!(rhs, 0);
917    if lhs == i64::MIN && rhs == -1 {
918        return i64::MIN;
919    }
920
921    let quotient = lhs / rhs;
922    let remainder = lhs % rhs;
923    if remainder != 0 && ((remainder > 0) != (rhs > 0)) {
924        quotient - 1
925    } else {
926        quotient
927    }
928}
929
930fn python_mod_i64(lhs: i64, rhs: i64) -> i64 {
931    debug_assert_ne!(rhs, 0);
932    if lhs == i64::MIN && rhs == -1 {
933        return 0;
934    }
935
936    let quotient = i128::from(python_floor_div_i64(lhs, rhs));
937    let remainder = i128::from(lhs) - quotient * i128::from(rhs);
938    let Ok(value) = i64::try_from(remainder) else {
939        return 0;
940    };
941    value
942}
943
944/// AG-10: Vectorized binary arithmetic on `&[i64]` slices.
945///
946/// Produces `i64` results for Add/Sub/Mul. For Div, returns `None`
947/// to signal the caller should use the `f64` path instead.
948fn vectorized_binary_i64(
949    left: &[i64],
950    right: &[i64],
951    left_validity: &ValidityMask,
952    right_validity: &ValidityMask,
953    op: ArithmeticOp,
954) -> Option<(Vec<i64>, ValidityMask)> {
955    let combined = left_validity.and_mask(right_validity);
956
957    // Div and Pow always produce floats
958    if matches!(op, ArithmeticOp::Div | ArithmeticOp::Pow) {
959        return None;
960    }
961
962    // For Mod/FloorDiv: if any non-missing right operand is zero, fall back
963    // to float. We must NOT gate on `combined` (left AND right validity) —
964    // pandas promotes the entire result dtype to Float64 whenever a zero
965    // divisor appears in the right operand, regardless of whether the
966    // matching left position is missing. Gating on combined caused
967    // promotion to be skipped when the zero divisor's left counterpart
968    // was Null, drifting the column dtype against the conformance oracle
969    // (fuzz_column_arith corpus surfaced this on the seed
970    // [97, 4, 11, 0, 0, 0, 0, 0, 0, 0, 10]).
971    if matches!(op, ArithmeticOp::Mod | ArithmeticOp::FloorDiv) {
972        let has_zero_divisor = right
973            .iter()
974            .enumerate()
975            .any(|(i, &r)| right_validity.get(i) && r == 0);
976        if has_zero_divisor {
977            return None;
978        }
979    }
980
981    let apply: fn(i64, i64) -> i64 = match op {
982        ArithmeticOp::Add => |a, b| a.wrapping_add(b),
983        ArithmeticOp::Sub => |a, b| a.wrapping_sub(b),
984        ArithmeticOp::Mul => |a, b| a.wrapping_mul(b),
985        ArithmeticOp::Mod => python_mod_i64,
986        ArithmeticOp::FloorDiv => python_floor_div_i64,
987        ArithmeticOp::Div | ArithmeticOp::Pow => unreachable!("handled by early return above"),
988    };
989
990    let out: Vec<i64> = left
991        .iter()
992        .zip(right.iter())
993        .enumerate()
994        .map(|(i, (&l, &r))| {
995            if combined.get(i) {
996                apply(l, r)
997            } else {
998                0 // sentinel for invalid positions
999            }
1000        })
1001        .collect();
1002
1003    Some((out, combined))
1004}
1005
1006enum ScalarValues {
1007    Eager(Vec<Scalar>),
1008    LazyAllValidInt64 {
1009        data: Arc<[i64]>,
1010        values: OnceLock<Vec<Scalar>>,
1011    },
1012    LazyAllValidFloat64 {
1013        data: Arc<[f64]>,
1014        values: OnceLock<Vec<Scalar>>,
1015    },
1016    LazyNullableFloat64 {
1017        data: Vec<f64>,
1018        validity: ValidityMask,
1019        values: OnceLock<Vec<Scalar>>,
1020    },
1021    /// Nullable Int64 mirror of `LazyNullableFloat64`
1022    /// (br-frankenpandas-lt5qx): contiguous `i64` data + validity, where an
1023    /// invalid slot materializes `Scalar::Null(NullKind::Null)` — exactly
1024    /// `Scalar::missing_for_dtype(DType::Int64)`. Unlike Float64 there is no
1025    /// NaN-as-missing ambiguity: missingness is the validity bit alone.
1026    LazyNullableInt64 {
1027        data: Vec<i64>,
1028        validity: ValidityMask,
1029        values: OnceLock<Vec<Scalar>>,
1030    },
1031    LazyAllValidBool {
1032        data: Arc<[bool]>,
1033        values: OnceLock<Vec<Scalar>>,
1034    },
1035    /// Contiguous backing for all-valid Utf8 columns
1036    /// (br-frankenpandas-2krr0): one rolling byte buffer + n+1 offsets (row
1037    /// `i` = `bytes[offsets[i]..offsets[i+1]]`, always valid UTF-8 by
1038    /// construction — only built from `&str` data). String-output ops write
1039    /// here without a per-row heap `String`; the `Vec<Scalar::Utf8>` view
1040    /// materializes once on demand.
1041    ///
1042    /// The byte buffer and offsets are `Arc`-shared (br-frankenpandas-oifvy):
1043    /// the backing is immutable after construction (string ops always build a
1044    /// fresh buffer, never mutate in place), so `Column::clone` shares the
1045    /// `Arc` instead of deep-copying the (often large) byte buffer — O(1)
1046    /// instead of O(n), and observationally a deep copy because the data can
1047    /// never change underneath a shared reader.
1048    LazyContiguousUtf8 {
1049        bytes: Arc<[u8]>,
1050        offsets: Arc<[usize]>,
1051        strictly_increasing: OnceLock<bool>,
1052        fixed_width: OnceLock<Option<usize>>,
1053        values: OnceLock<Vec<Scalar>>,
1054    },
1055    /// Run-length backing for all-valid Int64 columns whose values arrive as
1056    /// repeat runs (br-frankenpandas-3ad4n) — e.g. the left lanes of a dense
1057    /// inner join, where every matched left value repeats `bucket_len` times.
1058    /// Carries O(runs) memory until a consumer asks for the contiguous i64
1059    /// buffer (`as_i64_slice`) or the Scalar view, each expanded once on
1060    /// demand.
1061    LazyRepeatRunsInt64 {
1062        runs: Vec<(i64, usize)>,
1063        total_len: usize,
1064        data: OnceLock<Vec<i64>>,
1065        values: OnceLock<Vec<Scalar>>,
1066    },
1067    /// Repeat-run backing where the per-run values and run lengths are stored
1068    /// separately so multiple dense-join output lanes can share one immutable
1069    /// run-length descriptor.
1070    LazyRepeatValuesInt64 {
1071        run_values: Vec<i64>,
1072        run_lens: Arc<[usize]>,
1073        total_len: usize,
1074        data: OnceLock<Vec<i64>>,
1075        values: OnceLock<Vec<Scalar>>,
1076    },
1077    /// Repeated-slice backing for all-valid Int64 columns whose values arrive
1078    /// as slices of one shared tape (e.g. dense join right lanes). Each
1079    /// segment is `(start, len)` into `data`, and segment order is the
1080    /// observable row order.
1081    LazyRepeatedSlicesInt64 {
1082        data: Vec<i64>,
1083        segments: Arc<[(usize, usize)]>,
1084        total_len: usize,
1085        expanded: OnceLock<Vec<i64>>,
1086        values: OnceLock<Vec<Scalar>>,
1087    },
1088    /// Zero-copy contiguous row-range VIEW over an `Arc`-shared contiguous-Utf8
1089    /// backing (br-frankenpandas-jbyuc.1.1.1). Row `i` (`0..len`) is
1090    /// `bytes[offsets[start + i] .. offsets[start + i + 1]]` — the same shared
1091    /// `bytes`/`offsets` as the source `LazyContiguousUtf8`, with `start`/`len`
1092    /// selecting a contiguous window. `take_positions` returns this in O(1)
1093    /// (two `Arc::clone`s) when the requested positions are a contiguous
1094    /// ascending range, deferring the per-row byte gather until a consumer
1095    /// actually materializes the Scalar view. The byte content is identical to
1096    /// the eager gather: same bytes, same order, all-valid.
1097    LazyUtf8Slice {
1098        bytes: Arc<[u8]>,
1099        offsets: Arc<[usize]>,
1100        start: usize,
1101        len: usize,
1102        values: OnceLock<Vec<Scalar>>,
1103    },
1104}
1105
1106type Utf8ArcViewSource = (Arc<[u8]>, Arc<[usize]>, usize);
1107
1108impl ScalarValues {
1109    fn from_vec(values: Vec<Scalar>) -> Self {
1110        Self::Eager(values)
1111    }
1112
1113    fn lazy_all_valid_int64(data: Vec<i64>) -> Self {
1114        Self::lazy_all_valid_int64_arc(Arc::from(data))
1115    }
1116
1117    /// Share an existing `Arc` i64 buffer in O(1) (used by `Clone`).
1118    /// (br-frankenpandas-tin7r: immutable typed buffers clone by Arc, the
1119    /// numeric mirror of the utf8 oifvy lever.)
1120    fn lazy_all_valid_int64_arc(data: Arc<[i64]>) -> Self {
1121        Self::LazyAllValidInt64 {
1122            data,
1123            values: OnceLock::new(),
1124        }
1125    }
1126
1127    fn lazy_all_valid_float64(data: Vec<f64>) -> Self {
1128        Self::lazy_all_valid_float64_arc(Arc::from(data))
1129    }
1130
1131    /// Share an existing `Arc` f64 buffer in O(1) (used by `Clone`).
1132    fn lazy_all_valid_float64_arc(data: Arc<[f64]>) -> Self {
1133        Self::LazyAllValidFloat64 {
1134            data,
1135            values: OnceLock::new(),
1136        }
1137    }
1138
1139    fn lazy_nullable_float64(data: Vec<f64>, validity: ValidityMask) -> Self {
1140        Self::LazyNullableFloat64 {
1141            data,
1142            validity,
1143            values: OnceLock::new(),
1144        }
1145    }
1146
1147    fn lazy_nullable_int64(data: Vec<i64>, validity: ValidityMask) -> Self {
1148        Self::LazyNullableInt64 {
1149            data,
1150            validity,
1151            values: OnceLock::new(),
1152        }
1153    }
1154
1155    fn lazy_all_valid_bool(data: Vec<bool>) -> Self {
1156        Self::lazy_all_valid_bool_arc(Arc::from(data))
1157    }
1158
1159    /// Share an existing `Arc` bool buffer in O(1) (used by `Clone`).
1160    fn lazy_all_valid_bool_arc(data: Arc<[bool]>) -> Self {
1161        Self::LazyAllValidBool {
1162            data,
1163            values: OnceLock::new(),
1164        }
1165    }
1166
1167    fn lazy_contiguous_utf8(bytes: Vec<u8>, offsets: Vec<usize>) -> Self {
1168        debug_assert!(!offsets.is_empty(), "offsets must hold n+1 entries");
1169        debug_assert_eq!(*offsets.last().expect("non-empty"), bytes.len());
1170        Self::lazy_contiguous_utf8_arc(Arc::from(bytes), Arc::from(offsets))
1171    }
1172
1173    /// Construct a `LazyContiguousUtf8` from already-`Arc`-shared buffers,
1174    /// sharing them in O(1) instead of re-allocating. Used by `Clone` so two
1175    /// contiguous-Utf8 columns can share one immutable byte buffer
1176    /// (br-frankenpandas-oifvy). The witness caches start fresh — they are pure
1177    /// functions of the (identical) shared buffers, so a clone recomputes the
1178    /// same value lazily if asked.
1179    fn lazy_contiguous_utf8_arc(bytes: Arc<[u8]>, offsets: Arc<[usize]>) -> Self {
1180        debug_assert!(!offsets.is_empty(), "offsets must hold n+1 entries");
1181        debug_assert_eq!(*offsets.last().expect("non-empty"), bytes.len());
1182        Self::LazyContiguousUtf8 {
1183            bytes,
1184            offsets,
1185            strictly_increasing: OnceLock::new(),
1186            fixed_width: OnceLock::new(),
1187            values: OnceLock::new(),
1188        }
1189    }
1190
1191    /// Build a zero-copy contiguous row-range view over a shared contiguous-Utf8
1192    /// backing (br-frankenpandas-jbyuc.1.1.1). Rows `start..start+len` of the
1193    /// source become rows `0..len` of the view. Shares `bytes`/`offsets` in
1194    /// O(1); the Scalar view materializes on demand.
1195    fn lazy_utf8_slice(bytes: Arc<[u8]>, offsets: Arc<[usize]>, start: usize, len: usize) -> Self {
1196        debug_assert!(
1197            start + len < offsets.len(),
1198            "view window must lie within the source offsets"
1199        );
1200        Self::LazyUtf8Slice {
1201            bytes,
1202            offsets,
1203            start,
1204            len,
1205            values: OnceLock::new(),
1206        }
1207    }
1208
1209    fn lazy_repeat_runs_int64(runs: Vec<(i64, usize)>, total_len: usize) -> Self {
1210        debug_assert_eq!(
1211            runs.iter().map(|&(_, run_len)| run_len).sum::<usize>(),
1212            total_len
1213        );
1214        Self::LazyRepeatRunsInt64 {
1215            runs,
1216            total_len,
1217            data: OnceLock::new(),
1218            values: OnceLock::new(),
1219        }
1220    }
1221
1222    fn lazy_repeat_values_int64(
1223        run_values: Vec<i64>,
1224        run_lens: Arc<[usize]>,
1225        total_len: usize,
1226    ) -> Self {
1227        debug_assert_eq!(run_values.len(), run_lens.len());
1228        debug_assert_eq!(run_lens.iter().sum::<usize>(), total_len);
1229        Self::LazyRepeatValuesInt64 {
1230            run_values,
1231            run_lens,
1232            total_len,
1233            data: OnceLock::new(),
1234            values: OnceLock::new(),
1235        }
1236    }
1237
1238    fn lazy_repeated_slices_int64(
1239        data: Vec<i64>,
1240        segments: Vec<(usize, usize)>,
1241        total_len: usize,
1242    ) -> Self {
1243        Self::lazy_repeated_slices_int64_shared(data, Arc::from(segments), total_len)
1244    }
1245
1246    fn lazy_repeated_slices_int64_shared(
1247        data: Vec<i64>,
1248        segments: Arc<[(usize, usize)]>,
1249        total_len: usize,
1250    ) -> Self {
1251        debug_assert_eq!(
1252            segments.iter().map(|&(_, len)| len).sum::<usize>(),
1253            total_len
1254        );
1255        debug_assert!(
1256            segments
1257                .iter()
1258                .all(|&(start, len)| start.checked_add(len).is_some_and(|end| end <= data.len()))
1259        );
1260        Self::LazyRepeatedSlicesInt64 {
1261            data,
1262            segments,
1263            total_len,
1264            expanded: OnceLock::new(),
1265            values: OnceLock::new(),
1266        }
1267    }
1268
1269    fn expand_repeat_values_i64(
1270        run_values: &[i64],
1271        run_lens: &[usize],
1272        total_len: usize,
1273    ) -> Vec<i64> {
1274        const PARALLEL_MIN_VALUES: usize = 1 << 18;
1275        const PARALLEL_MAX_CHUNKS: usize = 16;
1276
1277        debug_assert_eq!(run_values.len(), run_lens.len());
1278        let thread_count = std::thread::available_parallelism()
1279            .map_or(1, usize::from)
1280            .min(PARALLEL_MAX_CHUNKS);
1281        if total_len < PARALLEL_MIN_VALUES || thread_count < 2 || run_values.is_empty() {
1282            let mut out = Vec::with_capacity(total_len);
1283            for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1284                out.resize(out.len() + run_len, value);
1285            }
1286            return out;
1287        }
1288
1289        let target = total_len.div_ceil(thread_count).max(1);
1290        let mut boundaries = vec![(0usize, 0usize)];
1291        let mut cumulative = 0usize;
1292        let mut next_target = target;
1293        for (run_idx, &run_len) in run_lens.iter().enumerate() {
1294            cumulative += run_len;
1295            if cumulative >= next_target && run_idx + 1 < run_lens.len() {
1296                boundaries.push((run_idx + 1, cumulative));
1297                next_target = cumulative.saturating_add(target);
1298            }
1299        }
1300        debug_assert_eq!(cumulative, total_len);
1301        boundaries.push((run_lens.len(), total_len));
1302
1303        let mut out = vec![0i64; total_len];
1304        let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1305        let mut rest: &mut [i64] = out.as_mut_slice();
1306        let mut prev = 0usize;
1307        for window in boundaries.windows(2) {
1308            let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1309            prev = window[1].1;
1310            rest = tail;
1311            chunk_slices.push(chunk_slice);
1312        }
1313
1314        std::thread::scope(|scope| {
1315            let mut handles = Vec::with_capacity(chunk_slices.len());
1316            for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1317                let (run_start, _) = boundaries[chunk_idx];
1318                let (run_end, _) = boundaries[chunk_idx + 1];
1319                let run_values = &run_values[run_start..run_end];
1320                let run_lens = &run_lens[run_start..run_end];
1321                handles.push(scope.spawn(move || {
1322                    let mut cursor = 0usize;
1323                    for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1324                        chunk_slice[cursor..cursor + run_len].fill(value);
1325                        cursor += run_len;
1326                    }
1327                    debug_assert_eq!(cursor, chunk_slice.len());
1328                }));
1329            }
1330            for handle in handles {
1331                handle
1332                    .join()
1333                    .expect("repeat-value expansion worker must not panic");
1334            }
1335        });
1336        out
1337    }
1338
1339    fn expand_repeated_slices_i64(
1340        data: &[i64],
1341        segments: &[(usize, usize)],
1342        total_len: usize,
1343    ) -> Vec<i64> {
1344        const PARALLEL_MIN_VALUES: usize = 1 << 18;
1345        const PARALLEL_MAX_CHUNKS: usize = 16;
1346
1347        let thread_count = std::thread::available_parallelism()
1348            .map_or(1, usize::from)
1349            .min(PARALLEL_MAX_CHUNKS);
1350        if total_len < PARALLEL_MIN_VALUES || thread_count < 2 || segments.is_empty() {
1351            let mut out = Vec::with_capacity(total_len);
1352            for &(start, len) in segments {
1353                out.extend_from_slice(&data[start..start + len]);
1354            }
1355            return out;
1356        }
1357
1358        let target = total_len.div_ceil(thread_count).max(1);
1359        let mut boundaries = vec![(0usize, 0usize)];
1360        let mut cumulative = 0usize;
1361        let mut next_target = target;
1362        for (segment_idx, &(_, len)) in segments.iter().enumerate() {
1363            cumulative += len;
1364            if cumulative >= next_target && segment_idx + 1 < segments.len() {
1365                boundaries.push((segment_idx + 1, cumulative));
1366                next_target = cumulative.saturating_add(target);
1367            }
1368        }
1369        debug_assert_eq!(cumulative, total_len);
1370        boundaries.push((segments.len(), total_len));
1371
1372        let mut out = vec![0i64; total_len];
1373        let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1374        let mut rest: &mut [i64] = out.as_mut_slice();
1375        let mut prev = 0usize;
1376        for window in boundaries.windows(2) {
1377            let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1378            prev = window[1].1;
1379            rest = tail;
1380            chunk_slices.push(chunk_slice);
1381        }
1382
1383        std::thread::scope(|scope| {
1384            let mut handles = Vec::with_capacity(chunk_slices.len());
1385            for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1386                let (segment_start, _) = boundaries[chunk_idx];
1387                let (segment_end, _) = boundaries[chunk_idx + 1];
1388                let segments = &segments[segment_start..segment_end];
1389                handles.push(scope.spawn(move || {
1390                    let mut cursor = 0usize;
1391                    for &(start, len) in segments {
1392                        chunk_slice[cursor..cursor + len]
1393                            .copy_from_slice(&data[start..start + len]);
1394                        cursor += len;
1395                    }
1396                    debug_assert_eq!(cursor, chunk_slice.len());
1397                }));
1398            }
1399            for handle in handles {
1400                handle
1401                    .join()
1402                    .expect("repeated-slice expansion worker must not panic");
1403            }
1404        });
1405        out
1406    }
1407
1408    /// The expanded contiguous `i64` buffer of a repeat-run backing, built
1409    /// once on first request. `None` for every other representation.
1410    ///
1411    /// Large expansions are row-chunked across scoped threads (disjoint
1412    /// `split_at_mut` slices, same scheme as the dense join fill) so a
1413    /// consumer that forces materialization pays the same parallel fill the
1414    /// eager path would have, not a serial one.
1415    fn repeat_runs_i64_data(&self) -> Option<&[i64]> {
1416        const PARALLEL_MIN_VALUES: usize = 1 << 18;
1417        const PARALLEL_MAX_CHUNKS: usize = 16;
1418
1419        if let Self::LazyRepeatRunsInt64 {
1420            runs,
1421            total_len,
1422            data,
1423            ..
1424        } = self
1425        {
1426            return Some(
1427                data.get_or_init(|| {
1428                    let thread_count = std::thread::available_parallelism()
1429                        .map_or(1, usize::from)
1430                        .min(PARALLEL_MAX_CHUNKS);
1431                    if *total_len < PARALLEL_MIN_VALUES || thread_count < 2 || runs.is_empty() {
1432                        let mut out = Vec::with_capacity(*total_len);
1433                        for &(value, run_len) in runs {
1434                            out.resize(out.len() + run_len, value);
1435                        }
1436                        return out;
1437                    }
1438
1439                    // Chunk boundaries (run_idx, out_pos) balanced by output
1440                    // size; each worker fills its disjoint output slice.
1441                    let target = total_len.div_ceil(thread_count).max(1);
1442                    let mut boundaries = vec![(0usize, 0usize)];
1443                    let mut cumulative = 0usize;
1444                    let mut next_target = target;
1445                    for (run_idx, &(_, run_len)) in runs.iter().enumerate() {
1446                        cumulative += run_len;
1447                        if cumulative >= next_target && run_idx + 1 < runs.len() {
1448                            boundaries.push((run_idx + 1, cumulative));
1449                            next_target = cumulative.saturating_add(target);
1450                        }
1451                    }
1452                    debug_assert_eq!(cumulative, *total_len);
1453                    boundaries.push((runs.len(), *total_len));
1454
1455                    let mut out = vec![0i64; *total_len];
1456                    let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1457                    let mut rest: &mut [i64] = out.as_mut_slice();
1458                    let mut prev = 0usize;
1459                    for window in boundaries.windows(2) {
1460                        let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1461                        prev = window[1].1;
1462                        rest = tail;
1463                        chunk_slices.push(chunk_slice);
1464                    }
1465
1466                    std::thread::scope(|scope| {
1467                        let mut handles = Vec::with_capacity(chunk_slices.len());
1468                        for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1469                            let (run_start, _) = boundaries[chunk_idx];
1470                            let (run_end, _) = boundaries[chunk_idx + 1];
1471                            let runs = &runs[run_start..run_end];
1472                            handles.push(scope.spawn(move || {
1473                                let mut cursor = 0usize;
1474                                for &(value, run_len) in runs {
1475                                    chunk_slice[cursor..cursor + run_len].fill(value);
1476                                    cursor += run_len;
1477                                }
1478                                debug_assert_eq!(cursor, chunk_slice.len());
1479                            }));
1480                        }
1481                        for handle in handles {
1482                            handle
1483                                .join()
1484                                .expect("repeat-run expansion worker must not panic");
1485                        }
1486                    });
1487                    out
1488                })
1489                .as_slice(),
1490            );
1491        }
1492        if let Self::LazyRepeatValuesInt64 {
1493            run_values,
1494            run_lens,
1495            total_len,
1496            data,
1497            ..
1498        } = self
1499        {
1500            return Some(
1501                data.get_or_init(|| {
1502                    Self::expand_repeat_values_i64(run_values, run_lens, *total_len)
1503                })
1504                .as_slice(),
1505            );
1506        }
1507        None
1508    }
1509
1510    fn repeated_slices_i64_data(&self) -> Option<&[i64]> {
1511        if let Self::LazyRepeatedSlicesInt64 {
1512            data,
1513            segments,
1514            total_len,
1515            expanded,
1516            ..
1517        } = self
1518        {
1519            return Some(
1520                expanded
1521                    .get_or_init(|| Self::expand_repeated_slices_i64(data, segments, *total_len))
1522                    .as_slice(),
1523            );
1524        }
1525        None
1526    }
1527
1528    fn as_slice(&self) -> &[Scalar] {
1529        match self {
1530            Self::Eager(values) => values,
1531            Self::LazyAllValidInt64 { data, values } => values
1532                .get_or_init(|| data.iter().copied().map(Scalar::Int64).collect())
1533                .as_slice(),
1534            Self::LazyAllValidFloat64 { data, values } => values
1535                .get_or_init(|| data.iter().copied().map(Scalar::Float64).collect())
1536                .as_slice(),
1537            Self::LazyNullableFloat64 {
1538                data,
1539                validity,
1540                values,
1541            } => values
1542                .get_or_init(|| {
1543                    data.iter()
1544                        .enumerate()
1545                        .map(|(idx, value)| {
1546                            if validity.get(idx) || value.is_nan() {
1547                                Scalar::Float64(*value)
1548                            } else {
1549                                Scalar::Null(NullKind::NaN)
1550                            }
1551                        })
1552                        .collect()
1553                })
1554                .as_slice(),
1555            Self::LazyAllValidBool { data, values } => values
1556                .get_or_init(|| data.iter().copied().map(Scalar::Bool).collect())
1557                .as_slice(),
1558            Self::LazyContiguousUtf8 {
1559                bytes,
1560                offsets,
1561                values,
1562                ..
1563            } => values
1564                .get_or_init(|| {
1565                    offsets
1566                        .windows(2)
1567                        .map(|w| {
1568                            Scalar::Utf8(
1569                                std::str::from_utf8(&bytes[w[0]..w[1]])
1570                                    .expect("contiguous utf8 buffer is valid by construction")
1571                                    .to_owned(),
1572                            )
1573                        })
1574                        .collect()
1575                })
1576                .as_slice(),
1577            Self::LazyNullableInt64 {
1578                data,
1579                validity,
1580                values,
1581            } => values
1582                .get_or_init(|| {
1583                    data.iter()
1584                        .enumerate()
1585                        .map(|(idx, value)| {
1586                            if validity.get(idx) {
1587                                Scalar::Int64(*value)
1588                            } else {
1589                                Scalar::Null(NullKind::Null)
1590                            }
1591                        })
1592                        .collect()
1593                })
1594                .as_slice(),
1595            Self::LazyRepeatRunsInt64 {
1596                runs,
1597                total_len,
1598                values,
1599                ..
1600            } => values
1601                .get_or_init(|| {
1602                    let mut out = Vec::with_capacity(*total_len);
1603                    for &(value, run_len) in runs {
1604                        out.resize(out.len() + run_len, Scalar::Int64(value));
1605                    }
1606                    out
1607                })
1608                .as_slice(),
1609            Self::LazyRepeatValuesInt64 {
1610                run_values,
1611                run_lens,
1612                total_len,
1613                values,
1614                ..
1615            } => values
1616                .get_or_init(|| {
1617                    let mut out = Vec::with_capacity(*total_len);
1618                    for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1619                        out.resize(out.len() + run_len, Scalar::Int64(value));
1620                    }
1621                    out
1622                })
1623                .as_slice(),
1624            Self::LazyRepeatedSlicesInt64 {
1625                data,
1626                segments,
1627                total_len,
1628                values,
1629                ..
1630            } => values
1631                .get_or_init(|| {
1632                    Self::expand_repeated_slices_i64(data, segments, *total_len)
1633                        .into_iter()
1634                        .map(Scalar::Int64)
1635                        .collect()
1636                })
1637                .as_slice(),
1638            Self::LazyUtf8Slice {
1639                bytes,
1640                offsets,
1641                start,
1642                len,
1643                values,
1644            } => values
1645                .get_or_init(|| {
1646                    (0..*len)
1647                        .map(|i| {
1648                            let lo = offsets[start + i];
1649                            let hi = offsets[start + i + 1];
1650                            Scalar::Utf8(
1651                                std::str::from_utf8(&bytes[lo..hi])
1652                                    .expect("contiguous utf8 buffer is valid by construction")
1653                                    .to_owned(),
1654                            )
1655                        })
1656                        .collect()
1657                })
1658                .as_slice(),
1659        }
1660    }
1661
1662    fn len(&self) -> usize {
1663        match self {
1664            Self::Eager(values) => values.len(),
1665            Self::LazyAllValidInt64 { data, .. } => data.len(),
1666            Self::LazyAllValidFloat64 { data, .. } => data.len(),
1667            Self::LazyNullableFloat64 { data, .. } => data.len(),
1668            Self::LazyAllValidBool { data, .. } => data.len(),
1669            Self::LazyContiguousUtf8 { offsets, .. } => offsets.len() - 1,
1670            Self::LazyNullableInt64 { data, .. } => data.len(),
1671            Self::LazyRepeatRunsInt64 { total_len, .. } => *total_len,
1672            Self::LazyRepeatValuesInt64 { total_len, .. } => *total_len,
1673            Self::LazyRepeatedSlicesInt64 { total_len, .. } => *total_len,
1674            Self::LazyUtf8Slice { len, .. } => *len,
1675        }
1676    }
1677
1678    fn is_empty(&self) -> bool {
1679        self.len() == 0
1680    }
1681}
1682
1683fn contiguous_utf8_offsets_are_strictly_increasing(bytes: &[u8], offsets: &[usize]) -> bool {
1684    let Some(n) = offsets.len().checked_sub(1) else {
1685        return false;
1686    };
1687    if n < 2 {
1688        return true;
1689    }
1690
1691    let mut previous = &bytes[offsets[0]..offsets[1]];
1692    for pos in 1..n {
1693        let current = &bytes[offsets[pos]..offsets[pos + 1]];
1694        if previous >= current {
1695            return false;
1696        }
1697        previous = current;
1698    }
1699    true
1700}
1701
1702fn contiguous_utf8_fixed_width(offsets: &[usize]) -> Option<usize> {
1703    let n = offsets.len().checked_sub(1)?;
1704    if n == 0 {
1705        return Some(0);
1706    }
1707    let width = offsets[1].checked_sub(offsets[0])?;
1708    for pos in 1..n {
1709        if offsets[pos + 1].checked_sub(offsets[pos])? != width {
1710            return None;
1711        }
1712    }
1713    Some(width)
1714}
1715
1716/// If `positions` is a non-empty contiguous ascending run
1717/// (`positions[i] == positions[0] + i`), return its start. Returns `None` for
1718/// an empty slice or the first out-of-sequence position — so a non-contiguous
1719/// take pays only until the first gap (typically O(1)).
1720/// (br-frankenpandas-jbyuc.1.1.1)
1721fn contiguous_ascending_start(positions: &[usize]) -> Option<usize> {
1722    let first = *positions.first()?;
1723    for (i, &pos) in positions.iter().enumerate() {
1724        if pos != first + i {
1725            return None;
1726        }
1727    }
1728    Some(first)
1729}
1730
1731impl Clone for ScalarValues {
1732    fn clone(&self) -> Self {
1733        match self {
1734            Self::Eager(values) => Self::Eager(values.clone()),
1735            Self::LazyAllValidInt64 { data, .. } => Self::lazy_all_valid_int64_arc(Arc::clone(data)),
1736            Self::LazyAllValidFloat64 { data, .. } => {
1737                Self::lazy_all_valid_float64_arc(Arc::clone(data))
1738            }
1739            Self::LazyNullableFloat64 { data, validity, .. } => {
1740                Self::lazy_nullable_float64(data.clone(), validity.clone())
1741            }
1742            Self::LazyAllValidBool { data, .. } => Self::lazy_all_valid_bool_arc(Arc::clone(data)),
1743            Self::LazyContiguousUtf8 { bytes, offsets, .. } => {
1744                Self::lazy_contiguous_utf8_arc(Arc::clone(bytes), Arc::clone(offsets))
1745            }
1746            Self::LazyNullableInt64 { data, validity, .. } => {
1747                Self::lazy_nullable_int64(data.clone(), validity.clone())
1748            }
1749            Self::LazyRepeatRunsInt64 {
1750                runs, total_len, ..
1751            } => Self::lazy_repeat_runs_int64(runs.clone(), *total_len),
1752            Self::LazyRepeatValuesInt64 {
1753                run_values,
1754                run_lens,
1755                total_len,
1756                ..
1757            } => {
1758                Self::lazy_repeat_values_int64(run_values.clone(), Arc::clone(run_lens), *total_len)
1759            }
1760            Self::LazyRepeatedSlicesInt64 {
1761                data,
1762                segments,
1763                total_len,
1764                ..
1765            } => Self::lazy_repeated_slices_int64_shared(
1766                data.clone(),
1767                Arc::clone(segments),
1768                *total_len,
1769            ),
1770            Self::LazyUtf8Slice {
1771                bytes,
1772                offsets,
1773                start,
1774                len,
1775                ..
1776            } => Self::lazy_utf8_slice(Arc::clone(bytes), Arc::clone(offsets), *start, *len),
1777        }
1778    }
1779}
1780
1781impl std::ops::Deref for ScalarValues {
1782    type Target = [Scalar];
1783
1784    fn deref(&self) -> &Self::Target {
1785        self.as_slice()
1786    }
1787}
1788
1789impl<'a> IntoIterator for &'a ScalarValues {
1790    type Item = &'a Scalar;
1791    type IntoIter = std::slice::Iter<'a, Scalar>;
1792
1793    fn into_iter(self) -> Self::IntoIter {
1794        self.as_slice().iter()
1795    }
1796}
1797
1798impl PartialEq for ScalarValues {
1799    fn eq(&self, other: &Self) -> bool {
1800        self.as_slice() == other.as_slice()
1801    }
1802}
1803
1804impl std::fmt::Debug for ScalarValues {
1805    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1806        self.as_slice().fmt(f)
1807    }
1808}
1809
1810impl Serialize for ScalarValues {
1811    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1812        self.as_slice().serialize(serializer)
1813    }
1814}
1815
1816impl<'de> Deserialize<'de> for ScalarValues {
1817    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1818        Vec::<Scalar>::deserialize(deserializer).map(Self::Eager)
1819    }
1820}
1821
1822#[derive(Serialize, Deserialize)]
1823pub struct Column {
1824    dtype: DType,
1825    values: ScalarValues,
1826    validity: ValidityMask,
1827    #[serde(skip)]
1828    data: Option<ColumnData>,
1829}
1830
1831impl Clone for Column {
1832    fn clone(&self) -> Self {
1833        Self {
1834            dtype: self.dtype,
1835            values: self
1836                .clone_dense_values_from_cache()
1837                .unwrap_or_else(|| self.values.clone()),
1838            validity: self.validity.clone(),
1839            data: None,
1840        }
1841    }
1842}
1843
1844impl PartialEq for Column {
1845    fn eq(&self, other: &Self) -> bool {
1846        self.dtype == other.dtype && self.values == other.values && self.validity == other.validity
1847    }
1848}
1849
1850impl std::fmt::Debug for Column {
1851    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1852        f.debug_struct("Column")
1853            .field("dtype", &self.dtype)
1854            .field("values", &self.values)
1855            .field("validity", &self.validity)
1856            .finish()
1857    }
1858}
1859
1860#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1861pub struct SparseColumn {
1862    dtype: SparseDType,
1863    len: usize,
1864    indices: Vec<usize>,
1865    values: Vec<Scalar>,
1866}
1867
1868#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1869#[serde(rename_all = "snake_case")]
1870pub enum ArithmeticOp {
1871    Add,
1872    Sub,
1873    Mul,
1874    Div,
1875    Mod,
1876    Pow,
1877    FloorDiv,
1878}
1879
1880/// Element-wise comparison operations that produce `Bool`-typed columns.
1881///
1882/// Null propagation: any missing/NaN input produces a missing output.
1883/// This matches pandas nullable-integer semantics (`pd.NA` propagation).
1884#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1885#[serde(rename_all = "snake_case")]
1886pub enum ComparisonOp {
1887    Gt,
1888    Lt,
1889    Eq,
1890    Ne,
1891    Ge,
1892    Le,
1893}
1894
1895fn nkeep_impl(col: &Column, n: usize, keep: &str, ascending: bool) -> Result<Column, ColumnError> {
1896    if !matches!(keep, "first" | "last" | "all") {
1897        return Err(ColumnError::Type(TypeError::NonNumericValue {
1898            value: keep.to_string(),
1899            dtype: col.dtype(),
1900        }));
1901    }
1902    // Annotate each value with its original position, then sort
1903    // (position is the secondary key; Rust's sort_by is stable, so
1904    // "first" falls out for free on equal primary keys).
1905    let mut indexed: Vec<(usize, &Scalar)> = col.values().iter().enumerate().collect();
1906    indexed.sort_by(|a, b| {
1907        let primary = compare_scalars_na_last(a.1, b.1, ascending);
1908        match (primary, keep) {
1909            // "last" policy: on ties, prefer later positions.
1910            (std::cmp::Ordering::Equal, "last") => b.0.cmp(&a.0),
1911            (std::cmp::Ordering::Equal, _) => a.0.cmp(&b.0),
1912            _ => primary,
1913        }
1914    });
1915    let take = n.min(indexed.len());
1916    let mut end = take;
1917    if keep == "all" && take > 0 && take < indexed.len() {
1918        let boundary = indexed[take - 1].1;
1919        while end < indexed.len() {
1920            let same = compare_scalars_na_last(indexed[end].1, boundary, ascending).is_eq();
1921            if !same {
1922                break;
1923            }
1924            end += 1;
1925        }
1926    }
1927    let values: Vec<Scalar> = indexed[..end].iter().map(|(_, v)| (*v).clone()).collect();
1928    Column::new(col.dtype(), values)
1929}
1930
1931fn is_monotonic_in_direction(values: &[Scalar], increasing: bool) -> bool {
1932    let mut prev: Option<&Scalar> = None;
1933    for v in values {
1934        if v.is_missing() {
1935            continue;
1936        }
1937        if let Some(p) = prev {
1938            let ord = compare_scalars_na_last(p, v, true);
1939            // `p` should come before `v` in the requested direction. With
1940            // ascending compare: Less/Equal → non-decreasing OK; Greater
1941            // breaks. For decreasing we flip the expectation.
1942            let ok = matches!(
1943                (ord, increasing),
1944                (std::cmp::Ordering::Less, true)
1945                    | (std::cmp::Ordering::Equal, _)
1946                    | (std::cmp::Ordering::Greater, false)
1947            );
1948            if !ok {
1949                return false;
1950            }
1951        }
1952        prev = Some(v);
1953    }
1954    true
1955}
1956
1957fn compare_scalars_na_last(left: &Scalar, right: &Scalar, ascending: bool) -> std::cmp::Ordering {
1958    use std::cmp::Ordering;
1959    match (left.is_missing(), right.is_missing()) {
1960        (true, true) => Ordering::Equal,
1961        // Missing always sorts to the end, regardless of direction.
1962        (true, false) => Ordering::Greater,
1963        (false, true) => Ordering::Less,
1964        (false, false) => {
1965            let ord = match (left, right) {
1966                (Scalar::Int64(a), Scalar::Int64(b)) => a.cmp(b),
1967                (Scalar::Float64(a), Scalar::Float64(b)) => {
1968                    a.partial_cmp(b).unwrap_or(Ordering::Equal)
1969                }
1970                (Scalar::Bool(a), Scalar::Bool(b)) => a.cmp(b),
1971                (Scalar::Utf8(a), Scalar::Utf8(b)) => a.cmp(b),
1972                (Scalar::Timedelta64(a), Scalar::Timedelta64(b)) => a.cmp(b),
1973                (a, b) => match (a.to_f64(), b.to_f64()) {
1974                    (Ok(af), Ok(bf)) => af.partial_cmp(&bf).unwrap_or(Ordering::Equal),
1975                    _ => Ordering::Equal,
1976                },
1977            };
1978            if ascending { ord } else { ord.reverse() }
1979        }
1980    }
1981}
1982
1983/// `keep=` policy for `duplicated`/`drop_duplicates`.
1984#[derive(Clone, Copy)]
1985enum DupPolicy {
1986    First,
1987    Last,
1988    None,
1989}
1990
1991/// Per-element duplicate flags over a contiguous slice of `Copy + Hash + Eq`
1992/// keys, using a fast (`FxHashSet`) hasher — the typed counterpart to the
1993/// `Scalar`-enum + SipHash path. Semantics are identical: `First` flags every
1994/// occurrence after the first; `Last` flags every occurrence before the last;
1995/// `None` flags every occurrence of any key that appears more than once.
1996fn duplicated_flags_typed<T>(keys: &[T], policy: DupPolicy) -> Vec<bool>
1997where
1998    T: std::hash::Hash + Eq + Copy,
1999{
2000    let n = keys.len();
2001    let mut flags = vec![false; n];
2002    match policy {
2003        DupPolicy::First => {
2004            let mut seen: FxHashSet<T> = FxHashSet::with_capacity_and_hasher(n, Default::default());
2005            for (idx, &k) in keys.iter().enumerate() {
2006                flags[idx] = !seen.insert(k);
2007            }
2008        }
2009        DupPolicy::Last => {
2010            let mut seen: FxHashSet<T> = FxHashSet::with_capacity_and_hasher(n, Default::default());
2011            for (idx, &k) in keys.iter().enumerate().rev() {
2012                flags[idx] = !seen.insert(k);
2013            }
2014        }
2015        DupPolicy::None => {
2016            let mut seen_once: FxHashSet<T> =
2017                FxHashSet::with_capacity_and_hasher(n, Default::default());
2018            let mut seen_multiple: FxHashSet<T> = FxHashSet::default();
2019            for &k in keys {
2020                if !seen_once.insert(k) {
2021                    seen_multiple.insert(k);
2022                }
2023            }
2024            for (idx, &k) in keys.iter().enumerate() {
2025                flags[idx] = seen_multiple.contains(&k);
2026            }
2027        }
2028    }
2029    flags
2030}
2031
2032/// Largest direct-address table we will allocate for integer dedup (entries).
2033/// At 16M entries the `seen`/`count` table is ~16MB — L3-resident on a typical
2034/// server — and dedup becomes a hash-free O(n) scan. Beyond this the table
2035/// stops being cache-friendly and we fall back to the FxHash set.
2036const DUP_DIRECT_ADDRESS_CAP: u128 = 1 << 24;
2037
2038/// Min and table size for a direct-address integer dedup, or `None` when the
2039/// value span is too wide (sparse) to be worth a dense table. We also require
2040/// the table to be at most ~16x the row count so a handful of widely-separated
2041/// values don't trigger a giant allocation.
2042fn i64_direct_address_range(data: &[i64]) -> Option<(i64, usize)> {
2043    let mut min = data.first().copied()?;
2044    let mut max = min;
2045    for &v in &data[1..] {
2046        if v < min {
2047            min = v;
2048        } else if v > max {
2049            max = v;
2050        }
2051    }
2052    let range = (max as i128 - min as i128 + 1) as u128;
2053    if range <= DUP_DIRECT_ADDRESS_CAP && range <= (data.len() as u128).saturating_mul(16) {
2054        Some((min, range as usize))
2055    } else {
2056        None
2057    }
2058}
2059
2060/// Hash-free duplicate flags for a bounded-range `i64` slice via a dense
2061/// direct-address table (no per-element hashing or `Scalar` enum). Identical
2062/// semantics to [`duplicated_flags_typed`]. `min`/`range` come from
2063/// [`i64_direct_address_range`], so `(v - min)` is always in `0..range`.
2064fn duplicated_flags_i64_direct(
2065    data: &[i64],
2066    min: i64,
2067    range: usize,
2068    policy: DupPolicy,
2069) -> Vec<bool> {
2070    let n = data.len();
2071    let mut flags = vec![false; n];
2072    let slot = |v: i64| (v as i128 - min as i128) as usize;
2073    match policy {
2074        DupPolicy::First => {
2075            let mut seen = vec![false; range];
2076            for (idx, &v) in data.iter().enumerate() {
2077                let s = slot(v);
2078                flags[idx] = seen[s];
2079                seen[s] = true;
2080            }
2081        }
2082        DupPolicy::Last => {
2083            let mut seen = vec![false; range];
2084            for (idx, &v) in data.iter().enumerate().rev() {
2085                let s = slot(v);
2086                flags[idx] = seen[s];
2087                seen[s] = true;
2088            }
2089        }
2090        DupPolicy::None => {
2091            // Saturating occupancy count (we only care about 1 vs >1).
2092            let mut count = vec![0u8; range];
2093            for &v in data {
2094                let s = slot(v);
2095                if count[s] < 2 {
2096                    count[s] += 1;
2097                }
2098            }
2099            for (idx, &v) in data.iter().enumerate() {
2100                flags[idx] = count[slot(v)] > 1;
2101            }
2102        }
2103    }
2104    flags
2105}
2106
2107/// Map an `i64` to an order-preserving `u64` radix key (flip the sign bit so
2108/// two's-complement negatives sort below non-negatives in unsigned order).
2109#[inline]
2110fn i64_radix_key(value: i64) -> u64 {
2111    (value as u64) ^ (1u64 << 63)
2112}
2113
2114/// Map an `f64` to an order-preserving `u64` radix key. For a non-negative
2115/// value flip only the sign bit; for a negative value flip every bit. This is
2116/// the standard IEEE-754 "sortable bits" transform and is monotonic across the
2117/// whole finite range (callers guarantee no NaN — `as_f64_slice` only yields
2118/// all-valid buffers and FP models NaN as missing). `-0.0` and `+0.0` map to
2119/// distinct keys but compare equal under the comparator path; we normalize
2120/// `-0.0`→`+0.0` first so the radix order matches `partial_cmp` exactly.
2121#[inline]
2122fn f64_radix_key(value: f64) -> u64 {
2123    let bits = (if value == 0.0 { 0.0 } else { value }).to_bits();
2124    if bits & (1u64 << 63) != 0 {
2125        !bits
2126    } else {
2127        bits | (1u64 << 63)
2128    }
2129}
2130
2131/// Stable LSD radix argsort over pre-computed `u64` keys (8 passes of 8-bit
2132/// counting sort). Returns the permutation `perm` such that
2133/// `keys[perm[0]] <= keys[perm[1]] <= ...`, with equal keys keeping their
2134/// original relative order (stability == the comparator path's tie behavior).
2135/// O(n) per pass, comparison-free — replaces the O(n log n) `Scalar`-enum
2136/// comparator for all-valid numeric columns.
2137fn radix_argsort_u64(keys: &[u64]) -> Vec<usize> {
2138    let n = keys.len();
2139    let mut idx: Vec<usize> = (0..n).collect();
2140    if n < 2 {
2141        return idx;
2142    }
2143    let mut scratch: Vec<usize> = vec![0; n];
2144    for shift in (0..64).step_by(8) {
2145        let mut count = [0usize; 256];
2146        for &k in keys {
2147            count[((k >> shift) & 0xff) as usize] += 1;
2148        }
2149        // Skip a pass whose byte is constant across the whole column (common
2150        // for clustered / small-magnitude data) — keeps `idx` in place.
2151        if count.contains(&n) {
2152            continue;
2153        }
2154        let mut running = 0usize;
2155        for slot in &mut count {
2156            let c = *slot;
2157            *slot = running;
2158            running += c;
2159        }
2160        for &i in &idx {
2161            let bucket = ((keys[i] >> shift) & 0xff) as usize;
2162            scratch[count[bucket]] = i;
2163            count[bucket] += 1;
2164        }
2165        std::mem::swap(&mut idx, &mut scratch);
2166    }
2167    idx
2168}
2169
2170/// Stable LSD radix argsort of an `i64` slice (br-frankenpandas-y5s15): the
2171/// permutation that orders `values` ascending (or descending), equal values
2172/// keeping their original order. Bit-identical to a stable `sort_by(i64::cmp)`:
2173/// `i64_radix_key` is order-preserving and the counting sort is stable;
2174/// descending flips the key (`!key`) so equal values still keep original order
2175/// (matching a reversed comparator whose `Equal` arm doesn't reorder). Reusable
2176/// for any all-Int64 ordering (index labels, single columns).
2177#[must_use]
2178pub fn radix_argsort_i64(values: &[i64], ascending: bool) -> Vec<usize> {
2179    let keys: Vec<u64> = if ascending {
2180        values.iter().map(|&v| i64_radix_key(v)).collect()
2181    } else {
2182        values.iter().map(|&v| !i64_radix_key(v)).collect()
2183    };
2184    radix_argsort_u64(&keys)
2185}
2186
2187/// Stable LSD radix lexsort over several `u64` key columns
2188/// (br-frankenpandas-lnsu6). Returns the permutation that orders rows
2189/// lexicographically by `keys_by_col[0]`, then `keys_by_col[1]`, …, with equal
2190/// rows keeping their original order — exactly a stable multi-key `sort_by`.
2191/// The least-significant digit overall is the last column's low byte, so the
2192/// columns are processed in reverse (each an 8-pass stable counting sort that
2193/// threads the running permutation), making the *first* column the most
2194/// significant. O(n·k) and comparison-free. All key vectors must have the same
2195/// length; callers bake per-column ascending/descending into the keys.
2196pub fn radix_argsort_multi_u64(keys_by_col: &[Vec<u64>]) -> Vec<usize> {
2197    let n = keys_by_col.first().map_or(0, Vec::len);
2198    let mut idx: Vec<usize> = (0..n).collect();
2199    if n < 2 || keys_by_col.is_empty() {
2200        return idx;
2201    }
2202    let mut scratch: Vec<usize> = vec![0; n];
2203    for keys in keys_by_col.iter().rev() {
2204        for shift in (0..64).step_by(8) {
2205            let mut count = [0usize; 256];
2206            for &k in keys {
2207                count[((k >> shift) & 0xff) as usize] += 1;
2208            }
2209            if count.contains(&n) {
2210                continue;
2211            }
2212            let mut running = 0usize;
2213            for slot in &mut count {
2214                let c = *slot;
2215                *slot = running;
2216                running += c;
2217            }
2218            for &i in &idx {
2219                let bucket = ((keys[i] >> shift) & 0xff) as usize;
2220                scratch[count[bucket]] = i;
2221                count[bucket] += 1;
2222            }
2223            std::mem::swap(&mut idx, &mut scratch);
2224        }
2225    }
2226    idx
2227}
2228
2229/// Stable MSD byte-radix argsort over UTF-8 strings.
2230///
2231/// Produces the exact permutation of a stable `sort_by` with `String::cmp`
2232/// (byte-lexicographic, shorter-prefix-first), comparison-free at scale:
2233/// each level counting-sorts the bucket by the byte at `depth`, with a
2234/// virtual end-of-string bucket ordered before every byte (ascending) /
2235/// after every byte (descending) — exactly `cmp`'s prefix rule. Counting
2236/// scatters preserve relative order and the small-bucket cutoff uses the
2237/// stable `sort_by` on the (equal-prefix-stripped) suffix, so ties keep
2238/// their original order at every level, matching the stable comparison
2239/// sort bit-for-bit in both directions.
2240fn utf8_msd_argsort(strs: &[&str], ascending: bool) -> Vec<usize> {
2241    let n = strs.len();
2242    let mut idx: Vec<usize> = (0..n).collect();
2243    if n > 1 {
2244        let mut aux: Vec<usize> = vec![0; n];
2245        utf8_msd_sort_range(strs, &mut idx, &mut aux, 0, n, 0, ascending);
2246    }
2247    idx
2248}
2249
2250fn utf8_msd_sort_range(
2251    strs: &[&str],
2252    idx: &mut [usize],
2253    aux: &mut [usize],
2254    lo: usize,
2255    hi: usize,
2256    depth: usize,
2257    ascending: bool,
2258) {
2259    let n = hi - lo;
2260    if n <= 1 {
2261        return;
2262    }
2263    // Small buckets (and pathologically deep shared prefixes, which bound
2264    // recursion depth) finish with the stable comparison sort on the suffix:
2265    // every string in this bucket shares its first `depth` bytes, so suffix
2266    // order equals full-string order.
2267    const CUTOFF: usize = 48;
2268    const MAX_DEPTH: usize = 1024;
2269    if n <= CUTOFF || depth >= MAX_DEPTH {
2270        idx[lo..hi].sort_by(|&a, &b| {
2271            let ord = strs[a].as_bytes()[depth..].cmp(&strs[b].as_bytes()[depth..]);
2272            if ascending { ord } else { ord.reverse() }
2273        });
2274        return;
2275    }
2276    // Bucket keys ordered so iterating 0..=256 visits buckets in output order:
2277    // ascending — EOS first (0), then bytes 1..=256;
2278    // descending — bytes reversed (255-b), then EOS last (256).
2279    let key = |s: &str| -> usize {
2280        let b = s.as_bytes();
2281        if depth < b.len() {
2282            if ascending {
2283                b[depth] as usize + 1
2284            } else {
2285                255 - b[depth] as usize
2286            }
2287        } else if ascending {
2288            0
2289        } else {
2290            256
2291        }
2292    };
2293    let mut counts = [0usize; 258];
2294    for &i in idx[lo..hi].iter() {
2295        counts[key(strs[i]) + 1] += 1;
2296    }
2297    for k in 1..258 {
2298        counts[k] += counts[k - 1];
2299    }
2300    // counts[k] = start offset of bucket k within [lo, hi).
2301    let mut offsets = counts;
2302    for &i in idx[lo..hi].iter() {
2303        let k = key(strs[i]);
2304        aux[lo + offsets[k]] = i;
2305        offsets[k] += 1;
2306    }
2307    idx[lo..hi].copy_from_slice(&aux[lo..hi]);
2308    // Recurse into byte buckets; the EOS bucket holds fully-equal strings
2309    // (same first `depth` bytes and length == depth) already in original
2310    // relative order — nothing to sort.
2311    let eos_bucket = if ascending { 0 } else { 256 };
2312    for k in 0..257 {
2313        if k == eos_bucket {
2314            continue;
2315        }
2316        let b_lo = lo + counts[k];
2317        let b_hi = lo + counts[k + 1];
2318        if b_hi - b_lo > 1 {
2319            utf8_msd_sort_range(strs, idx, aux, b_lo, b_hi, depth + 1, ascending);
2320        }
2321    }
2322}
2323
2324fn normalized_float_bits(value: f64) -> u64 {
2325    let normalized = if value == 0.0 { 0.0 } else { value };
2326    normalized.to_bits()
2327}
2328
2329fn interval_key(interval: &Interval) -> (u64, u64, IntervalClosed) {
2330    (
2331        normalized_float_bits(interval.left),
2332        normalized_float_bits(interval.right),
2333        interval.closed,
2334    )
2335}
2336
2337/// Hashable membership key for a non-missing scalar — the same equivalence
2338/// `Column::unique` uses (Float64 ±0.0 normalized to one key). `None` for
2339/// missing values. Lets the np set-ops (`setdiff1d`/`intersect1d`/`setxor1d`/
2340/// `in1d`) replace an O(N·M) linear `semantic_eq` scan over the other operand
2341/// with an O(1) hash-set probe. Because every operand is first passed through
2342/// `unique()` (which dedups by this exact key) and missing/NaN values are
2343/// filtered out, key equality matches the `semantic_eq` test on the values that
2344/// actually flow through.
2345#[derive(Hash, PartialEq, Eq)]
2346enum SetMemberKey<'a> {
2347    Bool(bool),
2348    Int64(i64),
2349    FloatBits(u64),
2350    Utf8(&'a str),
2351    Timedelta64(i64),
2352    Datetime64(i64),
2353    Period(i64),
2354    Interval(u64, u64, IntervalClosed),
2355}
2356
2357fn set_member_key(v: &Scalar) -> Option<SetMemberKey<'_>> {
2358    Some(match v {
2359        Scalar::Bool(b) => SetMemberKey::Bool(*b),
2360        Scalar::Int64(i) => SetMemberKey::Int64(*i),
2361        Scalar::Float64(f) => {
2362            let norm = if *f == 0.0 { 0.0 } else { *f };
2363            SetMemberKey::FloatBits(norm.to_bits())
2364        }
2365        Scalar::Utf8(s) => SetMemberKey::Utf8(s.as_str()),
2366        Scalar::Timedelta64(v) => SetMemberKey::Timedelta64(*v),
2367        Scalar::Datetime64(v) => SetMemberKey::Datetime64(*v),
2368        Scalar::Period(v) => SetMemberKey::Period(*v),
2369        Scalar::Interval(v) => {
2370            let (left, right, closed) = interval_key(v);
2371            SetMemberKey::Interval(left, right, closed)
2372        }
2373        Scalar::Null(_) => return None,
2374    })
2375}
2376
2377#[derive(Debug, Error, Clone, PartialEq)]
2378pub enum ColumnError {
2379    #[error("column length mismatch: left={left}, right={right}")]
2380    LengthMismatch { left: usize, right: usize },
2381    #[error("{operation} requires exactly {expected} element(s), got {actual}")]
2382    InvalidLength {
2383        operation: &'static str,
2384        expected: usize,
2385        actual: usize,
2386    },
2387    #[error("invalid sorter permutation for column of length {len}: {reason}")]
2388    InvalidSorter { len: usize, reason: String },
2389    #[error("mask must be Bool dtype; found {dtype:?}")]
2390    InvalidMaskType { dtype: DType },
2391    #[error("column dtype mismatch: left={left:?}, right={right:?}")]
2392    DTypeMismatch { left: DType, right: DType },
2393    #[error("Integers to negative integer powers are not allowed.")]
2394    NegativeIntegerPower,
2395    #[error(transparent)]
2396    Type(#[from] TypeError),
2397}
2398
2399impl SparseColumn {
2400    pub fn from_dense(dtype: SparseDType, values: Vec<Scalar>) -> Result<Self, ColumnError> {
2401        let len = values.len();
2402        let value_dtype = dtype.value_dtype;
2403        let fill_value = dtype.fill_value.clone();
2404        let mut indices = Vec::new();
2405        let mut sparse_values = Vec::new();
2406
2407        for (idx, value) in values.into_iter().enumerate() {
2408            let value = if value.dtype() == value_dtype || value.dtype() == DType::Null {
2409                Column::normalize_missing_for_dtype(value, value_dtype)
2410            } else {
2411                cast_scalar_owned(value, value_dtype)?
2412            };
2413
2414            if !value.semantic_eq(&fill_value) {
2415                indices.push(idx);
2416                sparse_values.push(value);
2417            }
2418        }
2419
2420        Ok(Self {
2421            dtype,
2422            len,
2423            indices,
2424            values: sparse_values,
2425        })
2426    }
2427
2428    pub fn from_dense_column(dtype: SparseDType, column: &Column) -> Result<Self, ColumnError> {
2429        Self::from_dense(dtype, column.values().to_vec())
2430    }
2431
2432    #[must_use]
2433    pub fn sparse_dtype(&self) -> &SparseDType {
2434        &self.dtype
2435    }
2436
2437    #[must_use]
2438    pub fn value_dtype(&self) -> DType {
2439        self.dtype.value_dtype
2440    }
2441
2442    #[must_use]
2443    pub fn fill_value(&self) -> &Scalar {
2444        &self.dtype.fill_value
2445    }
2446
2447    #[must_use]
2448    pub fn len(&self) -> usize {
2449        self.len
2450    }
2451
2452    #[must_use]
2453    pub fn is_empty(&self) -> bool {
2454        self.len == 0
2455    }
2456
2457    #[must_use]
2458    pub fn indices(&self) -> &[usize] {
2459        &self.indices
2460    }
2461
2462    #[must_use]
2463    pub fn stored_values(&self) -> &[Scalar] {
2464        &self.values
2465    }
2466
2467    #[must_use]
2468    pub fn npoints(&self) -> usize {
2469        self.values.len()
2470    }
2471
2472    #[must_use]
2473    pub fn density(&self) -> f64 {
2474        if self.len == 0 {
2475            0.0
2476        } else {
2477            self.values.len() as f64 / self.len as f64
2478        }
2479    }
2480
2481    #[must_use]
2482    pub fn to_dense_values(&self) -> Vec<Scalar> {
2483        let mut values = vec![self.dtype.fill_value.clone(); self.len];
2484        for (&idx, value) in self.indices.iter().zip(self.values.iter()) {
2485            values[idx] = value.clone();
2486        }
2487        values
2488    }
2489
2490    pub fn to_dense_column(&self) -> Result<Column, ColumnError> {
2491        Column::new(self.dtype.value_dtype, self.to_dense_values())
2492    }
2493}
2494
2495fn saturating_i64_to_usize(value: i64) -> usize {
2496    if value <= 0 {
2497        0
2498    } else {
2499        usize::try_from(value).unwrap_or(usize::MAX)
2500    }
2501}
2502
2503fn saturating_i64_abs_to_usize(value: i64) -> usize {
2504    usize::try_from(value.unsigned_abs()).unwrap_or(usize::MAX)
2505}
2506
2507fn normalize_head_take(n: i64, len: usize) -> usize {
2508    if n >= 0 {
2509        saturating_i64_to_usize(n).min(len)
2510    } else {
2511        len.saturating_sub(saturating_i64_abs_to_usize(n))
2512    }
2513}
2514
2515fn normalize_tail_window(n: i64, len: usize) -> (usize, usize) {
2516    if n >= 0 {
2517        let take = saturating_i64_to_usize(n).min(len);
2518        (len - take, take)
2519    } else {
2520        let skip = saturating_i64_abs_to_usize(n).min(len);
2521        (skip, len - skip)
2522    }
2523}
2524
2525fn round_i64_negative_decimals(value: i64, decimals: i32) -> i64 {
2526    debug_assert!(decimals < 0);
2527    let factor = match 10_i128.checked_pow(decimals.unsigned_abs()) {
2528        Some(factor) => factor,
2529        None => return 0,
2530    };
2531    let magnitude = i128::from(value).abs();
2532    let quotient = magnitude / factor;
2533    let remainder = magnitude % factor;
2534    let rounded_magnitude = match (remainder * 2).cmp(&factor) {
2535        std::cmp::Ordering::Less => quotient * factor,
2536        std::cmp::Ordering::Greater => (quotient + 1) * factor,
2537        std::cmp::Ordering::Equal if quotient % 2 == 0 => quotient * factor,
2538        std::cmp::Ordering::Equal => (quotient + 1) * factor,
2539    };
2540    let rounded = if value < 0 {
2541        -rounded_magnitude
2542    } else {
2543        rounded_magnitude
2544    };
2545    match i64::try_from(rounded) {
2546        Ok(value) => value,
2547        Err(_) if rounded < 0 => i64::MIN,
2548        Err(_) => i64::MAX,
2549    }
2550}
2551
2552impl Column {
2553    fn clone_dense_values_from_cache(&self) -> Option<ScalarValues> {
2554        if self.validity.len() != self.values.len()
2555            || self.validity.count_valid() != self.values.len()
2556        {
2557            return None;
2558        }
2559
2560        match (&self.data, self.dtype) {
2561            (Some(ColumnData::Bool(data)), DType::Bool)
2562                if data.len() == self.values.len() =>
2563            {
2564                // Carry the contiguous bool buffer through the clone as a lazy
2565                // all-valid backing (mirrors the Float64 arm) so `as_bool_slice`
2566                // stays available on the clone — otherwise every bool dense fast
2567                // path (filter masks, duplicated, isin) bails after a clone. The
2568                // Scalar view materializes identically (`map(Scalar::Bool)`) on
2569                // demand. BoolNullable is excluded: an all-valid clone of a
2570                // nullable-bool column must stay Eager so its dtype-tagged Scalar
2571                // view is preserved.
2572                Some(ScalarValues::lazy_all_valid_bool(data.clone()))
2573            }
2574            (Some(ColumnData::Int64(data)), DType::Int64)
2575                if data.len() == self.values.len() =>
2576            {
2577                // Carry the contiguous i64 buffer through the clone as a lazy
2578                // all-valid backing (mirrors the Float64 arm) so `as_i64_slice`
2579                // stays available on the clone. Previously the clone eagerly
2580                // materialized `Vec<Scalar::Int64>`, which both cost a full
2581                // Scalar build AND dropped slice availability — so cloned Int64
2582                // columns silently missed every dense/direct-address fast path
2583                // (groupby, value_counts, dedup, joins). Bit-identical Scalar
2584                // view (`map(Scalar::Int64)`) materializes on demand.
2585                // Int64Nullable is excluded to preserve its dtype-tagged view.
2586                Some(ScalarValues::lazy_all_valid_int64(data.clone()))
2587            }
2588            (Some(ColumnData::Float64(data)), DType::Float64)
2589                if data.len() == self.values.len() =>
2590            {
2591                Some(ScalarValues::lazy_all_valid_float64(data.clone()))
2592            }
2593            (Some(ColumnData::Timedelta64(data)), DType::Timedelta64)
2594                if data.len() == self.values.len() =>
2595            {
2596                Some(ScalarValues::from_vec(
2597                    data.iter().copied().map(Scalar::Timedelta64).collect(),
2598                ))
2599            }
2600            (Some(ColumnData::Datetime64(data)), DType::Datetime64)
2601                if data.len() == self.values.len() =>
2602            {
2603                Some(ScalarValues::from_vec(
2604                    data.iter().copied().map(Scalar::Datetime64).collect(),
2605                ))
2606            }
2607            (Some(ColumnData::Period(data)), DType::Period) if data.len() == self.values.len() => {
2608                Some(ScalarValues::from_vec(
2609                    data.iter().copied().map(Scalar::Period).collect(),
2610                ))
2611            }
2612            _ => None,
2613        }
2614    }
2615
2616    fn cached_data_for_values(dtype: DType, values: &[Scalar]) -> Option<ColumnData> {
2617        match dtype {
2618            DType::Bool
2619            | DType::BoolNullable
2620            | DType::Int64
2621            | DType::Int64Nullable
2622            | DType::Float64
2623            | DType::Timedelta64
2624            | DType::Datetime64
2625            | DType::Period => Some(ColumnData::from_scalars(values, dtype)),
2626            _ => None,
2627        }
2628    }
2629
2630    fn normalize_missing_for_dtype(value: Scalar, dtype: DType) -> Scalar {
2631        match value {
2632            Scalar::Null(NullKind::NaN) => Scalar::Null(NullKind::NaN),
2633            Scalar::Null(NullKind::NaT) => Scalar::Null(NullKind::NaT),
2634            Scalar::Null(_) => Scalar::missing_for_dtype(dtype),
2635            other => other,
2636        }
2637    }
2638
2639    /// Construct a column, coercing values to the target dtype.
2640    /// AG-03: takes ownership of the values vec and uses `cast_scalar_owned`
2641    /// to skip cloning when values already have the correct dtype.
2642    pub fn new(dtype: DType, values: Vec<Scalar>) -> Result<Self, ColumnError> {
2643        let preserve_utf8_object_bucket = matches!(dtype, DType::Utf8)
2644            && values.iter().any(|value| matches!(value, Scalar::Utf8(_)))
2645            && values
2646                .iter()
2647                .any(|value| !matches!(value, Scalar::Utf8(_) | Scalar::Null(_)));
2648        let needs_coercion = values.iter().any(|v| {
2649            let d = v.dtype();
2650            d != dtype && d != DType::Null
2651        }) && !preserve_utf8_object_bucket;
2652
2653        let coerced = if preserve_utf8_object_bucket {
2654            values
2655                .into_iter()
2656                .map(|value| Self::normalize_missing_for_dtype(value, dtype))
2657                .collect()
2658        } else if needs_coercion {
2659            values
2660                .into_iter()
2661                .map(|value| {
2662                    // Constructing a typed column with an explicit dtype is
2663                    // STRICT: a non-integer float cannot be coerced to int64
2664                    // (pandas DataFrame(dtype='int64') raises "Trying to coerce
2665                    // float values to integers"), UNLIKE astype which truncates
2666                    // toward zero. astype pre-truncates via cast_scalar, so this
2667                    // coercion path only ever sees raw floats from the explicit
2668                    // constructor. (br-frankenpandas-8nupg)
2669                    if matches!(dtype, DType::Int64 | DType::Int64Nullable)
2670                        && let Scalar::Float64(v) = &value
2671                        && v.is_finite()
2672                        && v.fract() != 0.0
2673                    {
2674                        return Err(TypeError::LossyFloatToInt { value: *v });
2675                    }
2676                    cast_scalar_owned(value, dtype)
2677                })
2678                .collect::<Result<Vec<_>, _>>()?
2679        } else {
2680            // No coercion needed: values already match dtype.
2681            // Preserve explicit NaN/NaT markers; remap generic Null to dtype-specific missing.
2682            values
2683                .into_iter()
2684                .map(|value| Self::normalize_missing_for_dtype(value, dtype))
2685                .collect()
2686        };
2687
2688        let validity = ValidityMask::from_values(&coerced);
2689
2690        Ok(Self {
2691            dtype,
2692            validity,
2693            data: Self::cached_data_for_values(dtype, &coerced),
2694            values: ScalarValues::from_vec(coerced),
2695        })
2696    }
2697
2698    pub fn from_values(values: Vec<Scalar>) -> Result<Self, ColumnError> {
2699        let dtype = infer_dtype(&values)?;
2700        Self::new(dtype, values)
2701    }
2702
2703    /// Build an all-valid Int64 column from already-typed contiguous values.
2704    ///
2705    /// This carries parser/vector-kernel dtype proofs directly into the
2706    /// columnar representation and delays `Scalar` materialization until a
2707    /// caller explicitly asks for scalar values.
2708    #[must_use]
2709    pub fn from_i64_values(data: Vec<i64>) -> Self {
2710        let len = data.len();
2711        Self {
2712            dtype: DType::Int64,
2713            values: ScalarValues::lazy_all_valid_int64(data),
2714            validity: ValidityMask::all_valid(len),
2715            data: None,
2716        }
2717    }
2718
2719    /// Build an all-valid Utf8 column from a contiguous byte buffer + n+1
2720    /// offsets (br-frankenpandas-2krr0). `bytes[offsets[i]..offsets[i+1]]`
2721    /// must be valid UTF-8 for every row — string-output ops guarantee this
2722    /// by writing only `&str` data. Semantically identical to
2723    /// `Column::new(DType::Utf8, scalars)` over the same strings, but the
2724    /// per-row `String`/`Scalar` boxing is deferred until a consumer reads
2725    /// the Scalar view.
2726    #[must_use]
2727    #[doc(hidden)]
2728    pub fn from_utf8_contiguous(bytes: Vec<u8>, offsets: Vec<usize>) -> Self {
2729        let len = offsets.len().saturating_sub(1);
2730        Self {
2731            dtype: DType::Utf8,
2732            values: ScalarValues::lazy_contiguous_utf8(bytes, offsets),
2733            validity: ValidityMask::all_valid(len),
2734            data: None,
2735        }
2736    }
2737
2738    /// Build an all-valid Int64 column from `(value, run_len)` repeat runs
2739    /// (br-frankenpandas-3ad4n). Semantically identical to
2740    /// `from_i64_values(expanded)` where `expanded` repeats each `value`
2741    /// `run_len` times, but carries only O(runs) memory until a consumer
2742    /// forces the contiguous buffer or Scalar view.
2743    #[must_use]
2744    #[doc(hidden)]
2745    pub fn from_i64_repeat_runs(runs: Vec<(i64, usize)>) -> Self {
2746        let total_len = runs.iter().map(|&(_, run_len)| run_len).sum();
2747        Self {
2748            dtype: DType::Int64,
2749            values: ScalarValues::lazy_repeat_runs_int64(runs, total_len),
2750            validity: ValidityMask::all_valid(total_len),
2751            data: None,
2752        }
2753    }
2754
2755    /// Build an all-valid Int64 column from per-run values plus a shared
2756    /// run-length descriptor. Semantically identical to
2757    /// [`Column::from_i64_repeat_runs`] over `run_values.zip(run_lens)`, but
2758    /// dense join lanes can share `run_lens` across columns.
2759    #[must_use]
2760    #[doc(hidden)]
2761    pub fn from_i64_repeat_values_run_lengths(
2762        run_values: Vec<i64>,
2763        run_lens: Arc<[usize]>,
2764    ) -> Self {
2765        let total_len = run_lens.iter().sum();
2766        Self {
2767            dtype: DType::Int64,
2768            values: ScalarValues::lazy_repeat_values_int64(run_values, run_lens, total_len),
2769            validity: ValidityMask::all_valid(total_len),
2770            data: None,
2771        }
2772    }
2773
2774    /// Build an all-valid Int64 column from repeated slices of one shared
2775    /// tape. Semantically identical to concatenating
2776    /// `data[start..start+len]` for each segment and calling
2777    /// [`Column::from_i64_values`].
2778    #[must_use]
2779    #[doc(hidden)]
2780    pub fn from_i64_repeated_slices(data: Vec<i64>, segments: Vec<(usize, usize)>) -> Self {
2781        let total_len = segments.iter().map(|&(_, len)| len).sum();
2782        Self {
2783            dtype: DType::Int64,
2784            values: ScalarValues::lazy_repeated_slices_int64(data, segments, total_len),
2785            validity: ValidityMask::all_valid(total_len),
2786            data: None,
2787        }
2788    }
2789
2790    /// Shared-descriptor counterpart of [`Column::from_i64_repeated_slices`].
2791    /// `total_len` must equal the sum of all segment lengths.
2792    #[must_use]
2793    #[doc(hidden)]
2794    pub fn from_i64_repeated_slices_shared(
2795        data: Vec<i64>,
2796        segments: Arc<[(usize, usize)]>,
2797        total_len: usize,
2798    ) -> Self {
2799        Self {
2800            dtype: DType::Int64,
2801            values: ScalarValues::lazy_repeated_slices_int64_shared(data, segments, total_len),
2802            validity: ValidityMask::all_valid(total_len),
2803            data: None,
2804        }
2805    }
2806
2807    /// Build an all-valid Float64 column from already-typed contiguous values.
2808    ///
2809    /// This is the typed ingestion counterpart to `Column::new(DType::Float64,
2810    /// Vec<Scalar>)` for sources that have already proven every value is a
2811    /// valid f64.
2812    #[must_use]
2813    pub fn from_f64_values(data: Vec<f64>) -> Self {
2814        let len = data.len();
2815        // pandas treats NaN in a float column as MISSING. The Scalar path
2816        // (Column::new -> ValidityMask::from_values) already marks NaN invalid
2817        // via Scalar::is_missing, so this typed-ingestion path must agree —
2818        // otherwise a caller passing NaN gets a column claiming all-valid and
2819        // as_f64_slice would hand the NaN out as a real value. Fast all-valid
2820        // path (cheap u64::MAX fill) when no NaN is present; per-bit mask only
2821        // when one is. (br-frankenpandas-jyhf7)
2822        let validity = if data.iter().any(|v| v.is_nan()) {
2823            ValidityMask::from_f64(&data)
2824        } else {
2825            ValidityMask::all_valid(len)
2826        };
2827        Self {
2828            dtype: DType::Float64,
2829            values: ScalarValues::lazy_all_valid_float64(data),
2830            validity,
2831            data: None,
2832        }
2833    }
2834
2835    /// Public (hidden) for fp-join's fused dense outer-merge builder
2836    /// (br-frankenpandas-343ho); invalid slots carry the 0.0-datum convention
2837    /// and materialize `Scalar::Null(NullKind::NaN)`.
2838    #[doc(hidden)]
2839    pub fn from_f64_values_with_validity(data: Vec<f64>, validity: ValidityMask) -> Self {
2840        debug_assert_eq!(data.len(), validity.len());
2841        if validity.all() {
2842            return Self::from_f64_values(data);
2843        }
2844        Self {
2845            dtype: DType::Float64,
2846            values: ScalarValues::lazy_nullable_float64(data, validity.clone()),
2847            validity,
2848            data: None,
2849        }
2850    }
2851
2852    /// Nullable Int64 counterpart of `from_f64_values_with_validity`
2853    /// (br-frankenpandas-lt5qx): invalid slots materialize
2854    /// `Scalar::Null(NullKind::Null)` (= `missing_for_dtype(Int64)`), valid
2855    /// slots `Scalar::Int64(data[i])`. Public (hidden) for fp-join's fused
2856    /// dense left-merge builder (br-frankenpandas-7wxoc).
2857    #[doc(hidden)]
2858    pub fn from_i64_values_with_validity(data: Vec<i64>, validity: ValidityMask) -> Self {
2859        debug_assert_eq!(data.len(), validity.len());
2860        if validity.all() {
2861            return Self::from_i64_values(data);
2862        }
2863        Self {
2864            dtype: DType::Int64,
2865            values: ScalarValues::lazy_nullable_int64(data, validity.clone()),
2866            validity,
2867            data: None,
2868        }
2869    }
2870
2871    /// Null-introducing positional reindex with Float64 promotion
2872    /// (br-frankenpandas-1bvcl): gather an all-valid Int64/Float64 column by
2873    /// `Option<usize>` positions into a nullable Float64 column without the
2874    /// per-row `Scalar` clone + `cast_scalar_owned` + `Column::new`
2875    /// revalidation. `None` (or out-of-range) slots take the established
2876    /// aligned-binary gap convention — 0.0 datum + invalid bit — which
2877    /// materializes `Scalar::Null(NullKind::NaN)`, exactly what the eager
2878    /// path's `missing_for_dtype(Float64)` cast produces; matched Int64 slots
2879    /// use `v as f64`, the same conversion as the `cast_scalar_owned`
2880    /// Int64->Float64 arm. Returns `None` for any other source (nullable,
2881    /// non-numeric), where the caller's `Scalar` path is the one that must
2882    /// reason about missingness.
2883    #[must_use]
2884    #[doc(hidden)]
2885    pub fn reindex_promote_float64_by_optional_positions(
2886        &self,
2887        positions: &[Option<usize>],
2888    ) -> Option<Self> {
2889        enum TypedSource<'a> {
2890            Int64(&'a [i64]),
2891            Float64(&'a [f64]),
2892        }
2893        let source = if let Some(slice) = self.as_i64_slice() {
2894            TypedSource::Int64(slice)
2895        } else {
2896            let slice = self.as_f64_slice()?;
2897            TypedSource::Float64(slice)
2898        };
2899
2900        let n = positions.len();
2901        let len = self.len();
2902        let mut data = Vec::with_capacity(n);
2903        let mut words = vec![0_u64; n.div_ceil(64)];
2904        for (out_idx, slot) in positions.iter().enumerate() {
2905            match slot {
2906                Some(idx) if *idx < len => {
2907                    let value = match source {
2908                        TypedSource::Int64(slice) => slice[*idx] as f64,
2909                        TypedSource::Float64(slice) => slice[*idx],
2910                    };
2911                    data.push(value);
2912                    // All-valid sources carry no NaN (from_f64_values marks
2913                    // NaN invalid), so every matched slot is valid.
2914                    words[out_idx / 64] |= 1_u64 << (out_idx % 64);
2915                }
2916                _ => data.push(0.0),
2917            }
2918        }
2919        Some(Self::from_f64_values_with_validity(
2920            data,
2921            ValidityMask { words, len: n },
2922        ))
2923    }
2924
2925    /// Borrow the column's contiguous `f64` buffer when this is an all-valid
2926    /// `Float64` column, enabling typed/SIMD reductions without the per-element
2927    /// `Scalar` match. Returns `None` for any other dtype or when the column
2928    /// has missing values — callers fall back to the `Scalar` path, which is
2929    /// the only path that must reason about missingness. Per
2930    /// br-frankenpandas-lei31.
2931    #[must_use]
2932    pub fn as_f64_slice(&self) -> Option<&[f64]> {
2933        if self.dtype == DType::Float64 && self.validity.all() {
2934            if let Some(ColumnData::Float64(data)) = &self.data {
2935                return Some(data.as_slice());
2936            }
2937            if let ScalarValues::LazyAllValidFloat64 { data, .. } = &self.values {
2938                return Some(data.as_ref());
2939            }
2940        }
2941        None
2942    }
2943
2944    /// Borrow the column's contiguous `i64` buffer when this is an all-valid
2945    /// `Int64` column. See [`Column::as_f64_slice`].
2946    #[must_use]
2947    pub fn as_i64_slice(&self) -> Option<&[i64]> {
2948        if self.dtype == DType::Int64 && self.validity.all() {
2949            if let Some(ColumnData::Int64(data)) = &self.data {
2950                return Some(data.as_slice());
2951            }
2952            if let ScalarValues::LazyAllValidInt64 { data, .. } = &self.values {
2953                return Some(data.as_ref());
2954            }
2955            if let Some(data) = self.values.repeat_runs_i64_data() {
2956                return Some(data);
2957            }
2958            if let Some(data) = self.values.repeated_slices_i64_data() {
2959                return Some(data);
2960            }
2961        }
2962        None
2963    }
2964
2965    /// Build an all-valid `Bool` column from already-typed contiguous values.
2966    ///
2967    /// The typed-ingestion counterpart for boolean results (comparison masks,
2968    /// predicates) — see [`Column::from_f64_values`]. Defers `Scalar`
2969    /// materialization until a caller asks for scalar values.
2970    #[must_use]
2971    pub fn from_bool_values(data: Vec<bool>) -> Self {
2972        let len = data.len();
2973        Self {
2974            dtype: DType::Bool,
2975            values: ScalarValues::lazy_all_valid_bool(data),
2976            validity: ValidityMask::all_valid(len),
2977            data: None,
2978        }
2979    }
2980
2981    /// Borrow the column's contiguous Utf8 backing — `(bytes, offsets)` with
2982    /// row `i` = `bytes[offsets[i]..offsets[i+1]]`, always valid UTF-8 —
2983    /// when this is an all-valid Utf8 column carrying the
2984    /// `LazyContiguousUtf8` representation (br-frankenpandas-2krr0 rung 3).
2985    /// Lets chained string ops read the previous op's output without ever
2986    /// materializing its `Vec<Scalar>` view. Returns `None` for Scalar-backed
2987    /// or nullable columns — callers fall back to `values()`.
2988    #[must_use]
2989    #[doc(hidden)]
2990    pub fn as_utf8_contiguous(&self) -> Option<(&[u8], &[usize])> {
2991        if self.dtype == DType::Utf8
2992            && self.validity.all()
2993            && let ScalarValues::LazyContiguousUtf8 { bytes, offsets, .. } = &self.values
2994        {
2995            return Some((bytes.as_ref(), offsets.as_ref()));
2996        }
2997        None
2998    }
2999
3000    /// Share the `Arc` contiguous-Utf8 backing plus the source-row offset of
3001    /// row 0, for an all-valid `LazyContiguousUtf8` (offset 0) or an existing
3002    /// `LazyUtf8Slice` view (offset `start`). The two `Arc::clone`s are O(1) and
3003    /// let `take_positions` return a contiguous-range view without copying
3004    /// (br-frankenpandas-jbyuc.1.1.1).
3005    fn utf8_arc_view_source(&self) -> Option<Utf8ArcViewSource> {
3006        if self.dtype != DType::Utf8 || !self.validity.all() {
3007            return None;
3008        }
3009        match &self.values {
3010            ScalarValues::LazyContiguousUtf8 { bytes, offsets, .. } => {
3011                Some((Arc::clone(bytes), Arc::clone(offsets), 0))
3012            }
3013            ScalarValues::LazyUtf8Slice {
3014                bytes,
3015                offsets,
3016                start,
3017                ..
3018            } => Some((Arc::clone(bytes), Arc::clone(offsets), *start)),
3019            _ => None,
3020        }
3021    }
3022
3023    /// Borrow the contiguous Utf8 backing only when its byte spans are already
3024    /// strictly increasing. The witness is cached on the immutable contiguous
3025    /// backing so repeated ordered joins do not rescan both key columns.
3026    #[must_use]
3027    #[doc(hidden)]
3028    pub fn as_strictly_increasing_utf8_contiguous(&self) -> Option<(&[u8], &[usize])> {
3029        if self.dtype == DType::Utf8
3030            && self.validity.all()
3031            && let ScalarValues::LazyContiguousUtf8 {
3032                bytes,
3033                offsets,
3034                strictly_increasing,
3035                ..
3036            } = &self.values
3037            && *strictly_increasing
3038                .get_or_init(|| contiguous_utf8_offsets_are_strictly_increasing(bytes, offsets))
3039        {
3040            return Some((bytes.as_ref(), offsets.as_ref()));
3041        }
3042        None
3043    }
3044
3045    /// Borrow a strict contiguous-Utf8 backing and its fixed row byte width.
3046    ///
3047    /// The fixed-width witness is cached next to the strict-increasing witness:
3048    /// ordered string joins can then detect a long equal byte window once and
3049    /// emit a whole range of 1:1 matches without per-row byte-span comparisons.
3050    #[must_use]
3051    pub fn as_fixed_width_strictly_increasing_utf8_contiguous(
3052        &self,
3053    ) -> Option<(&[u8], &[usize], usize)> {
3054        if self.dtype == DType::Utf8
3055            && self.validity.all()
3056            && let ScalarValues::LazyContiguousUtf8 {
3057                bytes,
3058                offsets,
3059                strictly_increasing,
3060                fixed_width,
3061                ..
3062            } = &self.values
3063            && *strictly_increasing
3064                .get_or_init(|| contiguous_utf8_offsets_are_strictly_increasing(bytes, offsets))
3065        {
3066            let width = fixed_width
3067                .get_or_init(|| contiguous_utf8_fixed_width(offsets))
3068                .as_ref()
3069                .copied()?;
3070            return Some((bytes.as_ref(), offsets.as_ref(), width));
3071        }
3072        None
3073    }
3074
3075    /// Borrow the column's contiguous `bool` buffer when this is an all-valid
3076    /// `Bool` column. See [`Column::as_f64_slice`].
3077    #[must_use]
3078    pub fn as_bool_slice(&self) -> Option<&[bool]> {
3079        if self.dtype == DType::Bool && self.validity.all() {
3080            if let Some(ColumnData::Bool(data)) = &self.data {
3081                return Some(data.as_slice());
3082            }
3083            if let ScalarValues::LazyAllValidBool { data, .. } = &self.values {
3084                return Some(data.as_ref());
3085            }
3086        }
3087        None
3088    }
3089
3090    /// Gather a new column from the given row positions of `self`.
3091    ///
3092    /// This is the fast path for materialization (`take`, `iloc`, boolean
3093    /// filter, `sort_values`, `drop_duplicates`, `reindex`, `head`/`tail`,
3094    /// groupby row selection). Because every gathered value originates from
3095    /// `self` it already matches `self.dtype` (no coercion needed), so this
3096    /// skips the dtype-coercion and object-bucket detection scans that
3097    /// `Column::new` performs. All-valid source columns clone values directly
3098    /// and emit an all-valid mask; missing-bearing columns fold the
3099    /// missing-normalization and validity rebuild into a single pass.
3100    ///
3101    /// The output is bit-for-bit identical to
3102    /// `Column::new(self.dtype(), positions.iter().map(|&p| self.values[p].clone()).collect())`
3103    /// (the no-coercion branch `Column::new` takes for same-dtype input): each
3104    /// gathered value is missing-normalized via `normalize_missing_for_dtype`
3105    /// (generic `Null` → dtype-specific missing, e.g. `NaT` for datetime), and
3106    /// the validity mask is recomputed from the normalized values'
3107    /// `is_missing()` exactly as `ValidityMask::from_values` would.
3108    ///
3109    /// # Panics
3110    /// Panics if any position is out of bounds (callers materialize from
3111    /// validated index positions; this mirrors the prior `values()[pos]` index).
3112    #[must_use]
3113    pub fn take_positions(&self, positions: &[usize]) -> Self {
3114        let n = positions.len();
3115        if self.validity.all() {
3116            if let Some(data) = self.take_cached_all_valid_float64_positions(positions) {
3117                return Self {
3118                    dtype: self.dtype,
3119                    values: ScalarValues::lazy_all_valid_float64(data),
3120                    validity: ValidityMask::all_valid(n),
3121                    data: None,
3122                };
3123            }
3124
3125            // Symmetric to the Float64 path: gather the contiguous i64 buffer and
3126            // keep the output lazily typed instead of materializing a
3127            // Vec<Scalar::Int64> (32 B/elem). Bit-identical — lazy_all_valid_int64
3128            // materializes Scalar::Int64(data[i]) exactly as the primitive path
3129            // would, with the same all-valid mask. (br-frankenpandas-uza04)
3130            if let Some(data) = self.take_cached_all_valid_int64_positions(positions) {
3131                return Self {
3132                    dtype: self.dtype,
3133                    values: ScalarValues::lazy_all_valid_int64(data),
3134                    validity: ValidityMask::all_valid(n),
3135                    data: None,
3136                };
3137            }
3138
3139            // Zero-copy contiguous-range view (br-frankenpandas-jbyuc.1.1.1):
3140            // when the requested positions are a contiguous ascending range over
3141            // an Arc-shared contiguous-Utf8 backing, share the source `bytes`/
3142            // `offsets` and defer the per-row byte gather instead of copying.
3143            // Bit-identical to the eager gather below: the view materializes
3144            // `Scalar::Utf8` of `bytes[off[start+i]..off[start+i+1]]` for
3145            // `i in 0..n` — the exact same spans, same order, all-valid mask.
3146            //
3147            // Gated `n >= 64`: a view keeps the *whole* source buffer alive via
3148            // `Arc`, so a tiny contiguous take (head/tail/single-row iloc) would
3149            // pin a potentially large buffer to hold a handful of rows. Small
3150            // takes fall through to the eager gather (a cheap, independent copy);
3151            // only sizeable contiguous ranges — the join-output shape this lever
3152            // targets — take the zero-copy view.
3153            if n >= 64
3154                && let Some((src_bytes, src_offsets, src_start)) = self.utf8_arc_view_source()
3155                && let Some(range_start) = contiguous_ascending_start(positions)
3156            {
3157                return Self {
3158                    dtype: self.dtype,
3159                    values: ScalarValues::lazy_utf8_slice(
3160                        src_bytes,
3161                        src_offsets,
3162                        src_start + range_start,
3163                        n,
3164                    ),
3165                    validity: ValidityMask::all_valid(n),
3166                    data: None,
3167                };
3168            }
3169
3170            // Contiguous-Utf8 gather (br-frankenpandas-nl1tw): an all-valid
3171            // `LazyContiguousUtf8` column gathers its selected byte spans into one
3172            // fresh `bytes` buffer + `offsets`, keeping the output lazily typed —
3173            // no per-row `String` heap clone and no lazy Scalar re-materialization.
3174            // Bit-identical to the Scalar-clone path: each output slot materializes
3175            // `Scalar::Utf8` of the exact same span bytes in the same order, with an
3176            // all-valid mask (the source is all-valid by the enclosing branch).
3177            if let Some((bytes, offsets)) = self.as_utf8_contiguous() {
3178                let total: usize = positions
3179                    .iter()
3180                    .map(|&pos| offsets[pos + 1] - offsets[pos])
3181                    .sum();
3182                let mut new_bytes = Vec::with_capacity(total);
3183                let mut new_offsets = Vec::with_capacity(n + 1);
3184                new_offsets.push(0);
3185                for &pos in positions {
3186                    new_bytes.extend_from_slice(&bytes[offsets[pos]..offsets[pos + 1]]);
3187                    new_offsets.push(new_bytes.len());
3188                }
3189                return Self {
3190                    dtype: self.dtype,
3191                    values: ScalarValues::lazy_contiguous_utf8(new_bytes, new_offsets),
3192                    validity: ValidityMask::all_valid(n),
3193                    data: None,
3194                };
3195            }
3196
3197            let values = self
3198                .take_all_valid_primitive_positions(positions)
3199                .unwrap_or_else(|| {
3200                    positions
3201                        .iter()
3202                        .map(|&pos| self.values[pos].clone())
3203                        .collect()
3204                });
3205            return Self {
3206                dtype: self.dtype,
3207                values: ScalarValues::from_vec(values),
3208                validity: ValidityMask::all_valid(n),
3209                data: None,
3210            };
3211        }
3212
3213        // Typed nullable Float64 gather: when the source carries a contiguous
3214        // f64 buffer with a validity mask (LazyNullableFloat64), gather the data
3215        // and the validity bits directly instead of cloning a Scalar per row.
3216        // Bit-identical: that variant materializes Float64(data[i]) when
3217        // valid-or-NaN and Null(NaN) otherwise, so the missingness of slot `pos`
3218        // is `validity.get(pos) && !data[pos].is_nan()`; carrying that exact bit
3219        // (and the raw datum) into from_f64_values_with_validity reproduces the
3220        // same Scalar at every slot, while skipping the 32 B/elem Vec<Scalar>.
3221        if let ScalarValues::LazyNullableFloat64 { data: src, .. } = &self.values {
3222            let mut data = Vec::with_capacity(n);
3223            let mut words = vec![0_u64; n.div_ceil(64)];
3224            for (out_idx, &pos) in positions.iter().enumerate() {
3225                let x = src[pos];
3226                data.push(x);
3227                if self.validity.get(pos) && !x.is_nan() {
3228                    words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3229                }
3230            }
3231            return Self::from_f64_values_with_validity(data, ValidityMask { words, len: n });
3232        }
3233
3234        let mut values = Vec::with_capacity(n);
3235        let mut words = vec![0_u64; n.div_ceil(64)];
3236        for (out_idx, &pos) in positions.iter().enumerate() {
3237            let value = Self::normalize_missing_for_dtype(self.values[pos].clone(), self.dtype);
3238            if !value.is_missing() {
3239                words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3240            }
3241            values.push(value);
3242        }
3243        Self {
3244            dtype: self.dtype,
3245            values: ScalarValues::from_vec(values),
3246            validity: ValidityMask { words, len: n },
3247            data: None,
3248        }
3249    }
3250
3251    fn take_cached_all_valid_float64_positions(&self, positions: &[usize]) -> Option<Vec<f64>> {
3252        let data = self.as_f64_slice()?;
3253        let mut values = Vec::with_capacity(positions.len());
3254        for &pos in positions {
3255            values.push(data[pos]);
3256        }
3257        Some(values)
3258    }
3259
3260    fn take_cached_all_valid_int64_positions(&self, positions: &[usize]) -> Option<Vec<i64>> {
3261        let data = self.as_i64_slice()?;
3262        let mut values = Vec::with_capacity(positions.len());
3263        for &pos in positions {
3264            values.push(data[pos]);
3265        }
3266        Some(values)
3267    }
3268
3269    fn take_all_valid_primitive_positions(&self, positions: &[usize]) -> Option<Vec<Scalar>> {
3270        if let Some(values) = self.take_cached_all_valid_primitive_positions(positions) {
3271            return Some(values);
3272        }
3273
3274        let mut values = Vec::with_capacity(positions.len());
3275        match self.dtype {
3276            DType::Bool | DType::BoolNullable => {
3277                for &pos in positions {
3278                    match &self.values[pos] {
3279                        Scalar::Bool(value) => values.push(Scalar::Bool(*value)),
3280                        _ => return None,
3281                    }
3282                }
3283            }
3284            DType::Int64 | DType::Int64Nullable => {
3285                for &pos in positions {
3286                    match &self.values[pos] {
3287                        Scalar::Int64(value) => values.push(Scalar::Int64(*value)),
3288                        _ => return None,
3289                    }
3290                }
3291            }
3292            DType::Float64 => {
3293                for &pos in positions {
3294                    match &self.values[pos] {
3295                        Scalar::Float64(value) => values.push(Scalar::Float64(*value)),
3296                        _ => return None,
3297                    }
3298                }
3299            }
3300            DType::Timedelta64 => {
3301                for &pos in positions {
3302                    match &self.values[pos] {
3303                        Scalar::Timedelta64(value) => values.push(Scalar::Timedelta64(*value)),
3304                        _ => return None,
3305                    }
3306                }
3307            }
3308            DType::Datetime64 => {
3309                for &pos in positions {
3310                    match &self.values[pos] {
3311                        Scalar::Datetime64(value) => values.push(Scalar::Datetime64(*value)),
3312                        _ => return None,
3313                    }
3314                }
3315            }
3316            DType::Period => {
3317                for &pos in positions {
3318                    match &self.values[pos] {
3319                        Scalar::Period(value) => values.push(Scalar::Period(*value)),
3320                        _ => return None,
3321                    }
3322                }
3323            }
3324            _ => return None,
3325        }
3326        Some(values)
3327    }
3328
3329    fn take_cached_all_valid_primitive_positions(
3330        &self,
3331        positions: &[usize],
3332    ) -> Option<Vec<Scalar>> {
3333        match self.dtype {
3334            DType::Bool => {
3335                if let Some(data) = self.as_bool_slice() {
3336                    let mut values = Vec::with_capacity(positions.len());
3337                    for &pos in positions {
3338                        values.push(Scalar::Bool(data[pos]));
3339                    }
3340                    return Some(values);
3341                }
3342            }
3343            DType::Int64 => {
3344                if let Some(data) = self.as_i64_slice() {
3345                    let mut values = Vec::with_capacity(positions.len());
3346                    for &pos in positions {
3347                        values.push(Scalar::Int64(data[pos]));
3348                    }
3349                    return Some(values);
3350                }
3351            }
3352            DType::Float64 => {
3353                if let Some(data) = self.as_f64_slice() {
3354                    let mut values = Vec::with_capacity(positions.len());
3355                    for &pos in positions {
3356                        values.push(Scalar::Float64(data[pos]));
3357                    }
3358                    return Some(values);
3359                }
3360            }
3361            _ => {}
3362        }
3363
3364        let data = self.data.as_ref()?;
3365        let mut values = Vec::with_capacity(positions.len());
3366        match (self.dtype, data) {
3367            (DType::Bool | DType::BoolNullable, ColumnData::Bool(data)) => {
3368                for &pos in positions {
3369                    values.push(Scalar::Bool(data[pos]));
3370                }
3371            }
3372            (DType::Int64 | DType::Int64Nullable, ColumnData::Int64(data)) => {
3373                for &pos in positions {
3374                    values.push(Scalar::Int64(data[pos]));
3375                }
3376            }
3377            (DType::Float64, ColumnData::Float64(data)) => {
3378                for &pos in positions {
3379                    values.push(Scalar::Float64(data[pos]));
3380                }
3381            }
3382            (DType::Timedelta64, ColumnData::Timedelta64(data)) => {
3383                for &pos in positions {
3384                    values.push(Scalar::Timedelta64(data[pos]));
3385                }
3386            }
3387            (DType::Datetime64, ColumnData::Datetime64(data)) => {
3388                for &pos in positions {
3389                    values.push(Scalar::Datetime64(data[pos]));
3390                }
3391            }
3392            (DType::Period, ColumnData::Period(data)) => {
3393                for &pos in positions {
3394                    values.push(Scalar::Period(data[pos]));
3395                }
3396            }
3397            _ => return None,
3398        }
3399        Some(values)
3400    }
3401
3402    /// Create a column filled with zeros.
3403    ///
3404    /// Matches np.zeros().
3405    pub fn zeros(n: usize, dtype: DType) -> Result<Self, ColumnError> {
3406        let zero = match dtype {
3407            DType::Int64 => Scalar::Int64(0),
3408            DType::Float64 => Scalar::Float64(0.0),
3409            DType::Bool => Scalar::Bool(false),
3410            _ => Scalar::Int64(0),
3411        };
3412        Self::new(dtype, vec![zero; n])
3413    }
3414
3415    /// Create a column filled with ones.
3416    ///
3417    /// Matches np.ones().
3418    pub fn ones(n: usize, dtype: DType) -> Result<Self, ColumnError> {
3419        let one = match dtype {
3420            DType::Int64 => Scalar::Int64(1),
3421            DType::Float64 => Scalar::Float64(1.0),
3422            DType::Bool => Scalar::Bool(true),
3423            _ => Scalar::Int64(1),
3424        };
3425        Self::new(dtype, vec![one; n])
3426    }
3427
3428    /// Create a column filled with a given value.
3429    ///
3430    /// Matches np.full().
3431    pub fn full(n: usize, fill_value: Scalar) -> Result<Self, ColumnError> {
3432        let dtype = fill_value.dtype();
3433        Self::new(dtype, vec![fill_value; n])
3434    }
3435
3436    /// Create a zeros column with same shape and dtype as self.
3437    pub fn zeros_like(&self) -> Result<Self, ColumnError> {
3438        Self::zeros(self.len(), self.dtype)
3439    }
3440
3441    /// Create a ones column with same shape and dtype as self.
3442    pub fn ones_like(&self) -> Result<Self, ColumnError> {
3443        Self::ones(self.len(), self.dtype)
3444    }
3445
3446    /// Create a column filled with fill_value with same shape as self.
3447    pub fn full_like(&self, fill_value: Scalar) -> Result<Self, ColumnError> {
3448        Self::new(self.dtype, vec![fill_value; self.len()])
3449    }
3450
3451    /// Create an empty column with same dtype as self.
3452    pub fn empty_like(&self) -> Result<Self, ColumnError> {
3453        Self::new(self.dtype, Vec::new())
3454    }
3455
3456    /// Create a column with evenly spaced values in [start, stop).
3457    ///
3458    /// Matches np.arange().
3459    pub fn arange(start: f64, stop: f64, step: f64) -> Result<Self, ColumnError> {
3460        if step == 0.0 {
3461            return Err(ColumnError::Type(TypeError::NonNumericValue {
3462                value: "step cannot be zero".to_string(),
3463                dtype: DType::Float64,
3464            }));
3465        }
3466        let mut values = Vec::new();
3467        let mut x = start;
3468        if step > 0.0 {
3469            while x < stop {
3470                values.push(Scalar::Float64(x));
3471                x += step;
3472            }
3473        } else {
3474            while x > stop {
3475                values.push(Scalar::Float64(x));
3476                x += step;
3477            }
3478        }
3479        Self::new(DType::Float64, values)
3480    }
3481
3482    /// Create a column with evenly spaced values over [start, stop].
3483    ///
3484    /// Matches np.linspace().
3485    pub fn linspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3486        if num == 0 {
3487            return Self::new(DType::Float64, Vec::new());
3488        }
3489        if num == 1 {
3490            return Self::new(DType::Float64, vec![Scalar::Float64(start)]);
3491        }
3492        let step = (stop - start) / (num - 1) as f64;
3493        let values: Vec<Scalar> = (0..num)
3494            .map(|i| Scalar::Float64(start + step * i as f64))
3495            .collect();
3496        Self::new(DType::Float64, values)
3497    }
3498
3499    /// Create a column with evenly spaced values on a log scale.
3500    ///
3501    /// Matches np.logspace().
3502    pub fn logspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3503        let lin = Self::linspace(start, stop, num)?;
3504        let values: Vec<Scalar> = lin
3505            .values()
3506            .iter()
3507            .map(|v| match v {
3508                Scalar::Float64(x) => Scalar::Float64(10.0_f64.powf(*x)),
3509                _ => v.clone(),
3510            })
3511            .collect();
3512        Self::new(DType::Float64, values)
3513    }
3514
3515    /// Create values evenly spaced on a log scale (geometric progression).
3516    ///
3517    /// Matches np.geomspace(start, stop, num). Unlike logspace, start and stop
3518    /// are the actual boundary values (not exponents).
3519    pub fn geomspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3520        if num == 0 {
3521            return Self::new(DType::Float64, vec![]);
3522        }
3523        if start == 0.0 || stop == 0.0 {
3524            return Err(ColumnError::Type(TypeError::NonNumericValue {
3525                value: "geomspace endpoints cannot be zero".to_owned(),
3526                dtype: DType::Float64,
3527            }));
3528        }
3529        if num == 1 {
3530            return Self::new(DType::Float64, vec![Scalar::Float64(start)]);
3531        }
3532
3533        let log_start = start.ln();
3534        let log_stop = stop.ln();
3535        let step = (log_stop - log_start) / (num - 1) as f64;
3536        let values: Vec<Scalar> = (0..num)
3537            .map(|i| Scalar::Float64((log_start + step * i as f64).exp()))
3538            .collect();
3539        Self::new(DType::Float64, values)
3540    }
3541
3542    /// Generate a Hann (Hanning) window.
3543    ///
3544    /// Matches np.hanning(M). Returns a raised cosine window of length M.
3545    pub fn hanning(m: usize) -> Result<Self, ColumnError> {
3546        if m == 0 {
3547            return Self::new(DType::Float64, vec![]);
3548        }
3549        if m == 1 {
3550            return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3551        }
3552        let values: Vec<Scalar> = (0..m)
3553            .map(|n| {
3554                let val =
3555                    0.5 - 0.5 * (2.0 * std::f64::consts::PI * n as f64 / (m - 1) as f64).cos();
3556                Scalar::Float64(val)
3557            })
3558            .collect();
3559        Self::new(DType::Float64, values)
3560    }
3561
3562    /// Generate a Hamming window.
3563    ///
3564    /// Matches np.hamming(M). Returns a Hamming window of length M.
3565    pub fn hamming(m: usize) -> Result<Self, ColumnError> {
3566        if m == 0 {
3567            return Self::new(DType::Float64, vec![]);
3568        }
3569        if m == 1 {
3570            return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3571        }
3572        let values: Vec<Scalar> = (0..m)
3573            .map(|n| {
3574                let val =
3575                    0.54 - 0.46 * (2.0 * std::f64::consts::PI * n as f64 / (m - 1) as f64).cos();
3576                Scalar::Float64(val)
3577            })
3578            .collect();
3579        Self::new(DType::Float64, values)
3580    }
3581
3582    /// Generate a Blackman window.
3583    ///
3584    /// Matches np.blackman(M). Returns a Blackman window of length M.
3585    pub fn blackman(m: usize) -> Result<Self, ColumnError> {
3586        if m == 0 {
3587            return Self::new(DType::Float64, vec![]);
3588        }
3589        if m == 1 {
3590            return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3591        }
3592        let values: Vec<Scalar> = (0..m)
3593            .map(|n| {
3594                let x = n as f64 / (m - 1) as f64;
3595                let val = 0.42 - 0.5 * (2.0 * std::f64::consts::PI * x).cos()
3596                    + 0.08 * (4.0 * std::f64::consts::PI * x).cos();
3597                Scalar::Float64(val)
3598            })
3599            .collect();
3600        Self::new(DType::Float64, values)
3601    }
3602
3603    /// Generate a Bartlett (triangular) window.
3604    ///
3605    /// Matches np.bartlett(M). Returns a triangular window of length M.
3606    pub fn bartlett(m: usize) -> Result<Self, ColumnError> {
3607        if m == 0 {
3608            return Self::new(DType::Float64, vec![]);
3609        }
3610        if m == 1 {
3611            return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3612        }
3613        let half = (m - 1) as f64 / 2.0;
3614        let values: Vec<Scalar> = (0..m)
3615            .map(|n| {
3616                let val = 1.0 - ((n as f64 - half) / half).abs();
3617                Scalar::Float64(val)
3618            })
3619            .collect();
3620        Self::new(DType::Float64, values)
3621    }
3622
3623    #[must_use]
3624    pub fn dtype(&self) -> DType {
3625        self.dtype
3626    }
3627
3628    /// Returns true if this column contains any null/missing values.
3629    #[must_use]
3630    pub fn has_nulls(&self) -> bool {
3631        self.validity.count_invalid() > 0
3632    }
3633
3634    /// Promote the dtype to its nullable variant if the column has nulls.
3635    ///
3636    /// For Int64 with nulls → Int64Nullable, Bool with nulls → BoolNullable.
3637    /// For already-nullable or other dtypes, returns a clone unchanged.
3638    #[must_use]
3639    pub fn promote_to_nullable(&self) -> Self {
3640        if !self.has_nulls() {
3641            return self.clone();
3642        }
3643        let new_dtype = self.dtype.to_nullable();
3644        if new_dtype == self.dtype {
3645            return self.clone();
3646        }
3647        Self {
3648            dtype: new_dtype,
3649            values: self.values.clone(),
3650            validity: self.validity.clone(),
3651            data: self.data.clone(),
3652        }
3653    }
3654
3655    /// Create a new column with a different dtype, preserving the same values.
3656    ///
3657    /// This is a low-level operation that only changes the dtype metadata
3658    /// without converting values. Use only when the values are already valid
3659    /// for the target dtype.
3660    #[must_use]
3661    pub fn with_dtype(&self, dtype: DType) -> Self {
3662        Self {
3663            dtype,
3664            values: self.values.clone(),
3665            validity: self.validity.clone(),
3666            data: None,
3667        }
3668    }
3669
3670    #[must_use]
3671    pub fn len(&self) -> usize {
3672        self.values.len()
3673    }
3674
3675    /// Number of elements, matching `pd.Series.size`.
3676    #[must_use]
3677    pub fn size(&self) -> usize {
3678        self.len()
3679    }
3680
3681    /// One-dimensional shape, matching `pd.Series.shape`.
3682    #[must_use]
3683    pub fn shape(&self) -> (usize,) {
3684        (self.len(),)
3685    }
3686
3687    /// Number of array dimensions, matching `pd.Series.ndim`.
3688    #[must_use]
3689    pub fn ndim(&self) -> usize {
3690        1
3691    }
3692
3693    #[must_use]
3694    pub fn is_empty(&self) -> bool {
3695        self.values.is_empty()
3696    }
3697
3698    /// Alias for [`is_empty`](Self::is_empty), matching `pd.Series.empty`.
3699    #[must_use]
3700    pub fn empty(&self) -> bool {
3701        self.is_empty()
3702    }
3703
3704    /// Return a deep copy of this column.
3705    ///
3706    /// Matches `pd.Series.copy(deep=True)` at the column storage layer.
3707    #[must_use]
3708    pub fn copy(&self) -> Self {
3709        self.clone()
3710    }
3711
3712    /// Return an immutable view-shaped clone of this column.
3713    ///
3714    /// Matches `pd.Series.view()` at the column storage layer.
3715    #[must_use]
3716    pub fn view(&self) -> Self {
3717        self.clone()
3718    }
3719
3720    /// One-dimensional transpose is identity.
3721    ///
3722    /// Matches `pd.Series.transpose()` at the column storage layer.
3723    #[must_use]
3724    pub fn transpose(&self) -> Self {
3725        self.clone()
3726    }
3727
3728    /// Lowercase alias for [`transpose`](Self::transpose).
3729    #[must_use]
3730    pub fn t(&self) -> Self {
3731        self.transpose()
3732    }
3733
3734    /// Uppercase pandas spelling for [`transpose`](Self::transpose).
3735    #[allow(non_snake_case)]
3736    #[must_use]
3737    pub fn T(&self) -> Self {
3738        self.transpose()
3739    }
3740
3741    #[must_use]
3742    pub fn values(&self) -> &[Scalar] {
3743        &self.values
3744    }
3745
3746    #[must_use]
3747    pub fn value(&self, idx: usize) -> Option<&Scalar> {
3748        self.values.get(idx)
3749    }
3750
3751    /// Extract scalar value from a single-element column.
3752    ///
3753    /// Matches `pd.Series.item()` at the column storage layer. Returns an
3754    /// error unless the column has exactly one element.
3755    pub fn item(&self) -> Result<Scalar, ColumnError> {
3756        match self.values.as_slice() {
3757            [value] => Ok(value.clone()),
3758            values => Err(ColumnError::InvalidLength {
3759                operation: "item()",
3760                expected: 1,
3761                actual: values.len(),
3762            }),
3763        }
3764    }
3765
3766    #[must_use]
3767    pub fn validity(&self) -> &ValidityMask {
3768        &self.validity
3769    }
3770
3771    /// Borrow-returning iterator over the column's scalars.
3772    ///
3773    /// Convenience over `self.values().iter()` so call sites don't
3774    /// have to reach through the slice accessor. Preserves
3775    /// position order.
3776    pub fn iter_values(&self) -> std::slice::Iter<'_, Scalar> {
3777        self.values.iter()
3778    }
3779
3780    /// Materialize the column's values into an owned `Vec<Scalar>`.
3781    ///
3782    /// Matches `pd.Series.to_list()`. Equivalent to
3783    /// `self.values().to_vec()`; the shorthand survives refactors
3784    /// that change the internal storage shape.
3785    #[must_use]
3786    pub fn to_vec(&self) -> Vec<Scalar> {
3787        self.values.to_vec()
3788    }
3789
3790    /// Alias for [`to_vec`](Self::to_vec), matching `pd.Series.to_list()`.
3791    #[must_use]
3792    pub fn to_list(&self) -> Vec<Scalar> {
3793        self.to_vec()
3794    }
3795
3796    /// Alias for [`to_list`](Self::to_list), matching `pd.Series.tolist()`.
3797    #[must_use]
3798    pub fn tolist(&self) -> Vec<Scalar> {
3799        self.to_list()
3800    }
3801
3802    /// Owned scalar materialization, matching `pd.Series.to_numpy()`.
3803    #[must_use]
3804    pub fn to_numpy(&self) -> Vec<Scalar> {
3805        self.to_vec()
3806    }
3807
3808    /// Flatten values to a one-dimensional vector, matching `pd.Series.ravel()`.
3809    #[must_use]
3810    pub fn ravel(&self) -> Vec<Scalar> {
3811        self.to_numpy()
3812    }
3813
3814    /// Flatten values to a copy, matching `np.ndarray.flatten()`.
3815    ///
3816    /// For 1D arrays this is equivalent to ravel() but explicitly returns
3817    /// an owned copy rather than potentially a view.
3818    #[must_use]
3819    pub fn flatten(&self) -> Vec<Scalar> {
3820        self.values.to_vec()
3821    }
3822
3823    /// Convert to array, matching `np.asarray()`.
3824    ///
3825    /// For Column this returns a clone since we're already array-like.
3826    #[must_use]
3827    pub fn asarray(&self) -> Self {
3828        self.clone()
3829    }
3830
3831    /// Owned scalar materialization, matching `pd.Series.array`.
3832    #[must_use]
3833    pub fn array(&self) -> Vec<Scalar> {
3834        self.to_vec()
3835    }
3836
3837    /// Whether any value in the column is missing.
3838    ///
3839    /// Matches `pd.Series.isna().any()` in one pass. Faster than
3840    /// calling `isnull()` and scanning — returns on the first
3841    /// missing value seen.
3842    #[must_use]
3843    pub fn has_any_missing(&self) -> bool {
3844        self.values.iter().any(Scalar::is_missing)
3845    }
3846
3847    /// Whether any value is missing, matching `pd.Series.hasnans`.
3848    #[must_use]
3849    pub fn hasnans(&self) -> bool {
3850        self.has_any_missing()
3851    }
3852
3853    /// Whether every value in the column is missing.
3854    ///
3855    /// Matches `pd.Series.isna().all()`. Empty columns return true
3856    /// (vacuously), mirroring `ValidityMask::all`'s empty-case
3857    /// convention.
3858    #[must_use]
3859    pub fn all_missing(&self) -> bool {
3860        self.values.iter().all(Scalar::is_missing)
3861    }
3862
3863    /// First value in the column (index 0), or `None` when empty.
3864    ///
3865    /// Matches `pd.Series.iloc[0]` shorthand. Returns the raw Scalar
3866    /// including missing markers; callers who want skipna semantics
3867    /// can pair with `has_any_missing`.
3868    #[must_use]
3869    pub fn first(&self) -> Option<&Scalar> {
3870        self.values.first()
3871    }
3872
3873    /// Last value in the column, or `None` when empty.
3874    #[must_use]
3875    pub fn last(&self) -> Option<&Scalar> {
3876        self.values.last()
3877    }
3878
3879    /// Count values for which `predicate` returns true.
3880    ///
3881    /// Complement to `apply_bool` that yields only the count rather
3882    /// than materializing a Bool column. Missing inputs are treated
3883    /// as a non-match (consistent with `apply_bool`'s
3884    /// missing→Bool(false) contract).
3885    pub fn count_matching<F>(&self, mut predicate: F) -> usize
3886    where
3887        F: FnMut(&Scalar) -> bool,
3888    {
3889        self.values
3890            .iter()
3891            .filter(|v| !v.is_missing() && predicate(v))
3892            .count()
3893    }
3894
3895    /// Elementwise combine with another column via a user function.
3896    ///
3897    /// Matches `pd.Series.combine(other, func)` at the Column layer
3898    /// without the pandas `fill_value=None` null-propagation policy
3899    /// — `zip_with` always invokes `func`, passing through missing
3900    /// values as-is so the caller decides whether to short-circuit
3901    /// nulls. Length mismatch returns `LengthMismatch`.
3902    pub fn zip_with<F>(&self, other: &Self, mut func: F) -> Result<Self, ColumnError>
3903    where
3904        F: FnMut(&Scalar, &Scalar) -> Scalar,
3905    {
3906        if self.values.len() != other.values.len() {
3907            return Err(ColumnError::LengthMismatch {
3908                left: self.values.len(),
3909                right: other.values.len(),
3910            });
3911        }
3912        let out: Vec<Scalar> = self
3913            .values
3914            .iter()
3915            .zip(other.values.iter())
3916            .map(|(a, b)| func(a, b))
3917            .collect();
3918        let inferred = infer_dtype(&out).unwrap_or(self.dtype);
3919        Self::new(inferred, out)
3920    }
3921
3922    /// `(position, scalar)` iterator.
3923    ///
3924    /// Shortcut for `iter_values().enumerate()`. Convenience for
3925    /// callers that need both positions and values and don't want
3926    /// to reach through the slice accessor.
3927    pub fn iter_enumerate(&self) -> std::iter::Enumerate<std::slice::Iter<'_, Scalar>> {
3928        self.values.iter().enumerate()
3929    }
3930
3931    /// Apply a predicate per value and collect the results into a
3932    /// Bool column.
3933    ///
3934    /// Like `Column::map` but specialized for predicate functions
3935    /// returning `bool`. Missing inputs produce `Scalar::Bool(false)`
3936    /// by default — callers that need null propagation should use
3937    /// `map` instead so they can emit `Null(NaN)` explicitly.
3938    pub fn apply_bool<F>(&self, mut predicate: F) -> Result<Self, ColumnError>
3939    where
3940        F: FnMut(&Scalar) -> bool,
3941    {
3942        let out: Vec<Scalar> = self
3943            .values
3944            .iter()
3945            .map(|v| {
3946                if v.is_missing() {
3947                    Scalar::Bool(false)
3948                } else {
3949                    Scalar::Bool(predicate(v))
3950                }
3951            })
3952            .collect();
3953        Self::new(DType::Bool, out)
3954    }
3955
3956    pub fn reindex_by_positions(&self, positions: &[Option<usize>]) -> Result<Self, ColumnError> {
3957        let mut present_positions = Vec::with_capacity(positions.len());
3958        let mut all_present = true;
3959        for position in positions {
3960            match position {
3961                Some(idx) if *idx < self.len() => present_positions.push(*idx),
3962                Some(_) | None => {
3963                    all_present = false;
3964                    break;
3965                }
3966            }
3967        }
3968        if all_present {
3969            return Ok(self.take_positions(&present_positions));
3970        }
3971
3972        // Typed null-introducing gather (br-frankenpandas-lt5qx): an
3973        // all-valid Int64/Float64 source skips the per-row Scalar clone +
3974        // Column::new revalidation. Missing slots (None or out-of-range)
3975        // produce exactly missing_for_dtype: Null(NullKind::Null) via the
3976        // nullable-Int64 backing, Null(NullKind::NaN) via the 0.0-datum
3977        // nullable-Float64 convention; valid slots clone the raw datum.
3978        let n = positions.len();
3979        if let Some(slice) = self.as_i64_slice() {
3980            let mut data = Vec::with_capacity(n);
3981            let mut words = vec![0_u64; n.div_ceil(64)];
3982            for (out_idx, slot) in positions.iter().enumerate() {
3983                match slot {
3984                    Some(idx) if *idx < slice.len() => {
3985                        data.push(slice[*idx]);
3986                        words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3987                    }
3988                    _ => data.push(0),
3989                }
3990            }
3991            return Ok(Self::from_i64_values_with_validity(
3992                data,
3993                ValidityMask { words, len: n },
3994            ));
3995        }
3996        if let Some(slice) = self.as_f64_slice() {
3997            let mut data = Vec::with_capacity(n);
3998            let mut words = vec![0_u64; n.div_ceil(64)];
3999            for (out_idx, slot) in positions.iter().enumerate() {
4000                match slot {
4001                    Some(idx) if *idx < slice.len() => {
4002                        data.push(slice[*idx]);
4003                        words[out_idx / 64] |= 1_u64 << (out_idx % 64);
4004                    }
4005                    _ => data.push(0.0),
4006                }
4007            }
4008            return Ok(Self::from_f64_values_with_validity(
4009                data,
4010                ValidityMask { words, len: n },
4011            ));
4012        }
4013
4014        let values = positions
4015            .iter()
4016            .map(|slot| match slot {
4017                Some(idx) => self
4018                    .values
4019                    .get(*idx)
4020                    .cloned()
4021                    .unwrap_or_else(|| Scalar::missing_for_dtype(self.dtype)),
4022                None => Scalar::missing_for_dtype(self.dtype),
4023            })
4024            .collect::<Vec<_>>();
4025
4026        Self::new(self.dtype, values)
4027    }
4028
4029    /// AG-10: Attempt vectorized typed-array path for binary arithmetic.
4030    ///
4031    /// Preconditions: both columns same length, out_dtype already computed.
4032    /// Returns `Some(Column)` if vectorized path succeeded, `None` to
4033    /// signal fallback to the scalar path.
4034    fn try_vectorized_binary(
4035        &self,
4036        right: &Self,
4037        op: ArithmeticOp,
4038        out_dtype: DType,
4039    ) -> Option<Result<Self, ColumnError>> {
4040        // Vectorized path: both sides same numeric dtype, no NaN-vs-Null
4041        // distinction needed (i.e. both Int64, or both Float64 / promoted to Float64).
4042        match out_dtype {
4043            DType::Float64 => {
4044                // Typed-input fast path: both operands are already all-valid
4045                // contiguous Float64 (as_f64_slice => validity.all() AND no NaN),
4046                // so read the buffers directly — no Scalar materialization, no
4047                // from_scalars copy, no nan-aware validity scan. Bit-identical to
4048                // the general arm's all-valid branch: with both inputs valid and
4049                // NaN-free, the combined validity is all-valid, so it returns
4050                // from_f64_values(apply(l,r)) too (and from_f64_values still marks
4051                // any operation-produced NaN missing, identically).
4052                if let (Some(l), Some(r)) = (self.as_f64_slice(), right.as_f64_slice()) {
4053                    let apply = binary_f64_apply(op);
4054                    let result: Vec<f64> = l.iter().zip(r).map(|(&a, &b)| apply(a, b)).collect();
4055                    return Some(Ok(Self::from_f64_values(result)));
4056                }
4057                let left_data = ColumnData::from_scalars(&self.values, DType::Float64);
4058                let right_data = ColumnData::from_scalars(&right.values, DType::Float64);
4059                let (ColumnData::Float64(l), ColumnData::Float64(r)) = (&left_data, &right_data)
4060                else {
4061                    return None;
4062                };
4063
4064                // We need NaN-aware validity: original validity + NaN propagation.
4065                // Build validity masks that treat NaN source scalars as invalid.
4066                let left_nan_aware = self.nan_aware_validity();
4067                let right_nan_aware = right.nan_aware_validity();
4068
4069                let (result_data, result_validity) =
4070                    vectorized_binary_f64(l, r, &left_nan_aware, &right_nan_aware, op);
4071
4072                // All inputs valid: preserve the typed result buffer directly
4073                // instead of rebuilding Vec<Scalar> and rescanning validity.
4074                // Operation-produced NaN is still marked missing by
4075                // from_f64_values, exactly like the Scalar::Float64(NaN) path.
4076                if result_validity.all() {
4077                    return Some(Ok(Self::from_f64_values(result_data)));
4078                }
4079
4080                // Build output scalars respecting NaN propagation: if either
4081                // input was NaN (not just Null), mark output as Null(NaN).
4082                let values: Vec<Scalar> = result_data
4083                    .iter()
4084                    .enumerate()
4085                    .map(|(i, v)| {
4086                        if !result_validity.get(i) {
4087                            // Preserve NaN vs Null distinction from inputs.
4088                            if self.is_nan_at(i) || right.is_nan_at(i) {
4089                                Scalar::Null(NullKind::NaN)
4090                            } else {
4091                                Scalar::missing_for_dtype(out_dtype)
4092                            }
4093                        } else {
4094                            Scalar::Float64(*v)
4095                        }
4096                    })
4097                    .collect();
4098
4099                Some(Self::new(out_dtype, values))
4100            }
4101            DType::Int64 if !matches!(op, ArithmeticOp::Div) => {
4102                // Both must actually be Int64 for the i64 fast path.
4103                if self.dtype != DType::Int64 || right.dtype != DType::Int64 {
4104                    return None;
4105                }
4106                // Typed-input fast path (see the Float64 arm): both operands are
4107                // all-valid contiguous i64 buffers, so feed vectorized_binary_i64
4108                // directly — no from_scalars materialization. All-valid inputs =>
4109                // all-valid result, so from_i64_values, identical to the general
4110                // arm's all-valid branch.
4111                if let (Some(l), Some(r)) = (self.as_i64_slice(), right.as_i64_slice()) {
4112                    let (result_data, _validity) =
4113                        vectorized_binary_i64(l, r, &self.validity, &right.validity, op)?;
4114                    return Some(Ok(Self::from_i64_values(result_data)));
4115                }
4116                let left_data = ColumnData::from_scalars(&self.values, DType::Int64);
4117                let right_data = ColumnData::from_scalars(&right.values, DType::Int64);
4118                let (ColumnData::Int64(l), ColumnData::Int64(r)) = (&left_data, &right_data) else {
4119                    return None;
4120                };
4121
4122                let (result_data, result_validity) =
4123                    vectorized_binary_i64(l, r, &self.validity, &right.validity, op)?;
4124
4125                // All inputs valid: keep the typed i64 result buffer as the
4126                // column source of truth and skip Scalar materialization.
4127                if result_validity.all() {
4128                    return Some(Ok(Self::from_i64_values(result_data)));
4129                }
4130
4131                let values: Vec<Scalar> = result_data
4132                    .iter()
4133                    .enumerate()
4134                    .map(|(i, v)| {
4135                        if !result_validity.get(i) {
4136                            Scalar::missing_for_dtype(out_dtype)
4137                        } else {
4138                            Scalar::Int64(*v)
4139                        }
4140                    })
4141                    .collect();
4142
4143                Some(Self::new(out_dtype, values))
4144            }
4145            _ => None, // Bool, Utf8, etc. — use scalar fallback
4146        }
4147    }
4148
4149    /// AG-10 fused outer-alignment arithmetic for two Float64 columns.
4150    ///
4151    /// Equivalent to `self.reindex_by_positions(lp)?.binary_numeric(
4152    /// &right.reindex_by_positions(rp)?, op)` for the Float64-output case, but it
4153    /// gathers `f64` directly from the *original* columns into the union layout
4154    /// in one pass instead of materializing two intermediate `Vec<Scalar>` and
4155    /// re-deriving their `f64` views. Provably isomorphic: `from_scalars` is
4156    /// element-wise and `reindex` is a gather, so `gather(from_scalars(src)) ==
4157    /// from_scalars(reindex(src))`; the nan-aware validity gathers identically
4158    /// (a `None` slot reindexes to `missing_for_dtype(Float64) = Null(NaN)`,
4159    /// i.e. invalid, exactly as the gathered mask marks it). For Float64 output
4160    /// every invalid position is `Null(NaN)` — matching both arms of
4161    /// `try_vectorized_binary`'s invalid branch. Caller guarantees both columns
4162    /// are `Float64`.
4163    pub fn aligned_binary_f64(
4164        &self,
4165        right: &Self,
4166        left_positions: &[Option<usize>],
4167        right_positions: &[Option<usize>],
4168        op: ArithmeticOp,
4169    ) -> Result<Self, ColumnError> {
4170        debug_assert_eq!(left_positions.len(), right_positions.len());
4171        let out_len = left_positions.len();
4172
4173        let lsrc = self.float64_binary_data();
4174        let rsrc = right.float64_binary_data();
4175        let lvalid = self.nan_aware_validity();
4176        let rvalid = right.nan_aware_validity();
4177
4178        let apply = binary_f64_apply(op);
4179
4180        let mut data = Vec::with_capacity(out_len);
4181        let mut words = vec![0_u64; out_len.div_ceil(64)];
4182        let mut all_valid = true;
4183        for (k, left_slot) in left_positions.iter().enumerate() {
4184            if let Some(i) = *left_slot
4185                && let Some(j) = right_positions.get(k).copied().flatten()
4186                && lvalid.get(i)
4187                && rvalid.get(j)
4188            {
4189                let value = apply(lsrc[i], rsrc[j]);
4190                data.push(value);
4191                if value.is_nan() {
4192                    all_valid = false;
4193                } else {
4194                    words[k / 64] |= 1_u64 << (k % 64);
4195                }
4196            } else {
4197                data.push(0.0);
4198                all_valid = false;
4199            }
4200        }
4201        if all_valid {
4202            return Ok(Self::from_f64_values(data));
4203        }
4204        Ok(Self::from_f64_values_with_validity(
4205            data,
4206            ValidityMask {
4207                words,
4208                len: out_len,
4209            },
4210        ))
4211    }
4212
4213    /// Fused Float64 arithmetic for two aligned contiguous `Int64` unit ranges.
4214    ///
4215    /// The caller has proven the left and right indexes are `[start, end]`
4216    /// integer ranges and the output index is their contiguous union. This is
4217    /// isomorphic to [`Self::aligned_binary_f64`] with arithmetic positions, but
4218    /// it fills the overlapped span directly and leaves non-overlap slots
4219    /// invalid, avoiding the two `Vec<Option<usize>>` alignment buffers.
4220    pub fn aligned_binary_f64_int64_unit_ranges(
4221        &self,
4222        right: &Self,
4223        left_range: (i64, i64),
4224        right_range: (i64, i64),
4225        union_range: (i64, i64),
4226        op: ArithmeticOp,
4227    ) -> Result<Self, ColumnError> {
4228        if !matches!(self.dtype, DType::Float64) || !matches!(right.dtype, DType::Float64) {
4229            return Err(ColumnError::DTypeMismatch {
4230                left: self.dtype,
4231                right: right.dtype,
4232            });
4233        }
4234
4235        let (left_start, left_end) = left_range;
4236        let (right_start, right_end) = right_range;
4237        let (union_start, union_end) = union_range;
4238
4239        let Some(left_len) = unit_range_len(left_start, left_end) else {
4240            return Err(ColumnError::LengthMismatch {
4241                left: self.len(),
4242                right: right.len(),
4243            });
4244        };
4245        let Some(right_len) = unit_range_len(right_start, right_end) else {
4246            return Err(ColumnError::LengthMismatch {
4247                left: self.len(),
4248                right: right.len(),
4249            });
4250        };
4251        let Some(out_len) = unit_range_len(union_start, union_end) else {
4252            return Err(ColumnError::LengthMismatch {
4253                left: self.len(),
4254                right: right.len(),
4255            });
4256        };
4257        if left_len != self.len() || right_len != right.len() {
4258            return Err(ColumnError::LengthMismatch {
4259                left: self.len(),
4260                right: right.len(),
4261            });
4262        }
4263
4264        let lsrc = self.float64_binary_data();
4265        let rsrc = right.float64_binary_data();
4266        let lvalid = self.nan_aware_validity();
4267        let rvalid = right.nan_aware_validity();
4268        let apply = binary_f64_apply(op);
4269
4270        let mut data = vec![0.0; out_len];
4271        let mut words = vec![0_u64; out_len.div_ceil(64)];
4272        let overlap_start = left_start.max(right_start);
4273        let overlap_end = left_end.min(right_end);
4274        let mut all_valid = overlap_start == union_start && overlap_end == union_end;
4275
4276        if overlap_start <= overlap_end {
4277            for value in overlap_start..=overlap_end {
4278                let out_idx = (value - union_start) as usize;
4279                let left_idx = (value - left_start) as usize;
4280                let right_idx = (value - right_start) as usize;
4281                if lvalid.get(left_idx) && rvalid.get(right_idx) {
4282                    let result = apply(lsrc[left_idx], rsrc[right_idx]);
4283                    data[out_idx] = result;
4284                    if result.is_nan() {
4285                        all_valid = false;
4286                    } else {
4287                        words[out_idx / 64] |= 1_u64 << (out_idx % 64);
4288                    }
4289                } else {
4290                    all_valid = false;
4291                }
4292            }
4293        }
4294
4295        if all_valid {
4296            return Ok(Self::from_f64_values(data));
4297        }
4298        Ok(Self::from_f64_values_with_validity(
4299            data,
4300            ValidityMask {
4301                words,
4302                len: out_len,
4303            },
4304        ))
4305    }
4306
4307    /// Same-index Float64 arithmetic fast path.
4308    ///
4309    /// Isomorphic to calling [`Self::aligned_binary_f64`] with
4310    /// `Some(i)`/`Some(i)` positions for every row, but avoids allocating and
4311    /// walking the identity alignment vectors.
4312    pub fn aligned_binary_f64_same_positions(
4313        &self,
4314        right: &Self,
4315        op: ArithmeticOp,
4316    ) -> Result<Self, ColumnError> {
4317        debug_assert_eq!(self.len(), right.len());
4318        let out_len = self.len();
4319
4320        let lsrc = self.float64_binary_data();
4321        let rsrc = right.float64_binary_data();
4322
4323        // Fully-valid fast path (br-frankenpandas-f64simd): when neither side has
4324        // a null bit or a NaN, every output position is valid, so emit a single
4325        // monomorphized (autovectorizing) slice op and skip both the per-element
4326        // `nan_aware_validity` mask builds and the per-element fn-pointer/validity
4327        // gating. Bit-identical to the general path under all-valid inputs: the
4328        // arithmetic and the typed `from_f64_values` constructor are the same.
4329        if self.validity.all()
4330            && right.validity.all()
4331            && !lsrc.iter().any(|x| x.is_nan())
4332            && !rsrc.iter().any(|x| x.is_nan())
4333        {
4334            return Ok(Self::from_f64_values(apply_f64_slices(op, &lsrc, &rsrc)));
4335        }
4336
4337        let lvalid = self.nan_aware_validity();
4338        let rvalid = right.nan_aware_validity();
4339        let apply = binary_f64_apply(op);
4340
4341        let mut data = Vec::with_capacity(out_len);
4342        let mut all_valid = true;
4343        for i in 0..out_len {
4344            if lvalid.get(i) && rvalid.get(i) {
4345                data.push(apply(lsrc[i], rsrc[i]));
4346            } else {
4347                all_valid = false;
4348                break;
4349            }
4350        }
4351        if all_valid {
4352            return Ok(Self::from_f64_values(data));
4353        }
4354
4355        let mut values = Vec::with_capacity(out_len);
4356        for i in 0..out_len {
4357            if lvalid.get(i) && rvalid.get(i) {
4358                values.push(Scalar::Float64(apply(lsrc[i], rsrc[i])));
4359            } else {
4360                values.push(Scalar::Null(NullKind::NaN));
4361            }
4362        }
4363        Self::new(DType::Float64, values)
4364    }
4365
4366    fn cached_float64_data(&self) -> Option<&[f64]> {
4367        match &self.data {
4368            Some(ColumnData::Float64(data)) if data.len() == self.values.len() => {
4369                return Some(data.as_slice());
4370            }
4371            _ => {}
4372        }
4373
4374        match &self.values {
4375            ScalarValues::LazyAllValidFloat64 { data, .. } if data.len() == self.validity.len() => {
4376                Some(data.as_ref())
4377            }
4378            ScalarValues::LazyNullableFloat64 { data, .. } if data.len() == self.validity.len() => {
4379                Some(data.as_slice())
4380            }
4381            _ => None,
4382        }
4383    }
4384
4385    fn float64_binary_data(&self) -> std::borrow::Cow<'_, [f64]> {
4386        if let Some(data) = self.cached_float64_data() {
4387            return std::borrow::Cow::Borrowed(data);
4388        }
4389
4390        match ColumnData::from_scalars(&self.values, DType::Float64) {
4391            ColumnData::Float64(data) => std::borrow::Cow::Owned(data),
4392            _ => unreachable!("Float64 materialization must produce Float64 data"),
4393        }
4394    }
4395
4396    /// Validity mask that also marks NaN float values as invalid.
4397    #[must_use]
4398    fn nan_aware_validity(&self) -> ValidityMask {
4399        let mut mask = self.validity.clone();
4400
4401        if let Some(data) = self.cached_float64_data() {
4402            for (i, value) in data.iter().enumerate() {
4403                if value.is_nan() {
4404                    mask.set(i, false);
4405                }
4406            }
4407            return mask;
4408        }
4409
4410        for (i, value) in self.values.iter().enumerate() {
4411            if matches!(value, Scalar::Float64(f) if f.is_nan()) {
4412                mask.set(i, false);
4413            }
4414        }
4415        mask
4416    }
4417
4418    /// Check if position `i` holds a NaN-class missing value.
4419    fn is_nan_at(&self, i: usize) -> bool {
4420        self.values.get(i).is_some_and(|v| v.is_nan())
4421    }
4422
4423    pub fn binary_numeric(&self, right: &Self, op: ArithmeticOp) -> Result<Self, ColumnError> {
4424        if self.len() != right.len() {
4425            return Err(ColumnError::LengthMismatch {
4426                left: self.len(),
4427                right: right.len(),
4428            });
4429        }
4430
4431        let mut out_dtype = common_dtype(self.dtype, right.dtype)?;
4432        if matches!(out_dtype, DType::Bool) {
4433            out_dtype = DType::Int64;
4434        }
4435        // Div always produces Float64. Pow keeps Int64 for int**int (numpy/pandas
4436        // semantics: 2 ** 3 -> int64 8, not float), but promotes to Float64 for any
4437        // float operand. Mod and FloorDiv preserve int if there are no zero divisors.
4438        let int_pow = matches!(op, ArithmeticOp::Pow)
4439            && self.dtype == DType::Int64
4440            && right.dtype == DType::Int64;
4441        if matches!(op, ArithmeticOp::Div | ArithmeticOp::Pow) && !int_pow {
4442            out_dtype = DType::Float64;
4443        }
4444
4445        // AG-10: Try vectorized path first; fallback to scalar path.
4446        if let Some(result) = self.try_vectorized_binary(right, op, out_dtype) {
4447            return result;
4448        }
4449
4450        // For Mod/FloorDiv: if vectorized failed (likely due to zero divisors), use Float64
4451        if matches!(op, ArithmeticOp::Mod | ArithmeticOp::FloorDiv)
4452            && matches!(out_dtype, DType::Int64)
4453        {
4454            out_dtype = DType::Float64;
4455        }
4456
4457        // Scalar fallback path (original implementation).
4458        let values = self
4459            .values
4460            .iter()
4461            .zip(&right.values)
4462            .map(|(left, right)| {
4463                if left.is_missing() || right.is_missing() {
4464                    return Ok::<_, ColumnError>(if left.is_nan() || right.is_nan() {
4465                        Scalar::Null(NullKind::NaN)
4466                    } else {
4467                        Scalar::missing_for_dtype(out_dtype)
4468                    });
4469                }
4470
4471                if matches!(out_dtype, DType::Int64) {
4472                    let lhs_i64 = match cast_scalar(left, DType::Int64)? {
4473                        Scalar::Int64(v) => v,
4474                        _ => unreachable!(),
4475                    };
4476                    let rhs_i64 = match cast_scalar(right, DType::Int64)? {
4477                        Scalar::Int64(v) => v,
4478                        _ => unreachable!(),
4479                    };
4480                    let result = match op {
4481                        ArithmeticOp::Add => lhs_i64.wrapping_add(rhs_i64),
4482                        ArithmeticOp::Sub => lhs_i64.wrapping_sub(rhs_i64),
4483                        ArithmeticOp::Mul => lhs_i64.wrapping_mul(rhs_i64),
4484                        // int ** int stays int64 (numpy/pandas). A negative integer
4485                        // exponent raises, matching numpy's "Integers to negative
4486                        // integer powers are not allowed." Overflow wraps like int64.
4487                        ArithmeticOp::Pow => {
4488                            if rhs_i64 < 0 {
4489                                return Err(ColumnError::NegativeIntegerPower);
4490                            }
4491                            lhs_i64.wrapping_pow(u32::try_from(rhs_i64).unwrap_or(u32::MAX))
4492                        }
4493                        ArithmeticOp::Div | ArithmeticOp::Mod | ArithmeticOp::FloorDiv => {
4494                            unreachable!()
4495                        }
4496                    };
4497                    return Ok(Scalar::Int64(result));
4498                }
4499
4500                let lhs = left.to_f64()?;
4501                let rhs = right.to_f64()?;
4502                let result = match op {
4503                    ArithmeticOp::Add => lhs + rhs,
4504                    ArithmeticOp::Sub => lhs - rhs,
4505                    ArithmeticOp::Mul => lhs * rhs,
4506                    ArithmeticOp::Div => lhs / rhs,
4507                    ArithmeticOp::Mod => python_mod_f64(lhs, rhs),
4508                    ArithmeticOp::Pow => lhs.powf(rhs),
4509                    ArithmeticOp::FloorDiv => python_floor_div_f64(lhs, rhs),
4510                };
4511
4512                Ok(Scalar::Float64(result))
4513            })
4514            .collect::<Result<Vec<_>, _>>()?;
4515
4516        Self::new(out_dtype, values)
4517    }
4518
4519    /// Element-wise addition, matching `pd.Series.add()`.
4520    pub fn add(&self, right: &Self) -> Result<Self, ColumnError> {
4521        self.binary_numeric(right, ArithmeticOp::Add)
4522    }
4523
4524    /// Reverse element-wise addition, matching `pd.Series.radd()`.
4525    pub fn radd(&self, left: &Self) -> Result<Self, ColumnError> {
4526        left.binary_numeric(self, ArithmeticOp::Add)
4527    }
4528
4529    /// Element-wise subtraction, matching `pd.Series.sub()`.
4530    pub fn sub(&self, right: &Self) -> Result<Self, ColumnError> {
4531        self.binary_numeric(right, ArithmeticOp::Sub)
4532    }
4533
4534    /// Reverse element-wise subtraction, matching `pd.Series.rsub()`.
4535    pub fn rsub(&self, left: &Self) -> Result<Self, ColumnError> {
4536        left.binary_numeric(self, ArithmeticOp::Sub)
4537    }
4538
4539    /// Alias for [`sub`](Self::sub), matching `pd.Series.subtract()`.
4540    pub fn subtract(&self, right: &Self) -> Result<Self, ColumnError> {
4541        self.sub(right)
4542    }
4543
4544    /// Element-wise multiplication, matching `pd.Series.mul()`.
4545    pub fn mul(&self, right: &Self) -> Result<Self, ColumnError> {
4546        self.binary_numeric(right, ArithmeticOp::Mul)
4547    }
4548
4549    /// Reverse element-wise multiplication, matching `pd.Series.rmul()`.
4550    pub fn rmul(&self, left: &Self) -> Result<Self, ColumnError> {
4551        left.binary_numeric(self, ArithmeticOp::Mul)
4552    }
4553
4554    /// Alias for [`mul`](Self::mul), matching `pd.Series.multiply()`.
4555    pub fn multiply(&self, right: &Self) -> Result<Self, ColumnError> {
4556        self.mul(right)
4557    }
4558
4559    /// Element-wise true division, matching `pd.Series.div()`.
4560    pub fn div(&self, right: &Self) -> Result<Self, ColumnError> {
4561        self.binary_numeric(right, ArithmeticOp::Div)
4562    }
4563
4564    /// Reverse element-wise true division, matching `pd.Series.rdiv()`.
4565    pub fn rdiv(&self, left: &Self) -> Result<Self, ColumnError> {
4566        left.binary_numeric(self, ArithmeticOp::Div)
4567    }
4568
4569    /// Alias for [`div`](Self::div), matching `pd.Series.divide()`.
4570    pub fn divide(&self, right: &Self) -> Result<Self, ColumnError> {
4571        self.div(right)
4572    }
4573
4574    /// Alias for [`div`](Self::div), matching `pd.Series.truediv()`.
4575    pub fn truediv(&self, right: &Self) -> Result<Self, ColumnError> {
4576        self.div(right)
4577    }
4578
4579    /// Alias for [`rdiv`](Self::rdiv), matching `pd.Series.rtruediv()`.
4580    pub fn rtruediv(&self, left: &Self) -> Result<Self, ColumnError> {
4581        self.rdiv(left)
4582    }
4583
4584    /// Element-wise floor division, matching `pd.Series.floordiv()`.
4585    pub fn floordiv(&self, right: &Self) -> Result<Self, ColumnError> {
4586        self.binary_numeric(right, ArithmeticOp::FloorDiv)
4587    }
4588
4589    /// Reverse element-wise floor division, matching `pd.Series.rfloordiv()`.
4590    pub fn rfloordiv(&self, left: &Self) -> Result<Self, ColumnError> {
4591        left.binary_numeric(self, ArithmeticOp::FloorDiv)
4592    }
4593
4594    /// Element-wise modulo, matching `pd.Series.mod()`.
4595    pub fn r#mod(&self, right: &Self) -> Result<Self, ColumnError> {
4596        self.binary_numeric(right, ArithmeticOp::Mod)
4597    }
4598
4599    /// Reverse element-wise modulo, matching `pd.Series.rmod()`.
4600    pub fn rmod(&self, left: &Self) -> Result<Self, ColumnError> {
4601        left.binary_numeric(self, ArithmeticOp::Mod)
4602    }
4603
4604    /// Element-wise exponentiation, matching `pd.Series.pow()`.
4605    pub fn pow(&self, right: &Self) -> Result<Self, ColumnError> {
4606        self.binary_numeric(right, ArithmeticOp::Pow)
4607    }
4608
4609    /// Reverse element-wise exponentiation, matching `pd.Series.rpow()`.
4610    pub fn rpow(&self, left: &Self) -> Result<Self, ColumnError> {
4611        left.binary_numeric(self, ArithmeticOp::Pow)
4612    }
4613
4614    /// Alias for pow, matching NumPy naming.
4615    pub fn power(&self, right: &Self) -> Result<Self, ColumnError> {
4616        self.pow(right)
4617    }
4618
4619    /// Float power, always returning Float64.
4620    ///
4621    /// Matches np.float_power(x, y). Unlike power(), this always returns
4622    /// Float64 and returns NaN for negative bases with non-integer exponents
4623    /// (where the result would be complex).
4624    pub fn float_power(&self, right: &Self) -> Result<Self, ColumnError> {
4625        if self.len() != right.len() {
4626            return Err(ColumnError::LengthMismatch {
4627                left: self.len(),
4628                right: right.len(),
4629            });
4630        }
4631        if let Some(out) = self.typed_float_binary(right, |b, e| b.powf(e)) {
4632            return Ok(out);
4633        }
4634        let mut out = Vec::with_capacity(self.values.len());
4635        for (base, exp) in self.values.iter().zip(&right.values) {
4636            if base.is_missing() || exp.is_missing() {
4637                out.push(Scalar::Float64(f64::NAN));
4638                continue;
4639            }
4640            let b = base.to_f64().map_err(ColumnError::Type)?;
4641            let e = exp.to_f64().map_err(ColumnError::Type)?;
4642            let result = b.powf(e);
4643            out.push(Scalar::Float64(result));
4644        }
4645        Self::new(DType::Float64, out)
4646    }
4647
4648    /// Alias for mod, matching NumPy naming.
4649    pub fn remainder(&self, right: &Self) -> Result<Self, ColumnError> {
4650        self.r#mod(right)
4651    }
4652
4653    /// Alias for floordiv, matching NumPy naming.
4654    pub fn floor_divide(&self, right: &Self) -> Result<Self, ColumnError> {
4655        self.floordiv(right)
4656    }
4657
4658    /// Alias for div, matching NumPy naming.
4659    pub fn true_divide(&self, right: &Self) -> Result<Self, ColumnError> {
4660        self.div(right)
4661    }
4662
4663    /// Element-wise arctangent of y/x.
4664    pub fn atan2(&self, other: &Self) -> Result<Self, ColumnError> {
4665        if self.len() != other.len() {
4666            return Err(ColumnError::LengthMismatch {
4667                left: self.len(),
4668                right: other.len(),
4669            });
4670        }
4671        if let Some(out) = self.typed_float_binary(other, |y, x| y.atan2(x)) {
4672            return Ok(out);
4673        }
4674        let mut out = Vec::with_capacity(self.values.len());
4675        for (y, x) in self.values.iter().zip(&other.values) {
4676            if y.is_missing() || x.is_missing() {
4677                out.push(Scalar::Float64(f64::NAN));
4678                continue;
4679            }
4680            let yf = y.to_f64().map_err(ColumnError::Type)?;
4681            let xf = x.to_f64().map_err(ColumnError::Type)?;
4682            out.push(Scalar::Float64(yf.atan2(xf)));
4683        }
4684        Self::new(DType::Float64, out)
4685    }
4686
4687    /// Element-wise Euclidean distance sqrt(x^2 + y^2).
4688    pub fn hypot(&self, other: &Self) -> Result<Self, ColumnError> {
4689        if self.len() != other.len() {
4690            return Err(ColumnError::LengthMismatch {
4691                left: self.len(),
4692                right: other.len(),
4693            });
4694        }
4695        if let Some(out) = self.typed_float_binary(other, |a, b| a.hypot(b)) {
4696            return Ok(out);
4697        }
4698        let mut out = Vec::with_capacity(self.values.len());
4699        for (a, b) in self.values.iter().zip(&other.values) {
4700            if a.is_missing() || b.is_missing() {
4701                out.push(Scalar::Float64(f64::NAN));
4702                continue;
4703            }
4704            let af = a.to_f64().map_err(ColumnError::Type)?;
4705            let bf = b.to_f64().map_err(ColumnError::Type)?;
4706            out.push(Scalar::Float64(af.hypot(bf)));
4707        }
4708        Self::new(DType::Float64, out)
4709    }
4710
4711    /// Element-wise floating-point remainder (fmod).
4712    pub fn fmod(&self, other: &Self) -> Result<Self, ColumnError> {
4713        if self.len() != other.len() {
4714            return Err(ColumnError::LengthMismatch {
4715                left: self.len(),
4716                right: other.len(),
4717            });
4718        }
4719        if let Some(out) = self.typed_float_binary(other, |a, b| a % b) {
4720            return Ok(out);
4721        }
4722        let mut out = Vec::with_capacity(self.values.len());
4723        for (a, b) in self.values.iter().zip(&other.values) {
4724            if a.is_missing() || b.is_missing() {
4725                out.push(Scalar::Float64(f64::NAN));
4726                continue;
4727            }
4728            let af = a.to_f64().map_err(ColumnError::Type)?;
4729            let bf = b.to_f64().map_err(ColumnError::Type)?;
4730            out.push(Scalar::Float64(af % bf));
4731        }
4732        Self::new(DType::Float64, out)
4733    }
4734
4735    /// Element-wise copysign: magnitude of self with sign of other.
4736    pub fn copysign(&self, other: &Self) -> Result<Self, ColumnError> {
4737        if self.len() != other.len() {
4738            return Err(ColumnError::LengthMismatch {
4739                left: self.len(),
4740                right: other.len(),
4741            });
4742        }
4743        if let Some(out) = self.typed_float_binary(other, |m, s| m.copysign(s)) {
4744            return Ok(out);
4745        }
4746        let mut out = Vec::with_capacity(self.values.len());
4747        for (mag, sign) in self.values.iter().zip(&other.values) {
4748            if mag.is_missing() || sign.is_missing() {
4749                out.push(Scalar::Float64(f64::NAN));
4750                continue;
4751            }
4752            let mf = mag.to_f64().map_err(ColumnError::Type)?;
4753            let sf = sign.to_f64().map_err(ColumnError::Type)?;
4754            out.push(Scalar::Float64(mf.copysign(sf)));
4755        }
4756        Self::new(DType::Float64, out)
4757    }
4758
4759    /// Element-wise sign: -1, 0, or 1.
4760    pub fn sign(&self) -> Result<Self, ColumnError> {
4761        // Typed, dtype-preserving fast path (all-valid only): Int64 -> Int64
4762        // (-1/0/1), Float64 -> Float64. all-valid Float64 has no NaN so the
4763        // is_nan branch never fires; -0.0 -> 0.0 (neither >0.0 nor <0.0), exactly
4764        // as the scalar loop. Bit-identical.
4765        if let Some(data) = self.as_i64_slice() {
4766            return Ok(Self::from_i64_values(
4767                data.iter()
4768                    .map(|&x| {
4769                        if x > 0 {
4770                            1
4771                        } else if x < 0 {
4772                            -1
4773                        } else {
4774                            0
4775                        }
4776                    })
4777                    .collect(),
4778            ));
4779        }
4780        if let Some(data) = self.as_f64_slice() {
4781            return Ok(Self::from_f64_values(
4782                data.iter()
4783                    .map(|&x| {
4784                        if x > 0.0 {
4785                            1.0
4786                        } else if x < 0.0 {
4787                            -1.0
4788                        } else {
4789                            0.0
4790                        }
4791                    })
4792                    .collect(),
4793            ));
4794        }
4795        let mut out = Vec::with_capacity(self.values.len());
4796        for v in &self.values {
4797            if v.is_missing() {
4798                out.push(Scalar::Float64(f64::NAN));
4799                continue;
4800            }
4801            match v {
4802                Scalar::Int64(x) => {
4803                    let s = if *x > 0 {
4804                        1
4805                    } else if *x < 0 {
4806                        -1
4807                    } else {
4808                        0
4809                    };
4810                    out.push(Scalar::Int64(s));
4811                }
4812                Scalar::Float64(x) => {
4813                    let s = if x.is_nan() {
4814                        f64::NAN
4815                    } else if *x > 0.0 {
4816                        1.0
4817                    } else if *x < 0.0 {
4818                        -1.0
4819                    } else {
4820                        0.0
4821                    };
4822                    out.push(Scalar::Float64(s));
4823                }
4824                _ => {
4825                    return Err(ColumnError::Type(TypeError::NonNumericValue {
4826                        value: format!("{v:?}"),
4827                        dtype: self.dtype,
4828                    }));
4829                }
4830            }
4831        }
4832        let dtype = match self.dtype {
4833            DType::Int64 => DType::Int64,
4834            _ => DType::Float64,
4835        };
4836        Self::new(dtype, out)
4837    }
4838
4839    /// Test element-wise for negative sign bit.
4840    ///
4841    /// Matches np.signbit(x). Returns True for negative values including -0.0.
4842    pub fn signbit(&self) -> Result<Self, ColumnError> {
4843        // Typed fast path (all-valid only, output Bool): Int64 sign via x < 0,
4844        // Float64 via is_sign_negative (so -0.0 -> true). Bit-identical; all-valid
4845        // ⇒ the missing -> Bool(false) branch never applies.
4846        if let Some(data) = self.as_i64_slice() {
4847            return Ok(Self::from_bool_values(
4848                data.iter().map(|&x| x < 0).collect(),
4849            ));
4850        }
4851        if let Some(data) = self.as_f64_slice() {
4852            return Ok(Self::from_bool_values(
4853                data.iter().map(|&x| x.is_sign_negative()).collect(),
4854            ));
4855        }
4856        let mut out = Vec::with_capacity(self.values.len());
4857        for v in &self.values {
4858            if v.is_missing() {
4859                out.push(Scalar::Bool(false));
4860                continue;
4861            }
4862            match v {
4863                Scalar::Int64(x) => out.push(Scalar::Bool(*x < 0)),
4864                Scalar::Float64(x) => out.push(Scalar::Bool(x.is_sign_negative())),
4865                _ => {
4866                    return Err(ColumnError::Type(TypeError::NonNumericValue {
4867                        value: format!("{v:?}"),
4868                        dtype: self.dtype,
4869                    }));
4870                }
4871            }
4872        }
4873        Self::new(DType::Bool, out)
4874    }
4875
4876    /// Compute the Heaviside step function.
4877    ///
4878    /// Matches np.heaviside(x, h0). Returns:
4879    /// - 0 where x < 0
4880    /// - h0 where x == 0
4881    /// - 1 where x > 0
4882    pub fn heaviside(&self, h0: f64) -> Result<Self, ColumnError> {
4883        let mut out = Vec::with_capacity(self.values.len());
4884        for v in &self.values {
4885            if v.is_missing() {
4886                out.push(Scalar::Float64(f64::NAN));
4887                continue;
4888            }
4889            match v {
4890                Scalar::Int64(x) => {
4891                    let val = if *x < 0 {
4892                        0.0
4893                    } else if *x > 0 {
4894                        1.0
4895                    } else {
4896                        h0
4897                    };
4898                    out.push(Scalar::Float64(val));
4899                }
4900                Scalar::Float64(x) => {
4901                    let val = if x.is_nan() {
4902                        f64::NAN
4903                    } else if *x < 0.0 {
4904                        0.0
4905                    } else if *x > 0.0 {
4906                        1.0
4907                    } else {
4908                        h0
4909                    };
4910                    out.push(Scalar::Float64(val));
4911                }
4912                _ => {
4913                    return Err(ColumnError::Type(TypeError::NonNumericValue {
4914                        value: format!("{v:?}"),
4915                        dtype: self.dtype,
4916                    }));
4917                }
4918            }
4919        }
4920        Self::new(DType::Float64, out)
4921    }
4922
4923    /// Element-wise greatest common divisor.
4924    ///
4925    /// Matches np.gcd(x, y). Works on integer values.
4926    pub fn gcd(&self, other: &Self) -> Result<Self, ColumnError> {
4927        if self.len() != other.len() {
4928            return Err(ColumnError::LengthMismatch {
4929                left: self.len(),
4930                right: other.len(),
4931            });
4932        }
4933        fn compute_gcd(mut a: i64, mut b: i64) -> i64 {
4934            a = a.abs();
4935            b = b.abs();
4936            while b != 0 {
4937                let t = b;
4938                b = a % b;
4939                a = t;
4940            }
4941            a
4942        }
4943        let mut out = Vec::with_capacity(self.values.len());
4944        for (a, b) in self.values.iter().zip(&other.values) {
4945            if a.is_missing() || b.is_missing() {
4946                out.push(Scalar::Null(NullKind::Null));
4947                continue;
4948            }
4949            match (a, b) {
4950                (Scalar::Int64(x), Scalar::Int64(y)) => {
4951                    out.push(Scalar::Int64(compute_gcd(*x, *y)));
4952                }
4953                _ => {
4954                    return Err(ColumnError::Type(TypeError::NonNumericValue {
4955                        value: format!("{a:?}"),
4956                        dtype: self.dtype,
4957                    }));
4958                }
4959            }
4960        }
4961        Self::new(DType::Int64, out)
4962    }
4963
4964    /// Element-wise least common multiple.
4965    ///
4966    /// Matches np.lcm(x, y). Works on integer values.
4967    pub fn lcm(&self, other: &Self) -> Result<Self, ColumnError> {
4968        if self.len() != other.len() {
4969            return Err(ColumnError::LengthMismatch {
4970                left: self.len(),
4971                right: other.len(),
4972            });
4973        }
4974        fn compute_gcd(mut a: i64, mut b: i64) -> i64 {
4975            a = a.abs();
4976            b = b.abs();
4977            while b != 0 {
4978                let t = b;
4979                b = a % b;
4980                a = t;
4981            }
4982            a
4983        }
4984        let mut out = Vec::with_capacity(self.values.len());
4985        for (a, b) in self.values.iter().zip(&other.values) {
4986            if a.is_missing() || b.is_missing() {
4987                out.push(Scalar::Null(NullKind::Null));
4988                continue;
4989            }
4990            match (a, b) {
4991                (Scalar::Int64(x), Scalar::Int64(y)) => {
4992                    let g = compute_gcd(*x, *y);
4993                    let result = if g == 0 { 0 } else { (x.abs() / g) * y.abs() };
4994                    out.push(Scalar::Int64(result));
4995                }
4996                _ => {
4997                    return Err(ColumnError::Type(TypeError::NonNumericValue {
4998                        value: format!("{a:?}"),
4999                        dtype: self.dtype,
5000                    }));
5001                }
5002            }
5003        }
5004        Self::new(DType::Int64, out)
5005    }
5006
5007    /// Element-wise bitwise AND.
5008    pub fn bitwise_and(&self, other: &Self) -> Result<Self, ColumnError> {
5009        if self.len() != other.len() {
5010            return Err(ColumnError::LengthMismatch {
5011                left: self.len(),
5012                right: other.len(),
5013            });
5014        }
5015        let mut out = Vec::with_capacity(self.values.len());
5016        for (a, b) in self.values.iter().zip(&other.values) {
5017            if a.is_missing() || b.is_missing() {
5018                out.push(Scalar::Null(NullKind::Null));
5019                continue;
5020            }
5021            match (a, b) {
5022                (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x & y)),
5023                (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x && *y)),
5024                _ => {
5025                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5026                        value: format!("{a:?}"),
5027                        dtype: self.dtype,
5028                    }));
5029                }
5030            }
5031        }
5032        Self::new(self.dtype, out)
5033    }
5034
5035    /// Element-wise bitwise OR.
5036    pub fn bitwise_or(&self, other: &Self) -> Result<Self, ColumnError> {
5037        if self.len() != other.len() {
5038            return Err(ColumnError::LengthMismatch {
5039                left: self.len(),
5040                right: other.len(),
5041            });
5042        }
5043        let mut out = Vec::with_capacity(self.values.len());
5044        for (a, b) in self.values.iter().zip(&other.values) {
5045            if a.is_missing() || b.is_missing() {
5046                out.push(Scalar::Null(NullKind::Null));
5047                continue;
5048            }
5049            match (a, b) {
5050                (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x | y)),
5051                (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x || *y)),
5052                _ => {
5053                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5054                        value: format!("{a:?}"),
5055                        dtype: self.dtype,
5056                    }));
5057                }
5058            }
5059        }
5060        Self::new(self.dtype, out)
5061    }
5062
5063    /// Element-wise bitwise XOR.
5064    pub fn bitwise_xor(&self, other: &Self) -> Result<Self, ColumnError> {
5065        if self.len() != other.len() {
5066            return Err(ColumnError::LengthMismatch {
5067                left: self.len(),
5068                right: other.len(),
5069            });
5070        }
5071        let mut out = Vec::with_capacity(self.values.len());
5072        for (a, b) in self.values.iter().zip(&other.values) {
5073            if a.is_missing() || b.is_missing() {
5074                out.push(Scalar::Null(NullKind::Null));
5075                continue;
5076            }
5077            match (a, b) {
5078                (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x ^ y)),
5079                (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x ^ *y)),
5080                _ => {
5081                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5082                        value: format!("{a:?}"),
5083                        dtype: self.dtype,
5084                    }));
5085                }
5086            }
5087        }
5088        Self::new(self.dtype, out)
5089    }
5090
5091    /// Element-wise left bit shift.
5092    ///
5093    /// Matches np.left_shift(x, y). Shifts bits of x left by y positions.
5094    pub fn left_shift(&self, other: &Self) -> Result<Self, ColumnError> {
5095        if self.len() != other.len() {
5096            return Err(ColumnError::LengthMismatch {
5097                left: self.len(),
5098                right: other.len(),
5099            });
5100        }
5101        let mut out = Vec::with_capacity(self.values.len());
5102        for (a, b) in self.values.iter().zip(&other.values) {
5103            if a.is_missing() || b.is_missing() {
5104                out.push(Scalar::Null(NullKind::Null));
5105                continue;
5106            }
5107            match (a, b) {
5108                (Scalar::Int64(x), Scalar::Int64(y)) => {
5109                    let shift = (*y).clamp(0, 63) as u32;
5110                    out.push(Scalar::Int64(x.wrapping_shl(shift)));
5111                }
5112                _ => {
5113                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5114                        value: format!("{a:?}"),
5115                        dtype: self.dtype,
5116                    }));
5117                }
5118            }
5119        }
5120        Self::new(DType::Int64, out)
5121    }
5122
5123    /// Element-wise right bit shift.
5124    ///
5125    /// Matches np.right_shift(x, y). Shifts bits of x right by y positions.
5126    pub fn right_shift(&self, other: &Self) -> Result<Self, ColumnError> {
5127        if self.len() != other.len() {
5128            return Err(ColumnError::LengthMismatch {
5129                left: self.len(),
5130                right: other.len(),
5131            });
5132        }
5133        let mut out = Vec::with_capacity(self.values.len());
5134        for (a, b) in self.values.iter().zip(&other.values) {
5135            if a.is_missing() || b.is_missing() {
5136                out.push(Scalar::Null(NullKind::Null));
5137                continue;
5138            }
5139            match (a, b) {
5140                (Scalar::Int64(x), Scalar::Int64(y)) => {
5141                    let shift = (*y).clamp(0, 63) as u32;
5142                    out.push(Scalar::Int64(x.wrapping_shr(shift)));
5143                }
5144                _ => {
5145                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5146                        value: format!("{a:?}"),
5147                        dtype: self.dtype,
5148                    }));
5149                }
5150            }
5151        }
5152        Self::new(DType::Int64, out)
5153    }
5154
5155    /// Element-wise bitwise NOT (invert).
5156    pub fn bitwise_not(&self) -> Result<Self, ColumnError> {
5157        let mut out = Vec::with_capacity(self.values.len());
5158        for v in &self.values {
5159            if v.is_missing() {
5160                out.push(Scalar::Null(NullKind::Null));
5161                continue;
5162            }
5163            match v {
5164                Scalar::Int64(x) => out.push(Scalar::Int64(!x)),
5165                Scalar::Bool(x) => out.push(Scalar::Bool(!x)),
5166                _ => {
5167                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5168                        value: format!("{v:?}"),
5169                        dtype: self.dtype,
5170                    }));
5171                }
5172            }
5173        }
5174        Self::new(self.dtype, out)
5175    }
5176
5177    /// Alias for bitwise_not.
5178    pub fn invert(&self) -> Result<Self, ColumnError> {
5179        self.bitwise_not()
5180    }
5181
5182    /// Element-wise maximum, NaN propagates.
5183    pub fn maximum(&self, other: &Self) -> Result<Self, ColumnError> {
5184        if self.len() != other.len() {
5185            return Err(ColumnError::LengthMismatch {
5186                left: self.len(),
5187                right: other.len(),
5188            });
5189        }
5190        // all-valid ⇒ no NaN (NaN floats mark the column invalid), so the scalar
5191        // loop's is_nan branch never fires and the result is af.max(bf).
5192        if let Some(out) = self.typed_float_binary(other, f64::max) {
5193            return Ok(out);
5194        }
5195        let mut out = Vec::with_capacity(self.values.len());
5196        for (a, b) in self.values.iter().zip(&other.values) {
5197            if a.is_missing() || b.is_missing() {
5198                out.push(Scalar::Float64(f64::NAN));
5199                continue;
5200            }
5201            let af = a.to_f64().map_err(ColumnError::Type)?;
5202            let bf = b.to_f64().map_err(ColumnError::Type)?;
5203            if af.is_nan() || bf.is_nan() {
5204                out.push(Scalar::Float64(f64::NAN));
5205            } else {
5206                out.push(Scalar::Float64(af.max(bf)));
5207            }
5208        }
5209        Self::new(DType::Float64, out)
5210    }
5211
5212    /// Element-wise minimum, NaN propagates.
5213    pub fn minimum(&self, other: &Self) -> Result<Self, ColumnError> {
5214        if self.len() != other.len() {
5215            return Err(ColumnError::LengthMismatch {
5216                left: self.len(),
5217                right: other.len(),
5218            });
5219        }
5220        if let Some(out) = self.typed_float_binary(other, f64::min) {
5221            return Ok(out);
5222        }
5223        let mut out = Vec::with_capacity(self.values.len());
5224        for (a, b) in self.values.iter().zip(&other.values) {
5225            if a.is_missing() || b.is_missing() {
5226                out.push(Scalar::Float64(f64::NAN));
5227                continue;
5228            }
5229            let af = a.to_f64().map_err(ColumnError::Type)?;
5230            let bf = b.to_f64().map_err(ColumnError::Type)?;
5231            if af.is_nan() || bf.is_nan() {
5232                out.push(Scalar::Float64(f64::NAN));
5233            } else {
5234                out.push(Scalar::Float64(af.min(bf)));
5235            }
5236        }
5237        Self::new(DType::Float64, out)
5238    }
5239
5240    /// Element-wise maximum, ignoring NaN.
5241    pub fn fmax(&self, other: &Self) -> Result<Self, ColumnError> {
5242        if self.len() != other.len() {
5243            return Err(ColumnError::LengthMismatch {
5244                left: self.len(),
5245                right: other.len(),
5246            });
5247        }
5248        // all-valid numeric ⇒ both to_f64().ok() are Some(non-NaN), so the result
5249        // is x.max(y) — same as `maximum` on this domain.
5250        if let Some(out) = self.typed_float_binary(other, f64::max) {
5251            return Ok(out);
5252        }
5253        let mut out = Vec::with_capacity(self.values.len());
5254        for (a, b) in self.values.iter().zip(&other.values) {
5255            let af = a.to_f64().ok();
5256            let bf = b.to_f64().ok();
5257            let result = match (af, bf) {
5258                (Some(x), Some(y)) if x.is_nan() => y,
5259                (Some(x), Some(y)) if y.is_nan() => x,
5260                (Some(x), Some(y)) => x.max(y),
5261                (Some(x), None) => x,
5262                (None, Some(y)) => y,
5263                (None, None) => f64::NAN,
5264            };
5265            out.push(Scalar::Float64(result));
5266        }
5267        Self::new(DType::Float64, out)
5268    }
5269
5270    /// Element-wise minimum, ignoring NaN.
5271    pub fn fmin(&self, other: &Self) -> Result<Self, ColumnError> {
5272        if self.len() != other.len() {
5273            return Err(ColumnError::LengthMismatch {
5274                left: self.len(),
5275                right: other.len(),
5276            });
5277        }
5278        if let Some(out) = self.typed_float_binary(other, f64::min) {
5279            return Ok(out);
5280        }
5281        let mut out = Vec::with_capacity(self.values.len());
5282        for (a, b) in self.values.iter().zip(&other.values) {
5283            let af = a.to_f64().ok();
5284            let bf = b.to_f64().ok();
5285            let result = match (af, bf) {
5286                (Some(x), Some(y)) if x.is_nan() => y,
5287                (Some(x), Some(y)) if y.is_nan() => x,
5288                (Some(x), Some(y)) => x.min(y),
5289                (Some(x), None) => x,
5290                (None, Some(y)) => y,
5291                (None, None) => f64::NAN,
5292            };
5293            out.push(Scalar::Float64(result));
5294        }
5295        Self::new(DType::Float64, out)
5296    }
5297
5298    /// Logical AND between two boolean columns.
5299    pub fn logical_and(&self, other: &Self) -> Result<Self, ColumnError> {
5300        if self.len() != other.len() {
5301            return Err(ColumnError::LengthMismatch {
5302                left: self.len(),
5303                right: other.len(),
5304            });
5305        }
5306        let mut out = Vec::with_capacity(self.values.len());
5307        for (a, b) in self.values.iter().zip(&other.values) {
5308            if a.is_missing() || b.is_missing() {
5309                out.push(Scalar::Null(NullKind::Null));
5310                continue;
5311            }
5312            let av = match a {
5313                Scalar::Bool(x) => *x,
5314                _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5315            };
5316            let bv = match b {
5317                Scalar::Bool(x) => *x,
5318                _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5319            };
5320            out.push(Scalar::Bool(av && bv));
5321        }
5322        Self::new(DType::Bool, out)
5323    }
5324
5325    /// Logical OR between two boolean columns.
5326    pub fn logical_or(&self, other: &Self) -> Result<Self, ColumnError> {
5327        if self.len() != other.len() {
5328            return Err(ColumnError::LengthMismatch {
5329                left: self.len(),
5330                right: other.len(),
5331            });
5332        }
5333        let mut out = Vec::with_capacity(self.values.len());
5334        for (a, b) in self.values.iter().zip(&other.values) {
5335            if a.is_missing() || b.is_missing() {
5336                out.push(Scalar::Null(NullKind::Null));
5337                continue;
5338            }
5339            let av = match a {
5340                Scalar::Bool(x) => *x,
5341                _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5342            };
5343            let bv = match b {
5344                Scalar::Bool(x) => *x,
5345                _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5346            };
5347            out.push(Scalar::Bool(av || bv));
5348        }
5349        Self::new(DType::Bool, out)
5350    }
5351
5352    /// Logical XOR between two boolean columns.
5353    pub fn logical_xor(&self, other: &Self) -> Result<Self, ColumnError> {
5354        if self.len() != other.len() {
5355            return Err(ColumnError::LengthMismatch {
5356                left: self.len(),
5357                right: other.len(),
5358            });
5359        }
5360        let mut out = Vec::with_capacity(self.values.len());
5361        for (a, b) in self.values.iter().zip(&other.values) {
5362            if a.is_missing() || b.is_missing() {
5363                out.push(Scalar::Null(NullKind::Null));
5364                continue;
5365            }
5366            let av = match a {
5367                Scalar::Bool(x) => *x,
5368                _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5369            };
5370            let bv = match b {
5371                Scalar::Bool(x) => *x,
5372                _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5373            };
5374            out.push(Scalar::Bool(av ^ bv));
5375        }
5376        Self::new(DType::Bool, out)
5377    }
5378
5379    /// Logical NOT (element-wise negation to boolean).
5380    pub fn logical_not(&self) -> Result<Self, ColumnError> {
5381        let mut out = Vec::with_capacity(self.values.len());
5382        for v in &self.values {
5383            if v.is_missing() {
5384                out.push(Scalar::Null(NullKind::Null));
5385                continue;
5386            }
5387            let bv = match v {
5388                Scalar::Bool(x) => *x,
5389                _ => v.to_f64().map(|x| x != 0.0).unwrap_or(false),
5390            };
5391            out.push(Scalar::Bool(!bv));
5392        }
5393        Self::new(DType::Bool, out)
5394    }
5395
5396    /// Element-wise comparison producing a `Bool`-typed column.
5397    ///
5398    /// Both columns must have the same length. Missing values (Null or NaN)
5399    /// propagate: if either operand is missing, the result is missing.
5400    pub fn binary_comparison(&self, right: &Self, op: ComparisonOp) -> Result<Self, ColumnError> {
5401        if self.len() != right.len() {
5402            return Err(ColumnError::LengthMismatch {
5403                left: self.len(),
5404                right: right.len(),
5405            });
5406        }
5407
5408        // Typed fast path: both operands are all-valid contiguous Float64 (resp.
5409        // Int64), so compare over the buffers and build the Bool result via
5410        // from_bool_values — no Scalar materialization or per-element dispatch.
5411        // Bit-identical to scalar_compare's same-dtype arm (the identical `a <op>
5412        // b`); all-valid inputs mean no Null branch, and the comparisons never
5413        // see a NaN (as_f64_slice excludes it).
5414        if let (Some(l), Some(r)) = (self.as_f64_slice(), right.as_f64_slice()) {
5415            let bools: Vec<bool> = l
5416                .iter()
5417                .zip(r)
5418                .map(|(&a, &b)| match op {
5419                    ComparisonOp::Gt => a > b,
5420                    ComparisonOp::Lt => a < b,
5421                    ComparisonOp::Eq => a == b,
5422                    ComparisonOp::Ne => a != b,
5423                    ComparisonOp::Ge => a >= b,
5424                    ComparisonOp::Le => a <= b,
5425                })
5426                .collect();
5427            return Ok(Self::from_bool_values(bools));
5428        }
5429        if let (Some(l), Some(r)) = (self.as_i64_slice(), right.as_i64_slice()) {
5430            let bools: Vec<bool> = l
5431                .iter()
5432                .zip(r)
5433                .map(|(&a, &b)| match op {
5434                    ComparisonOp::Gt => a > b,
5435                    ComparisonOp::Lt => a < b,
5436                    ComparisonOp::Eq => a == b,
5437                    ComparisonOp::Ne => a != b,
5438                    ComparisonOp::Ge => a >= b,
5439                    ComparisonOp::Le => a <= b,
5440                })
5441                .collect();
5442            return Ok(Self::from_bool_values(bools));
5443        }
5444
5445        let values = self
5446            .values
5447            .iter()
5448            .zip(&right.values)
5449            .map(|(l, r)| -> Result<Scalar, ColumnError> {
5450                if l.is_missing() || r.is_missing() {
5451                    return Ok(Scalar::Null(NullKind::Null));
5452                }
5453                let result = scalar_compare(l, r, op)?;
5454                Ok(Scalar::Bool(result))
5455            })
5456            .collect::<Result<Vec<_>, _>>()?;
5457
5458        Self::new(DType::Bool, values)
5459    }
5460
5461    /// Element-wise equality, matching `pd.Series.eq()`.
5462    pub fn eq(&self, right: &Self) -> Result<Self, ColumnError> {
5463        self.binary_comparison(right, ComparisonOp::Eq)
5464    }
5465
5466    /// Element-wise inequality, matching `pd.Series.ne()`.
5467    pub fn ne(&self, right: &Self) -> Result<Self, ColumnError> {
5468        self.binary_comparison(right, ComparisonOp::Ne)
5469    }
5470
5471    /// Element-wise less-than comparison, matching `pd.Series.lt()`.
5472    pub fn lt(&self, right: &Self) -> Result<Self, ColumnError> {
5473        self.binary_comparison(right, ComparisonOp::Lt)
5474    }
5475
5476    /// Element-wise less-than-or-equal comparison, matching `pd.Series.le()`.
5477    pub fn le(&self, right: &Self) -> Result<Self, ColumnError> {
5478        self.binary_comparison(right, ComparisonOp::Le)
5479    }
5480
5481    /// Element-wise greater-than comparison, matching `pd.Series.gt()`.
5482    pub fn gt(&self, right: &Self) -> Result<Self, ColumnError> {
5483        self.binary_comparison(right, ComparisonOp::Gt)
5484    }
5485
5486    /// Element-wise greater-than-or-equal comparison, matching `pd.Series.ge()`.
5487    pub fn ge(&self, right: &Self) -> Result<Self, ColumnError> {
5488        self.binary_comparison(right, ComparisonOp::Ge)
5489    }
5490
5491    /// Compare every element against a scalar value, producing a `Bool`-typed column.
5492    ///
5493    /// Missing values in the column propagate as missing in the result.
5494    pub fn compare_scalar(&self, scalar: &Scalar, op: ComparisonOp) -> Result<Self, ColumnError> {
5495        if scalar.is_missing() {
5496            // Comparing against missing always produces all-missing.
5497            let values = vec![Scalar::Null(NullKind::Null); self.len()];
5498            return Self::new(DType::Bool, values);
5499        }
5500
5501        // Typed fast path (br-frankenpandas-2kpwa): when self is an all-valid
5502        // contiguous numeric buffer, compare against the scalar directly over the
5503        // typed slice and build the Bool result via from_bool_values, skipping the
5504        // per-element Scalar dispatch in scalar_compare and the 32-byte Scalar alloc
5505        // for every output cell. Bit-identical to the scalar path:
5506        //   * Float64 self vs any numeric scalar reduces in scalar_compare to the
5507        //     final "convert both to f64" branch, i.e. `v <op> scalar.to_f64()`.
5508        //   * Int64 self vs Int64 scalar takes scalar_compare's both-Int64 branch,
5509        //     i.e. the i64 comparison. (Int64 vs float scalar still uses the AoS
5510        //     path so f64-promotion semantics stay identical.)
5511        if let Some(data) = self.as_f64_slice()
5512            && let Ok(s) = scalar.to_f64()
5513        {
5514            let bools: Vec<bool> = data
5515                .iter()
5516                .map(|&v| match op {
5517                    ComparisonOp::Gt => v > s,
5518                    ComparisonOp::Lt => v < s,
5519                    ComparisonOp::Eq => v == s,
5520                    ComparisonOp::Ne => v != s,
5521                    ComparisonOp::Ge => v >= s,
5522                    ComparisonOp::Le => v <= s,
5523                })
5524                .collect();
5525            return Ok(Self::from_bool_values(bools));
5526        }
5527        if let Some(data) = self.as_i64_slice()
5528            && let Scalar::Int64(s) = scalar
5529        {
5530            let s = *s;
5531            let bools: Vec<bool> = data
5532                .iter()
5533                .map(|&v| match op {
5534                    ComparisonOp::Gt => v > s,
5535                    ComparisonOp::Lt => v < s,
5536                    ComparisonOp::Eq => v == s,
5537                    ComparisonOp::Ne => v != s,
5538                    ComparisonOp::Ge => v >= s,
5539                    ComparisonOp::Le => v <= s,
5540                })
5541                .collect();
5542            return Ok(Self::from_bool_values(bools));
5543        }
5544
5545        let values = self
5546            .values
5547            .iter()
5548            .map(|v| -> Result<Scalar, ColumnError> {
5549                if v.is_missing() {
5550                    return Ok(Scalar::Null(NullKind::Null));
5551                }
5552                let result = scalar_compare(v, scalar, op)?;
5553                Ok(Scalar::Bool(result))
5554            })
5555            .collect::<Result<Vec<_>, _>>()?;
5556
5557        Self::new(DType::Bool, values)
5558    }
5559
5560    /// Select elements where `mask` is `true`, producing a new column.
5561    ///
5562    /// The mask must be a `Bool`-typed column of the same length.
5563    /// Missing values in the mask are treated as `false` (not selected).
5564    pub fn filter_by_mask(&self, mask: &Self) -> Result<Self, ColumnError> {
5565        if mask.dtype != DType::Bool {
5566            return Err(ColumnError::InvalidMaskType { dtype: mask.dtype });
5567        }
5568        if self.len() != mask.len() {
5569            return Err(ColumnError::LengthMismatch {
5570                left: self.len(),
5571                right: mask.len(),
5572            });
5573        }
5574
5575        // Typed fast path (br-frankenpandas-lei31 family): gather the contiguous
5576        // f64/i64 buffer by the mask directly, skipping the 32-byte Scalar clone
5577        // and the dtype-coercion scan in Column::new. Bit-identical — selects the
5578        // same positions in the same order, and an all-valid source yields an
5579        // all-valid result, exactly as the Scalar path + Column::new would.
5580        //
5581        // When the mask is an all-valid Bool column (the usual shape — it just
5582        // came out of a comparison), read its contiguous `bool` buffer instead of
5583        // `mask.values` (which would force the lazy-bool mask to materialize a
5584        // full Vec<Scalar::Bool>). `as_bool_slice` returns the raw bits, and for
5585        // an all-valid bool column `bits[i] == matches!(values[i],
5586        // Scalar::Bool(true))`, so selection is identical.
5587        if let Some(mask_bits) = mask.as_bool_slice() {
5588            if let Some(data) = self.as_f64_slice() {
5589                let gathered: Vec<f64> = data
5590                    .iter()
5591                    .zip(mask_bits)
5592                    .filter_map(|(&v, &m)| m.then_some(v))
5593                    .collect();
5594                return Ok(Self::from_f64_values(gathered));
5595            }
5596            if let Some(data) = self.as_i64_slice() {
5597                let gathered: Vec<i64> = data
5598                    .iter()
5599                    .zip(mask_bits)
5600                    .filter_map(|(&v, &m)| m.then_some(v))
5601                    .collect();
5602                return Ok(Self::from_i64_values(gathered));
5603            }
5604            let values = self
5605                .values
5606                .iter()
5607                .zip(mask_bits)
5608                .filter_map(|(val, &m)| m.then_some(val.clone()))
5609                .collect::<Vec<_>>();
5610            return Self::new(self.dtype, values);
5611        }
5612
5613        if let Some(data) = self.as_f64_slice() {
5614            let gathered: Vec<f64> = data
5615                .iter()
5616                .zip(mask.values.iter())
5617                .filter_map(|(&v, m)| matches!(m, Scalar::Bool(true)).then_some(v))
5618                .collect();
5619            return Ok(Self::from_f64_values(gathered));
5620        }
5621        if let Some(data) = self.as_i64_slice() {
5622            let gathered: Vec<i64> = data
5623                .iter()
5624                .zip(mask.values.iter())
5625                .filter_map(|(&v, m)| matches!(m, Scalar::Bool(true)).then_some(v))
5626                .collect();
5627            return Ok(Self::from_i64_values(gathered));
5628        }
5629
5630        let values = self
5631            .values
5632            .iter()
5633            .zip(mask.values.iter())
5634            .filter_map(|(val, mask_val)| match mask_val {
5635                Scalar::Bool(true) => Some(val.clone()),
5636                _ => None,
5637            })
5638            .collect::<Vec<_>>();
5639
5640        Self::new(self.dtype, values)
5641    }
5642
5643    /// Fill missing values with a replacement scalar.
5644    ///
5645    /// Returns a new column where every missing position is replaced
5646    /// by `fill_value`. The fill value is cast to the column's dtype.
5647    pub fn fillna(&self, fill_value: &Scalar) -> Result<Self, ColumnError> {
5648        if self.dtype == DType::Null {
5649            let replacement_dtype = if fill_value.is_missing() {
5650                DType::Null
5651            } else {
5652                fill_value.dtype()
5653            };
5654            let values = self
5655                .values
5656                .iter()
5657                .map(|value| {
5658                    if value.is_missing() {
5659                        fill_value.clone()
5660                    } else {
5661                        value.clone()
5662                    }
5663                })
5664                .collect();
5665            return Self::new(replacement_dtype, values);
5666        }
5667
5668        let cast_fill = cast_scalar(fill_value, self.dtype)?;
5669        let values = self
5670            .values
5671            .iter()
5672            .map(|v| {
5673                if v.is_missing() {
5674                    cast_fill.clone()
5675                } else {
5676                    v.clone()
5677                }
5678            })
5679            .collect();
5680
5681        Self::new(self.dtype, values)
5682    }
5683
5684    /// Remove missing values, returning a shorter column.
5685    pub fn dropna(&self) -> Result<Self, ColumnError> {
5686        let values = self
5687            .values
5688            .iter()
5689            .filter(|v| !v.is_missing())
5690            .cloned()
5691            .collect();
5692
5693        Self::new(self.dtype, values)
5694    }
5695
5696    /// Gather rows by integer position.
5697    ///
5698    /// Matches `pd.Series.take(indices)`. Each index must fall within
5699    /// `0..len()`; out-of-range positions return
5700    /// `ColumnError::LengthMismatch` (left=length, right=offending
5701    /// index).
5702    pub fn take(&self, indices: &[usize]) -> Result<Self, ColumnError> {
5703        let mut out = Vec::with_capacity(indices.len());
5704        for &i in indices {
5705            match self.values.get(i) {
5706                Some(v) => out.push(v.clone()),
5707                None => {
5708                    return Err(ColumnError::LengthMismatch {
5709                        left: self.values.len(),
5710                        right: i,
5711                    });
5712                }
5713            }
5714        }
5715        Self::new(self.dtype, out)
5716    }
5717
5718    /// Replace elements at specified indices with given values.
5719    ///
5720    /// Matches np.put(). Returns a new column with values replaced at indices.
5721    pub fn put(&self, indices: &[usize], values: &[Scalar]) -> Result<Self, ColumnError> {
5722        if indices.len() != values.len() {
5723            return Err(ColumnError::LengthMismatch {
5724                left: indices.len(),
5725                right: values.len(),
5726            });
5727        }
5728        let mut out = self.values.to_vec();
5729        for (&i, v) in indices.iter().zip(values) {
5730            if i >= out.len() {
5731                return Err(ColumnError::LengthMismatch {
5732                    left: out.len(),
5733                    right: i,
5734                });
5735            }
5736            out[i] = v.clone();
5737        }
5738        Self::new(self.dtype, out)
5739    }
5740
5741    /// Contiguous slice by positional range `start..start+len`.
5742    ///
5743    /// Out-of-range requests are clamped to the available tail so a
5744    /// start past `len()` yields an empty column with the same dtype,
5745    /// matching pandas' permissive slice semantics.
5746    pub fn slice(&self, start: usize, len: usize) -> Result<Self, ColumnError> {
5747        if start >= self.values.len() {
5748            return Self::new(self.dtype, Vec::new());
5749        }
5750        let end = start.saturating_add(len).min(self.values.len());
5751        let values = self.values[start..end].to_vec();
5752        Self::new(self.dtype, values)
5753    }
5754
5755    /// Return the first `n` values.
5756    ///
5757    /// Matches pandas `head(n)` semantics on a 1-D array-like surface.
5758    /// Negative `n` returns all values except the last `-n`.
5759    pub fn head(&self, n: i64) -> Result<Self, ColumnError> {
5760        let take = normalize_head_take(n, self.len());
5761        self.slice(0, take)
5762    }
5763
5764    /// Return the last `n` values.
5765    ///
5766    /// Matches pandas `tail(n)` semantics on a 1-D array-like surface.
5767    /// Negative `n` returns all values except the first `-n`.
5768    pub fn tail(&self, n: i64) -> Result<Self, ColumnError> {
5769        let (start, len) = normalize_tail_window(n, self.len());
5770        self.slice(start, len)
5771    }
5772
5773    /// Split column into n equal-ish parts.
5774    ///
5775    /// Matches np.array_split(). Returns Vec of Columns.
5776    pub fn array_split(&self, n: usize) -> Result<Vec<Self>, ColumnError> {
5777        if n == 0 {
5778            return Ok(Vec::new());
5779        }
5780        let len = self.values.len();
5781        let base_size = len / n;
5782        let remainder = len % n;
5783        let mut result = Vec::with_capacity(n);
5784        let mut start = 0;
5785        for i in 0..n {
5786            let size = base_size + if i < remainder { 1 } else { 0 };
5787            let part = self.slice(start, size)?;
5788            result.push(part);
5789            start += size;
5790        }
5791        Ok(result)
5792    }
5793
5794    /// Alias for array_split.
5795    pub fn split(&self, n: usize) -> Result<Vec<Self>, ColumnError> {
5796        self.array_split(n)
5797    }
5798
5799    /// Concatenate `other` onto `self`, preserving dtype.
5800    ///
5801    /// Returns `ColumnError::DTypeMismatch` when `other.dtype()` differs
5802    /// from `self.dtype()`.
5803    pub fn concat(&self, other: &Self) -> Result<Self, ColumnError> {
5804        if self.dtype != other.dtype {
5805            return Err(ColumnError::DTypeMismatch {
5806                left: self.dtype,
5807                right: other.dtype,
5808            });
5809        }
5810        let mut values = Vec::with_capacity(self.values.len() + other.values.len());
5811        values.extend_from_slice(&self.values);
5812        values.extend_from_slice(&other.values);
5813        Self::new(self.dtype, values)
5814    }
5815
5816    /// Alias for concat, matching np.append.
5817    pub fn append(&self, other: &Self) -> Result<Self, ColumnError> {
5818        self.concat(other)
5819    }
5820
5821    /// Insert values at given index.
5822    ///
5823    /// Matches np.insert(). Returns new column with values inserted.
5824    pub fn insert(&self, index: usize, values: &[Scalar]) -> Result<Self, ColumnError> {
5825        let idx = index.min(self.values.len());
5826        let mut out = Vec::with_capacity(self.values.len() + values.len());
5827        out.extend_from_slice(&self.values[..idx]);
5828        out.extend_from_slice(values);
5829        out.extend_from_slice(&self.values[idx..]);
5830        Self::new(self.dtype, out)
5831    }
5832
5833    /// Delete values at given indices.
5834    ///
5835    /// Matches np.delete(). Returns new column with values removed.
5836    pub fn delete(&self, indices: &[usize]) -> Result<Self, ColumnError> {
5837        let mut to_delete: FxHashSet<usize> = FxHashSet::default();
5838        for &i in indices {
5839            to_delete.insert(i);
5840        }
5841        let out: Vec<Scalar> = self
5842            .values
5843            .iter()
5844            .enumerate()
5845            .filter(|(i, _)| !to_delete.contains(i))
5846            .map(|(_, v)| v.clone())
5847            .collect();
5848        Self::new(self.dtype, out)
5849    }
5850
5851    /// Resize column to new size, padding or truncating as needed.
5852    ///
5853    /// Matches np.resize(). If new size is larger, values cycle from beginning.
5854    pub fn resize(&self, new_size: usize) -> Result<Self, ColumnError> {
5855        if new_size == 0 || self.values.is_empty() {
5856            return Self::new(self.dtype, Vec::new());
5857        }
5858        let mut out = Vec::with_capacity(new_size);
5859        let mut i = 0;
5860        while out.len() < new_size {
5861            out.push(self.values[i % self.values.len()].clone());
5862            i += 1;
5863        }
5864        Self::new(self.dtype, out)
5865    }
5866
5867    /// Repeat each value `repeats` times contiguously.
5868    ///
5869    /// Matches `pd.Series.repeat(n)`. `repeats=0` yields an empty
5870    /// column; `repeats=1` is a clone.
5871    pub fn repeat(&self, repeats: usize) -> Result<Self, ColumnError> {
5872        if repeats == 0 {
5873            return Self::new(self.dtype, Vec::new());
5874        }
5875        if repeats == 1 {
5876            return Ok(self.clone());
5877        }
5878        let mut out = Vec::with_capacity(self.values.len() * repeats);
5879        for v in &self.values {
5880            for _ in 0..repeats {
5881                out.push(v.clone());
5882            }
5883        }
5884        Self::new(self.dtype, out)
5885    }
5886
5887    /// Tile (repeat) the entire column n times.
5888    ///
5889    /// Matches `np.tile()`. Unlike repeat which duplicates each element,
5890    /// tile duplicates the entire array.
5891    pub fn tile(&self, reps: usize) -> Result<Self, ColumnError> {
5892        if reps == 0 {
5893            return Self::new(self.dtype, Vec::new());
5894        }
5895        if reps == 1 {
5896            return Ok(self.clone());
5897        }
5898        let mut out = Vec::with_capacity(self.values.len() * reps);
5899        for _ in 0..reps {
5900            out.extend_from_slice(&self.values);
5901        }
5902        Self::new(self.dtype, out)
5903    }
5904
5905    /// Reverse the row order of the column.
5906    ///
5907    /// Matches `pd.Series[::-1]` / `iloc[::-1]`. Dtype is preserved.
5908    pub fn reverse(&self) -> Result<Self, ColumnError> {
5909        let mut values = self.values.to_vec();
5910        values.reverse();
5911        Self::new(self.dtype, values)
5912    }
5913
5914    /// Alias for reverse, matching np.flip.
5915    pub fn flip(&self) -> Result<Self, ColumnError> {
5916        self.reverse()
5917    }
5918
5919    /// Roll array elements along the axis.
5920    ///
5921    /// Matches np.roll(a, shift). Elements that roll beyond the last position
5922    /// are re-introduced at the first, and vice versa. Positive shift rolls
5923    /// elements to higher indices (right), negative to lower (left).
5924    pub fn roll(&self, shift: i64) -> Result<Self, ColumnError> {
5925        let len = self.len();
5926        if len == 0 {
5927            return Ok(self.clone());
5928        }
5929        let shift = ((shift % len as i64) + len as i64) as usize % len;
5930        if shift == 0 {
5931            return Ok(self.clone());
5932        }
5933        let mut out = Vec::with_capacity(len);
5934        let split = len - shift;
5935        out.extend_from_slice(&self.values[split..]);
5936        out.extend_from_slice(&self.values[..split]);
5937        Self::new(self.dtype, out)
5938    }
5939
5940    /// Filter values based on a boolean condition column.
5941    ///
5942    /// Matches `np.compress()`. Returns only values where condition is True.
5943    pub fn compress(&self, condition: &Self) -> Result<Self, ColumnError> {
5944        if self.len() != condition.len() {
5945            return Err(ColumnError::LengthMismatch {
5946                left: self.len(),
5947                right: condition.len(),
5948            });
5949        }
5950        let mut out = Vec::new();
5951        for (v, c) in self.values.iter().zip(&condition.values) {
5952            match c {
5953                Scalar::Bool(true) => out.push(v.clone()),
5954                Scalar::Bool(false) => {}
5955                _ => {
5956                    return Err(ColumnError::Type(TypeError::NonNumericValue {
5957                        value: format!("{c:?}"),
5958                        dtype: condition.dtype,
5959                    }));
5960                }
5961            }
5962        }
5963        Self::new(self.dtype, out)
5964    }
5965
5966    /// Cumulative sum, null-propagating per fp-types::nancumsum.
5967    ///
5968    /// Matches `pd.Series.cumsum()`. The resulting column is always
5969    /// Float64 (matching the numeric accumulator type used in
5970    /// nancumsum).
5971    pub fn cumsum(&self) -> Result<Self, ColumnError> {
5972        // Typed prefix scan: an all-valid Float64 column scans its contiguous
5973        // buffer, no Scalar materialization in or out. Bit-identical to
5974        // nancumsum's Float64 arm — the same `running += x` left-fold seeded at
5975        // 0.0 in the same order; an operation-produced NaN (inf - inf) is flagged
5976        // missing by from_f64_values, exactly as Self::new(Float64, ...) does.
5977        if let Some(data) = self.as_f64_slice() {
5978            let mut running = 0.0_f64;
5979            let out: Vec<f64> = data
5980                .iter()
5981                .map(|&x| {
5982                    running += x;
5983                    running
5984                })
5985                .collect();
5986            return Ok(Self::from_f64_values(out));
5987        }
5988        let out = nancumsum(&self.values);
5989        Self::new(DType::Float64, out)
5990    }
5991
5992    /// Cumulative product, null-propagating per fp-types::nancumprod.
5993    pub fn cumprod(&self) -> Result<Self, ColumnError> {
5994        // Typed prefix scan (see cumsum): nancumprod seeds `running` at 1.0.
5995        if let Some(data) = self.as_f64_slice() {
5996            let mut running = 1.0_f64;
5997            let out: Vec<f64> = data
5998                .iter()
5999                .map(|&x| {
6000                    running *= x;
6001                    running
6002                })
6003                .collect();
6004            return Ok(Self::from_f64_values(out));
6005        }
6006        let out = nancumprod(&self.values);
6007        Self::new(DType::Float64, out)
6008    }
6009
6010    /// Cumulative maximum, null-propagating per fp-types::nancummax.
6011    pub fn cummax(&self) -> Result<Self, ColumnError> {
6012        // Typed running maximum: nancummax takes the first non-missing value
6013        // then `prev.max(x)` (std f64::max semantics) — reproduced over the
6014        // contiguous buffer for an all-valid Float64 column.
6015        if let Some(data) = self.as_f64_slice() {
6016            if let Some((&first, rest)) = data.split_first() {
6017                let mut running = first;
6018                let mut out = Vec::with_capacity(data.len());
6019                out.push(running);
6020                for &x in rest {
6021                    running = running.max(x);
6022                    out.push(running);
6023                }
6024                return Ok(Self::from_f64_values(out));
6025            }
6026            return Ok(Self::from_f64_values(Vec::new()));
6027        }
6028        let out = nancummax(&self.values);
6029        Self::new(DType::Float64, out)
6030    }
6031
6032    /// Cumulative minimum, null-propagating per fp-types::nancummin.
6033    pub fn cummin(&self) -> Result<Self, ColumnError> {
6034        // Typed running minimum (see cummax).
6035        if let Some(data) = self.as_f64_slice() {
6036            if let Some((&first, rest)) = data.split_first() {
6037                let mut running = first;
6038                let mut out = Vec::with_capacity(data.len());
6039                out.push(running);
6040                for &x in rest {
6041                    running = running.min(x);
6042                    out.push(running);
6043                }
6044                return Ok(Self::from_f64_values(out));
6045            }
6046            return Ok(Self::from_f64_values(Vec::new()));
6047        }
6048        let out = nancummin(&self.values);
6049        Self::new(DType::Float64, out)
6050    }
6051
6052    /// Sum of non-missing values.
6053    ///
6054    /// Matches `pd.Series.sum()` in skipna=True mode via fp-types::nansum.
6055    /// Empty column returns 0.0 (matching pandas).
6056    #[must_use]
6057    pub fn sum(&self) -> Scalar {
6058        // Typed reduction: an all-valid Float64 column sums straight over its
6059        // contiguous buffer instead of materializing/iterating a Vec<Scalar>.
6060        // Bit-identical to nansum's Float64 arm: a sequential left-fold seeded
6061        // at 0.0 over the same values in the same order (no Timedelta/missing
6062        // branch applies to an all-valid Float64 column).
6063        if let Some(data) = self.as_f64_slice() {
6064            let mut s = 0.0_f64;
6065            for &x in data {
6066                s += x;
6067            }
6068            return Scalar::Float64(s);
6069        }
6070        nansum(&self.values)
6071    }
6072
6073    /// Arithmetic mean of non-missing values.
6074    ///
6075    /// Matches `pd.Series.mean()` via fp-types::nanmean. Empty column
6076    /// returns Null(NaN).
6077    #[must_use]
6078    pub fn mean(&self) -> Scalar {
6079        // Typed reduction (see `sum`): for an all-valid Float64 column nanmean
6080        // is `Σ / count` with count == len; an empty column stays Null(NaN).
6081        if let Some(data) = self.as_f64_slice() {
6082            if data.is_empty() {
6083                return Scalar::Null(NullKind::NaN);
6084            }
6085            let mut s = 0.0_f64;
6086            for &x in data {
6087                s += x;
6088            }
6089            return Scalar::Float64(s / data.len() as f64);
6090        }
6091        nanmean(&self.values)
6092    }
6093
6094    /// Weighted average of non-missing values.
6095    ///
6096    /// Matches np.average(a, weights=w). Returns NaN if weights sum to zero.
6097    #[must_use]
6098    pub fn weighted_mean(&self, weights: &Self) -> Scalar {
6099        if self.len() != weights.len() {
6100            return Scalar::Null(NullKind::NaN);
6101        }
6102        let mut sum = 0.0;
6103        let mut weight_sum = 0.0;
6104        for (v, w) in self.values.iter().zip(weights.values()) {
6105            if v.is_missing() || w.is_missing() {
6106                continue;
6107            }
6108            let vf = match v.to_f64() {
6109                Ok(x) => x,
6110                Err(_) => continue,
6111            };
6112            let wf = match w.to_f64() {
6113                Ok(x) => x,
6114                Err(_) => continue,
6115            };
6116            sum += vf * wf;
6117            weight_sum += wf;
6118        }
6119        if weight_sum == 0.0 {
6120            return Scalar::Null(NullKind::NaN);
6121        }
6122        Scalar::Float64(sum / weight_sum)
6123    }
6124
6125    /// Alias for weighted_mean, matching np.average naming.
6126    #[must_use]
6127    pub fn average(&self, weights: &Self) -> Scalar {
6128        self.weighted_mean(weights)
6129    }
6130
6131    /// Minimum non-missing value.
6132    ///
6133    /// Matches `pd.Series.min()` via fp-types::nanmin. Preserves dtype
6134    /// for homogeneous inputs.
6135    #[must_use]
6136    pub fn min(&self) -> Scalar {
6137        // Typed reduction: an all-valid numeric column folds the minimum straight
6138        // over its contiguous buffer (an associative reduction the compiler can
6139        // vectorize), skipping the Vec<Scalar> materialization. Bit-identical to
6140        // nanmin: it keeps the first element on a tie via strict `<` (so -0.0 vs
6141        // 0.0 ordering is preserved), and returns it dtype-preserved.
6142        if let Some(data) = self.as_f64_slice()
6143            && let Some((&first, rest)) = data.split_first()
6144        {
6145            let mut m = first;
6146            for &x in rest {
6147                if x < m {
6148                    m = x;
6149                }
6150            }
6151            return Scalar::Float64(m);
6152        }
6153        if let Some(data) = self.as_i64_slice()
6154            && let Some((&first, rest)) = data.split_first()
6155        {
6156            let mut m = first;
6157            for &x in rest {
6158                if x < m {
6159                    m = x;
6160                }
6161            }
6162            return Scalar::Int64(m);
6163        }
6164        nanmin(&self.values)
6165    }
6166
6167    /// Maximum non-missing value.
6168    ///
6169    /// Matches `pd.Series.max()` via fp-types::nanmax.
6170    #[must_use]
6171    pub fn max(&self) -> Scalar {
6172        // Typed reduction (see `min`); nanmax keeps the first element on a tie
6173        // via strict `>`, dtype-preserved.
6174        if let Some(data) = self.as_f64_slice()
6175            && let Some((&first, rest)) = data.split_first()
6176        {
6177            let mut m = first;
6178            for &x in rest {
6179                if x > m {
6180                    m = x;
6181                }
6182            }
6183            return Scalar::Float64(m);
6184        }
6185        if let Some(data) = self.as_i64_slice()
6186            && let Some((&first, rest)) = data.split_first()
6187        {
6188            let mut m = first;
6189            for &x in rest {
6190                if x > m {
6191                    m = x;
6192                }
6193            }
6194            return Scalar::Int64(m);
6195        }
6196        nanmax(&self.values)
6197    }
6198
6199    /// Median of non-missing values.
6200    ///
6201    /// Matches `pd.Series.median()` via fp-types::nanmedian.
6202    #[must_use]
6203    pub fn median(&self) -> Scalar {
6204        nanmedian(&self.values)
6205    }
6206
6207    /// Product of non-missing values.
6208    ///
6209    /// Matches `pd.Series.prod()` via fp-types::nanprod. Empty column
6210    /// returns 1.0 (matching pandas).
6211    #[must_use]
6212    pub fn prod(&self) -> Scalar {
6213        // Typed reduction (mirror of `sum`): an all-valid Float64 column
6214        // multiplies straight over its contiguous buffer instead of iterating a
6215        // Vec<Scalar>. Bit-identical to nanprod's Float64 arm: a sequential
6216        // left-fold seeded at 1.0 over the same values in the same order. all-
6217        // valid ⇒ nothing is filtered and the all-missing→Null branch can't fire,
6218        // so empty folds to Float64(1.0) exactly as nanprod returns for empty.
6219        if let Some(data) = self.as_f64_slice() {
6220            let mut p = 1.0_f64;
6221            for &x in data {
6222                p *= x;
6223            }
6224            return Scalar::Float64(p);
6225        }
6226        nanprod(&self.values)
6227    }
6228
6229    /// Alias for [`prod`](Self::prod), matching `pd.Series.product()`.
6230    #[must_use]
6231    pub fn product(&self) -> Scalar {
6232        self.prod()
6233    }
6234
6235    /// Alias for sum, matching np.nansum.
6236    #[must_use]
6237    pub fn nansum(&self) -> Scalar {
6238        self.sum()
6239    }
6240
6241    /// Alias for mean, matching np.nanmean.
6242    #[must_use]
6243    pub fn nanmean(&self) -> Scalar {
6244        self.mean()
6245    }
6246
6247    /// Alias for min, matching np.nanmin.
6248    #[must_use]
6249    pub fn nanmin(&self) -> Scalar {
6250        self.min()
6251    }
6252
6253    /// Alias for max, matching np.nanmax.
6254    #[must_use]
6255    pub fn nanmax(&self) -> Scalar {
6256        self.max()
6257    }
6258
6259    /// Alias for prod, matching np.nanprod.
6260    #[must_use]
6261    pub fn nanprod(&self) -> Scalar {
6262        self.prod()
6263    }
6264
6265    /// Alias for std, matching np.nanstd.
6266    #[must_use]
6267    pub fn nanstd(&self, ddof: usize) -> Scalar {
6268        self.std(ddof)
6269    }
6270
6271    /// Alias for var, matching np.nanvar.
6272    #[must_use]
6273    pub fn nanvar(&self, ddof: usize) -> Scalar {
6274        self.var(ddof)
6275    }
6276
6277    /// Alias for median, matching np.nanmedian.
6278    #[must_use]
6279    pub fn nanmedian(&self) -> Scalar {
6280        self.median()
6281    }
6282
6283    fn skipna_false_missing_result(&self, skipna: bool) -> Option<Scalar> {
6284        if skipna || !self.values.iter().any(Scalar::is_missing) {
6285            return None;
6286        }
6287
6288        Some(if matches!(self.dtype, DType::Timedelta64) {
6289            Scalar::Timedelta64(Timedelta::NAT)
6290        } else {
6291            Scalar::Float64(f64::NAN)
6292        })
6293    }
6294
6295    /// Sum with explicit pandas `skipna=` control.
6296    ///
6297    /// Matches `pd.Series.sum(skipna=...)`.
6298    #[must_use]
6299    pub fn sum_skipna(&self, skipna: bool) -> Scalar {
6300        self.skipna_false_missing_result(skipna)
6301            .unwrap_or_else(|| self.sum())
6302    }
6303
6304    /// Mean with explicit pandas `skipna=` control.
6305    #[must_use]
6306    pub fn mean_skipna(&self, skipna: bool) -> Scalar {
6307        self.skipna_false_missing_result(skipna)
6308            .unwrap_or_else(|| self.mean())
6309    }
6310
6311    /// Minimum with explicit pandas `skipna=` control.
6312    #[must_use]
6313    pub fn min_skipna(&self, skipna: bool) -> Scalar {
6314        self.skipna_false_missing_result(skipna)
6315            .unwrap_or_else(|| self.min())
6316    }
6317
6318    /// Maximum with explicit pandas `skipna=` control.
6319    #[must_use]
6320    pub fn max_skipna(&self, skipna: bool) -> Scalar {
6321        self.skipna_false_missing_result(skipna)
6322            .unwrap_or_else(|| self.max())
6323    }
6324
6325    /// Median with explicit pandas `skipna=` control.
6326    #[must_use]
6327    pub fn median_skipna(&self, skipna: bool) -> Scalar {
6328        self.skipna_false_missing_result(skipna)
6329            .unwrap_or_else(|| self.median())
6330    }
6331
6332    /// Product with explicit pandas `skipna=` control.
6333    #[must_use]
6334    pub fn prod_skipna(&self, skipna: bool) -> Scalar {
6335        self.skipna_false_missing_result(skipna)
6336            .unwrap_or_else(|| self.prod())
6337    }
6338
6339    /// Variance with explicit pandas `skipna=` control.
6340    #[must_use]
6341    pub fn var_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6342        self.skipna_false_missing_result(skipna)
6343            .unwrap_or_else(|| self.var(ddof))
6344    }
6345
6346    /// Standard deviation with explicit pandas `skipna=` control.
6347    #[must_use]
6348    pub fn std_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6349        self.skipna_false_missing_result(skipna)
6350            .unwrap_or_else(|| self.std(ddof))
6351    }
6352
6353    /// Standard error of the mean with explicit pandas `skipna=` control.
6354    #[must_use]
6355    pub fn sem_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6356        self.skipna_false_missing_result(skipna)
6357            .unwrap_or_else(|| self.sem(ddof))
6358    }
6359
6360    /// Count of non-missing values.
6361    ///
6362    /// Matches `pd.Series.count()`.
6363    #[must_use]
6364    pub fn count(&self) -> usize {
6365        self.values.iter().filter(|v| !v.is_missing()).count()
6366    }
6367
6368    /// Forward-fill missing values with the most recent non-missing
6369    /// value, optionally capped by `limit` consecutive fills.
6370    ///
6371    /// Matches `pd.Series.ffill(limit=None)`. Leading nulls stay null
6372    /// until the first non-missing value is seen. `limit=None` means
6373    /// unbounded; `limit=Some(k)` caps each missing run to `k` fills.
6374    pub fn ffill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6375        let mut out = Vec::with_capacity(self.values.len());
6376        let mut last: Option<Scalar> = None;
6377        let mut run = 0usize;
6378        for v in &self.values {
6379            if !v.is_missing() {
6380                out.push(v.clone());
6381                last = Some(v.clone());
6382                run = 0;
6383                continue;
6384            }
6385            match (&last, limit) {
6386                (Some(prev), None) => out.push(prev.clone()),
6387                (Some(prev), Some(cap)) if run < cap => {
6388                    out.push(prev.clone());
6389                    run += 1;
6390                }
6391                _ => out.push(v.clone()),
6392            }
6393        }
6394        Self::new(self.dtype, out)
6395    }
6396
6397    /// Alias for [`ffill`](Self::ffill), matching deprecated `pd.Series.pad()`.
6398    pub fn pad(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6399        self.ffill(limit)
6400    }
6401
6402    /// Backward-fill missing values with the next non-missing value,
6403    /// optionally capped by `limit` consecutive fills.
6404    ///
6405    /// Matches `pd.Series.bfill(limit=None)`. Trailing nulls stay null
6406    /// if no subsequent non-missing value is observed.
6407    pub fn bfill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6408        let mut out = vec![Scalar::Null(NullKind::NaN); self.values.len()];
6409        let mut next: Option<Scalar> = None;
6410        let mut run = 0usize;
6411        for (i, v) in self.values.iter().enumerate().rev() {
6412            if !v.is_missing() {
6413                out[i] = v.clone();
6414                next = Some(v.clone());
6415                run = 0;
6416                continue;
6417            }
6418            match (&next, limit) {
6419                (Some(nxt), None) => out[i] = nxt.clone(),
6420                (Some(nxt), Some(cap)) if run < cap => {
6421                    out[i] = nxt.clone();
6422                    run += 1;
6423                }
6424                _ => out[i] = v.clone(),
6425            }
6426        }
6427        Self::new(self.dtype, out)
6428    }
6429
6430    /// Alias for [`bfill`](Self::bfill), matching deprecated `pd.Series.backfill()`.
6431    pub fn backfill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6432        self.bfill(limit)
6433    }
6434
6435    /// Count of distinct non-missing values.
6436    ///
6437    /// Matches `pd.Series.nunique(dropna=True)`.
6438    #[must_use]
6439    pub fn nunique(&self) -> Scalar {
6440        self.nunique_with_dropna(true)
6441    }
6442
6443    /// Count of distinct values with explicit missing-value handling.
6444    ///
6445    /// Matches `pd.Series.nunique(dropna=...)`. When `dropna=false`,
6446    /// all missing values contribute a single extra distinct bucket.
6447    #[must_use]
6448    pub fn nunique_with_dropna(&self, dropna: bool) -> Scalar {
6449        // Dense direct-address fast path: an all-valid, bounded-range Int64
6450        // column counts distinct values via a seen-bitset indexed by (v-min) —
6451        // hash-free, no Scalar enum. All-valid ⇒ no missing, so dropna does not
6452        // add a bucket; bit-identical to nannunique's distinct count. Same gate
6453        // as unique/isin/duplicated.
6454        if let Some(data) = self.as_i64_slice()
6455            && let Some((min, range)) = i64_direct_address_range(data)
6456        {
6457            let mut seen = vec![false; range];
6458            let mut distinct = 0i64;
6459            for &v in data {
6460                let slot = (v as i128 - min as i128) as usize;
6461                if !seen[slot] {
6462                    seen[slot] = true;
6463                    distinct += 1;
6464                }
6465            }
6466            return Scalar::Int64(distinct);
6467        }
6468
6469        let mut distinct = match nannunique(&self.values) {
6470            Scalar::Int64(count) => count,
6471            _ => 0,
6472        };
6473
6474        if !dropna && self.values.iter().any(Scalar::is_missing) {
6475            distinct += 1;
6476        }
6477
6478        Scalar::Int64(distinct)
6479    }
6480
6481    /// Truthiness reduction: whether any non-missing value is truthy.
6482    ///
6483    /// Matches `pd.Series.any()` in skipna=True mode. Empty column
6484    /// returns false (pandas convention).
6485    #[must_use]
6486    pub fn any(&self) -> Scalar {
6487        nanany(&self.values)
6488    }
6489
6490    /// Truthiness reduction: whether all non-missing values are truthy.
6491    ///
6492    /// Matches `pd.Series.all()` in skipna=True mode. Empty column
6493    /// returns true.
6494    #[must_use]
6495    pub fn all(&self) -> Scalar {
6496        nanall(&self.values)
6497    }
6498
6499    /// Element-wise difference between consecutive non-missing values.
6500    ///
6501    /// Unlike `diff(1)` — which inserts Null(NaN) for every missing
6502    /// input — this walker-style helper skips nulls when picking the
6503    /// "previous" value. Positions whose nearest preceding non-missing
6504    /// neighbor lies before the start of the column (i.e. the first
6505    /// non-missing value itself, or a missing input) emit Null(NaN).
6506    /// Matches the common pandas idiom `s.dropna().diff().reindex(s.index)`.
6507    pub fn diff_valid(&self) -> Result<Self, ColumnError> {
6508        let mut prev: Option<f64> = None;
6509        let mut out = Vec::with_capacity(self.values.len());
6510        for v in &self.values {
6511            if v.is_missing() {
6512                out.push(Scalar::Null(NullKind::NaN));
6513                continue;
6514            }
6515            match v.to_f64() {
6516                Ok(x) if !x.is_nan() => match prev {
6517                    Some(p) => {
6518                        out.push(Scalar::Float64(x - p));
6519                        prev = Some(x);
6520                    }
6521                    None => {
6522                        out.push(Scalar::Null(NullKind::NaN));
6523                        prev = Some(x);
6524                    }
6525                },
6526                Ok(_) => out.push(Scalar::Null(NullKind::NaN)),
6527                Err(err) => return Err(ColumnError::Type(err)),
6528            }
6529        }
6530        Self::new(DType::Float64, out)
6531    }
6532
6533    /// Deterministic uniform sampling of `n` rows with a caller-supplied
6534    /// seed.
6535    ///
6536    /// Matches the no-replacement subset of `pd.Series.sample(n,
6537    /// random_state=seed)`. `n >= len()` returns a clone. Uses an
6538    /// in-place partial Fisher-Yates shuffle driven by a stateless
6539    /// LCG so callers can reproduce samples without dragging in
6540    /// `rand`. Result dtype matches `self`.
6541    pub fn sample(&self, n: usize, seed: u64) -> Result<Self, ColumnError> {
6542        let len = self.values.len();
6543        if n >= len {
6544            return Ok(self.clone());
6545        }
6546        let mut indices: Vec<usize> = (0..len).collect();
6547        let mut state = seed.wrapping_add(0x9E3779B97F4A7C15);
6548        for i in 0..n {
6549            // Standard LCG constants from Knuth (MMIX).
6550            state = state
6551                .wrapping_mul(6364136223846793005)
6552                .wrapping_add(1442695040888963407);
6553            let bound = (len - i) as u64;
6554            let pick = i + (state.wrapping_shr(33) % bound) as usize;
6555            indices.swap(i, pick);
6556        }
6557        let values: Vec<Scalar> = indices[..n]
6558            .iter()
6559            .map(|&idx| self.values[idx].clone())
6560            .collect();
6561        Self::new(self.dtype, values)
6562    }
6563
6564    /// Position of the first non-missing value, or None when every
6565    /// value is missing.
6566    ///
6567    /// Matches `pd.Series.first_valid_index()` for positional
6568    /// indices — callers can map the returned position through their
6569    /// own Index to recover a label.
6570    #[must_use]
6571    pub fn first_valid(&self) -> Option<usize> {
6572        self.values.iter().position(|v| !v.is_missing())
6573    }
6574
6575    /// Alias for [`first_valid`](Self::first_valid), matching
6576    /// `pd.Series.first_valid_index()` for positional indices.
6577    #[must_use]
6578    pub fn first_valid_index(&self) -> Option<usize> {
6579        self.first_valid()
6580    }
6581
6582    /// Position of the last non-missing value, or None when every
6583    /// value is missing.
6584    ///
6585    /// Matches `pd.Series.last_valid_index()` for positional indices.
6586    #[must_use]
6587    pub fn last_valid(&self) -> Option<usize> {
6588        self.values.iter().rposition(|v| !v.is_missing())
6589    }
6590
6591    /// Alias for [`last_valid`](Self::last_valid), matching
6592    /// `pd.Series.last_valid_index()` for positional indices.
6593    #[must_use]
6594    pub fn last_valid_index(&self) -> Option<usize> {
6595        self.last_valid()
6596    }
6597
6598    /// Sliding-window sum over `window` consecutive positions.
6599    ///
6600    /// Matches `pd.Series.rolling(window).sum()`. Positions with fewer
6601    /// than `min_periods` non-missing values in the window emit
6602    /// `Null(NaN)`. `min_periods=0` preserves pandas' convention that
6603    /// an empty window sums to 0.0. Result dtype is always Float64.
6604    /// `window=0` returns an all-null Float64 column the same length
6605    /// as self.
6606    pub fn rolling_window_sum(
6607        &self,
6608        window: usize,
6609        min_periods: usize,
6610    ) -> Result<Self, ColumnError> {
6611        let len = self.values.len();
6612        if window == 0 {
6613            return Self::new(DType::Float64, vec![Scalar::Null(NullKind::NaN); len]);
6614        }
6615        let mut out = Vec::with_capacity(len);
6616        for i in 0..len {
6617            let start = (i + 1).saturating_sub(window);
6618            let end = i + 1;
6619            let mut sum = 0.0_f64;
6620            let mut observed = 0usize;
6621            for v in &self.values[start..end] {
6622                if v.is_missing() {
6623                    continue;
6624                }
6625                match v.to_f64() {
6626                    Ok(x) if !x.is_nan() => {
6627                        sum += x;
6628                        observed += 1;
6629                    }
6630                    Ok(_) => {}
6631                    Err(err) => return Err(ColumnError::Type(err)),
6632                }
6633            }
6634            if observed >= min_periods.max(1) || (min_periods == 0 && end - start > 0) {
6635                out.push(Scalar::Float64(sum));
6636            } else {
6637                out.push(Scalar::Null(NullKind::NaN));
6638            }
6639        }
6640        Self::new(DType::Float64, out)
6641    }
6642
6643    /// Per-row missing-value flag (Bool column).
6644    ///
6645    /// Matches `pd.Series.isna()` / `isnull()`.
6646    pub fn isnull(&self) -> Result<Self, ColumnError> {
6647        let out: Vec<Scalar> = self
6648            .values
6649            .iter()
6650            .map(|v| Scalar::Bool(v.is_missing()))
6651            .collect();
6652        Self::new(DType::Bool, out)
6653    }
6654
6655    /// Alias for [`isnull`](Self::isnull), matching `pd.Series.isna()`.
6656    pub fn isna(&self) -> Result<Self, ColumnError> {
6657        self.isnull()
6658    }
6659
6660    /// Per-row non-missing flag (Bool column).
6661    ///
6662    /// Matches `pd.Series.notna()` / `notnull()`.
6663    pub fn notnull(&self) -> Result<Self, ColumnError> {
6664        let out: Vec<Scalar> = self
6665            .values
6666            .iter()
6667            .map(|v| Scalar::Bool(!v.is_missing()))
6668            .collect();
6669        Self::new(DType::Bool, out)
6670    }
6671
6672    /// Alias for [`notnull`](Self::notnull), matching `pd.Series.notna()`.
6673    pub fn notna(&self) -> Result<Self, ColumnError> {
6674        self.notnull()
6675    }
6676
6677    /// Per-row check for finite values (not NaN or infinity).
6678    pub fn isfinite(&self) -> Result<Self, ColumnError> {
6679        let out: Vec<Scalar> = self
6680            .values
6681            .iter()
6682            .map(|v| match v {
6683                Scalar::Float64(f) => Scalar::Bool(f.is_finite()),
6684                Scalar::Int64(_) => Scalar::Bool(true),
6685                _ if v.is_missing() => Scalar::Bool(false),
6686                _ => Scalar::Bool(true),
6687            })
6688            .collect();
6689        Self::new(DType::Bool, out)
6690    }
6691
6692    /// Per-row check for infinite values.
6693    pub fn isinf(&self) -> Result<Self, ColumnError> {
6694        let out: Vec<Scalar> = self
6695            .values
6696            .iter()
6697            .map(|v| match v {
6698                Scalar::Float64(f) => Scalar::Bool(f.is_infinite()),
6699                _ => Scalar::Bool(false),
6700            })
6701            .collect();
6702        Self::new(DType::Bool, out)
6703    }
6704
6705    /// Per-row check for NaN values.
6706    pub fn isnan(&self) -> Result<Self, ColumnError> {
6707        let out: Vec<Scalar> = self
6708            .values
6709            .iter()
6710            .map(|v| match v {
6711                Scalar::Float64(f) => Scalar::Bool(f.is_nan()),
6712                Scalar::Null(NullKind::NaN) => Scalar::Bool(true),
6713                _ => Scalar::Bool(false),
6714            })
6715            .collect();
6716        Self::new(DType::Bool, out)
6717    }
6718
6719    /// Sample variance (ddof-parameterized).
6720    ///
6721    /// Matches `pd.Series.var(ddof=1)`.
6722    #[must_use]
6723    pub fn var(&self, ddof: usize) -> Scalar {
6724        // Typed two-pass reduction: an all-valid Float64 column computes the
6725        // mean then the sum of squared deviations straight over its contiguous
6726        // buffer, skipping the Vec<Scalar> materialization. Bit-identical to
6727        // nanvar's numeric arm — the exact same `Iterator::sum::<f64>()`
6728        // constructs over the same values in the same order (so seed/ordering
6729        // match), `Null(NaN)` when count <= ddof.
6730        if let Some(data) = self.as_f64_slice() {
6731            let n = data.len();
6732            if n <= ddof {
6733                return Scalar::Null(NullKind::NaN);
6734            }
6735            let mean: f64 = data.iter().sum::<f64>() / n as f64;
6736            let sum_sq: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>();
6737            return Scalar::Float64(sum_sq / (n - ddof) as f64);
6738        }
6739        nanvar(&self.values, ddof)
6740    }
6741
6742    /// Sample standard deviation (ddof-parameterized).
6743    ///
6744    /// Matches `pd.Series.std(ddof=1)`.
6745    #[must_use]
6746    pub fn std(&self, ddof: usize) -> Scalar {
6747        // For an all-valid Float64 column nanstd is sqrt(nanvar) (Float64 arm);
6748        // reuse the typed var. Non-Float64 (e.g. Timedelta) keep nanstd, which
6749        // has its own dtype-preserving path.
6750        if self.as_f64_slice().is_some() {
6751            return match self.var(ddof) {
6752                Scalar::Float64(v) => Scalar::Float64(v.sqrt()),
6753                other => other,
6754            };
6755        }
6756        nanstd(&self.values, ddof)
6757    }
6758
6759    /// Standard error of the mean (ddof-parameterized).
6760    ///
6761    /// Matches `pd.Series.sem(ddof=1)`.
6762    #[must_use]
6763    pub fn sem(&self, ddof: usize) -> Scalar {
6764        nansem(&self.values, ddof)
6765    }
6766
6767    /// Sample covariance between this column and another.
6768    ///
6769    /// Matches `pd.Series.cov(other)`. Uses ddof=1 by default.
6770    /// Returns NaN if fewer than 2 valid pairs.
6771    #[must_use]
6772    pub fn cov(&self, other: &Self) -> Scalar {
6773        self.cov_ddof(other, 1)
6774    }
6775
6776    /// Sample covariance with custom ddof.
6777    #[must_use]
6778    pub fn cov_ddof(&self, other: &Self, ddof: usize) -> Scalar {
6779        let n = self.values.len().min(other.values.len());
6780        if n == 0 {
6781            return Scalar::Null(NullKind::NaN);
6782        }
6783        let mut sum_x = 0.0;
6784        let mut sum_y = 0.0;
6785        let mut count = 0usize;
6786        for i in 0..n {
6787            let x = match self.values[i].to_f64() {
6788                Ok(v) if v.is_finite() => v,
6789                _ => continue,
6790            };
6791            let y = match other.values[i].to_f64() {
6792                Ok(v) if v.is_finite() => v,
6793                _ => continue,
6794            };
6795            sum_x += x;
6796            sum_y += y;
6797            count += 1;
6798        }
6799        if count <= ddof {
6800            return Scalar::Null(NullKind::NaN);
6801        }
6802        let mean_x = sum_x / count as f64;
6803        let mean_y = sum_y / count as f64;
6804        let mut cov_sum = 0.0;
6805        for i in 0..n {
6806            let x = match self.values[i].to_f64() {
6807                Ok(v) if v.is_finite() => v,
6808                _ => continue,
6809            };
6810            let y = match other.values[i].to_f64() {
6811                Ok(v) if v.is_finite() => v,
6812                _ => continue,
6813            };
6814            cov_sum += (x - mean_x) * (y - mean_y);
6815        }
6816        Scalar::Float64(cov_sum / (count - ddof) as f64)
6817    }
6818
6819    /// Pearson correlation coefficient between this column and another.
6820    ///
6821    /// Matches `pd.Series.corr(other)`. Returns NaN if fewer than 2 valid pairs.
6822    #[must_use]
6823    pub fn corr(&self, other: &Self) -> Scalar {
6824        let n = self.values.len().min(other.values.len());
6825        if n == 0 {
6826            return Scalar::Null(NullKind::NaN);
6827        }
6828        let mut sum_x = 0.0;
6829        let mut sum_y = 0.0;
6830        let mut sum_xx = 0.0;
6831        let mut sum_yy = 0.0;
6832        let mut sum_xy = 0.0;
6833        let mut count = 0usize;
6834        for i in 0..n {
6835            let x = match self.values[i].to_f64() {
6836                Ok(v) if v.is_finite() => v,
6837                _ => continue,
6838            };
6839            let y = match other.values[i].to_f64() {
6840                Ok(v) if v.is_finite() => v,
6841                _ => continue,
6842            };
6843            sum_x += x;
6844            sum_y += y;
6845            sum_xx += x * x;
6846            sum_yy += y * y;
6847            sum_xy += x * y;
6848            count += 1;
6849        }
6850        if count < 2 {
6851            return Scalar::Null(NullKind::NaN);
6852        }
6853        let n_f = count as f64;
6854        let numerator = n_f * sum_xy - sum_x * sum_y;
6855        let denom_x = (n_f * sum_xx - sum_x * sum_x).sqrt();
6856        let denom_y = (n_f * sum_yy - sum_y * sum_y).sqrt();
6857        if denom_x == 0.0 || denom_y == 0.0 {
6858            return Scalar::Null(NullKind::NaN);
6859        }
6860        Scalar::Float64(numerator / (denom_x * denom_y))
6861    }
6862
6863    /// Autocorrelation at a given lag.
6864    ///
6865    /// Matches `pd.Series.autocorr(lag)`. Returns NaN if fewer than 2 valid pairs.
6866    #[must_use]
6867    pub fn autocorr(&self, lag: usize) -> Scalar {
6868        if lag >= self.values.len() {
6869            return Scalar::Null(NullKind::NaN);
6870        }
6871        let shifted = match self.shift(lag as i64, Scalar::Null(NullKind::NaN)) {
6872            Ok(s) => s,
6873            Err(_) => return Scalar::Null(NullKind::NaN),
6874        };
6875        self.corr(&shifted)
6876    }
6877
6878    /// Sample skewness (bias-corrected, Fisher-Pearson).
6879    ///
6880    /// Matches `pd.Series.skew()`. Requires at least 3 non-missing
6881    /// values; returns `Null(NaN)` otherwise.
6882    #[must_use]
6883    pub fn skew(&self) -> Scalar {
6884        nanskew(&self.values)
6885    }
6886
6887    /// Excess sample kurtosis (Fisher's definition, bias-corrected).
6888    ///
6889    /// Matches `pd.Series.kurt()`. Requires at least 4 non-missing
6890    /// values; returns `Null(NaN)` otherwise.
6891    #[must_use]
6892    pub fn kurt(&self) -> Scalar {
6893        nankurt(&self.values)
6894    }
6895
6896    /// Alias for [`kurt`](Self::kurt), matching `pd.Series.kurtosis()`.
6897    #[must_use]
6898    pub fn kurtosis(&self) -> Scalar {
6899        self.kurt()
6900    }
6901
6902    /// Peak-to-peak range (max − min) over non-missing values.
6903    ///
6904    /// Matches `np.ptp`. Returns `Null(NaN)` for empty or all-missing
6905    /// columns.
6906    #[must_use]
6907    pub fn ptp(&self) -> Scalar {
6908        nanptp(&self.values)
6909    }
6910
6911    /// Whether every non-missing value is distinct.
6912    ///
6913    /// Matches `pd.Series.is_unique`.
6914    #[must_use]
6915    pub fn is_unique(&self) -> bool {
6916        !self.has_duplicates()
6917    }
6918
6919    /// Whether any non-missing value repeats.
6920    ///
6921    /// Matches `pd.Series.has_duplicates`.
6922    #[must_use]
6923    pub fn has_duplicates(&self) -> bool {
6924        #[derive(Hash, PartialEq, Eq)]
6925        enum Key<'a> {
6926            Bool(bool),
6927            Int64(i64),
6928            FloatBits(u64),
6929            Utf8(&'a str),
6930            Timedelta64(i64),
6931            Datetime64(i64),
6932            Period(i64),
6933            Interval(u64, u64, IntervalClosed),
6934        }
6935        let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
6936        for v in &self.values {
6937            if v.is_missing() {
6938                continue;
6939            }
6940            let key = match v {
6941                Scalar::Bool(b) => Key::Bool(*b),
6942                Scalar::Int64(i) => Key::Int64(*i),
6943                Scalar::Float64(f) => {
6944                    let norm = if *f == 0.0 { 0.0 } else { *f };
6945                    Key::FloatBits(norm.to_bits())
6946                }
6947                Scalar::Utf8(s) => Key::Utf8(s.as_str()),
6948                Scalar::Timedelta64(v) => Key::Timedelta64(*v),
6949                Scalar::Datetime64(v) => Key::Datetime64(*v),
6950                Scalar::Period(v) => Key::Period(*v),
6951                Scalar::Interval(v) => {
6952                    let (left, right, closed) = interval_key(v);
6953                    Key::Interval(left, right, closed)
6954                }
6955                Scalar::Null(_) => continue,
6956            };
6957            if !seen.insert(key) {
6958                return true;
6959            }
6960        }
6961        false
6962    }
6963
6964    /// Percent change between consecutive non-missing values.
6965    ///
6966    /// Matches `pd.Series.pct_change(periods=1)` (fill_method defaults
6967    /// to None on pandas 2.2+, so nulls propagate without forward fill).
6968    /// Result dtype Float64. Non-numeric inputs return TypeError. The
6969    /// leading `|periods|` positions are Null(NaN).
6970    pub fn pct_change(&self, periods: i64) -> Result<Self, ColumnError> {
6971        let len = self.values.len();
6972        if len == 0 || periods == 0 {
6973            return Self::new(DType::Float64, vec![Scalar::Null(NullKind::NaN); len]);
6974        }
6975        let abs = periods.unsigned_abs() as usize;
6976        let mut out: Vec<Scalar> = Vec::with_capacity(len);
6977        for i in 0..len {
6978            let prev_idx = if periods > 0 {
6979                i.checked_sub(abs)
6980            } else if i + abs < len {
6981                Some(i + abs)
6982            } else {
6983                None
6984            };
6985            let Some(pi) = prev_idx else {
6986                out.push(Scalar::Null(NullKind::NaN));
6987                continue;
6988            };
6989            let cur = &self.values[i];
6990            let prev = &self.values[pi];
6991            if cur.is_missing() || prev.is_missing() {
6992                out.push(Scalar::Null(NullKind::NaN));
6993                continue;
6994            }
6995            // Per br-frankenpandas-mcu90: Timedelta64 pct_change matches
6996            // pandas — ns deltas divide as dimensionless f64. Was silently
6997            // NaN before via the catch-all (Timedelta64.to_f64() errors).
6998            if let (Scalar::Timedelta64(cur_ns), Scalar::Timedelta64(prev_ns)) = (cur, prev) {
6999                if *cur_ns == Timedelta::NAT || *prev_ns == Timedelta::NAT {
7000                    out.push(Scalar::Null(NullKind::NaN));
7001                    continue;
7002                }
7003                let prev_f = *prev_ns as f64;
7004                if prev_f.abs() < f64::EPSILON {
7005                    out.push(Scalar::Null(NullKind::NaN));
7006                } else {
7007                    out.push(Scalar::Float64((*cur_ns as f64 - prev_f) / prev_f));
7008                }
7009                continue;
7010            }
7011            match (cur.to_f64(), prev.to_f64()) {
7012                (Ok(c), Ok(p)) => {
7013                    if p == 0.0 || p.is_nan() || c.is_nan() {
7014                        out.push(Scalar::Null(NullKind::NaN));
7015                    } else {
7016                        out.push(Scalar::Float64((c - p) / p));
7017                    }
7018                }
7019                _ => out.push(Scalar::Null(NullKind::NaN)),
7020            }
7021        }
7022        Self::new(DType::Float64, out)
7023    }
7024
7025    /// Percentage change with optional null fill before computation.
7026    ///
7027    /// Matches `pd.Series.pct_change(periods, fill_method=..., limit=...)`.
7028    /// `fill_method=None` preserves pandas 2.2 default behavior (no fill).
7029    /// `"ffill"` / `"pad"` forward-fill missing values first, while
7030    /// `"bfill"` / `"backfill"` backward-fill first. `limit` caps
7031    /// consecutive fills and is ignored when `fill_method` is `None`.
7032    pub fn pct_change_with_fill(
7033        &self,
7034        periods: i64,
7035        fill_method: Option<&str>,
7036        limit: Option<usize>,
7037    ) -> Result<Self, ColumnError> {
7038        let filled = match fill_method {
7039            None => self.clone(),
7040            Some(method) => match method {
7041                "ffill" | "pad" => self.ffill(limit)?,
7042                "bfill" | "backfill" => self.bfill(limit)?,
7043                other => {
7044                    return Err(ColumnError::Type(TypeError::NonNumericValue {
7045                        value: other.to_string(),
7046                        dtype: self.dtype,
7047                    }));
7048                }
7049            },
7050        };
7051        filled.pct_change(periods)
7052    }
7053
7054    /// Summary descriptive statistics.
7055    ///
7056    /// Matches `pd.Series.describe()` for numeric columns: returns the
7057    /// seven-value tuple (count, mean, std, min, q25, q50, q75, max)
7058    /// as a `Vec<(&'static str, Scalar)>` in pandas order. Non-numeric
7059    /// columns return TypeError. Empty or fully-missing columns
7060    /// produce Null(NaN) for the moment-based stats and Int64(0) for
7061    /// count.
7062    pub fn describe(&self) -> Result<Vec<(&'static str, Scalar)>, ColumnError> {
7063        if !matches!(
7064            self.dtype,
7065            DType::Int64 | DType::Float64 | DType::Timedelta64
7066        ) {
7067            return Err(ColumnError::Type(TypeError::NonNumericValue {
7068                value: format!("{:?}", self.dtype),
7069                dtype: self.dtype,
7070            }));
7071        }
7072        let count = Scalar::Int64(self.count() as i64);
7073        let mean = self.mean();
7074        let std = {
7075            let nums: Vec<f64> = self
7076                .values
7077                .iter()
7078                .filter(|v| !v.is_missing())
7079                .filter_map(|v| v.to_f64().ok())
7080                .collect();
7081            if nums.len() < 2 {
7082                Scalar::Null(NullKind::NaN)
7083            } else {
7084                let mu = nums.iter().sum::<f64>() / nums.len() as f64;
7085                let ss: f64 = nums.iter().map(|x| (x - mu).powi(2)).sum();
7086                Scalar::Float64((ss / (nums.len() as f64 - 1.0)).sqrt())
7087            }
7088        };
7089        let q25 = self.quantile(0.25);
7090        let q50 = self.quantile(0.5);
7091        let q75 = self.quantile(0.75);
7092        let min = self.min();
7093        let max = self.max();
7094        Ok(vec![
7095            ("count", count),
7096            ("mean", mean),
7097            ("std", std),
7098            ("min", min),
7099            ("25%", q25),
7100            ("50%", q50),
7101            ("75%", q75),
7102            ("max", max),
7103        ])
7104    }
7105
7106    /// Combine two columns element-wise via `func`, using `fill` where
7107    /// either input is missing.
7108    ///
7109    /// Matches `pd.Series.combine(other, func, fill_value=...)`. Result
7110    /// length is the min of the two inputs (pandas aligns by position
7111    /// when inputs are the same length; longer inputs are truncated).
7112    /// Length mismatch returns `LengthMismatch`.
7113    pub fn combine<F>(
7114        &self,
7115        other: &Self,
7116        mut func: F,
7117        fill: Option<Scalar>,
7118    ) -> Result<Self, ColumnError>
7119    where
7120        F: FnMut(&Scalar, &Scalar) -> Scalar,
7121    {
7122        if self.values.len() != other.values.len() {
7123            return Err(ColumnError::LengthMismatch {
7124                left: self.values.len(),
7125                right: other.values.len(),
7126            });
7127        }
7128        let out: Vec<Scalar> = self
7129            .values
7130            .iter()
7131            .zip(other.values.iter())
7132            .map(|(a, b)| {
7133                let a_miss = a.is_missing();
7134                let b_miss = b.is_missing();
7135                match (a_miss || b_miss, fill.as_ref()) {
7136                    // pandas fill_value=None: propagate null, do not invoke func.
7137                    (true, None) => Scalar::Null(NullKind::NaN),
7138                    (_, fill_opt) => {
7139                        let default = fill_opt.unwrap_or(a);
7140                        let left = if a_miss { default } else { a };
7141                        let right = if b_miss { fill_opt.unwrap_or(b) } else { b };
7142                        func(left, right)
7143                    }
7144                }
7145            })
7146            .collect();
7147        let inferred = infer_dtype(&out).unwrap_or(self.dtype);
7148        Self::new(inferred, out)
7149    }
7150
7151    /// Numeric-only `map` that converts each non-missing value to f64,
7152    /// applies `func`, and collects the result.
7153    ///
7154    /// Matches the common pattern `pd.Series.apply(lambda x: f(x))`
7155    /// for numeric-only transforms. Missing values pass through as
7156    /// Null(NaN) without invoking `func`. Non-numeric inputs return a
7157    /// type error on the first failing element. Result dtype is
7158    /// Float64.
7159    pub fn apply_float<F>(&self, mut func: F) -> Result<Self, ColumnError>
7160    where
7161        F: FnMut(f64) -> f64,
7162    {
7163        let mut out = Vec::with_capacity(self.values.len());
7164        for v in &self.values {
7165            if v.is_missing() {
7166                out.push(Scalar::Null(NullKind::NaN));
7167                continue;
7168            }
7169            match v.to_f64() {
7170                Ok(x) => {
7171                    let y = func(x);
7172                    if y.is_nan() {
7173                        out.push(Scalar::Null(NullKind::NaN));
7174                    } else {
7175                        out.push(Scalar::Float64(y));
7176                    }
7177                }
7178                Err(err) => return Err(ColumnError::Type(err)),
7179            }
7180        }
7181        Self::new(DType::Float64, out)
7182    }
7183
7184    /// Bin non-missing values into `bins` equal-width buckets covering
7185    /// `[min, max]` and return the count per bin.
7186    ///
7187    /// Matches the `bins=n` histogram path behind `pd.Series.hist` (or
7188    /// `numpy.histogram(bins=n)[0]`). Bucket boundaries are inclusive on
7189    /// the low side except for the final bucket, which is inclusive on
7190    /// both sides. Empty columns / bins=0 yield an empty Vec.
7191    #[must_use]
7192    pub fn hist_counts(&self, bins: usize) -> Vec<usize> {
7193        if bins == 0 {
7194            return Vec::new();
7195        }
7196        let nums: Vec<f64> = self
7197            .values
7198            .iter()
7199            .filter(|v| !v.is_missing())
7200            .filter_map(|v| v.to_f64().ok())
7201            .filter(|f| !f.is_nan())
7202            .collect();
7203        if nums.is_empty() {
7204            return vec![0; bins];
7205        }
7206        let (min, max) = nums
7207            .iter()
7208            .fold((f64::INFINITY, f64::NEG_INFINITY), |(lo, hi), &x| {
7209                (lo.min(x), hi.max(x))
7210            });
7211        if (max - min).abs() < f64::EPSILON {
7212            // All values collapse into the first bin.
7213            let mut counts = vec![0; bins];
7214            counts[0] = nums.len();
7215            return counts;
7216        }
7217        let width = (max - min) / bins as f64;
7218        let mut counts = vec![0usize; bins];
7219        for x in &nums {
7220            let mut idx = ((x - min) / width) as usize;
7221            if idx >= bins {
7222                idx = bins - 1;
7223            }
7224            counts[idx] += 1;
7225        }
7226        counts
7227    }
7228
7229    /// Position of the smallest non-missing value, or None when every
7230    /// value is missing.
7231    ///
7232    /// Matches `pd.Series.argmin()` (skipna=True). Ties resolve to the
7233    /// first position seen.
7234    #[must_use]
7235    pub fn argmin(&self) -> Option<usize> {
7236        nanargmin(&self.values)
7237    }
7238
7239    /// Alias for [`argmin`](Self::argmin), matching `pd.Series.idxmin()`
7240    /// for positional indices.
7241    #[must_use]
7242    pub fn idxmin(&self) -> Option<usize> {
7243        self.argmin()
7244    }
7245
7246    /// Position of the largest non-missing value, or None when every
7247    /// value is missing.
7248    ///
7249    /// Matches `pd.Series.argmax()`.
7250    #[must_use]
7251    pub fn argmax(&self) -> Option<usize> {
7252        nanargmax(&self.values)
7253    }
7254
7255    /// Alias for [`argmax`](Self::argmax), matching `pd.Series.idxmax()`
7256    /// for positional indices.
7257    #[must_use]
7258    pub fn idxmax(&self) -> Option<usize> {
7259        self.argmax()
7260    }
7261
7262    /// Alias for argmin, matching np.nanargmin.
7263    #[must_use]
7264    pub fn nanargmin(&self) -> Option<usize> {
7265        self.argmin()
7266    }
7267
7268    /// Alias for argmax, matching np.nanargmax.
7269    #[must_use]
7270    pub fn nanargmax(&self) -> Option<usize> {
7271        self.argmax()
7272    }
7273
7274    /// Whether non-missing values are non-decreasing.
7275    ///
7276    /// Matches `pd.Series.is_monotonic_increasing`. An empty column or
7277    /// a column with a single non-missing value returns true. Missing
7278    /// values are skipped when comparing neighbors.
7279    #[must_use]
7280    pub fn is_monotonic_increasing(&self) -> bool {
7281        is_monotonic_in_direction(&self.values, true)
7282    }
7283
7284    /// Whether non-missing values are non-increasing.
7285    ///
7286    /// Matches `pd.Series.is_monotonic_decreasing`.
7287    #[must_use]
7288    pub fn is_monotonic_decreasing(&self) -> bool {
7289        is_monotonic_in_direction(&self.values, false)
7290    }
7291
7292    /// Combine two columns, taking `self` where present and `other`
7293    /// otherwise.
7294    ///
7295    /// Matches `pd.Series.combine_first(other)`. For each aligned
7296    /// position, the result is `self` when `self` is non-missing, else
7297    /// `other`. Length mismatch returns `LengthMismatch`. Result
7298    /// dtype follows `self`.
7299    pub fn combine_first(&self, other: &Self) -> Result<Self, ColumnError> {
7300        if self.values.len() != other.values.len() {
7301            return Err(ColumnError::LengthMismatch {
7302                left: self.values.len(),
7303                right: other.values.len(),
7304            });
7305        }
7306        let out: Vec<Scalar> = self
7307            .values
7308            .iter()
7309            .zip(other.values.iter())
7310            .map(|(a, b)| if a.is_missing() { b.clone() } else { a.clone() })
7311            .collect();
7312        Self::new(self.dtype, out)
7313    }
7314
7315    /// Clip values below `lower`, leaving the upper bound free.
7316    ///
7317    /// Matches `pd.Series.clip(lower=...)`. Thin wrapper over
7318    /// `clip(Some(lower), None)` that preserves the shortcut reading
7319    /// convention of pandas.
7320    pub fn clip_lower(&self, lower: f64) -> Result<Self, ColumnError> {
7321        self.clip(Some(lower), None)
7322    }
7323
7324    /// Clip values above `upper`, leaving the lower bound free.
7325    ///
7326    /// Matches `pd.Series.clip(upper=...)`.
7327    pub fn clip_upper(&self, upper: f64) -> Result<Self, ColumnError> {
7328        self.clip(None, Some(upper))
7329    }
7330
7331    /// Remove duplicated values, keeping the first occurrence.
7332    ///
7333    /// Matches `pd.Series.drop_duplicates(keep='first')`.
7334    pub fn drop_duplicates(&self) -> Result<Self, ColumnError> {
7335        self.drop_duplicates_keep("first")
7336    }
7337
7338    /// Remove duplicated values with explicit pandas `keep=` semantics.
7339    ///
7340    /// Supported policies are `"first"`, `"last"`, and `"false"` /
7341    /// `"none"` for pandas `keep=False`.
7342    pub fn drop_duplicates_keep(&self, keep: &str) -> Result<Self, ColumnError> {
7343        let dup = self.duplicated_keep(keep)?;
7344        let mut out = Vec::with_capacity(self.values.len());
7345        for (v, keep_flag) in self.values.iter().zip(dup.values.iter()) {
7346            if matches!(keep_flag, Scalar::Bool(false)) {
7347                out.push(v.clone());
7348            }
7349        }
7350        Self::new(self.dtype, out)
7351    }
7352
7353    /// Element-wise comparison against `other`, emitting a 2-column
7354    /// report of differences.
7355    ///
7356    /// Matches `pd.Series.compare(other)` — returns two Columns
7357    /// `(self_values, other_values)` containing only the positions
7358    /// where the two differ. Missing entries compare equal to each
7359    /// other. Length-mismatched inputs return `LengthMismatch`.
7360    pub fn compare(&self, other: &Self) -> Result<(Self, Self), ColumnError> {
7361        if self.values.len() != other.values.len() {
7362            return Err(ColumnError::LengthMismatch {
7363                left: self.values.len(),
7364                right: other.values.len(),
7365            });
7366        }
7367        let mut left = Vec::new();
7368        let mut right = Vec::new();
7369        for (a, b) in self.values.iter().zip(other.values.iter()) {
7370            let equal = match (a.is_missing(), b.is_missing()) {
7371                (true, true) => true,
7372                (true, false) | (false, true) => false,
7373                (false, false) => a.semantic_eq(b),
7374            };
7375            if !equal {
7376                left.push(a.clone());
7377                right.push(b.clone());
7378            }
7379        }
7380        Ok((Self::new(self.dtype, left)?, Self::new(other.dtype, right)?))
7381    }
7382
7383    /// Apply a unary function over each value.
7384    ///
7385    /// Matches `pd.Series.map(func)`. Missing values are passed to the
7386    /// user function (callers can decide whether to propagate NaN);
7387    /// result dtype is inferred via `infer_dtype` over the outputs,
7388    /// falling back to `self.dtype` when inference fails (e.g. empty
7389    /// or all-null output).
7390    pub fn map<F>(&self, mut func: F) -> Result<Self, ColumnError>
7391    where
7392        F: FnMut(&Scalar) -> Scalar,
7393    {
7394        let out: Vec<Scalar> = self.values.iter().map(&mut func).collect();
7395        let target = infer_dtype(&out).unwrap_or(self.dtype);
7396        Self::new(target, out)
7397    }
7398
7399    /// Linearly interpolate missing numeric values.
7400    ///
7401    /// Matches `pd.Series.interpolate(method='linear')` with the
7402    /// default `limit_direction='forward'`: interior missing runs are
7403    /// linearly interpolated, LEADING nulls stay null (forward fill
7404    /// cannot reach them), and TRAILING nulls are forward-filled with
7405    /// the last valid value (pandas does not extrapolate). Non-numeric
7406    /// columns return a type error. Result dtype is always Float64.
7407    pub fn interpolate_linear(&self) -> Result<Self, ColumnError> {
7408        let len = self.values.len();
7409        // Convert to f64 once; missing → None.
7410        let mut floats: Vec<Option<f64>> = Vec::with_capacity(len);
7411        for v in &self.values {
7412            if v.is_missing() {
7413                floats.push(None);
7414                continue;
7415            }
7416            match v.to_f64() {
7417                Ok(x) if !x.is_nan() => floats.push(Some(x)),
7418                Ok(_) => floats.push(None),
7419                Err(err) => return Err(ColumnError::Type(err)),
7420            }
7421        }
7422
7423        // Walk interior gaps between the first and last non-null.
7424        let first = floats.iter().position(Option::is_some);
7425        let last = floats.iter().rposition(Option::is_some);
7426        if let (Some(start), Some(end)) = (first, last) {
7427            let mut i = start;
7428            while i < end {
7429                if floats[i].is_some() {
7430                    i += 1;
7431                    continue;
7432                }
7433                let gap_start = i;
7434                while i < end && floats[i].is_none() {
7435                    i += 1;
7436                }
7437                let before = floats[gap_start - 1].expect("anchor");
7438                let after = floats[i].expect("anchor");
7439                let span = (i - gap_start + 1) as f64;
7440                for (k, j) in (gap_start..i).enumerate() {
7441                    let step = (k + 1) as f64;
7442                    floats[j] = Some(before + (after - before) * (step / span));
7443                }
7444            }
7445            // Trailing nulls (after the last valid value) are forward-filled
7446            // with that value — pandas' default limit_direction='forward' carries
7447            // it forward rather than extrapolating. Leading nulls (before `start`)
7448            // are intentionally left null. (br-frankenpandas-8ic7c)
7449            let last_valid = floats[end].expect("last valid anchor");
7450            for slot in floats.iter_mut().skip(end + 1) {
7451                *slot = Some(last_valid);
7452            }
7453        }
7454
7455        let out: Vec<Scalar> = floats
7456            .into_iter()
7457            .map(|opt| match opt {
7458                Some(x) => Scalar::Float64(x),
7459                None => Scalar::Null(NullKind::NaN),
7460            })
7461            .collect();
7462        Self::new(DType::Float64, out)
7463    }
7464
7465    /// Alias for [`interpolate_linear`](Self::interpolate_linear), matching
7466    /// the default `pd.Series.interpolate()` behavior.
7467    pub fn interpolate(&self) -> Result<Self, ColumnError> {
7468        self.interpolate_linear()
7469    }
7470
7471    /// Linear-interpolation quantile at `q ∈ [0.0, 1.0]`.
7472    ///
7473    /// Matches `pd.Series.quantile(q, interpolation='linear')`.
7474    /// Missing values are skipped (skipna=True). Returns
7475    /// `Null(NaN)` for empty columns or `q` outside `[0.0, 1.0]`.
7476    #[must_use]
7477    pub fn quantile(&self, q: f64) -> Scalar {
7478        nanquantile(&self.values, q)
7479    }
7480
7481    /// Percentile of non-missing values.
7482    ///
7483    /// Matches np.percentile(). Takes percentile p in [0, 100].
7484    #[must_use]
7485    pub fn percentile(&self, p: f64) -> Scalar {
7486        self.quantile(p / 100.0)
7487    }
7488
7489    /// Alias for quantile, matching np.nanquantile.
7490    #[must_use]
7491    pub fn nanquantile(&self, q: f64) -> Scalar {
7492        self.quantile(q)
7493    }
7494
7495    /// Alias for percentile, matching np.nanpercentile.
7496    #[must_use]
7497    pub fn nanpercentile(&self, p: f64) -> Scalar {
7498        self.percentile(p)
7499    }
7500
7501    /// Most frequent non-missing values, ascending-sorted.
7502    ///
7503    /// Matches `pd.Series.mode()`. Ties are all returned; missing
7504    /// values are ignored. For empty or all-missing columns the
7505    /// result is an empty same-dtype column.
7506    pub fn mode(&self) -> Result<Self, ColumnError> {
7507        // Counting-sort fast path: an all-valid, bounded-range Int64 column
7508        // tallies in O(n) via a dense direct-address histogram instead of the
7509        // SipHash `HashMap` below, and emits the winners with NO sort. Walking
7510        // the slots in ascending value order (slot s ↔ value `min + s`) yields
7511        // the most-frequent values already ascending — identical to the
7512        // `HashMap` path's `winners.sort_by(compare_scalars_na_last(.., true))`,
7513        // which orders Int64 by exact `i64::cmp`. `as_i64_slice` is `Some` only
7514        // for a fully-valid Int64 column, so there are no missing values to skip
7515        // (matching `key_of`'s `None`-on-missing), and an empty column makes
7516        // `i64_direct_address_range` return `None` → the `HashMap` path returns
7517        // the empty same-dtype column exactly as before.
7518        if let Some(data) = self.as_i64_slice()
7519            && let Some((min, range)) = i64_direct_address_range(data)
7520        {
7521            let mut count = vec![0i64; range];
7522            for &v in data {
7523                count[(v as i128 - min as i128) as usize] += 1;
7524            }
7525            let max_count = count.iter().copied().max().unwrap_or(0);
7526            let mut winners = Vec::new();
7527            for (s, &c) in count.iter().enumerate() {
7528                if c == max_count {
7529                    winners.push(Scalar::Int64(min + s as i64));
7530                }
7531            }
7532            return Self::new(self.dtype, winners);
7533        }
7534
7535        #[derive(Hash, PartialEq, Eq)]
7536        enum Key<'a> {
7537            Bool(bool),
7538            Int64(i64),
7539            FloatBits(u64),
7540            Utf8(&'a str),
7541            Timedelta64(i64),
7542            Datetime64(i64),
7543            Period(i64),
7544            Interval(u64, u64, IntervalClosed),
7545        }
7546        fn key_of(v: &Scalar) -> Option<Key<'_>> {
7547            if v.is_missing() {
7548                return None;
7549            }
7550            Some(match v {
7551                Scalar::Bool(b) => Key::Bool(*b),
7552                Scalar::Int64(i) => Key::Int64(*i),
7553                Scalar::Float64(f) => {
7554                    let norm = if *f == 0.0 { 0.0 } else { *f };
7555                    Key::FloatBits(norm.to_bits())
7556                }
7557                Scalar::Utf8(s) => Key::Utf8(s.as_str()),
7558                Scalar::Timedelta64(v) => Key::Timedelta64(*v),
7559                Scalar::Datetime64(v) => Key::Datetime64(*v),
7560                Scalar::Period(v) => Key::Period(*v),
7561                Scalar::Interval(v) => {
7562                    let (left, right, closed) = interval_key(v);
7563                    Key::Interval(left, right, closed)
7564                }
7565                Scalar::Null(_) => return None,
7566            })
7567        }
7568
7569        let mut counts: FxHashMap<Key<'_>, (usize, &Scalar)> = FxHashMap::default();
7570        for v in &self.values {
7571            if let Some(k) = key_of(v) {
7572                counts
7573                    .entry(k)
7574                    .and_modify(|entry| entry.0 += 1)
7575                    .or_insert((1, v));
7576            }
7577        }
7578        if counts.is_empty() {
7579            return Self::new(self.dtype, Vec::new());
7580        }
7581        let max_count = counts.values().map(|(c, _)| *c).max().unwrap_or(0);
7582        let mut winners: Vec<Scalar> = counts
7583            .values()
7584            .filter_map(|(c, v)| {
7585                if *c == max_count {
7586                    Some((*v).clone())
7587                } else {
7588                    None
7589                }
7590            })
7591            .collect();
7592        winners.sort_by(|a, b| compare_scalars_na_last(a, b, true));
7593        Self::new(self.dtype, winners)
7594    }
7595
7596    /// Approximate memory footprint in bytes.
7597    ///
7598    /// Matches `pd.Series.memory_usage(deep=...)`. When `deep` is true
7599    /// and the column contains Utf8 values, each string's byte length
7600    /// is counted; otherwise a fixed per-element width is used
7601    /// (8 bytes for numeric/timedelta, 1 for Bool, pointer-sized for
7602    /// Utf8, 0 for Null). The ValidityMask is counted separately.
7603    #[must_use]
7604    pub fn memory_usage(&self, deep: bool) -> usize {
7605        let element_bytes = match self.dtype {
7606            DType::Bool => 1,
7607            DType::Int64 | DType::Float64 | DType::Timedelta64 => 8,
7608            DType::Utf8 => std::mem::size_of::<usize>(),
7609            _ => 0,
7610        };
7611        let base = element_bytes * self.values.len();
7612        let deep_extra = if deep && self.dtype == DType::Utf8 {
7613            self.values
7614                .iter()
7615                .map(|v| match v {
7616                    Scalar::Utf8(s) => s.len(),
7617                    _ => 0,
7618                })
7619                .sum::<usize>()
7620        } else {
7621            0
7622        };
7623        // One bit per element, rounded up to whole bytes.
7624        let validity_bytes = self.values.len().div_ceil(8);
7625        base + deep_extra + validity_bytes
7626    }
7627
7628    /// Approximate value-buffer footprint, matching `pd.Series.nbytes`.
7629    #[must_use]
7630    pub fn nbytes(&self) -> usize {
7631        self.memory_usage(false)
7632    }
7633
7634    /// Return the size in bytes of a single element.
7635    ///
7636    /// Matches `pd.Series.dtype.itemsize`. Returns 8 for Int64/Float64/Datetime64/Timedelta64,
7637    /// 1 for Bool, and an estimate for variable-length types.
7638    #[must_use]
7639    pub fn itemsize(&self) -> usize {
7640        match self.dtype() {
7641            DType::Bool | DType::BoolNullable => 1,
7642            DType::Int64
7643            | DType::Int64Nullable
7644            | DType::Float64
7645            | DType::Datetime64
7646            | DType::Timedelta64
7647            | DType::Period => 8,
7648            DType::Utf8 => {
7649                if self.values.is_empty() {
7650                    0
7651                } else {
7652                    self.memory_usage(true) / self.values.len()
7653                }
7654            }
7655            DType::Null | DType::Categorical | DType::Interval | DType::Sparse => 8,
7656        }
7657    }
7658
7659    /// Element-wise equality into a Bool column.
7660    ///
7661    /// Matches `pd.Series.eq(other)`. Both inputs must have the same
7662    /// length. Missing-on-either-side positions produce `false`
7663    /// (pandas semantics: NaN != anything, including NaN).
7664    pub fn equals(&self, other: &Self) -> Result<Self, ColumnError> {
7665        if self.values.len() != other.values.len() {
7666            return Err(ColumnError::LengthMismatch {
7667                left: self.values.len(),
7668                right: other.values.len(),
7669            });
7670        }
7671        let out: Vec<Scalar> = self
7672            .values
7673            .iter()
7674            .zip(other.values.iter())
7675            .map(|(a, b)| {
7676                if a.is_missing() || b.is_missing() {
7677                    Scalar::Bool(false)
7678                } else {
7679                    Scalar::Bool(a.semantic_eq(b))
7680                }
7681            })
7682            .collect();
7683        Self::new(DType::Bool, out)
7684    }
7685
7686    /// Scalar dot product against another column.
7687    ///
7688    /// Matches `pd.Series.dot(other)` for numeric columns. Missing
7689    /// entries on either side contribute zero (consistent with
7690    /// fp-types nan-aware sums). Length mismatch returns
7691    /// `LengthMismatch`; non-numeric inputs return a type error on
7692    /// the first offending value.
7693    pub fn dot(&self, other: &Self) -> Result<f64, ColumnError> {
7694        if self.values.len() != other.values.len() {
7695            return Err(ColumnError::LengthMismatch {
7696                left: self.values.len(),
7697                right: other.values.len(),
7698            });
7699        }
7700        let mut sum = 0.0_f64;
7701        for (a, b) in self.values.iter().zip(other.values.iter()) {
7702            if a.is_missing() || b.is_missing() {
7703                continue;
7704            }
7705            let av = a.to_f64().map_err(ColumnError::Type)?;
7706            let bv = b.to_f64().map_err(ColumnError::Type)?;
7707            if av.is_nan() || bv.is_nan() {
7708                continue;
7709            }
7710            sum += av * bv;
7711        }
7712        Ok(sum)
7713    }
7714
7715    /// Discrete linear convolution of two 1D sequences.
7716    ///
7717    /// Matches np.convolve(a, v, mode). Modes:
7718    /// - "full": output length = len(a) + len(v) - 1
7719    /// - "same": output length = max(len(a), len(v))
7720    /// - "valid": output length = max(len(a), len(v)) - min(len(a), len(v)) + 1
7721    pub fn convolve(&self, kernel: &Self, mode: &str) -> Result<Self, ColumnError> {
7722        let a: Vec<f64> = self
7723            .values
7724            .iter()
7725            .map(|v| v.to_f64().unwrap_or(0.0))
7726            .collect();
7727        let v: Vec<f64> = kernel
7728            .values
7729            .iter()
7730            .map(|v| v.to_f64().unwrap_or(0.0))
7731            .collect();
7732
7733        if a.is_empty() || v.is_empty() {
7734            return Self::new(DType::Float64, vec![]);
7735        }
7736
7737        let full_len = a.len() + v.len() - 1;
7738        let mut full: Vec<f64> = vec![0.0; full_len];
7739
7740        for (i, &ai) in a.iter().enumerate() {
7741            for (j, &vj) in v.iter().enumerate() {
7742                full[i + j] += ai * vj;
7743            }
7744        }
7745
7746        let out: Vec<f64> = match mode {
7747            "full" => full,
7748            "same" => {
7749                let target_len = a.len().max(v.len());
7750                let start = (full_len - target_len) / 2;
7751                full[start..start + target_len].to_vec()
7752            }
7753            "valid" => {
7754                let min_len = a.len().min(v.len());
7755                let valid_len = a.len().max(v.len()) - min_len + 1;
7756                let start = min_len - 1;
7757                full[start..start + valid_len].to_vec()
7758            }
7759            _ => {
7760                return Err(ColumnError::Type(TypeError::NonNumericValue {
7761                    value: format!("invalid mode '{mode}', expected 'full', 'same', or 'valid'"),
7762                    dtype: self.dtype,
7763                }));
7764            }
7765        };
7766
7767        let scalars: Vec<Scalar> = out.into_iter().map(Scalar::Float64).collect();
7768        Self::new(DType::Float64, scalars)
7769    }
7770
7771    /// Cross-correlation of two 1D sequences.
7772    ///
7773    /// Matches np.correlate(a, v, mode). This is convolve(a, reverse(v), mode).
7774    pub fn correlate(&self, other: &Self, mode: &str) -> Result<Self, ColumnError> {
7775        let reversed = other.reverse()?;
7776        self.convolve(&reversed, mode)
7777    }
7778
7779    /// Fill missing values in `self` with aligned values from `other`.
7780    ///
7781    /// Matches `pd.Series.fillna(other)` when `other` is a Series. Only
7782    /// positions missing in `self` are replaced. Length mismatch
7783    /// returns `LengthMismatch`. Values from `other` are cast into
7784    /// `self.dtype`.
7785    pub fn fillna_with_column(&self, other: &Self) -> Result<Self, ColumnError> {
7786        if self.values.len() != other.values.len() {
7787            return Err(ColumnError::LengthMismatch {
7788                left: self.values.len(),
7789                right: other.values.len(),
7790            });
7791        }
7792        let out: Vec<Scalar> = self
7793            .values
7794            .iter()
7795            .zip(other.values.iter())
7796            .map(|(v, o)| {
7797                if v.is_missing() {
7798                    cast_scalar(o, self.dtype)
7799                } else {
7800                    Ok(v.clone())
7801                }
7802            })
7803            .collect::<Result<Vec<_>, _>>()
7804            .map_err(ColumnError::Type)?;
7805        Self::new(self.dtype, out)
7806    }
7807
7808    /// Element-wise quotient and remainder against `divisor`.
7809    ///
7810    /// Matches `pd.Series.divmod(other)`: returns
7811    /// `(self // other, self % other)`. Division by zero, missing
7812    /// inputs, or non-numeric values yield `Null(NaN)` in both
7813    /// outputs at that position. Length mismatch returns
7814    /// `LengthMismatch`. Both result columns are Float64.
7815    pub fn divmod(&self, divisor: &Self) -> Result<(Self, Self), ColumnError> {
7816        if self.values.len() != divisor.values.len() {
7817            return Err(ColumnError::LengthMismatch {
7818                left: self.values.len(),
7819                right: divisor.values.len(),
7820            });
7821        }
7822        let mut quotient = Vec::with_capacity(self.values.len());
7823        let mut remainder = Vec::with_capacity(self.values.len());
7824        for (a, b) in self.values.iter().zip(divisor.values.iter()) {
7825            if a.is_missing() || b.is_missing() {
7826                quotient.push(Scalar::Null(NullKind::NaN));
7827                remainder.push(Scalar::Null(NullKind::NaN));
7828                continue;
7829            }
7830            let num = match a.to_f64() {
7831                Ok(x) if !x.is_nan() => x,
7832                _ => {
7833                    quotient.push(Scalar::Null(NullKind::NaN));
7834                    remainder.push(Scalar::Null(NullKind::NaN));
7835                    continue;
7836                }
7837            };
7838            let den = match b.to_f64() {
7839                Ok(x) if !x.is_nan() => x,
7840                _ => {
7841                    quotient.push(Scalar::Null(NullKind::NaN));
7842                    remainder.push(Scalar::Null(NullKind::NaN));
7843                    continue;
7844                }
7845            };
7846            if den == 0.0 {
7847                quotient.push(Scalar::Null(NullKind::NaN));
7848                remainder.push(Scalar::Null(NullKind::NaN));
7849                continue;
7850            }
7851            // Floor-division and Python-style modulo, including pandas' signed
7852            // zero and infinity behavior.
7853            let q = python_floor_div_f64(num, den);
7854            let r = python_mod_f64(num, den);
7855            quotient.push(Scalar::Float64(q));
7856            remainder.push(Scalar::Float64(r));
7857        }
7858        Ok((
7859            Self::new(DType::Float64, quotient)?,
7860            Self::new(DType::Float64, remainder)?,
7861        ))
7862    }
7863
7864    /// Keep values where `cond` is true; replace false positions with
7865    /// values from an `other` Column (element-wise).
7866    ///
7867    /// Matches `pd.Series.where(cond, other)` when `other` is a Series
7868    /// aligned by position. All three inputs must have the same
7869    /// length. Cond must be Bool. Missing cond entries propagate as
7870    /// Null(NaN). The result dtype is `self.dtype`; if `other`'s dtype
7871    /// differs, values coming from `other` are cast via `cast_scalar`.
7872    pub fn where_cond_series(&self, cond: &Self, other: &Self) -> Result<Self, ColumnError> {
7873        if cond.dtype != DType::Bool {
7874            return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
7875        }
7876        if self.values.len() != cond.values.len() || self.values.len() != other.values.len() {
7877            return Err(ColumnError::LengthMismatch {
7878                left: self.values.len(),
7879                right: cond.values.len().max(other.values.len()),
7880            });
7881        }
7882        // Typed branchless select: all-valid Bool cond and same-typed all-valid
7883        // numeric self/other compute the result straight over the contiguous
7884        // buffers, with no per-element Scalar dispatch/clone or output Vec<Scalar>.
7885        // Bit-identical — with an all-valid cond there is no missing branch, and
7886        // for matching dtypes cast_scalar(o, self.dtype) is the identity, so each
7887        // slot is cond[i] ? self[i] : other[i]. Mixed/nullable inputs fall back.
7888        if let Some(cb) = cond.as_bool_slice() {
7889            if let (Some(s), Some(o)) = (self.as_f64_slice(), other.as_f64_slice()) {
7890                let out: Vec<f64> = (0..s.len())
7891                    .map(|i| if cb[i] { s[i] } else { o[i] })
7892                    .collect();
7893                return Ok(Self::from_f64_values(out));
7894            }
7895            if let (Some(s), Some(o)) = (self.as_i64_slice(), other.as_i64_slice()) {
7896                let out: Vec<i64> = (0..s.len())
7897                    .map(|i| if cb[i] { s[i] } else { o[i] })
7898                    .collect();
7899                return Ok(Self::from_i64_values(out));
7900            }
7901        }
7902        let out: Vec<Scalar> = self
7903            .values
7904            .iter()
7905            .zip(cond.values.iter().zip(other.values.iter()))
7906            .map(|(v, (c, o))| match c {
7907                Scalar::Bool(true) => Ok(v.clone()),
7908                Scalar::Bool(false) => cast_scalar(o, self.dtype),
7909                _ => Ok(Scalar::Null(NullKind::NaN)),
7910            })
7911            .collect::<Result<Vec<_>, _>>()
7912            .map_err(ColumnError::Type)?;
7913        Self::new(self.dtype, out)
7914    }
7915
7916    /// Replace values where `cond` is true with values from `other`
7917    /// (element-wise); otherwise keep.
7918    ///
7919    /// Matches `pd.Series.mask(cond, other)` when `other` is a Series.
7920    pub fn mask_series(&self, cond: &Self, other: &Self) -> Result<Self, ColumnError> {
7921        if cond.dtype != DType::Bool {
7922            return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
7923        }
7924        if self.values.len() != cond.values.len() || self.values.len() != other.values.len() {
7925            return Err(ColumnError::LengthMismatch {
7926                left: self.values.len(),
7927                right: cond.values.len().max(other.values.len()),
7928            });
7929        }
7930        // Typed branchless select (inverse of where_cond_series): cond true picks
7931        // other, false picks self. Same isomorphism argument.
7932        if let Some(cb) = cond.as_bool_slice() {
7933            if let (Some(s), Some(o)) = (self.as_f64_slice(), other.as_f64_slice()) {
7934                let out: Vec<f64> = (0..s.len())
7935                    .map(|i| if cb[i] { o[i] } else { s[i] })
7936                    .collect();
7937                return Ok(Self::from_f64_values(out));
7938            }
7939            if let (Some(s), Some(o)) = (self.as_i64_slice(), other.as_i64_slice()) {
7940                let out: Vec<i64> = (0..s.len())
7941                    .map(|i| if cb[i] { o[i] } else { s[i] })
7942                    .collect();
7943                return Ok(Self::from_i64_values(out));
7944            }
7945        }
7946        let out: Vec<Scalar> = self
7947            .values
7948            .iter()
7949            .zip(cond.values.iter().zip(other.values.iter()))
7950            .map(|(v, (c, o))| match c {
7951                Scalar::Bool(true) => cast_scalar(o, self.dtype),
7952                Scalar::Bool(false) => Ok(v.clone()),
7953                _ => Ok(Scalar::Null(NullKind::NaN)),
7954            })
7955            .collect::<Result<Vec<_>, _>>()
7956            .map_err(ColumnError::Type)?;
7957        Self::new(self.dtype, out)
7958    }
7959
7960    /// Pairwise value substitution.
7961    ///
7962    /// Matches `pd.Series.replace(to_replace, value)` when both
7963    /// arguments are scalar lists of equal length. For each value in
7964    /// the column, the first (to_replace, replacement) pair that
7965    /// matches via `Scalar::semantic_eq` is applied. Missing inputs
7966    /// can be replaced by listing `Scalar::Null(...)` in `to_replace`.
7967    /// Length mismatch between `to_replace` and `values` returns
7968    /// `ColumnError::LengthMismatch`.
7969    pub fn replace_values(
7970        &self,
7971        to_replace: &[Scalar],
7972        replacement: &[Scalar],
7973    ) -> Result<Self, ColumnError> {
7974        if to_replace.len() != replacement.len() {
7975            return Err(ColumnError::LengthMismatch {
7976                left: to_replace.len(),
7977                right: replacement.len(),
7978            });
7979        }
7980        let out: Vec<Scalar> = self
7981            .values
7982            .iter()
7983            .map(|v| {
7984                for (target, replacement_val) in to_replace.iter().zip(replacement.iter()) {
7985                    // Treat all missing variants as matching to_replace = Null.
7986                    let matches = if target.is_missing() && v.is_missing() {
7987                        true
7988                    } else if target.is_missing() || v.is_missing() {
7989                        false
7990                    } else {
7991                        v.semantic_eq(target)
7992                    };
7993                    if matches {
7994                        return replacement_val.clone();
7995                    }
7996                }
7997                v.clone()
7998            })
7999            .collect();
8000        let inferred = infer_dtype(&out).unwrap_or(self.dtype);
8001        Self::new(inferred, out)
8002    }
8003
8004    /// Alias for [`replace_values`](Self::replace_values), matching
8005    /// `pd.Series.replace(to_replace, value)` for equal-length scalar
8006    /// list replacements.
8007    pub fn replace(
8008        &self,
8009        to_replace: &[Scalar],
8010        replacement: &[Scalar],
8011    ) -> Result<Self, ColumnError> {
8012        self.replace_values(to_replace, replacement)
8013    }
8014
8015    /// Positions where the value is truthy and non-missing.
8016    ///
8017    /// Matches `np.nonzero` / `pd.Series.to_numpy().nonzero()` style
8018    /// behavior. Useful for turning a Bool mask column into explicit
8019    /// index positions. Non-missing zero-like values (Int64 0,
8020    /// Float64 0.0, Bool false, empty Utf8) are excluded.
8021    #[must_use]
8022    pub fn nonzero(&self) -> Vec<usize> {
8023        let mut out = Vec::new();
8024        for (i, v) in self.values.iter().enumerate() {
8025            if v.is_missing() {
8026                continue;
8027            }
8028            let truthy = match v {
8029                Scalar::Bool(b) => *b,
8030                Scalar::Int64(x) => *x != 0,
8031                Scalar::Float64(x) => *x != 0.0 && !x.is_nan(),
8032                Scalar::Utf8(s) => !s.is_empty(),
8033                Scalar::Timedelta64(x) => *x != 0,
8034                Scalar::Datetime64(x) => *x != Timestamp::NAT,
8035                Scalar::Period(x) => *x != i64::MIN,
8036                Scalar::Interval(_) => true,
8037                Scalar::Null(_) => false,
8038            };
8039            if truthy {
8040                out.push(i);
8041            }
8042        }
8043        out
8044    }
8045
8046    /// Count number of non-zero elements.
8047    ///
8048    /// Matches np.count_nonzero().
8049    #[must_use]
8050    pub fn count_nonzero(&self) -> usize {
8051        self.nonzero().len()
8052    }
8053
8054    /// Indices of non-zero elements as a column.
8055    ///
8056    /// Matches np.flatnonzero(). Returns Int64 column of indices.
8057    pub fn flatnonzero(&self) -> Result<Self, ColumnError> {
8058        let indices: Vec<Scalar> = self
8059            .nonzero()
8060            .into_iter()
8061            .map(|i| Scalar::Int64(i as i64))
8062            .collect();
8063        Self::new(DType::Int64, indices)
8064    }
8065
8066    /// Keep values where `cond` is true; replace false positions with
8067    /// `other`.
8068    ///
8069    /// Matches `pd.Series.where(cond, other)`. `cond` must be a Bool
8070    /// column of the same length (otherwise `LengthMismatch`). Missing
8071    /// positions in `cond` propagate as Null(NaN) in the result.
8072    pub fn where_cond(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8073        if cond.dtype != DType::Bool {
8074            return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
8075        }
8076        if self.values.len() != cond.values.len() {
8077            return Err(ColumnError::LengthMismatch {
8078                left: self.values.len(),
8079                right: cond.values.len(),
8080            });
8081        }
8082        // Typed branchless select against a scalar `other`. For Float64 self,
8083        // Column::new coerces other to Float64, so other.to_f64() (non-missing)
8084        // is the exact false-branch value; for Int64 self only an Int64 other
8085        // stays lossless, so that path requires Scalar::Int64. All-valid cond =>
8086        // no missing branch. Bit-identical; other cases fall back.
8087        if !other.is_missing()
8088            && let Some(cb) = cond.as_bool_slice()
8089        {
8090            if let Some(s) = self.as_f64_slice()
8091                && let Ok(o) = other.to_f64()
8092            {
8093                let out: Vec<f64> = (0..s.len()).map(|i| if cb[i] { s[i] } else { o }).collect();
8094                return Ok(Self::from_f64_values(out));
8095            }
8096            if let Some(s) = self.as_i64_slice()
8097                && let Scalar::Int64(o) = other
8098            {
8099                let o = *o;
8100                let out: Vec<i64> = (0..s.len()).map(|i| if cb[i] { s[i] } else { o }).collect();
8101                return Ok(Self::from_i64_values(out));
8102            }
8103        }
8104        let out: Vec<Scalar> = self
8105            .values
8106            .iter()
8107            .zip(cond.values.iter())
8108            .map(|(v, c)| match c {
8109                Scalar::Bool(true) => v.clone(),
8110                Scalar::Bool(false) => other.clone(),
8111                _ => Scalar::Null(NullKind::NaN),
8112            })
8113            .collect();
8114        Self::new(self.dtype, out)
8115    }
8116
8117    /// Alias for [`where_cond`](Self::where_cond), matching
8118    /// `pd.Series.where(cond, other)` for scalar `other` values.
8119    pub fn r#where(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8120        self.where_cond(cond, other)
8121    }
8122
8123    /// Rank the values of the column.
8124    ///
8125    /// Matches `pd.Series.rank(method=..., ascending=..., na_option='keep')`.
8126    /// Supported `method` values are `"average"` (pandas default,
8127    /// ties → average of tied ranks), `"min"` (ties → smallest tied
8128    /// rank), `"max"` (ties → largest tied rank), `"first"` (ties →
8129    /// appearance order), and `"dense"` (ties → consecutive integers
8130    /// with no gaps between distinct groups).
8131    ///
8132    /// Missing input positions stay missing in the output (matching
8133    /// pandas `na_option='keep'`). The result dtype is always Float64
8134    /// so `"average"` can produce non-integer ranks.
8135    pub fn rank(&self, method: &str, ascending: bool) -> Result<Self, ColumnError> {
8136        let valid_method = matches!(method, "average" | "min" | "max" | "first" | "dense");
8137        if !valid_method {
8138            return Err(ColumnError::Type(TypeError::NonNumericValue {
8139                value: method.to_string(),
8140                dtype: self.dtype,
8141            }));
8142        }
8143
8144        let len = self.values.len();
8145
8146        // Counting-sort fast path: an all-valid, bounded-range Int64 column ranks
8147        // in O(n) via a value histogram + prefix sums instead of the O(n log n)
8148        // sort below. Bit-identical: compare_scalars_na_last compares Int64 with
8149        // exact `i64::cmp` (no f64 coercion), so grouping by exact value matches
8150        // the sort's tie groups; the stable sort's within-tie order ("first")
8151        // is the original order, reproduced by a per-value occurrence counter
8152        // walked in original order. The rank f64 expressions mirror the sort
8153        // path's exactly (start_rank/end_rank), so every method/direction agrees.
8154        if let Some(data) = self.as_i64_slice()
8155            && let Some((min, range)) = i64_direct_address_range(data)
8156        {
8157            let total = data.len() as i64;
8158            let mut count = vec![0i64; range];
8159            for &v in data {
8160                count[(v as i128 - min as i128) as usize] += 1;
8161            }
8162            // c_less[s] = # values < value-at-slot-s; dense_asc[s] = 1-based
8163            // ascending ordinal among present distinct values.
8164            let mut c_less = vec![0i64; range];
8165            let mut dense_asc = vec![0i64; range];
8166            let mut acc = 0i64;
8167            let mut ord = 0i64;
8168            for s in 0..range {
8169                c_less[s] = acc;
8170                if count[s] > 0 {
8171                    ord += 1;
8172                    dense_asc[s] = ord;
8173                }
8174                acc += count[s];
8175            }
8176            let n_distinct = ord;
8177            let mut occ = vec![0i64; range];
8178            let mut ranks = vec![Scalar::Null(NullKind::NaN); len];
8179            for (i, &v) in data.iter().enumerate() {
8180                let s = (v as i128 - min as i128) as usize;
8181                let c = count[s];
8182                // `before` = sorted-position offset of this value's tie group
8183                // (values that sort before it): `c_less` ascending, the
8184                // complement `total - c_less - c` descending.
8185                let before = if ascending {
8186                    c_less[s]
8187                } else {
8188                    total - c_less[s] - c
8189                };
8190                let start_rank = before as f64 + 1.0;
8191                let end_rank = (before + c) as f64;
8192                let value = match method {
8193                    "average" => (start_rank + end_rank) / 2.0,
8194                    "min" => start_rank,
8195                    "max" => end_rank,
8196                    "first" => {
8197                        let k = occ[s];
8198                        occ[s] += 1;
8199                        (before + k) as f64 + 1.0
8200                    }
8201                    "dense" => {
8202                        let d = if ascending {
8203                            dense_asc[s]
8204                        } else {
8205                            n_distinct - dense_asc[s] + 1
8206                        };
8207                        d as f64
8208                    }
8209                    _ => unreachable!(),
8210                };
8211                ranks[i] = Scalar::Float64(value);
8212            }
8213            return Self::new(DType::Float64, ranks);
8214        }
8215
8216        // Radix fast path: an all-valid, NaN-free Float64 column ranks in O(n)
8217        // via the stable LSD radix permutation (the same one `sort_values`/
8218        // `argsort` use) instead of the O(n log n) `Scalar` comparison sort
8219        // below. Bit-identical: `f64_radix_key` normalizes `-0.0` to `0.0`
8220        // (exactly as `compare_scalars_na_last`'s `partial_cmp` treats `-0.0 ==
8221        // 0.0`), `radix_argsort_u64` is stable (ties keep original order, like
8222        // the stable `sort_by`), and tie groups are detected with f64 `==`
8223        // (which is `Equal` under `partial_cmp` for the same finite values). A
8224        // NaN would diverge — `partial_cmp(NaN, _) -> Equal` collapses ties in
8225        // the comparator path while the radix key sorts NaN to one end, and a
8226        // NaN value is also `is_missing()` so the comparator path drops it — so
8227        // any NaN routes to the unchanged comparator fallback. All-valid +
8228        // NaN-free means every row is ranked (no nulls), so the output is built
8229        // typed via `from_f64_values`.
8230        if let Some(data) = self.as_f64_slice()
8231            && !data.iter().any(|x| x.is_nan())
8232        {
8233            let perm = self
8234                .typed_radix_perm(ascending)
8235                .expect("f64 slice yields radix perm");
8236            let n = perm.len();
8237            let mut ranks = vec![0.0_f64; len];
8238            let mut cursor = 0usize;
8239            let mut dense_rank = 0f64;
8240            while cursor < n {
8241                let mut end = cursor + 1;
8242                while end < n && data[perm[end]] == data[perm[cursor]] {
8243                    end += 1;
8244                }
8245                let start_rank = cursor as f64 + 1.0;
8246                let end_rank = end as f64;
8247                dense_rank += 1.0;
8248                #[allow(clippy::needless_range_loop)] // group_idx is also the "first" rank value
8249                for group_idx in cursor..end {
8250                    let original = perm[group_idx];
8251                    ranks[original] = match method {
8252                        "average" => (start_rank + end_rank) / 2.0,
8253                        "min" => start_rank,
8254                        "max" => end_rank,
8255                        "first" => group_idx as f64 + 1.0,
8256                        "dense" => dense_rank,
8257                        _ => unreachable!(),
8258                    };
8259                }
8260                cursor = end;
8261            }
8262            return Ok(Self::from_f64_values(ranks));
8263        }
8264
8265        let mut non_missing: Vec<(usize, &Scalar)> = Vec::with_capacity(len);
8266        for (i, v) in self.values.iter().enumerate() {
8267            if !v.is_missing() {
8268                non_missing.push((i, v));
8269            }
8270        }
8271        non_missing.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8272
8273        let mut ranks = vec![Scalar::Null(NullKind::NaN); len];
8274        let n = non_missing.len();
8275        let mut cursor = 0usize;
8276        let mut dense_rank = 0f64;
8277        while cursor < n {
8278            let mut end = cursor + 1;
8279            while end < n {
8280                let same =
8281                    compare_scalars_na_last(non_missing[cursor].1, non_missing[end].1, ascending)
8282                        .is_eq();
8283                if !same {
8284                    break;
8285                }
8286                end += 1;
8287            }
8288            let start_rank = cursor as f64 + 1.0;
8289            let end_rank = end as f64;
8290            dense_rank += 1.0;
8291            for (group_idx, entry) in non_missing.iter().enumerate().take(end).skip(cursor) {
8292                let original = entry.0;
8293                let value = match method {
8294                    "average" => (start_rank + end_rank) / 2.0,
8295                    "min" => start_rank,
8296                    "max" => end_rank,
8297                    "first" => group_idx as f64 + 1.0,
8298                    "dense" => dense_rank,
8299                    _ => unreachable!(),
8300                };
8301                ranks[original] = Scalar::Float64(value);
8302            }
8303            cursor = end;
8304        }
8305        Self::new(DType::Float64, ranks)
8306    }
8307
8308    /// Position where `needle` would be inserted to preserve sort order.
8309    ///
8310    /// Matches `pd.Series.searchsorted(value, side)`. `side` is
8311    /// `"left"` (first valid insertion position) or `"right"` (last).
8312    /// The column is assumed sorted ascending with missing values at
8313    /// the end (consistent with `sort_values(true)`). Missing
8314    /// `needle` is rejected with a type error.
8315    pub fn searchsorted(&self, needle: &Scalar, side: &str) -> Result<usize, ColumnError> {
8316        self.searchsorted_position(needle, side, None)
8317    }
8318
8319    /// Position where `needle` would be inserted using an explicit sorter.
8320    ///
8321    /// Matches `pd.Series.searchsorted(value, side, sorter=...)` where
8322    /// `sorter` is a permutation that sorts the column ascending.
8323    pub fn searchsorted_with_sorter(
8324        &self,
8325        needle: &Scalar,
8326        side: &str,
8327        sorter: &[usize],
8328    ) -> Result<usize, ColumnError> {
8329        self.searchsorted_position(needle, side, Some(sorter))
8330    }
8331
8332    /// Positions where `needles` would be inserted to preserve sort order.
8333    ///
8334    /// Matches `pd.Series.searchsorted(values, side)` for array-like
8335    /// inputs. Returns an `Int64` column of insertion positions.
8336    /// Missing needles are rejected with the same error as the scalar path.
8337    pub fn searchsorted_values(&self, needles: &[Scalar], side: &str) -> Result<Self, ColumnError> {
8338        let positions: Vec<Scalar> = needles
8339            .iter()
8340            .map(|needle| self.searchsorted_position(needle, side, None))
8341            .map(|result| result.map(|position| Scalar::Int64(position as i64)))
8342            .collect::<Result<Vec<_>, _>>()?;
8343        Self::new(DType::Int64, positions)
8344    }
8345
8346    /// Positions where `needles` would be inserted using an explicit sorter.
8347    ///
8348    /// Matches `pd.Series.searchsorted(values, side, sorter=...)` for
8349    /// array-like inputs. Returns an `Int64` column of insertion positions.
8350    pub fn searchsorted_values_with_sorter(
8351        &self,
8352        needles: &[Scalar],
8353        side: &str,
8354        sorter: &[usize],
8355    ) -> Result<Self, ColumnError> {
8356        let positions: Vec<Scalar> = needles
8357            .iter()
8358            .map(|needle| self.searchsorted_position(needle, side, Some(sorter)))
8359            .map(|result| result.map(|position| Scalar::Int64(position as i64)))
8360            .collect::<Result<Vec<_>, _>>()?;
8361        Self::new(DType::Int64, positions)
8362    }
8363
8364    fn searchsorted_position(
8365        &self,
8366        needle: &Scalar,
8367        side: &str,
8368        sorter: Option<&[usize]>,
8369    ) -> Result<usize, ColumnError> {
8370        if side != "left" && side != "right" {
8371            return Err(ColumnError::Type(TypeError::NonNumericValue {
8372                value: side.to_string(),
8373                dtype: self.dtype,
8374            }));
8375        }
8376        if needle.is_missing() {
8377            return Err(ColumnError::Type(TypeError::ValueIsMissing {
8378                kind: NullKind::NaN,
8379            }));
8380        }
8381
8382        let sorter = self.validate_searchsorted_sorter(sorter)?;
8383        let len = sorter.map_or(self.values.len(), <[usize]>::len);
8384        let mut lo = 0usize;
8385        let mut hi = len;
8386        while lo < hi {
8387            let mid = lo + (hi - lo) / 2;
8388            let mid_idx = sorter.map_or(mid, |indices| indices[mid]);
8389            let mid_val = &self.values[mid_idx];
8390            // Values that are "missing" sort to the end; treat needle as
8391            // less than any missing slot.
8392            let ord = if mid_val.is_missing() {
8393                std::cmp::Ordering::Greater
8394            } else {
8395                compare_scalars_na_last(mid_val, needle, true)
8396            };
8397            use std::cmp::Ordering;
8398            let go_right = match (ord, side) {
8399                (Ordering::Less, _) => true,
8400                (Ordering::Equal, "left") => false,
8401                (Ordering::Equal, "right") => true,
8402                (Ordering::Greater, _) => false,
8403                _ => unreachable!(),
8404            };
8405            if go_right {
8406                lo = mid + 1;
8407            } else {
8408                hi = mid;
8409            }
8410        }
8411        Ok(lo)
8412    }
8413
8414    fn validate_searchsorted_sorter<'a>(
8415        &self,
8416        sorter: Option<&'a [usize]>,
8417    ) -> Result<Option<&'a [usize]>, ColumnError> {
8418        let Some(sorter) = sorter else {
8419            return Ok(None);
8420        };
8421        let len = self.values.len();
8422        if sorter.len() != len {
8423            return Err(ColumnError::LengthMismatch {
8424                left: len,
8425                right: sorter.len(),
8426            });
8427        }
8428        let mut seen = vec![false; len];
8429        for &idx in sorter {
8430            if idx >= len {
8431                return Err(ColumnError::InvalidSorter {
8432                    len,
8433                    reason: format!("index {idx} out of bounds"),
8434                });
8435            }
8436            if std::mem::replace(&mut seen[idx], true) {
8437                return Err(ColumnError::InvalidSorter {
8438                    len,
8439                    reason: format!("index {idx} appears more than once"),
8440                });
8441            }
8442        }
8443        Ok(Some(sorter))
8444    }
8445
8446    /// Return bin indices for values given sorted bin edges.
8447    ///
8448    /// Matches np.digitize(). Returns indices such that bins[i-1] <= x < bins[i].
8449    pub fn digitize(&self, bins: &Self, right: bool) -> Result<Self, ColumnError> {
8450        let mut out = Vec::with_capacity(self.values.len());
8451        for v in &self.values {
8452            if v.is_missing() {
8453                out.push(Scalar::Int64(0));
8454                continue;
8455            }
8456            let vf = v.to_f64().map_err(ColumnError::Type)?;
8457            let side = if right { "right" } else { "left" };
8458            let pos = bins.searchsorted(&Scalar::Float64(vf), side)?;
8459            out.push(Scalar::Int64(pos as i64));
8460        }
8461        Self::new(DType::Int64, out)
8462    }
8463
8464    /// Count occurrences of each non-negative integer value.
8465    ///
8466    /// Matches np.bincount(). Returns array where output[i] = count of i in input.
8467    /// Requires non-negative Int64 values.
8468    pub fn bincount(&self, minlength: usize) -> Result<Self, ColumnError> {
8469        let mut max_val = 0i64;
8470        for v in &self.values {
8471            if v.is_missing() {
8472                continue;
8473            }
8474            match v {
8475                Scalar::Int64(x) if *x >= 0 => {
8476                    if *x > max_val {
8477                        max_val = *x;
8478                    }
8479                }
8480                Scalar::Int64(x) => {
8481                    return Err(ColumnError::Type(TypeError::NonNumericValue {
8482                        value: format!("negative value {x}"),
8483                        dtype: self.dtype,
8484                    }));
8485                }
8486                _ => {
8487                    return Err(ColumnError::Type(TypeError::NonNumericValue {
8488                        value: format!("{v:?}"),
8489                        dtype: self.dtype,
8490                    }));
8491                }
8492            }
8493        }
8494        let len = (max_val as usize + 1).max(minlength);
8495        let mut counts = vec![0i64; len];
8496        for v in &self.values {
8497            if v.is_missing() {
8498                continue;
8499            }
8500            if let Scalar::Int64(x) = v {
8501                counts[*x as usize] += 1;
8502            }
8503        }
8504        let out: Vec<Scalar> = counts.into_iter().map(Scalar::Int64).collect();
8505        Self::new(DType::Int64, out)
8506    }
8507
8508    /// Compute histogram using provided bin edges.
8509    ///
8510    /// Matches np.histogram(a, bins=edges). Returns counts for each bin.
8511    /// Bins are [edges[i], edges[i+1]) except the last which is [edges[n-1], edges[n]].
8512    pub fn histogram(&self, bin_edges: &[f64]) -> Result<Self, ColumnError> {
8513        if bin_edges.len() < 2 {
8514            return Err(ColumnError::Type(TypeError::NonNumericValue {
8515                value: "histogram requires at least 2 bin edges".to_owned(),
8516                dtype: self.dtype,
8517            }));
8518        }
8519        let n_bins = bin_edges.len() - 1;
8520        let mut counts = vec![0i64; n_bins];
8521
8522        // Fast path: strictly-increasing edges admit an O(log n_bins) binary
8523        // search for each value's bin instead of the O(n_bins) linear scan
8524        // below — O(N·log B) vs O(N·B). Bins are right-open [e_i, e_{i+1}) with
8525        // an inclusive final right edge, so for x in [e_0, e_last] the bin is
8526        // `partition_point(|e| e <= x) - 1` clamped to the last bin (so a value
8527        // exactly at e_last lands in bin n_bins-1); values outside [e_0, e_last]
8528        // are dropped — bit-identical to the linear scan. Non-strict (duplicate)
8529        // edges create zero-width bins where the two scans can disagree, so they
8530        // take the original linear path.
8531        let strict = bin_edges.windows(2).all(|w| w[0] < w[1]);
8532
8533        for v in &self.values {
8534            if v.is_missing() {
8535                continue;
8536            }
8537            let x = match v.to_f64() {
8538                Ok(f) if f.is_finite() => f,
8539                _ => continue,
8540            };
8541            if strict {
8542                if x < bin_edges[0] || x > bin_edges[n_bins] {
8543                    continue;
8544                }
8545                let bin = (bin_edges.partition_point(|&e| e <= x) - 1).min(n_bins - 1);
8546                counts[bin] += 1;
8547                continue;
8548            }
8549            // Linear scan (non-strict edges fallback).
8550            for i in 0..n_bins {
8551                let in_bin = if i == n_bins - 1 {
8552                    // Last bin is inclusive on right
8553                    x >= bin_edges[i] && x <= bin_edges[i + 1]
8554                } else {
8555                    x >= bin_edges[i] && x < bin_edges[i + 1]
8556                };
8557                if in_bin {
8558                    counts[i] += 1;
8559                    break;
8560                }
8561            }
8562            // Values outside all bins are not counted (matches numpy)
8563        }
8564
8565        let out: Vec<Scalar> = counts.into_iter().map(Scalar::Int64).collect();
8566        Self::new(DType::Int64, out)
8567    }
8568
8569    /// Compute histogram with auto-generated bins.
8570    ///
8571    /// Matches np.histogram(a, bins=n_bins). Returns (counts, bin_edges).
8572    /// Bins are evenly spaced between min and max of the data.
8573    pub fn histogram_auto(&self, n_bins: usize) -> Result<(Self, Vec<f64>), ColumnError> {
8574        if n_bins == 0 {
8575            return Err(ColumnError::Type(TypeError::NonNumericValue {
8576                value: "histogram requires at least 1 bin".to_owned(),
8577                dtype: self.dtype,
8578            }));
8579        }
8580
8581        // Find min and max
8582        let mut min_val = f64::INFINITY;
8583        let mut max_val = f64::NEG_INFINITY;
8584        for v in &self.values {
8585            if v.is_missing() {
8586                continue;
8587            }
8588            if let Ok(x) = v.to_f64()
8589                && x.is_finite()
8590            {
8591                min_val = min_val.min(x);
8592                max_val = max_val.max(x);
8593            }
8594        }
8595
8596        if !min_val.is_finite() || !max_val.is_finite() || min_val > max_val {
8597            // No valid data
8598            let counts: Vec<Scalar> = vec![Scalar::Int64(0); n_bins];
8599            let edges = vec![0.0; n_bins + 1];
8600            return Ok((Self::new(DType::Int64, counts)?, edges));
8601        }
8602
8603        // Generate bin edges
8604        let range = max_val - min_val;
8605        let (adj_min, adj_max) = if range == 0.0 {
8606            // All values are the same - numpy extends by 0.5 on each side
8607            (min_val - 0.5, max_val + 0.5)
8608        } else {
8609            (min_val, max_val)
8610        };
8611        let adj_range = adj_max - adj_min;
8612        let step = adj_range / n_bins as f64;
8613        let bin_edges: Vec<f64> = (0..=n_bins).map(|i| adj_min + step * i as f64).collect();
8614
8615        let counts = self.histogram(&bin_edges)?;
8616        Ok((counts, bin_edges))
8617    }
8618
8619    /// Cast the column to a target dtype.
8620    ///
8621    /// Matches `pd.Series.astype(dtype)`. Each value is routed through
8622    /// `fp_types::cast_scalar`, so coercion rules (Int64↔Float64,
8623    /// Bool→Int64, Utf8 parsing, etc.) match the existing cast table.
8624    /// Cast failures on any element return `ColumnError::Type` wrapping
8625    /// the underlying TypeError so the caller can attribute the
8626    /// failing conversion. Missing values pass through as the
8627    /// target dtype's canonical missing representation.
8628    pub fn astype(&self, target: DType) -> Result<Self, ColumnError> {
8629        if self.dtype == target {
8630            return Ok(self.clone());
8631        }
8632        // Typed fast paths for the two ubiquitous all-valid numeric casts:
8633        //   Int64 -> Float64 is exactly `x as f64` (the cast_scalar branch), and
8634        //   Float64 -> Int64 truncates a finite in-range float toward zero via
8635        //   `v as i64`. NaN floats mark the column invalid so as_f64_slice
8636        //   declines; an out-of-range float makes cast_scalar error, so we only
8637        //   take the typed path when every value is in range (otherwise the
8638        //   Scalar path below reproduces that exact error). Bit-identical.
8639        if target == DType::Float64
8640            && let Some(data) = self.as_i64_slice()
8641        {
8642            let out: Vec<f64> = data.iter().map(|&x| x as f64).collect();
8643            return Ok(Self::from_f64_values(out));
8644        }
8645        if target == DType::Int64
8646            && let Some(data) = self.as_f64_slice()
8647            && data
8648                .iter()
8649                .all(|&v| v >= i64::MIN as f64 && v < 9_223_372_036_854_775_808.0)
8650        {
8651            let out: Vec<i64> = data.iter().map(|&v| v as i64).collect();
8652            return Ok(Self::from_i64_values(out));
8653        }
8654        let out: Vec<Scalar> = self
8655            .values
8656            .iter()
8657            .map(|v| cast_scalar(v, target))
8658            .collect::<Result<Vec<_>, _>>()
8659            .map_err(ColumnError::Type)?;
8660        Self::new(target, out)
8661    }
8662
8663    /// Return the `n` smallest values with explicit keep policy for
8664    /// ties.
8665    ///
8666    /// Matches `pd.Series.nsmallest(n, keep=...)`:
8667    /// - `"first"`: take the first `n` rows in ascending order, break
8668    ///   ties by original position (stable).
8669    /// - `"last"`: on ties, prefer later-appearing rows.
8670    /// - `"all"`: include every row tied with the `n`-th smallest, so
8671    ///   the returned column can exceed `n`.
8672    pub fn nsmallest_keep(&self, n: usize, keep: &str) -> Result<Self, ColumnError> {
8673        nkeep_impl(self, n, keep, true)
8674    }
8675
8676    /// Return the `n` largest values with explicit keep policy for
8677    /// ties.
8678    ///
8679    /// Matches `pd.Series.nlargest(n, keep=...)` — see `nsmallest_keep`
8680    /// for the shared semantics.
8681    pub fn nlargest_keep(&self, n: usize, keep: &str) -> Result<Self, ColumnError> {
8682        nkeep_impl(self, n, keep, false)
8683    }
8684
8685    /// Return the `n` largest values.
8686    ///
8687    /// Matches `pd.Series.nlargest(n)` with `keep='first'` — ties are
8688    /// broken by first-seen order via a stable descending sort.
8689    /// Missing values are placed at the end of the sorted view and
8690    /// therefore excluded from the top-n when `n` fits within the
8691    /// non-missing count. `n > len()` clamps to the full column.
8692    pub fn nlargest(&self, n: usize) -> Result<Self, ColumnError> {
8693        let sorted = self.sort_values(false)?;
8694        let take = n.min(sorted.values.len());
8695        let values: Vec<Scalar> = sorted.values[..take].to_vec();
8696        Self::new(self.dtype, values)
8697    }
8698
8699    /// Return the `n` smallest values.
8700    ///
8701    /// Matches `pd.Series.nsmallest(n)` with `keep='first'`.
8702    pub fn nsmallest(&self, n: usize) -> Result<Self, ColumnError> {
8703        let sorted = self.sort_values(true)?;
8704        let take = n.min(sorted.values.len());
8705        let values: Vec<Scalar> = sorted.values[..take].to_vec();
8706        Self::new(self.dtype, values)
8707    }
8708
8709    /// Replace values where `cond` is true with `other`; otherwise keep.
8710    ///
8711    /// Matches `pd.Series.mask(cond, other)` — the logical inverse of
8712    /// `where_cond`. Same validation rules apply.
8713    pub fn mask(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8714        if cond.dtype != DType::Bool {
8715            return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
8716        }
8717        if self.values.len() != cond.values.len() {
8718            return Err(ColumnError::LengthMismatch {
8719                left: self.values.len(),
8720                right: cond.values.len(),
8721            });
8722        }
8723        // Typed branchless select (inverse of where_cond scalar): cond true picks
8724        // the scalar other, false picks self. Same isomorphism argument.
8725        if !other.is_missing()
8726            && let Some(cb) = cond.as_bool_slice()
8727        {
8728            if let Some(s) = self.as_f64_slice()
8729                && let Ok(o) = other.to_f64()
8730            {
8731                let out: Vec<f64> = (0..s.len()).map(|i| if cb[i] { o } else { s[i] }).collect();
8732                return Ok(Self::from_f64_values(out));
8733            }
8734            if let Some(s) = self.as_i64_slice()
8735                && let Scalar::Int64(o) = other
8736            {
8737                let o = *o;
8738                let out: Vec<i64> = (0..s.len()).map(|i| if cb[i] { o } else { s[i] }).collect();
8739                return Ok(Self::from_i64_values(out));
8740            }
8741        }
8742        let out: Vec<Scalar> = self
8743            .values
8744            .iter()
8745            .zip(cond.values.iter())
8746            .map(|(v, c)| match c {
8747                Scalar::Bool(true) => other.clone(),
8748                Scalar::Bool(false) => v.clone(),
8749                _ => Scalar::Null(NullKind::NaN),
8750            })
8751            .collect();
8752        Self::new(self.dtype, out)
8753    }
8754
8755    /// Sort values in ascending or descending order.
8756    ///
8757    /// Matches `pd.Series.sort_values(ascending=...)`. Missing values
8758    /// are placed at the end (pandas `na_position='last'` default).
8759    /// Stable sort.
8760    /// Stable sorting permutation for an all-valid numeric column via radix
8761    /// sort over the typed buffer, or `None` when the typed fast path does not
8762    /// apply (non-numeric dtype, or any missing value — those go through the
8763    /// `Scalar` comparator which alone reasons about na-last placement). The
8764    /// permutation is bit-identical to the stable comparator path: monotonic
8765    /// radix keys preserve `<` order and stable counting-sort preserves ties.
8766    /// Borrowed `&str` view of an all-valid Utf8 column, `None` when the
8767    /// column has any missing slot or any non-Utf8 scalar (those need the
8768    /// na-last / mixed-dtype comparator).
8769    fn as_all_valid_str_vec(&self) -> Option<Vec<&str>> {
8770        if self.dtype != DType::Utf8 || !self.validity.all() {
8771            return None;
8772        }
8773        // Contiguous fast path (br-frankenpandas-vecff): when the column
8774        // carries the LazyContiguousUtf8 backing (output of a string op),
8775        // slice each row's &str straight from the byte buffer instead of
8776        // forcing the whole Vec<Scalar> to materialize just to read it.
8777        // Bit-identical: the same &str values in the same order; argsort /
8778        // group keys only ever borrow them.
8779        if let Some((bytes, offsets)) = self.as_utf8_contiguous() {
8780            let mut strs = Vec::with_capacity(offsets.len() - 1);
8781            for w in offsets.windows(2) {
8782                strs.push(
8783                    std::str::from_utf8(&bytes[w[0]..w[1]])
8784                        .expect("contiguous utf8 buffer is valid by construction"),
8785                );
8786            }
8787            return Some(strs);
8788        }
8789        let mut strs = Vec::with_capacity(self.len());
8790        for v in self.values.iter() {
8791            match v {
8792                Scalar::Utf8(s) => strs.push(s.as_str()),
8793                _ => return None,
8794            }
8795        }
8796        Some(strs)
8797    }
8798
8799    fn typed_radix_perm(&self, ascending: bool) -> Option<Vec<usize>> {
8800        if let Some(data) = self.as_i64_slice() {
8801            let keys: Vec<u64> = if ascending {
8802                data.iter().map(|&v| i64_radix_key(v)).collect()
8803            } else {
8804                data.iter().map(|&v| !i64_radix_key(v)).collect()
8805            };
8806            return Some(radix_argsort_u64(&keys));
8807        }
8808        if let Some(data) = self.as_f64_slice() {
8809            let keys: Vec<u64> = if ascending {
8810                data.iter().map(|&v| f64_radix_key(v)).collect()
8811            } else {
8812                data.iter().map(|&v| !f64_radix_key(v)).collect()
8813            };
8814            return Some(radix_argsort_u64(&keys));
8815        }
8816        None
8817    }
8818
8819    /// Order-preserving `u64` radix keys for this column (per-column ascending/
8820    /// descending baked in), for the multi-key lexsort
8821    /// (`radix_argsort_multi_u64`, br-frankenpandas-lnsu6). `Some` only for an
8822    /// all-valid Int64 or all-valid **no-NaN** Float64 column — the cases where
8823    /// the radix order is bit-identical to the stable comparator: Int64 `cmp`,
8824    /// finite-Float64 `partial_cmp` (`-0.0` normalized to `+0.0`). A Float64
8825    /// column with any NaN returns `None` so the caller keeps the `Scalar`
8826    /// comparator (which, in the multi-key path, treats `NaN` as compare-Equal —
8827    /// a semantics the monotonic radix key cannot reproduce).
8828    #[must_use]
8829    pub fn typed_radix_keys(&self, ascending: bool) -> Option<Vec<u64>> {
8830        if let Some(data) = self.as_i64_slice() {
8831            return Some(if ascending {
8832                data.iter().map(|&v| i64_radix_key(v)).collect()
8833            } else {
8834                data.iter().map(|&v| !i64_radix_key(v)).collect()
8835            });
8836        }
8837        if let Some(data) = self.as_f64_slice() {
8838            if data.iter().any(|x| x.is_nan()) {
8839                return None;
8840            }
8841            return Some(if ascending {
8842                data.iter().map(|&v| f64_radix_key(v)).collect()
8843            } else {
8844                data.iter().map(|&v| !f64_radix_key(v)).collect()
8845            });
8846        }
8847        None
8848    }
8849
8850    pub fn sort_values(&self, ascending: bool) -> Result<Self, ColumnError> {
8851        // Typed radix fast path: all-valid Int64/Float64 columns sort their
8852        // contiguous buffer comparison-free, then re-ingest typed (no 32B
8853        // Scalar clone or enum-match per comparison).
8854        if let Some(data) = self.as_i64_slice() {
8855            let perm = self
8856                .typed_radix_perm(ascending)
8857                .expect("i64 slice yields perm");
8858            let sorted: Vec<i64> = perm.iter().map(|&i| data[i]).collect();
8859            return Ok(Self::from_i64_values(sorted));
8860        }
8861        if let Some(data) = self.as_f64_slice() {
8862            let perm = self
8863                .typed_radix_perm(ascending)
8864                .expect("f64 slice yields perm");
8865            let sorted: Vec<f64> = perm.iter().map(|&i| data[i]).collect();
8866            return Ok(Self::from_f64_values(sorted));
8867        }
8868        // All-valid Utf8: gather by the stable MSD radix permutation. The
8869        // fallback below sorts (idx, &Scalar) pairs stably with the same
8870        // ordering, so cloning in permutation order yields the identical
8871        // value sequence.
8872        if let Some(strs) = self.as_all_valid_str_vec() {
8873            let perm = utf8_msd_argsort(&strs, ascending);
8874            let sorted: Vec<Scalar> = perm.iter().map(|&i| self.values[i].clone()).collect();
8875            return Self::new(self.dtype, sorted);
8876        }
8877        let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8878        indexed.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8879        let sorted: Vec<Scalar> = indexed.into_iter().map(|(_, v)| v.clone()).collect();
8880        Self::new(self.dtype, sorted)
8881    }
8882
8883    /// Positions that would sort the column ascending.
8884    ///
8885    /// Matches `pd.Series.argsort()`. Returns a `Vec<usize>` such that
8886    /// `take(&argsort)` equals `sort_values(true)`. Missing values
8887    /// sort to the end; stable.
8888    #[must_use]
8889    pub fn argsort(&self) -> Vec<usize> {
8890        self.argsort_with(true)
8891    }
8892
8893    /// Stable sorting permutation in either direction. Uses the typed radix
8894    /// fast path for all-valid Int64/Float64 columns (comparison-free) and the
8895    /// `Scalar` na-last comparator otherwise. `take(&argsort_with(asc))` equals
8896    /// `sort_values(asc)`. Missing values sort to the end regardless of `asc`.
8897    #[must_use]
8898    pub fn argsort_with(&self, ascending: bool) -> Vec<usize> {
8899        if let Some(perm) = self.typed_radix_perm(ascending) {
8900            return perm;
8901        }
8902        // All-valid Utf8: stable MSD byte radix replaces the O(n log n)
8903        // Scalar-comparator sort. Bit-identical — `String::cmp` is exactly
8904        // byte order with shorter-prefix-first, no value is missing (so the
8905        // na-last arms never fire), and both sorts are stable.
8906        if let Some(strs) = self.as_all_valid_str_vec() {
8907            return utf8_msd_argsort(&strs, ascending);
8908        }
8909        let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8910        indexed.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8911        indexed.into_iter().map(|(i, _)| i).collect()
8912    }
8913
8914    /// Return indices that partition the array around kth element.
8915    ///
8916    /// Matches np.argpartition(). After partition, element at kth position
8917    /// is in its sorted position, elements before are <= kth element,
8918    /// elements after are >= kth element.
8919    pub fn argpartition(&self, kth: usize) -> Result<Vec<usize>, ColumnError> {
8920        if kth >= self.len() {
8921            return Err(ColumnError::InvalidLength {
8922                operation: "argpartition",
8923                expected: kth + 1,
8924                actual: self.len(),
8925            });
8926        }
8927        let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8928        indexed.select_nth_unstable_by(kth, |a, b| compare_scalars_na_last(a.1, b.1, true));
8929        Ok(indexed.into_iter().map(|(i, _)| i).collect())
8930    }
8931
8932    /// Partition array around kth smallest element.
8933    ///
8934    /// Matches np.partition(). Returns a partially sorted array where
8935    /// element at kth position is in its final sorted position.
8936    pub fn partition(&self, kth: usize) -> Result<Self, ColumnError> {
8937        let indices = self.argpartition(kth)?;
8938        let out: Vec<Scalar> = indices.iter().map(|&i| self.values[i].clone()).collect();
8939        Self::new(self.dtype, out)
8940    }
8941
8942    /// First-order difference: `values[i] - values[i - periods]`.
8943    ///
8944    /// Matches `pd.Series.diff(periods)`. The leading `|periods|`
8945    /// positions are Null(NaN). Negative periods compute
8946    /// `values[i] - values[i + |periods|]`. Non-numeric inputs return
8947    /// a type error. Result dtype is always Float64.
8948    pub fn diff(&self, periods: i64) -> Result<Self, ColumnError> {
8949        let len = self.values.len();
8950        // Per br-frankenpandas-e607u: Timedelta64 diff preserves dtype
8951        // matching pandas, instead of forcing Float64 output and NaN-ing
8952        // via the to_f64-else catch-all.
8953        // Per pandas 2.2.3: Bool.diff() is XOR (cur != prev), yielding a bool
8954        // result with a missing leading element — NOT numeric subtraction
8955        // (older pandas gave [-1, 0, 1]). Timedelta64 keeps its dtype; all other
8956        // numeric types diff as Float64.
8957        let out_dtype = match self.dtype {
8958            DType::Timedelta64 => DType::Timedelta64,
8959            DType::Bool => DType::Bool,
8960            _ => DType::Float64,
8961        };
8962        if len == 0 || periods == 0 {
8963            let null = if out_dtype == DType::Timedelta64 {
8964                Scalar::Null(NullKind::NaT)
8965            } else {
8966                Scalar::Null(NullKind::NaN)
8967            };
8968            return Self::new(out_dtype, vec![null; len]);
8969        }
8970        let abs = periods.unsigned_abs() as usize;
8971        let mut out: Vec<Scalar> = Vec::with_capacity(len);
8972        let null_scalar = if out_dtype == DType::Timedelta64 {
8973            Scalar::Null(NullKind::NaT)
8974        } else {
8975            Scalar::Null(NullKind::NaN)
8976        };
8977        for i in 0..len {
8978            if (periods > 0 && i < abs) || (periods < 0 && i + abs >= len) {
8979                out.push(null_scalar.clone());
8980                continue;
8981            }
8982            let (cur, prev) = if periods > 0 {
8983                (&self.values[i], &self.values[i - abs])
8984            } else {
8985                (&self.values[i], &self.values[i + abs])
8986            };
8987            if cur.is_missing() || prev.is_missing() {
8988                out.push(null_scalar.clone());
8989                continue;
8990            }
8991            if let (Scalar::Timedelta64(cur_ns), Scalar::Timedelta64(prev_ns)) = (cur, prev) {
8992                if *cur_ns == Timedelta::NAT || *prev_ns == Timedelta::NAT {
8993                    out.push(Scalar::Null(NullKind::NaT));
8994                } else {
8995                    out.push(Scalar::Timedelta64(cur_ns.saturating_sub(*prev_ns)));
8996                }
8997                continue;
8998            }
8999            if let (Scalar::Bool(cur_b), Scalar::Bool(prev_b)) = (cur, prev) {
9000                // pandas 2.2.3 Bool.diff() == (cur XOR prev).
9001                out.push(Scalar::Bool(cur_b != prev_b));
9002                continue;
9003            }
9004            match (cur.to_f64(), prev.to_f64()) {
9005                (Ok(a), Ok(b)) => out.push(Scalar::Float64(a - b)),
9006                _ => out.push(Scalar::Null(NullKind::NaN)),
9007            }
9008        }
9009        Self::new(out_dtype, out)
9010    }
9011
9012    /// Consecutive differences with optional prepend/append values.
9013    ///
9014    /// Matches np.ediff1d(). Prepend/append scalars are added at boundaries.
9015    pub fn ediff1d(
9016        &self,
9017        to_begin: Option<Scalar>,
9018        to_end: Option<Scalar>,
9019    ) -> Result<Self, ColumnError> {
9020        let mut out = Vec::new();
9021        if let Some(v) = to_begin {
9022            out.push(v);
9023        }
9024        for i in 1..self.values.len() {
9025            let cur = &self.values[i];
9026            let prev = &self.values[i - 1];
9027            if cur.is_missing() || prev.is_missing() {
9028                out.push(Scalar::Float64(f64::NAN));
9029                continue;
9030            }
9031            let cf = cur.to_f64().map_err(ColumnError::Type)?;
9032            let pf = prev.to_f64().map_err(ColumnError::Type)?;
9033            out.push(Scalar::Float64(cf - pf));
9034        }
9035        if let Some(v) = to_end {
9036            out.push(v);
9037        }
9038        Self::new(DType::Float64, out)
9039    }
9040
9041    /// Numerical gradient using central differences.
9042    ///
9043    /// Matches np.gradient() with uniform spacing.
9044    pub fn gradient(&self) -> Result<Self, ColumnError> {
9045        let n = self.values.len();
9046        if n == 0 {
9047            return Self::new(DType::Float64, Vec::new());
9048        }
9049        if n == 1 {
9050            return Self::new(DType::Float64, vec![Scalar::Float64(0.0)]);
9051        }
9052        let vals: Vec<f64> = self
9053            .values
9054            .iter()
9055            .map(|v| v.to_f64().unwrap_or(f64::NAN))
9056            .collect();
9057        let mut out = Vec::with_capacity(n);
9058        out.push(Scalar::Float64(vals[1] - vals[0]));
9059        for i in 1..n - 1 {
9060            out.push(Scalar::Float64((vals[i + 1] - vals[i - 1]) / 2.0));
9061        }
9062        out.push(Scalar::Float64(vals[n - 1] - vals[n - 2]));
9063        Self::new(DType::Float64, out)
9064    }
9065
9066    /// Trapezoidal numerical integration.
9067    ///
9068    /// Matches np.trapz(). Returns scalar result of integral.
9069    pub fn trapz(&self, dx: f64) -> Result<Scalar, ColumnError> {
9070        let n = self.values.len();
9071        if n < 2 {
9072            return Ok(Scalar::Float64(0.0));
9073        }
9074        let vals: Vec<f64> = self
9075            .values
9076            .iter()
9077            .map(|v| v.to_f64().unwrap_or(0.0))
9078            .collect();
9079        let mut sum = 0.0;
9080        for i in 1..n {
9081            sum += (vals[i - 1] + vals[i]) / 2.0 * dx;
9082        }
9083        Ok(Scalar::Float64(sum))
9084    }
9085
9086    /// Per-row boolean flag for duplicated values (keep='first').
9087    ///
9088    /// Matches `pd.Series.duplicated()` — all but the first occurrence
9089    /// of each value is flagged true. Missing values are treated as a
9090    /// single bucket (pandas equates NaN for this purpose).
9091    pub fn duplicated(&self) -> Result<Self, ColumnError> {
9092        self.duplicated_keep("first")
9093    }
9094
9095    /// Per-row boolean flag for duplicated values with explicit keep policy.
9096    ///
9097    /// Matches `pd.Series.duplicated(keep=...)`. Supported policies
9098    /// are `"first"`, `"last"`, and `"false"` / `"none"` for pandas
9099    /// `keep=False`.
9100    pub fn duplicated_keep(&self, keep: &str) -> Result<Self, ColumnError> {
9101        #[derive(Hash, PartialEq, Eq)]
9102        enum Key<'a> {
9103            Null,
9104            Bool(bool),
9105            Int64(i64),
9106            FloatBits(u64),
9107            Utf8(&'a str),
9108            Timedelta64(i64),
9109            Datetime64(i64),
9110            Period(i64),
9111            Interval(u64, u64, IntervalClosed),
9112        }
9113        fn key_of(v: &Scalar) -> Key<'_> {
9114            if v.is_missing() {
9115                return Key::Null;
9116            }
9117            match v {
9118                Scalar::Bool(b) => Key::Bool(*b),
9119                Scalar::Int64(i) => Key::Int64(*i),
9120                Scalar::Float64(f) => {
9121                    let norm = if *f == 0.0 { 0.0 } else { *f };
9122                    Key::FloatBits(norm.to_bits())
9123                }
9124                Scalar::Utf8(s) => Key::Utf8(s.as_str()),
9125                Scalar::Timedelta64(v) => Key::Timedelta64(*v),
9126                Scalar::Datetime64(v) => Key::Datetime64(*v),
9127                Scalar::Period(v) => Key::Period(*v),
9128                Scalar::Interval(v) => {
9129                    let (left, right, closed) = interval_key(v);
9130                    Key::Interval(left, right, closed)
9131                }
9132                Scalar::Null(_) => Key::Null,
9133            }
9134        }
9135
9136        let policy = match keep {
9137            "first" => DupPolicy::First,
9138            "last" => DupPolicy::Last,
9139            "false" | "False" | "none" => DupPolicy::None,
9140            other => {
9141                return Err(ColumnError::Type(TypeError::NonNumericValue {
9142                    value: other.to_string(),
9143                    dtype: self.dtype,
9144                }));
9145            }
9146        };
9147
9148        // Typed fast paths: all-valid Int64/Float64 hash their contiguous
9149        // buffer directly with FxHash, skipping the per-value `Key` enum and
9150        // SipHash. `as_*_slice` only yields all-valid buffers, so the `Null`
9151        // bucket never arises; Float64 normalizes -0.0→+0.0 before `to_bits`
9152        // exactly as `key_of` does, keeping dedup semantics bit-identical.
9153        if let Some(data) = self.as_i64_slice() {
9154            // Bounded value span → hash-free direct-address table (O(n), no
9155            // probing); otherwise the FxHash typed set.
9156            if let Some((min, range)) = i64_direct_address_range(data) {
9157                return Ok(Self::from_bool_values(duplicated_flags_i64_direct(
9158                    data, min, range, policy,
9159                )));
9160            }
9161            return Ok(Self::from_bool_values(duplicated_flags_typed(data, policy)));
9162        }
9163        if let Some(data) = self.as_f64_slice() {
9164            let keys: Vec<u64> = data
9165                .iter()
9166                .map(|&f| (if f == 0.0 { 0.0 } else { f }).to_bits())
9167                .collect();
9168            return Ok(Self::from_bool_values(duplicated_flags_typed(
9169                &keys, policy,
9170            )));
9171        }
9172
9173        let mut flags = vec![false; self.values.len()];
9174        match policy {
9175            DupPolicy::First => {
9176                let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
9177                for (idx, value) in self.values.iter().enumerate() {
9178                    flags[idx] = !seen.insert(key_of(value));
9179                }
9180            }
9181            DupPolicy::Last => {
9182                let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
9183                for (idx, value) in self.values.iter().enumerate().rev() {
9184                    flags[idx] = !seen.insert(key_of(value));
9185                }
9186            }
9187            DupPolicy::None => {
9188                let mut seen_once: FxHashSet<Key<'_>> = FxHashSet::default();
9189                let mut seen_multiple: FxHashSet<Key<'_>> = FxHashSet::default();
9190                for value in &self.values {
9191                    let key = key_of(value);
9192                    if !seen_once.insert(key_of(value)) {
9193                        seen_multiple.insert(key);
9194                    }
9195                }
9196                for (idx, value) in self.values.iter().enumerate() {
9197                    flags[idx] = seen_multiple.contains(&key_of(value));
9198                }
9199            }
9200        }
9201
9202        let out: Vec<Scalar> = flags.into_iter().map(Scalar::Bool).collect();
9203        Self::new(DType::Bool, out)
9204    }
9205
9206    /// Bool column indicating whether each value lies in `[lower, upper]`
9207    /// (or the open interval when `inclusive=false`).
9208    ///
9209    /// Matches `pd.Series.between(left, right, inclusive='both'|'neither')`.
9210    /// Missing values map to false. Non-numeric inputs return a type
9211    /// error.
9212    pub fn between(&self, lower: f64, upper: f64, inclusive: bool) -> Result<Self, ColumnError> {
9213        let policy = if inclusive { "both" } else { "neither" };
9214        self.between_inclusive(lower, upper, policy)
9215    }
9216
9217    /// Bool column indicating whether each value lies between bounds
9218    /// with pandas string-valued side-inclusion semantics.
9219    ///
9220    /// Matches `pd.Series.between(inclusive=...)` for `"both"`,
9221    /// `"left"`, `"right"`, and `"neither"`.
9222    pub fn between_inclusive(
9223        &self,
9224        lower: f64,
9225        upper: f64,
9226        inclusive: &str,
9227    ) -> Result<Self, ColumnError> {
9228        let (include_left, include_right) = match inclusive {
9229            "both" => (true, true),
9230            "left" => (true, false),
9231            "right" => (false, true),
9232            "neither" => (false, false),
9233            other => {
9234                return Err(ColumnError::Type(TypeError::NonNumericValue {
9235                    value: other.to_string(),
9236                    dtype: self.dtype,
9237                }));
9238            }
9239        };
9240
9241        let mut out = Vec::with_capacity(self.values.len());
9242        for v in &self.values {
9243            if v.is_missing() {
9244                out.push(Scalar::Bool(false));
9245                continue;
9246            }
9247            match v.to_f64() {
9248                Ok(x) => {
9249                    let lower_ok = if include_left { x >= lower } else { x > lower };
9250                    let upper_ok = if include_right { x <= upper } else { x < upper };
9251                    out.push(Scalar::Bool(lower_ok && upper_ok));
9252                }
9253                Err(err) => return Err(ColumnError::Type(err)),
9254            }
9255        }
9256        Self::new(DType::Bool, out)
9257    }
9258
9259    /// Encode the column as integer codes plus unique values.
9260    ///
9261    /// Matches `pd.Series.factorize()` default behavior: missing values
9262    /// map to `-1`, and uniques preserve first-seen order.
9263    pub fn factorize(&self) -> Result<(Self, Self), ColumnError> {
9264        self.factorize_with_options(false, true)
9265    }
9266
9267    /// Encode the column as integer codes plus unique values.
9268    ///
9269    /// Matches `pd.Series.factorize(sort=..., use_na_sentinel=...)`.
9270    /// When `sort=true`, uniques are sorted and codes are remapped to the
9271    /// sorted positions. When `use_na_sentinel=false`, missing values are
9272    /// emitted as a regular unique bucket instead of `-1`.
9273    pub fn factorize_with_options(
9274        &self,
9275        sort: bool,
9276        use_na_sentinel: bool,
9277    ) -> Result<(Self, Self), ColumnError> {
9278        // Per br-frankenpandas-9433f: HashMap-based code lookup mirrors
9279        // fp-frame's Series::factorize fix (br-78d0c). fp-columnar can't
9280        // import fp-frame's ScalarKey (cycle), so define a local
9281        // hashable wrapper. missing_position tracker handles the
9282        // use_na_sentinel=false branch separately so multiple null
9283        // kinds collapse to the same code (matches the existing
9284        // is_missing-based check).
9285        #[derive(Hash, PartialEq, Eq, Clone, Copy)]
9286        enum LocalKey<'a> {
9287            Bool(bool),
9288            Int64(i64),
9289            FloatBits(u64),
9290            Utf8(&'a str),
9291            Timedelta64(i64),
9292            Datetime64(i64),
9293            Period(i64),
9294            Interval(u64, u64, IntervalClosed),
9295        }
9296        fn key_of(s: &Scalar) -> Option<LocalKey<'_>> {
9297            match s {
9298                Scalar::Null(_) => None,
9299                Scalar::Bool(b) => Some(LocalKey::Bool(*b)),
9300                Scalar::Int64(i) => Some(LocalKey::Int64(*i)),
9301                Scalar::Float64(f) => {
9302                    if f.is_nan() {
9303                        None
9304                    } else {
9305                        let normalized = if *f == 0.0 { 0.0 } else { *f };
9306                        Some(LocalKey::FloatBits(normalized.to_bits()))
9307                    }
9308                }
9309                Scalar::Utf8(s) => Some(LocalKey::Utf8(s.as_str())),
9310                Scalar::Timedelta64(t) => {
9311                    if *t == Timedelta::NAT {
9312                        None
9313                    } else {
9314                        Some(LocalKey::Timedelta64(*t))
9315                    }
9316                }
9317                Scalar::Datetime64(t) => {
9318                    if *t == Timestamp::NAT {
9319                        None
9320                    } else {
9321                        Some(LocalKey::Datetime64(*t))
9322                    }
9323                }
9324                Scalar::Period(p) => {
9325                    if *p == i64::MIN {
9326                        None
9327                    } else {
9328                        Some(LocalKey::Period(*p))
9329                    }
9330                }
9331                Scalar::Interval(interval) => {
9332                    let (left, right, closed) = interval_key(interval);
9333                    Some(LocalKey::Interval(left, right, closed))
9334                }
9335            }
9336        }
9337
9338        let (mut codes, mut uniques): (Vec<Scalar>, Vec<Scalar>) = if let Some((data, min, range)) =
9339            self.as_i64_slice()
9340                .and_then(|d| i64_direct_address_range(d).map(|(m, r)| (d, m, r)))
9341        {
9342            // Hash-free direct-address factorize for a bounded-range all-valid
9343            // Int64 column: a dense code table indexed by (v-min) assigns
9344            // first-seen codes in O(n) with no hashing. All-valid ⇒ no
9345            // missing/sentinel handling, so this is bit-identical to the
9346            // HashMap path's first-seen code assignment.
9347            let mut code_table = vec![-1i64; range];
9348            let mut uniques: Vec<Scalar> = Vec::new();
9349            let mut codes: Vec<Scalar> = Vec::with_capacity(data.len());
9350            for &v in data {
9351                let slot = (v as i128 - min as i128) as usize;
9352                let existing = code_table[slot];
9353                if existing < 0 {
9354                    let code = uniques.len() as i64;
9355                    code_table[slot] = code;
9356                    uniques.push(Scalar::Int64(v));
9357                    codes.push(Scalar::Int64(code));
9358                } else {
9359                    codes.push(Scalar::Int64(existing));
9360                }
9361            }
9362            (codes, uniques)
9363        } else {
9364            let mut uniques: Vec<Scalar> = Vec::new();
9365            let mut idx_map: FxHashMap<LocalKey<'_>, i64> = FxHashMap::default();
9366            let mut missing_position: Option<i64> = None;
9367            let mut codes: Vec<Scalar> = Vec::with_capacity(self.values.len());
9368
9369            for value in &self.values {
9370                if value.is_missing() {
9371                    if use_na_sentinel {
9372                        codes.push(Scalar::Int64(-1));
9373                    } else if let Some(p) = missing_position {
9374                        codes.push(Scalar::Int64(p));
9375                    } else {
9376                        let code = uniques.len() as i64;
9377                        missing_position = Some(code);
9378                        uniques.push(value.clone());
9379                        codes.push(Scalar::Int64(code));
9380                    }
9381                    continue;
9382                }
9383                let Some(key) = key_of(value) else {
9384                    // Defensive: non-missing value that maps to no key
9385                    // (shouldn't happen for valid Scalar variants).
9386                    codes.push(Scalar::Int64(-1));
9387                    continue;
9388                };
9389                match idx_map.get(&key) {
9390                    Some(&p) => codes.push(Scalar::Int64(p)),
9391                    None => {
9392                        let code = uniques.len() as i64;
9393                        idx_map.insert(key, code);
9394                        uniques.push(value.clone());
9395                        codes.push(Scalar::Int64(code));
9396                    }
9397                }
9398            }
9399            drop(idx_map);
9400            (codes, uniques)
9401        };
9402
9403        if sort && !uniques.is_empty() {
9404            let mut ordering: Vec<usize> = (0..uniques.len()).collect();
9405            ordering.sort_by(|left, right| {
9406                compare_scalars_na_last(&uniques[*left], &uniques[*right], true)
9407            });
9408
9409            let mut remap = vec![0usize; uniques.len()];
9410            let sorted_uniques: Vec<Scalar> = ordering
9411                .into_iter()
9412                .enumerate()
9413                .map(|(sorted_position, original_position)| {
9414                    remap[original_position] = sorted_position;
9415                    uniques[original_position].clone()
9416                })
9417                .collect();
9418
9419            for code in &mut codes {
9420                if let Scalar::Int64(value) = code
9421                    && *value >= 0
9422                {
9423                    *value = remap[*value as usize] as i64;
9424                }
9425            }
9426
9427            uniques = sorted_uniques;
9428        }
9429
9430        let codes_col = Self::new(DType::Int64, codes)?;
9431        let uniques_col = Self::new(self.dtype, uniques)?;
9432        Ok((codes_col, uniques_col))
9433    }
9434
9435    /// Element-wise absolute value.
9436    ///
9437    /// Matches `pd.Series.abs()`. Int/Float/Bool/Timedelta paths preserve
9438    /// dtype; Utf8 inputs return `ColumnError::Type` because pandas raises
9439    /// TypeError on non-numeric .abs().
9440    pub fn abs(&self) -> Result<Self, ColumnError> {
9441        // Typed fast path: all-valid Int64/Float64 take abs over the contiguous
9442        // buffer and re-ingest typed (same dtype preserved), skipping the lazy
9443        // Scalar materialization and the 32B-per-cell Vec<Scalar>. Bit-identical
9444        // to the loop below (Int64 wrapping_abs incl i64::MIN; Float64 .abs()
9445        // incl -0.0→0.0; all-valid ⇒ no missing branch).
9446        if let Some(data) = self.as_i64_slice() {
9447            return Ok(Self::from_i64_values(
9448                data.iter().map(|&x| x.wrapping_abs()).collect(),
9449            ));
9450        }
9451        if let Some(data) = self.as_f64_slice() {
9452            return Ok(Self::from_f64_values(
9453                data.iter().map(|&x| x.abs()).collect(),
9454            ));
9455        }
9456
9457        let mut out = Vec::with_capacity(self.values.len());
9458        for v in &self.values {
9459            if v.is_missing() {
9460                out.push(v.clone());
9461                continue;
9462            }
9463            match v {
9464                Scalar::Bool(x) => out.push(Scalar::Bool(*x)),
9465                Scalar::Int64(x) => out.push(Scalar::Int64(x.wrapping_abs())),
9466                Scalar::Float64(x) => out.push(Scalar::Float64(x.abs())),
9467                Scalar::Timedelta64(x) if *x != Timedelta::NAT => {
9468                    out.push(Scalar::Timedelta64(x.wrapping_abs()))
9469                }
9470                _ => {
9471                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9472                        value: format!("{v:?}"),
9473                        dtype: self.dtype,
9474                    }));
9475                }
9476            }
9477        }
9478        Self::new(self.dtype, out)
9479    }
9480
9481    /// Alias for abs, matching np.fabs.
9482    pub fn fabs(&self) -> Result<Self, ColumnError> {
9483        self.abs()
9484    }
9485
9486    /// Alias for abs, matching np.absolute.
9487    pub fn absolute(&self) -> Result<Self, ColumnError> {
9488        self.abs()
9489    }
9490
9491    /// Negate numeric values. Matches numpy's negative ufunc.
9492    pub fn neg(&self) -> Result<Self, ColumnError> {
9493        // Typed, dtype-preserving fast path (all-valid only): Int64 negates over
9494        // the i64 buffer (wrapping, incl i64::MIN) and stays Int64; Float64
9495        // negates over the f64 buffer. Bit-identical to the scalar loop.
9496        if let Some(data) = self.as_i64_slice() {
9497            return Ok(Self::from_i64_values(
9498                data.iter().map(|&x| x.wrapping_neg()).collect(),
9499            ));
9500        }
9501        if let Some(data) = self.as_f64_slice() {
9502            return Ok(Self::from_f64_values(data.iter().map(|&x| -x).collect()));
9503        }
9504        let mut out = Vec::with_capacity(self.values.len());
9505        for v in &self.values {
9506            if v.is_missing() {
9507                out.push(v.clone());
9508                continue;
9509            }
9510            match v {
9511                Scalar::Int64(x) => out.push(Scalar::Int64(x.wrapping_neg())),
9512                Scalar::Float64(x) => out.push(Scalar::Float64(-x)),
9513                Scalar::Timedelta64(x) if *x != Timedelta::NAT => {
9514                    out.push(Scalar::Timedelta64(x.wrapping_neg()))
9515                }
9516                _ => {
9517                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9518                        value: format!("{v:?}"),
9519                        dtype: self.dtype,
9520                    }));
9521                }
9522            }
9523        }
9524        Self::new(self.dtype, out)
9525    }
9526
9527    /// Unary positive (identity for numeric, error for non-numeric).
9528    pub fn positive(&self) -> Result<Self, ColumnError> {
9529        for v in &self.values {
9530            if v.is_missing() {
9531                continue;
9532            }
9533            match v {
9534                Scalar::Int64(_) | Scalar::Float64(_) | Scalar::Timedelta64(_) => {}
9535                _ => {
9536                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9537                        value: format!("{v:?}"),
9538                        dtype: self.dtype,
9539                    }));
9540                }
9541            }
9542        }
9543        Ok(self.clone())
9544    }
9545
9546    /// Alias for positive.
9547    pub fn negative(&self) -> Result<Self, ColumnError> {
9548        self.neg()
9549    }
9550
9551    /// Square root of numeric values. Matches numpy's sqrt ufunc.
9552    pub fn sqrt(&self) -> Result<Self, ColumnError> {
9553        if let Some(out) = self.typed_float_unary(f64::sqrt) {
9554            return Ok(out);
9555        }
9556        let mut out = Vec::with_capacity(self.values.len());
9557        for v in &self.values {
9558            if v.is_missing() {
9559                out.push(Scalar::Float64(f64::NAN));
9560                continue;
9561            }
9562            match v {
9563                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sqrt())),
9564                Scalar::Float64(x) => out.push(Scalar::Float64(x.sqrt())),
9565                _ => {
9566                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9567                        value: format!("{v:?}"),
9568                        dtype: self.dtype,
9569                    }));
9570                }
9571            }
9572        }
9573        Self::new(DType::Float64, out)
9574    }
9575
9576    /// Exponential (e^x) of numeric values. Matches numpy's exp ufunc.
9577    pub fn exp(&self) -> Result<Self, ColumnError> {
9578        if let Some(out) = self.typed_float_unary(f64::exp) {
9579            return Ok(out);
9580        }
9581        let mut out = Vec::with_capacity(self.values.len());
9582        for v in &self.values {
9583            if v.is_missing() {
9584                out.push(Scalar::Float64(f64::NAN));
9585                continue;
9586            }
9587            match v {
9588                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).exp())),
9589                Scalar::Float64(x) => out.push(Scalar::Float64(x.exp())),
9590                _ => {
9591                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9592                        value: format!("{v:?}"),
9593                        dtype: self.dtype,
9594                    }));
9595                }
9596            }
9597        }
9598        Self::new(DType::Float64, out)
9599    }
9600
9601    /// Natural logarithm of numeric values. Matches numpy's log ufunc.
9602    pub fn log(&self) -> Result<Self, ColumnError> {
9603        if let Some(out) = self.typed_float_unary(f64::ln) {
9604            return Ok(out);
9605        }
9606        let mut out = Vec::with_capacity(self.values.len());
9607        for v in &self.values {
9608            if v.is_missing() {
9609                out.push(Scalar::Float64(f64::NAN));
9610                continue;
9611            }
9612            match v {
9613                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).ln())),
9614                Scalar::Float64(x) => out.push(Scalar::Float64(x.ln())),
9615                _ => {
9616                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9617                        value: format!("{v:?}"),
9618                        dtype: self.dtype,
9619                    }));
9620                }
9621            }
9622        }
9623        Self::new(DType::Float64, out)
9624    }
9625
9626    /// Base-10 logarithm of numeric values. Matches numpy's log10 ufunc.
9627    pub fn log10(&self) -> Result<Self, ColumnError> {
9628        if let Some(out) = self.typed_float_unary(f64::log10) {
9629            return Ok(out);
9630        }
9631        let mut out = Vec::with_capacity(self.values.len());
9632        for v in &self.values {
9633            if v.is_missing() {
9634                out.push(Scalar::Float64(f64::NAN));
9635                continue;
9636            }
9637            match v {
9638                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).log10())),
9639                Scalar::Float64(x) => out.push(Scalar::Float64(x.log10())),
9640                _ => {
9641                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9642                        value: format!("{v:?}"),
9643                        dtype: self.dtype,
9644                    }));
9645                }
9646            }
9647        }
9648        Self::new(DType::Float64, out)
9649    }
9650
9651    /// Base-2 logarithm of numeric values. Matches numpy's log2 ufunc.
9652    pub fn log2(&self) -> Result<Self, ColumnError> {
9653        if let Some(out) = self.typed_float_unary(f64::log2) {
9654            return Ok(out);
9655        }
9656        let mut out = Vec::with_capacity(self.values.len());
9657        for v in &self.values {
9658            if v.is_missing() {
9659                out.push(Scalar::Float64(f64::NAN));
9660                continue;
9661            }
9662            match v {
9663                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).log2())),
9664                Scalar::Float64(x) => out.push(Scalar::Float64(x.log2())),
9665                _ => {
9666                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9667                        value: format!("{v:?}"),
9668                        dtype: self.dtype,
9669                    }));
9670                }
9671            }
9672        }
9673        Self::new(DType::Float64, out)
9674    }
9675
9676    /// Compute element-wise sine.
9677    pub fn sin(&self) -> Result<Self, ColumnError> {
9678        if let Some(out) = self.typed_float_unary(f64::sin) {
9679            return Ok(out);
9680        }
9681        let mut out = Vec::with_capacity(self.values.len());
9682        for v in &self.values {
9683            if v.is_missing() {
9684                out.push(Scalar::Float64(f64::NAN));
9685                continue;
9686            }
9687            match v {
9688                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sin())),
9689                Scalar::Float64(x) => out.push(Scalar::Float64(x.sin())),
9690                _ => {
9691                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9692                        value: format!("{v:?}"),
9693                        dtype: self.dtype,
9694                    }));
9695                }
9696            }
9697        }
9698        Self::new(DType::Float64, out)
9699    }
9700
9701    /// Compute element-wise cosine.
9702    pub fn cos(&self) -> Result<Self, ColumnError> {
9703        if let Some(out) = self.typed_float_unary(f64::cos) {
9704            return Ok(out);
9705        }
9706        let mut out = Vec::with_capacity(self.values.len());
9707        for v in &self.values {
9708            if v.is_missing() {
9709                out.push(Scalar::Float64(f64::NAN));
9710                continue;
9711            }
9712            match v {
9713                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cos())),
9714                Scalar::Float64(x) => out.push(Scalar::Float64(x.cos())),
9715                _ => {
9716                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9717                        value: format!("{v:?}"),
9718                        dtype: self.dtype,
9719                    }));
9720                }
9721            }
9722        }
9723        Self::new(DType::Float64, out)
9724    }
9725
9726    /// Compute element-wise tangent.
9727    pub fn tan(&self) -> Result<Self, ColumnError> {
9728        if let Some(out) = self.typed_float_unary(f64::tan) {
9729            return Ok(out);
9730        }
9731        let mut out = Vec::with_capacity(self.values.len());
9732        for v in &self.values {
9733            if v.is_missing() {
9734                out.push(Scalar::Float64(f64::NAN));
9735                continue;
9736            }
9737            match v {
9738                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).tan())),
9739                Scalar::Float64(x) => out.push(Scalar::Float64(x.tan())),
9740                _ => {
9741                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9742                        value: format!("{v:?}"),
9743                        dtype: self.dtype,
9744                    }));
9745                }
9746            }
9747        }
9748        Self::new(DType::Float64, out)
9749    }
9750
9751    /// Compute element-wise arcsine.
9752    pub fn asin(&self) -> Result<Self, ColumnError> {
9753        if let Some(out) = self.typed_float_unary(f64::asin) {
9754            return Ok(out);
9755        }
9756        let mut out = Vec::with_capacity(self.values.len());
9757        for v in &self.values {
9758            if v.is_missing() {
9759                out.push(Scalar::Float64(f64::NAN));
9760                continue;
9761            }
9762            match v {
9763                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).asin())),
9764                Scalar::Float64(x) => out.push(Scalar::Float64(x.asin())),
9765                _ => {
9766                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9767                        value: format!("{v:?}"),
9768                        dtype: self.dtype,
9769                    }));
9770                }
9771            }
9772        }
9773        Self::new(DType::Float64, out)
9774    }
9775
9776    /// Compute element-wise arccosine.
9777    pub fn acos(&self) -> Result<Self, ColumnError> {
9778        if let Some(out) = self.typed_float_unary(f64::acos) {
9779            return Ok(out);
9780        }
9781        let mut out = Vec::with_capacity(self.values.len());
9782        for v in &self.values {
9783            if v.is_missing() {
9784                out.push(Scalar::Float64(f64::NAN));
9785                continue;
9786            }
9787            match v {
9788                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).acos())),
9789                Scalar::Float64(x) => out.push(Scalar::Float64(x.acos())),
9790                _ => {
9791                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9792                        value: format!("{v:?}"),
9793                        dtype: self.dtype,
9794                    }));
9795                }
9796            }
9797        }
9798        Self::new(DType::Float64, out)
9799    }
9800
9801    /// Compute element-wise arctangent.
9802    pub fn atan(&self) -> Result<Self, ColumnError> {
9803        if let Some(out) = self.typed_float_unary(f64::atan) {
9804            return Ok(out);
9805        }
9806        let mut out = Vec::with_capacity(self.values.len());
9807        for v in &self.values {
9808            if v.is_missing() {
9809                out.push(Scalar::Float64(f64::NAN));
9810                continue;
9811            }
9812            match v {
9813                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).atan())),
9814                Scalar::Float64(x) => out.push(Scalar::Float64(x.atan())),
9815                _ => {
9816                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9817                        value: format!("{v:?}"),
9818                        dtype: self.dtype,
9819                    }));
9820                }
9821            }
9822        }
9823        Self::new(DType::Float64, out)
9824    }
9825
9826    /// Compute element-wise hyperbolic sine.
9827    pub fn sinh(&self) -> Result<Self, ColumnError> {
9828        if let Some(out) = self.typed_float_unary(f64::sinh) {
9829            return Ok(out);
9830        }
9831        let mut out = Vec::with_capacity(self.values.len());
9832        for v in &self.values {
9833            if v.is_missing() {
9834                out.push(Scalar::Float64(f64::NAN));
9835                continue;
9836            }
9837            match v {
9838                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sinh())),
9839                Scalar::Float64(x) => out.push(Scalar::Float64(x.sinh())),
9840                _ => {
9841                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9842                        value: format!("{v:?}"),
9843                        dtype: self.dtype,
9844                    }));
9845                }
9846            }
9847        }
9848        Self::new(DType::Float64, out)
9849    }
9850
9851    /// Compute element-wise hyperbolic cosine.
9852    pub fn cosh(&self) -> Result<Self, ColumnError> {
9853        if let Some(out) = self.typed_float_unary(f64::cosh) {
9854            return Ok(out);
9855        }
9856        let mut out = Vec::with_capacity(self.values.len());
9857        for v in &self.values {
9858            if v.is_missing() {
9859                out.push(Scalar::Float64(f64::NAN));
9860                continue;
9861            }
9862            match v {
9863                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cosh())),
9864                Scalar::Float64(x) => out.push(Scalar::Float64(x.cosh())),
9865                _ => {
9866                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9867                        value: format!("{v:?}"),
9868                        dtype: self.dtype,
9869                    }));
9870                }
9871            }
9872        }
9873        Self::new(DType::Float64, out)
9874    }
9875
9876    /// Compute element-wise hyperbolic tangent.
9877    pub fn tanh(&self) -> Result<Self, ColumnError> {
9878        if let Some(out) = self.typed_float_unary(f64::tanh) {
9879            return Ok(out);
9880        }
9881        let mut out = Vec::with_capacity(self.values.len());
9882        for v in &self.values {
9883            if v.is_missing() {
9884                out.push(Scalar::Float64(f64::NAN));
9885                continue;
9886            }
9887            match v {
9888                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).tanh())),
9889                Scalar::Float64(x) => out.push(Scalar::Float64(x.tanh())),
9890                _ => {
9891                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9892                        value: format!("{v:?}"),
9893                        dtype: self.dtype,
9894                    }));
9895                }
9896            }
9897        }
9898        Self::new(DType::Float64, out)
9899    }
9900
9901    /// Compute element-wise inverse hyperbolic sine.
9902    pub fn asinh(&self) -> Result<Self, ColumnError> {
9903        if let Some(out) = self.typed_float_unary(f64::asinh) {
9904            return Ok(out);
9905        }
9906        let mut out = Vec::with_capacity(self.values.len());
9907        for v in &self.values {
9908            if v.is_missing() {
9909                out.push(Scalar::Float64(f64::NAN));
9910                continue;
9911            }
9912            match v {
9913                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).asinh())),
9914                Scalar::Float64(x) => out.push(Scalar::Float64(x.asinh())),
9915                _ => {
9916                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9917                        value: format!("{v:?}"),
9918                        dtype: self.dtype,
9919                    }));
9920                }
9921            }
9922        }
9923        Self::new(DType::Float64, out)
9924    }
9925
9926    /// Compute element-wise inverse hyperbolic cosine.
9927    pub fn acosh(&self) -> Result<Self, ColumnError> {
9928        if let Some(out) = self.typed_float_unary(f64::acosh) {
9929            return Ok(out);
9930        }
9931        let mut out = Vec::with_capacity(self.values.len());
9932        for v in &self.values {
9933            if v.is_missing() {
9934                out.push(Scalar::Float64(f64::NAN));
9935                continue;
9936            }
9937            match v {
9938                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).acosh())),
9939                Scalar::Float64(x) => out.push(Scalar::Float64(x.acosh())),
9940                _ => {
9941                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9942                        value: format!("{v:?}"),
9943                        dtype: self.dtype,
9944                    }));
9945                }
9946            }
9947        }
9948        Self::new(DType::Float64, out)
9949    }
9950
9951    /// Compute element-wise inverse hyperbolic tangent.
9952    pub fn atanh(&self) -> Result<Self, ColumnError> {
9953        if let Some(out) = self.typed_float_unary(f64::atanh) {
9954            return Ok(out);
9955        }
9956        let mut out = Vec::with_capacity(self.values.len());
9957        for v in &self.values {
9958            if v.is_missing() {
9959                out.push(Scalar::Float64(f64::NAN));
9960                continue;
9961            }
9962            match v {
9963                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).atanh())),
9964                Scalar::Float64(x) => out.push(Scalar::Float64(x.atanh())),
9965                _ => {
9966                    return Err(ColumnError::Type(TypeError::NonNumericValue {
9967                        value: format!("{v:?}"),
9968                        dtype: self.dtype,
9969                    }));
9970                }
9971            }
9972        }
9973        Self::new(DType::Float64, out)
9974    }
9975
9976    /// Numpy-style alias for asin.
9977    ///
9978    /// Matches np.arcsin(x).
9979    pub fn arcsin(&self) -> Result<Self, ColumnError> {
9980        self.asin()
9981    }
9982
9983    /// Numpy-style alias for acos.
9984    ///
9985    /// Matches np.arccos(x).
9986    pub fn arccos(&self) -> Result<Self, ColumnError> {
9987        self.acos()
9988    }
9989
9990    /// Numpy-style alias for atan.
9991    ///
9992    /// Matches np.arctan(x).
9993    pub fn arctan(&self) -> Result<Self, ColumnError> {
9994        self.atan()
9995    }
9996
9997    /// Numpy-style alias for atan2.
9998    ///
9999    /// Matches np.arctan2(y, x).
10000    pub fn arctan2(&self, other: &Self) -> Result<Self, ColumnError> {
10001        self.atan2(other)
10002    }
10003
10004    /// Numpy-style alias for asinh.
10005    ///
10006    /// Matches np.arcsinh(x).
10007    pub fn arcsinh(&self) -> Result<Self, ColumnError> {
10008        self.asinh()
10009    }
10010
10011    /// Numpy-style alias for acosh.
10012    ///
10013    /// Matches np.arccosh(x).
10014    pub fn arccosh(&self) -> Result<Self, ColumnError> {
10015        self.acosh()
10016    }
10017
10018    /// Numpy-style alias for atanh.
10019    ///
10020    /// Matches np.arctanh(x).
10021    pub fn arctanh(&self) -> Result<Self, ColumnError> {
10022        self.atanh()
10023    }
10024
10025    /// Compute element-wise floor.
10026    /// Typed fast path shared by floor/ceil/trunc: an all-valid Float64 (or
10027    /// Int64) column maps `f` over its contiguous buffer and re-ingests via
10028    /// `from_f64_values`, skipping lazy Scalar materialization + the 32 B/cell
10029    /// Vec<Scalar> + Column::new revalidation. Returns `None` (fall back to the
10030    /// scalar loop) for nullable / non-numeric columns. Bit-identical: all-valid
10031    /// ⇒ the `is_missing → NaN` branch never fires; for Int64 the scalar path
10032    /// casts `x as f64` and `f(x as f64) == x as f64` since the cast is integral;
10033    /// floor/ceil/trunc of finite/Inf inputs never synthesize a NaN, and
10034    /// from_f64_values re-marks any NaN exactly as Self::new would.
10035    fn typed_float_unary(&self, f: fn(f64) -> f64) -> Option<Self> {
10036        if let Some(data) = self.as_f64_slice() {
10037            return Some(Self::from_f64_values(data.iter().map(|&x| f(x)).collect()));
10038        }
10039        if let Some(data) = self.as_i64_slice() {
10040            return Some(Self::from_f64_values(
10041                data.iter().map(|&x| f(x as f64)).collect(),
10042            ));
10043        }
10044        None
10045    }
10046
10047    /// All-valid numeric column → an owned `f64` view (Float64 copied, Int64 cast
10048    /// `x as f64`), exactly as `Scalar::to_f64` would. `None` for nullable /
10049    /// non-numeric columns (so binary ufuncs fall back to the scalar loop).
10050    fn all_valid_as_f64(&self) -> Option<Vec<f64>> {
10051        if let Some(s) = self.as_f64_slice() {
10052            return Some(s.to_vec());
10053        }
10054        if let Some(s) = self.as_i64_slice() {
10055            return Some(s.iter().map(|&x| x as f64).collect());
10056        }
10057        None
10058    }
10059
10060    /// Typed fast path for a Float64-output binary ufunc: when both columns are
10061    /// all-valid numeric, map `f` over the two contiguous buffers and re-ingest
10062    /// via `from_f64_values`, skipping per-element Scalar dispatch/clone on both
10063    /// sides. Caller must have validated equal length. Bit-identical: all-valid
10064    /// ⇒ no missing→NaN branch; `f(a,b)` is the scalar loop's `f(a.to_f64(),
10065    /// b.to_f64())`; from_f64_values re-marks any NaN result missing as Self::new
10066    /// would. Returns `None` to fall back when either side is nullable/non-numeric.
10067    fn typed_float_binary(&self, other: &Self, f: fn(f64, f64) -> f64) -> Option<Self> {
10068        let a = self.all_valid_as_f64()?;
10069        let b = other.all_valid_as_f64()?;
10070        Some(Self::from_f64_values(
10071            a.iter().zip(b.iter()).map(|(&x, &y)| f(x, y)).collect(),
10072        ))
10073    }
10074
10075    pub fn floor(&self) -> Result<Self, ColumnError> {
10076        if let Some(out) = self.typed_float_unary(f64::floor) {
10077            return Ok(out);
10078        }
10079        let mut out = Vec::with_capacity(self.values.len());
10080        for v in &self.values {
10081            if v.is_missing() {
10082                out.push(Scalar::Float64(f64::NAN));
10083                continue;
10084            }
10085            match v {
10086                Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10087                Scalar::Float64(x) => out.push(Scalar::Float64(x.floor())),
10088                _ => {
10089                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10090                        value: format!("{v:?}"),
10091                        dtype: self.dtype,
10092                    }));
10093                }
10094            }
10095        }
10096        Self::new(DType::Float64, out)
10097    }
10098
10099    /// Compute element-wise ceiling.
10100    pub fn ceil(&self) -> Result<Self, ColumnError> {
10101        if let Some(out) = self.typed_float_unary(f64::ceil) {
10102            return Ok(out);
10103        }
10104        let mut out = Vec::with_capacity(self.values.len());
10105        for v in &self.values {
10106            if v.is_missing() {
10107                out.push(Scalar::Float64(f64::NAN));
10108                continue;
10109            }
10110            match v {
10111                Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10112                Scalar::Float64(x) => out.push(Scalar::Float64(x.ceil())),
10113                _ => {
10114                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10115                        value: format!("{v:?}"),
10116                        dtype: self.dtype,
10117                    }));
10118                }
10119            }
10120        }
10121        Self::new(DType::Float64, out)
10122    }
10123
10124    /// Compute element-wise truncation toward zero.
10125    pub fn trunc(&self) -> Result<Self, ColumnError> {
10126        if let Some(out) = self.typed_float_unary(f64::trunc) {
10127            return Ok(out);
10128        }
10129        let mut out = Vec::with_capacity(self.values.len());
10130        for v in &self.values {
10131            if v.is_missing() {
10132                out.push(Scalar::Float64(f64::NAN));
10133                continue;
10134            }
10135            match v {
10136                Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10137                Scalar::Float64(x) => out.push(Scalar::Float64(x.trunc())),
10138                _ => {
10139                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10140                        value: format!("{v:?}"),
10141                        dtype: self.dtype,
10142                    }));
10143                }
10144            }
10145        }
10146        Self::new(DType::Float64, out)
10147    }
10148
10149    /// Replace NaN with zero and infinity with large finite numbers.
10150    ///
10151    /// Matches np.nan_to_num(x). NaN becomes 0, positive infinity becomes
10152    /// a large positive number, negative infinity becomes a large negative number.
10153    pub fn nan_to_num(&self) -> Result<Self, ColumnError> {
10154        self.nan_to_num_with_values(0.0, f64::MAX, f64::MIN)
10155    }
10156
10157    /// Replace NaN and infinity with specified values.
10158    ///
10159    /// Matches np.nan_to_num(x, nan=nan, posinf=posinf, neginf=neginf).
10160    pub fn nan_to_num_with_values(
10161        &self,
10162        nan: f64,
10163        posinf: f64,
10164        neginf: f64,
10165    ) -> Result<Self, ColumnError> {
10166        // Typed fast path (all-valid only, output Float64). all-valid Float64 has
10167        // no NaN (NaN marks a column invalid), so only the ±Inf replacements can
10168        // fire; the Int64 branch is a plain x as f64. Bit-identical to the scalar
10169        // loop; from_f64_values re-marks a NaN replacement (e.g. posinf=NaN)
10170        // missing exactly as Self::new would.
10171        if let Some(data) = self.as_f64_slice() {
10172            return Ok(Self::from_f64_values(
10173                data.iter()
10174                    .map(|&x| {
10175                        if x == f64::INFINITY {
10176                            posinf
10177                        } else if x == f64::NEG_INFINITY {
10178                            neginf
10179                        } else {
10180                            x
10181                        }
10182                    })
10183                    .collect(),
10184            ));
10185        }
10186        if let Some(data) = self.as_i64_slice() {
10187            return Ok(Self::from_f64_values(
10188                data.iter().map(|&x| x as f64).collect(),
10189            ));
10190        }
10191        let mut out = Vec::with_capacity(self.values.len());
10192        for v in &self.values {
10193            let result = match v {
10194                Scalar::Float64(x) => {
10195                    if x.is_nan() {
10196                        nan
10197                    } else if *x == f64::INFINITY {
10198                        posinf
10199                    } else if *x == f64::NEG_INFINITY {
10200                        neginf
10201                    } else {
10202                        *x
10203                    }
10204                }
10205                Scalar::Int64(x) => *x as f64,
10206                Scalar::Null(_) => nan,
10207                _ => {
10208                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10209                        value: format!("{v:?}"),
10210                        dtype: self.dtype,
10211                    }));
10212                }
10213            };
10214            out.push(Scalar::Float64(result));
10215        }
10216        Self::new(DType::Float64, out)
10217    }
10218
10219    /// Round to nearest even integer (banker's rounding).
10220    ///
10221    /// Matches np.rint(x). Values exactly halfway between integers round to
10222    /// the nearest even integer.
10223    pub fn rint(&self) -> Result<Self, ColumnError> {
10224        // round-half-to-even; for Int64 round_ties_even(x as f64) == x as f64
10225        // (integral), matching the scalar Float64(x as f64) branch. Output Float64.
10226        if let Some(out) = self.typed_float_unary(f64::round_ties_even) {
10227            return Ok(out);
10228        }
10229        let mut out = Vec::with_capacity(self.values.len());
10230        for v in &self.values {
10231            if v.is_missing() {
10232                out.push(Scalar::Float64(f64::NAN));
10233                continue;
10234            }
10235            match v {
10236                Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10237                Scalar::Float64(x) => out.push(Scalar::Float64(x.round_ties_even())),
10238                _ => {
10239                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10240                        value: format!("{v:?}"),
10241                        dtype: self.dtype,
10242                    }));
10243                }
10244            }
10245        }
10246        Self::new(DType::Float64, out)
10247    }
10248
10249    /// Round toward zero (same as trunc).
10250    ///
10251    /// Matches np.fix(x). Alias for trunc().
10252    pub fn fix(&self) -> Result<Self, ColumnError> {
10253        self.trunc()
10254    }
10255
10256    /// Trim leading and/or trailing zeros from a 1-D array.
10257    ///
10258    /// Matches np.trim_zeros(). The `trim` parameter specifies:
10259    /// - "f" or "fb": trim from front (leading zeros)
10260    /// - "b" or "fb": trim from back (trailing zeros)
10261    /// - "fb" (default): trim both
10262    pub fn trim_zeros(&self, trim: &str) -> Result<Self, ColumnError> {
10263        let values = &self.values;
10264        if values.is_empty() {
10265            return Self::new(self.dtype, vec![]);
10266        }
10267
10268        let is_zero = |s: &Scalar| -> bool {
10269            match s {
10270                Scalar::Int64(x) => *x == 0,
10271                Scalar::Float64(x) => *x == 0.0,
10272                Scalar::Bool(b) => !*b,
10273                _ => false,
10274            }
10275        };
10276
10277        let mut start = 0;
10278        let mut end = values.len();
10279
10280        if trim.contains('f') {
10281            while start < end && is_zero(&values[start]) {
10282                start += 1;
10283            }
10284        }
10285
10286        if trim.contains('b') {
10287            while end > start && is_zero(&values[end - 1]) {
10288                end -= 1;
10289            }
10290        }
10291
10292        Self::new(self.dtype, values[start..end].to_vec())
10293    }
10294
10295    /// Round to the given number of decimals.
10296    ///
10297    /// Matches np.around(a, decimals). For negative decimals, rounds to
10298    /// the left of the decimal point (e.g., decimals=-1 rounds to tens).
10299    pub fn around(&self, decimals: i32) -> Result<Self, ColumnError> {
10300        let factor = 10.0_f64.powi(decimals);
10301        let mut out = Vec::with_capacity(self.values.len());
10302        for v in &self.values {
10303            if v.is_missing() {
10304                out.push(Scalar::Float64(f64::NAN));
10305                continue;
10306            }
10307            match v {
10308                Scalar::Int64(x) => {
10309                    if decimals >= 0 {
10310                        out.push(Scalar::Int64(*x));
10311                    } else {
10312                        // np.around uses round-half-to-even (banker's), e.g.
10313                        // around([25], -1) -> 20, not 30.
10314                        let rounded = ((*x as f64) * factor).round_ties_even() / factor;
10315                        out.push(Scalar::Int64(rounded as i64));
10316                    }
10317                }
10318                Scalar::Float64(x) => {
10319                    let rounded = (*x * factor).round_ties_even() / factor;
10320                    out.push(Scalar::Float64(rounded));
10321                }
10322                _ => {
10323                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10324                        value: format!("{v:?}"),
10325                        dtype: self.dtype,
10326                    }));
10327                }
10328            }
10329        }
10330        if decimals >= 0 && self.dtype == DType::Int64 {
10331            Self::new(DType::Int64, out)
10332        } else {
10333            Self::new(DType::Float64, out)
10334        }
10335    }
10336
10337    /// Unwrap by changing deltas between values to their 2*pi complements.
10338    ///
10339    /// Matches np.unwrap(). Unwraps radian phase values by adding multiples
10340    /// of 2*pi when the absolute difference from the previous value exceeds
10341    /// the discontinuity threshold (default: pi).
10342    pub fn unwrap(&self, discont: Option<f64>) -> Result<Self, ColumnError> {
10343        let threshold = discont.unwrap_or(std::f64::consts::PI);
10344        let two_pi = 2.0 * std::f64::consts::PI;
10345
10346        let mut out = Vec::with_capacity(self.values.len());
10347        let mut offset = 0.0;
10348
10349        for (i, v) in self.values.iter().enumerate() {
10350            if v.is_missing() {
10351                out.push(Scalar::Float64(f64::NAN));
10352                continue;
10353            }
10354            let x = match v {
10355                Scalar::Int64(x) => *x as f64,
10356                Scalar::Float64(x) => *x,
10357                _ => {
10358                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10359                        value: format!("{v:?}"),
10360                        dtype: self.dtype,
10361                    }));
10362                }
10363            };
10364
10365            if i == 0 {
10366                out.push(Scalar::Float64(x));
10367            } else {
10368                let prev = match &out[out.len() - 1] {
10369                    Scalar::Float64(p) if !p.is_nan() => *p,
10370                    _ => {
10371                        out.push(Scalar::Float64(x + offset));
10372                        continue;
10373                    }
10374                };
10375
10376                let diff = x + offset - prev;
10377                if diff > threshold {
10378                    offset -= two_pi * ((diff + std::f64::consts::PI) / two_pi).floor();
10379                } else if diff < -threshold {
10380                    offset += two_pi * ((-diff + std::f64::consts::PI) / two_pi).floor();
10381                }
10382                out.push(Scalar::Float64(x + offset));
10383            }
10384        }
10385
10386        Self::new(DType::Float64, out)
10387    }
10388
10389    /// Compute exp(x) - 1 with improved precision for small x.
10390    pub fn expm1(&self) -> Result<Self, ColumnError> {
10391        if let Some(out) = self.typed_float_unary(f64::exp_m1) {
10392            return Ok(out);
10393        }
10394        let mut out = Vec::with_capacity(self.values.len());
10395        for v in &self.values {
10396            if v.is_missing() {
10397                out.push(Scalar::Float64(f64::NAN));
10398                continue;
10399            }
10400            match v {
10401                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).exp_m1())),
10402                Scalar::Float64(x) => out.push(Scalar::Float64(x.exp_m1())),
10403                _ => {
10404                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10405                        value: format!("{v:?}"),
10406                        dtype: self.dtype,
10407                    }));
10408                }
10409            }
10410        }
10411        Self::new(DType::Float64, out)
10412    }
10413
10414    /// Compute ln(1 + x) with improved precision for small x.
10415    pub fn log1p(&self) -> Result<Self, ColumnError> {
10416        if let Some(out) = self.typed_float_unary(f64::ln_1p) {
10417            return Ok(out);
10418        }
10419        let mut out = Vec::with_capacity(self.values.len());
10420        for v in &self.values {
10421            if v.is_missing() {
10422                out.push(Scalar::Float64(f64::NAN));
10423                continue;
10424            }
10425            match v {
10426                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).ln_1p())),
10427                Scalar::Float64(x) => out.push(Scalar::Float64(x.ln_1p())),
10428                _ => {
10429                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10430                        value: format!("{v:?}"),
10431                        dtype: self.dtype,
10432                    }));
10433                }
10434            }
10435        }
10436        Self::new(DType::Float64, out)
10437    }
10438
10439    /// Compute element-wise cube root.
10440    pub fn cbrt(&self) -> Result<Self, ColumnError> {
10441        if let Some(out) = self.typed_float_unary(f64::cbrt) {
10442            return Ok(out);
10443        }
10444        let mut out = Vec::with_capacity(self.values.len());
10445        for v in &self.values {
10446            if v.is_missing() {
10447                out.push(Scalar::Float64(f64::NAN));
10448                continue;
10449            }
10450            match v {
10451                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cbrt())),
10452                Scalar::Float64(x) => out.push(Scalar::Float64(x.cbrt())),
10453                _ => {
10454                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10455                        value: format!("{v:?}"),
10456                        dtype: self.dtype,
10457                    }));
10458                }
10459            }
10460        }
10461        Self::new(DType::Float64, out)
10462    }
10463
10464    /// Multiply by 2 raised to an integer power.
10465    ///
10466    /// Matches np.ldexp(x, exp). Computes x * 2^exp for each element.
10467    pub fn ldexp(&self, exp: i32) -> Result<Self, ColumnError> {
10468        let multiplier = 2.0_f64.powi(exp);
10469        let mut out = Vec::with_capacity(self.values.len());
10470        for v in &self.values {
10471            if v.is_missing() {
10472                out.push(Scalar::Float64(f64::NAN));
10473                continue;
10474            }
10475            match v {
10476                Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64 * multiplier)),
10477                Scalar::Float64(x) => out.push(Scalar::Float64(x * multiplier)),
10478                _ => {
10479                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10480                        value: format!("{v:?}"),
10481                        dtype: self.dtype,
10482                    }));
10483                }
10484            }
10485        }
10486        Self::new(DType::Float64, out)
10487    }
10488
10489    /// Split into integer and fractional parts.
10490    ///
10491    /// Matches np.modf(x). Returns (fractional_part, integer_part) as two columns.
10492    /// The fractional part has the same sign as the input.
10493    pub fn modf(&self) -> Result<(Self, Self), ColumnError> {
10494        let mut frac = Vec::with_capacity(self.values.len());
10495        let mut int = Vec::with_capacity(self.values.len());
10496        for v in &self.values {
10497            if v.is_missing() {
10498                frac.push(Scalar::Float64(f64::NAN));
10499                int.push(Scalar::Float64(f64::NAN));
10500                continue;
10501            }
10502            match v {
10503                Scalar::Int64(x) => {
10504                    frac.push(Scalar::Float64(0.0));
10505                    int.push(Scalar::Float64(*x as f64));
10506                }
10507                Scalar::Float64(x) => {
10508                    let i = x.trunc();
10509                    let f = x - i;
10510                    frac.push(Scalar::Float64(f));
10511                    int.push(Scalar::Float64(i));
10512                }
10513                _ => {
10514                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10515                        value: format!("{v:?}"),
10516                        dtype: self.dtype,
10517                    }));
10518                }
10519            }
10520        }
10521        Ok((
10522            Self::new(DType::Float64, frac)?,
10523            Self::new(DType::Float64, int)?,
10524        ))
10525    }
10526
10527    /// Decompose float into mantissa and exponent.
10528    ///
10529    /// Matches np.frexp(x). Returns (mantissa, exponent) where:
10530    /// - mantissa is in [0.5, 1.0) or exactly 0.0
10531    /// - exponent is an integer
10532    /// - x = mantissa * 2^exponent
10533    pub fn frexp(&self) -> Result<(Self, Self), ColumnError> {
10534        let mut mantissa = Vec::with_capacity(self.values.len());
10535        let mut exponent = Vec::with_capacity(self.values.len());
10536        for v in &self.values {
10537            if v.is_missing() {
10538                mantissa.push(Scalar::Float64(f64::NAN));
10539                exponent.push(Scalar::Int64(0));
10540                continue;
10541            }
10542            match v {
10543                Scalar::Int64(x) => {
10544                    let f = *x as f64;
10545                    if f == 0.0 {
10546                        mantissa.push(Scalar::Float64(0.0));
10547                        exponent.push(Scalar::Int64(0));
10548                    } else {
10549                        let bits = f.abs().to_bits();
10550                        let exp_bits = ((bits >> 52) & 0x7ff) as i64;
10551                        let exp = exp_bits - 1022; // normalized mantissa is in [0.5, 1.0)
10552                        let mant_bits = (bits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10553                        let mant = f64::from_bits(mant_bits);
10554                        let mant = if f < 0.0 { -mant } else { mant };
10555                        mantissa.push(Scalar::Float64(mant));
10556                        exponent.push(Scalar::Int64(exp));
10557                    }
10558                }
10559                Scalar::Float64(x) => {
10560                    if x.is_nan() {
10561                        mantissa.push(Scalar::Float64(f64::NAN));
10562                        exponent.push(Scalar::Int64(0));
10563                    } else if x.is_infinite() {
10564                        mantissa.push(Scalar::Float64(*x));
10565                        exponent.push(Scalar::Int64(0));
10566                    } else if *x == 0.0 {
10567                        mantissa.push(Scalar::Float64(*x)); // preserves sign of zero
10568                        exponent.push(Scalar::Int64(0));
10569                    } else {
10570                        let bits = x.abs().to_bits();
10571                        let exp_bits = ((bits >> 52) & 0x7ff) as i64;
10572                        if exp_bits == 0 {
10573                            // denormalized number - scale up and extract
10574                            let scaled = x.abs() * 2.0_f64.powi(64);
10575                            let sbits = scaled.to_bits();
10576                            let sexp_bits = ((sbits >> 52) & 0x7ff) as i64;
10577                            let exp = sexp_bits - 1022 - 64;
10578                            let mant_bits = (sbits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10579                            let mant = f64::from_bits(mant_bits);
10580                            let mant = if *x < 0.0 { -mant } else { mant };
10581                            mantissa.push(Scalar::Float64(mant));
10582                            exponent.push(Scalar::Int64(exp));
10583                        } else {
10584                            let exp = exp_bits - 1022;
10585                            let mant_bits = (bits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10586                            let mant = f64::from_bits(mant_bits);
10587                            let mant = if *x < 0.0 { -mant } else { mant };
10588                            mantissa.push(Scalar::Float64(mant));
10589                            exponent.push(Scalar::Int64(exp));
10590                        }
10591                    }
10592                }
10593                _ => {
10594                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10595                        value: format!("{v:?}"),
10596                        dtype: self.dtype,
10597                    }));
10598                }
10599            }
10600        }
10601        Ok((
10602            Self::new(DType::Float64, mantissa)?,
10603            Self::new(DType::Int64, exponent)?,
10604        ))
10605    }
10606
10607    /// Return the next representable floating-point value after x toward y.
10608    ///
10609    /// Matches np.nextafter(x, y). For each pair of elements, returns the
10610    /// next representable float after x in the direction of y.
10611    pub fn nextafter(&self, other: &Self) -> Result<Self, ColumnError> {
10612        if self.len() != other.len() {
10613            return Err(ColumnError::LengthMismatch {
10614                left: self.len(),
10615                right: other.len(),
10616            });
10617        }
10618        let mut out = Vec::with_capacity(self.values.len());
10619        for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10620            if v1.is_missing() || v2.is_missing() {
10621                out.push(Scalar::Float64(f64::NAN));
10622                continue;
10623            }
10624            let x = v1.to_f64().map_err(ColumnError::Type)?;
10625            let y = v2.to_f64().map_err(ColumnError::Type)?;
10626            let result = if x.is_nan() || y.is_nan() {
10627                f64::NAN
10628            } else if x == y {
10629                x
10630            } else if x == 0.0 {
10631                // Smallest positive/negative denormal, not MIN_POSITIVE (normalized)
10632                if y > 0.0 {
10633                    f64::from_bits(1) // smallest positive denormal ≈ 5e-324
10634                } else {
10635                    -f64::from_bits(1) // smallest negative denormal ≈ -5e-324
10636                }
10637            } else {
10638                let bits = x.to_bits() as i64;
10639                let next_bits = if (x > 0.0) == (y > x) {
10640                    bits + 1
10641                } else {
10642                    bits - 1
10643                };
10644                f64::from_bits(next_bits as u64)
10645            };
10646            out.push(Scalar::Float64(result));
10647        }
10648        Self::new(DType::Float64, out)
10649    }
10650
10651    /// Check if values are negative infinity.
10652    ///
10653    /// Matches np.isneginf(x). Returns a Bool column that is True where
10654    /// the value is negative infinity.
10655    pub fn isneginf(&self) -> Result<Self, ColumnError> {
10656        let mut out = Vec::with_capacity(self.values.len());
10657        for v in &self.values {
10658            if v.is_missing() {
10659                out.push(Scalar::Bool(false));
10660                continue;
10661            }
10662            match v {
10663                Scalar::Int64(_) => out.push(Scalar::Bool(false)),
10664                Scalar::Float64(x) => out.push(Scalar::Bool(*x == f64::NEG_INFINITY)),
10665                _ => out.push(Scalar::Bool(false)),
10666            }
10667        }
10668        Self::new(DType::Bool, out)
10669    }
10670
10671    /// Check if values are positive infinity.
10672    ///
10673    /// Matches np.isposinf(x). Returns a Bool column that is True where
10674    /// the value is positive infinity.
10675    pub fn isposinf(&self) -> Result<Self, ColumnError> {
10676        let mut out = Vec::with_capacity(self.values.len());
10677        for v in &self.values {
10678            if v.is_missing() {
10679                out.push(Scalar::Bool(false));
10680                continue;
10681            }
10682            match v {
10683                Scalar::Int64(_) => out.push(Scalar::Bool(false)),
10684                Scalar::Float64(x) => out.push(Scalar::Bool(*x == f64::INFINITY)),
10685                _ => out.push(Scalar::Bool(false)),
10686            }
10687        }
10688        Self::new(DType::Bool, out)
10689    }
10690
10691    /// Compute 2 raised to the power of each element.
10692    ///
10693    /// Matches np.exp2(x). Returns 2^x for each element.
10694    pub fn exp2(&self) -> Result<Self, ColumnError> {
10695        // Typed fast path (all-valid only, output Float64). The Int64 branch must
10696        // keep `2.0.powi(x as i32)` (NOT (x as f64).exp2()) to match the scalar
10697        // loop's exact rounding; Float64 uses x.exp2(). Bit-identical.
10698        if let Some(data) = self.as_f64_slice() {
10699            return Ok(Self::from_f64_values(
10700                data.iter().map(|&x| x.exp2()).collect(),
10701            ));
10702        }
10703        if let Some(data) = self.as_i64_slice() {
10704            return Ok(Self::from_f64_values(
10705                data.iter().map(|&x| 2.0_f64.powi(x as i32)).collect(),
10706            ));
10707        }
10708        let mut out = Vec::with_capacity(self.values.len());
10709        for v in &self.values {
10710            if v.is_missing() {
10711                out.push(Scalar::Float64(f64::NAN));
10712                continue;
10713            }
10714            match v {
10715                Scalar::Int64(x) => out.push(Scalar::Float64(2.0_f64.powi(*x as i32))),
10716                Scalar::Float64(x) => out.push(Scalar::Float64(x.exp2())),
10717                _ => {
10718                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10719                        value: format!("{v:?}"),
10720                        dtype: self.dtype,
10721                    }));
10722                }
10723            }
10724        }
10725        Self::new(DType::Float64, out)
10726    }
10727
10728    /// Compute the sinc function.
10729    ///
10730    /// Matches np.sinc(x). Returns sin(pi*x) / (pi*x), with sinc(0) = 1.
10731    pub fn sinc(&self) -> Result<Self, ColumnError> {
10732        // all-valid ⇒ no NaN, so the scalar formula reduces to 0->1 else
10733        // sin(πx)/(πx) for both Float64 and Int64 (x as f64). Bit-identical.
10734        if let Some(out) = self.typed_float_unary(|x| {
10735            if x == 0.0 {
10736                1.0
10737            } else {
10738                let px = std::f64::consts::PI * x;
10739                px.sin() / px
10740            }
10741        }) {
10742            return Ok(out);
10743        }
10744        let mut out = Vec::with_capacity(self.values.len());
10745        for v in &self.values {
10746            if v.is_missing() {
10747                out.push(Scalar::Float64(f64::NAN));
10748                continue;
10749            }
10750            match v {
10751                Scalar::Int64(x) => {
10752                    if *x == 0 {
10753                        out.push(Scalar::Float64(1.0));
10754                    } else {
10755                        let px = std::f64::consts::PI * (*x as f64);
10756                        out.push(Scalar::Float64(px.sin() / px));
10757                    }
10758                }
10759                Scalar::Float64(x) => {
10760                    if *x == 0.0 {
10761                        out.push(Scalar::Float64(1.0));
10762                    } else if x.is_nan() {
10763                        out.push(Scalar::Float64(f64::NAN));
10764                    } else {
10765                        let px = std::f64::consts::PI * x;
10766                        out.push(Scalar::Float64(px.sin() / px));
10767                    }
10768                }
10769                _ => {
10770                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10771                        value: format!("{v:?}"),
10772                        dtype: self.dtype,
10773                    }));
10774                }
10775            }
10776        }
10777        Self::new(DType::Float64, out)
10778    }
10779
10780    /// Compute log(exp(x) + exp(y)) in a numerically stable way.
10781    ///
10782    /// Matches np.logaddexp(x, y). Useful for log-domain arithmetic
10783    /// where direct computation would overflow/underflow.
10784    pub fn logaddexp(&self, other: &Self) -> Result<Self, ColumnError> {
10785        if self.len() != other.len() {
10786            return Err(ColumnError::LengthMismatch {
10787                left: self.len(),
10788                right: other.len(),
10789            });
10790        }
10791        let mut out = Vec::with_capacity(self.values.len());
10792        for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10793            if v1.is_missing() || v2.is_missing() {
10794                out.push(Scalar::Float64(f64::NAN));
10795                continue;
10796            }
10797            let x = v1.to_f64().map_err(ColumnError::Type)?;
10798            let y = v2.to_f64().map_err(ColumnError::Type)?;
10799            let result = if x.is_nan() || y.is_nan() {
10800                f64::NAN
10801            } else if x == f64::NEG_INFINITY {
10802                y
10803            } else if y == f64::NEG_INFINITY {
10804                x
10805            } else if x == f64::INFINITY || y == f64::INFINITY {
10806                f64::INFINITY
10807            } else if x >= y {
10808                x + (y - x).exp().ln_1p()
10809            } else {
10810                y + (x - y).exp().ln_1p()
10811            };
10812            out.push(Scalar::Float64(result));
10813        }
10814        Self::new(DType::Float64, out)
10815    }
10816
10817    /// Compute log2(2**x + 2**y) in a numerically stable way.
10818    ///
10819    /// Matches np.logaddexp2(x, y). Like logaddexp but using base 2.
10820    pub fn logaddexp2(&self, other: &Self) -> Result<Self, ColumnError> {
10821        if self.len() != other.len() {
10822            return Err(ColumnError::LengthMismatch {
10823                left: self.len(),
10824                right: other.len(),
10825            });
10826        }
10827        let ln2 = std::f64::consts::LN_2;
10828        let mut out = Vec::with_capacity(self.values.len());
10829        for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10830            if v1.is_missing() || v2.is_missing() {
10831                out.push(Scalar::Float64(f64::NAN));
10832                continue;
10833            }
10834            let x = v1.to_f64().map_err(ColumnError::Type)?;
10835            let y = v2.to_f64().map_err(ColumnError::Type)?;
10836            let result = if x.is_nan() || y.is_nan() {
10837                f64::NAN
10838            } else if x == f64::NEG_INFINITY {
10839                y
10840            } else if y == f64::NEG_INFINITY {
10841                x
10842            } else if x == f64::INFINITY || y == f64::INFINITY {
10843                f64::INFINITY
10844            } else if x >= y {
10845                x + ((y - x) * ln2).exp().ln_1p() / ln2
10846            } else {
10847                y + ((x - y) * ln2).exp().ln_1p() / ln2
10848            };
10849            out.push(Scalar::Float64(result));
10850        }
10851        Self::new(DType::Float64, out)
10852    }
10853
10854    /// Compute spacing between this value and the next representable float.
10855    ///
10856    /// Matches np.spacing(x). Returns the ULP (unit in last place) - the
10857    /// distance to the next representable float away from zero.
10858    pub fn spacing(&self) -> Result<Self, ColumnError> {
10859        let mut out = Vec::with_capacity(self.values.len());
10860        for v in &self.values {
10861            if v.is_missing() {
10862                out.push(Scalar::Float64(f64::NAN));
10863                continue;
10864            }
10865            match v {
10866                Scalar::Int64(x) => {
10867                    let f = (*x as f64).abs();
10868                    if f == 0.0 {
10869                        out.push(Scalar::Float64(f64::from_bits(1)));
10870                    } else {
10871                        let bits = f.to_bits();
10872                        let next = f64::from_bits(bits + 1);
10873                        out.push(Scalar::Float64(next - f));
10874                    }
10875                }
10876                Scalar::Float64(x) => {
10877                    if x.is_nan() || x.is_infinite() {
10878                        out.push(Scalar::Float64(f64::NAN));
10879                    } else {
10880                        let f = x.abs();
10881                        if f == 0.0 {
10882                            out.push(Scalar::Float64(f64::from_bits(1)));
10883                        } else {
10884                            let bits = f.to_bits();
10885                            let next = f64::from_bits(bits + 1);
10886                            out.push(Scalar::Float64(next - f));
10887                        }
10888                    }
10889                }
10890                _ => {
10891                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10892                        value: format!("{v:?}"),
10893                        dtype: self.dtype,
10894                    }));
10895                }
10896            }
10897        }
10898        Self::new(DType::Float64, out)
10899    }
10900
10901    /// Convert angles from degrees to radians.
10902    pub fn radians(&self) -> Result<Self, ColumnError> {
10903        if let Some(out) = self.typed_float_unary(f64::to_radians) {
10904            return Ok(out);
10905        }
10906        let mut out = Vec::with_capacity(self.values.len());
10907        for v in &self.values {
10908            if v.is_missing() {
10909                out.push(Scalar::Float64(f64::NAN));
10910                continue;
10911            }
10912            match v {
10913                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).to_radians())),
10914                Scalar::Float64(x) => out.push(Scalar::Float64(x.to_radians())),
10915                _ => {
10916                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10917                        value: format!("{v:?}"),
10918                        dtype: self.dtype,
10919                    }));
10920                }
10921            }
10922        }
10923        Self::new(DType::Float64, out)
10924    }
10925
10926    /// Alias for radians.
10927    pub fn deg2rad(&self) -> Result<Self, ColumnError> {
10928        self.radians()
10929    }
10930
10931    /// Convert angles from radians to degrees.
10932    pub fn degrees(&self) -> Result<Self, ColumnError> {
10933        if let Some(out) = self.typed_float_unary(f64::to_degrees) {
10934            return Ok(out);
10935        }
10936        let mut out = Vec::with_capacity(self.values.len());
10937        for v in &self.values {
10938            if v.is_missing() {
10939                out.push(Scalar::Float64(f64::NAN));
10940                continue;
10941            }
10942            match v {
10943                Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).to_degrees())),
10944                Scalar::Float64(x) => out.push(Scalar::Float64(x.to_degrees())),
10945                _ => {
10946                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10947                        value: format!("{v:?}"),
10948                        dtype: self.dtype,
10949                    }));
10950                }
10951            }
10952        }
10953        Self::new(DType::Float64, out)
10954    }
10955
10956    /// Alias for degrees.
10957    pub fn rad2deg(&self) -> Result<Self, ColumnError> {
10958        self.degrees()
10959    }
10960
10961    /// Compute element-wise reciprocal (1/x).
10962    pub fn reciprocal(&self) -> Result<Self, ColumnError> {
10963        if let Some(out) = self.typed_float_unary(|x| 1.0 / x) {
10964            return Ok(out);
10965        }
10966        let mut out = Vec::with_capacity(self.values.len());
10967        for v in &self.values {
10968            if v.is_missing() {
10969                out.push(Scalar::Float64(f64::NAN));
10970                continue;
10971            }
10972            match v {
10973                Scalar::Int64(x) => out.push(Scalar::Float64(1.0 / (*x as f64))),
10974                Scalar::Float64(x) => out.push(Scalar::Float64(1.0 / x)),
10975                _ => {
10976                    return Err(ColumnError::Type(TypeError::NonNumericValue {
10977                        value: format!("{v:?}"),
10978                        dtype: self.dtype,
10979                    }));
10980                }
10981            }
10982        }
10983        Self::new(DType::Float64, out)
10984    }
10985
10986    /// Compute element-wise square (x^2).
10987    pub fn square(&self) -> Result<Self, ColumnError> {
10988        // Typed, dtype-preserving fast path (all-valid only): Int64 stays Int64
10989        // (`x * x`, same overflow behavior as the scalar loop); Float64 squares
10990        // over the f64 buffer. Bit-identical.
10991        if let Some(data) = self.as_i64_slice() {
10992            return Ok(Self::from_i64_values(data.iter().map(|&x| x * x).collect()));
10993        }
10994        if let Some(data) = self.as_f64_slice() {
10995            return Ok(Self::from_f64_values(data.iter().map(|&x| x * x).collect()));
10996        }
10997        let mut out = Vec::with_capacity(self.values.len());
10998        for v in &self.values {
10999            if v.is_missing() {
11000                out.push(Scalar::Float64(f64::NAN));
11001                continue;
11002            }
11003            match v {
11004                Scalar::Int64(x) => out.push(Scalar::Int64(x * x)),
11005                Scalar::Float64(x) => out.push(Scalar::Float64(x * x)),
11006                _ => {
11007                    return Err(ColumnError::Type(TypeError::NonNumericValue {
11008                        value: format!("{v:?}"),
11009                        dtype: self.dtype,
11010                    }));
11011                }
11012            }
11013        }
11014        let dtype = match self.dtype {
11015            DType::Int64 => DType::Int64,
11016            _ => DType::Float64,
11017        };
11018        Self::new(dtype, out)
11019    }
11020
11021    /// Shift column values by `periods` positions, filling vacated slots
11022    /// with `fill`.
11023    ///
11024    /// Matches `pd.Series.shift(periods, fill_value)` for the positional
11025    /// form. Positive periods shift right (vacates the head); negative
11026    /// periods shift left (vacates the tail).
11027    pub fn shift(&self, periods: i64, fill: Scalar) -> Result<Self, ColumnError> {
11028        let len = self.values.len();
11029        if len == 0 || periods == 0 {
11030            return Ok(self.clone());
11031        }
11032        let abs = periods.unsigned_abs() as usize;
11033        let mut out: Vec<Scalar> = Vec::with_capacity(len);
11034        if abs >= len {
11035            for _ in 0..len {
11036                out.push(fill.clone());
11037            }
11038        } else if periods > 0 {
11039            for _ in 0..abs {
11040                out.push(fill.clone());
11041            }
11042            out.extend_from_slice(&self.values[..len - abs]);
11043        } else {
11044            out.extend_from_slice(&self.values[abs..]);
11045            for _ in 0..abs {
11046                out.push(fill.clone());
11047            }
11048        }
11049        Self::new(self.dtype, out)
11050    }
11051
11052    /// Clip numeric values to `[lower, upper]`.
11053    ///
11054    /// Matches `pd.Series.clip(lower, upper)`. `None` on either bound
11055    /// disables that side. Non-numeric inputs return a type error.
11056    /// Missing values pass through unchanged. Result dtype is Float64
11057    /// (via `infer_dtype`) to accommodate fractional clipping.
11058    pub fn clip(&self, lower: Option<f64>, upper: Option<f64>) -> Result<Self, ColumnError> {
11059        // Typed fast path: an all-valid numeric column clamps straight over its
11060        // contiguous buffer (output is always Float64), with no per-element
11061        // Scalar dispatch/clone or output Vec<Scalar>. Bit-identical — the scalar
11062        // loop applies the lower bound then the upper bound to v.to_f64(), which
11063        // for an all-valid Float64/Int64 column is exactly data[i] (as f64). NaN
11064        // floats mark the column invalid (validity.all() false), so as_*_slice
11065        // declines and missing values keep the Scalar path.
11066        let clamp = |mut x: f64| {
11067            if let Some(lo) = lower
11068                && x < lo
11069            {
11070                x = lo;
11071            }
11072            if let Some(hi) = upper
11073                && x > hi
11074            {
11075                x = hi;
11076            }
11077            x
11078        };
11079        if let Some(data) = self.as_f64_slice() {
11080            let out: Vec<f64> = data.iter().map(|&x| clamp(x)).collect();
11081            return Ok(Self::from_f64_values(out));
11082        }
11083        if let Some(data) = self.as_i64_slice() {
11084            let out: Vec<f64> = data.iter().map(|&x| clamp(x as f64)).collect();
11085            return Ok(Self::from_f64_values(out));
11086        }
11087
11088        let mut out = Vec::with_capacity(self.values.len());
11089        for v in &self.values {
11090            if v.is_missing() {
11091                out.push(v.clone());
11092                continue;
11093            }
11094            let numeric = match v.to_f64() {
11095                Ok(x) => x,
11096                Err(err) => return Err(ColumnError::Type(err)),
11097            };
11098            let mut clipped = numeric;
11099            if let Some(lo) = lower
11100                && clipped < lo
11101            {
11102                clipped = lo;
11103            }
11104            if let Some(hi) = upper
11105                && clipped > hi
11106            {
11107                clipped = hi;
11108            }
11109            out.push(Scalar::Float64(clipped));
11110        }
11111        Self::new(DType::Float64, out)
11112    }
11113
11114    /// Round numeric values to `decimals` decimal places.
11115    ///
11116    /// Matches `pd.Series.round(decimals)`. Negative `decimals` rounds
11117    /// to the left of the decimal point. Int columns pass through
11118    /// unchanged for decimals >= 0 and retain Int64 dtype for negative
11119    /// decimals. Bool columns pass through unchanged. Missing values are
11120    /// preserved.
11121    pub fn round(&self, decimals: i32) -> Result<Self, ColumnError> {
11122        if matches!(self.dtype, DType::Bool) || (self.dtype == DType::Int64 && decimals >= 0) {
11123            return Ok(self.clone());
11124        }
11125        if self.dtype == DType::Int64 {
11126            let out = self
11127                .values
11128                .iter()
11129                .map(|v| match v {
11130                    Scalar::Int64(value) => {
11131                        Scalar::Int64(round_i64_negative_decimals(*value, decimals))
11132                    }
11133                    Scalar::Null(kind) => Scalar::Null(*kind),
11134                    other => other.clone(),
11135                })
11136                .collect();
11137            return Self::new(DType::Int64, out);
11138        }
11139        let factor = 10f64.powi(decimals);
11140        // Typed fast path (mirror of `abs`): an all-valid Float64 column rounds
11141        // over its contiguous buffer and re-ingests typed, skipping the lazy
11142        // Scalar materialization, the 32 B/cell Vec<Scalar>, and Column::new's
11143        // revalidation passes. Bit-identical to the scalar loop below: the
11144        // formula is the same `(x*factor).round_ties_even()/factor`, and
11145        // from_f64_values re-marks any NaN result as missing exactly as
11146        // `Self::new(Float64, ..)` would (all-valid ⇒ no is_missing branch).
11147        if let Some(data) = self.as_f64_slice() {
11148            return Ok(Self::from_f64_values(
11149                data.iter()
11150                    .map(|&x| (x * factor).round_ties_even() / factor)
11151                    .collect(),
11152            ));
11153        }
11154        let mut out = Vec::with_capacity(self.values.len());
11155        for v in &self.values {
11156            if v.is_missing() {
11157                out.push(v.clone());
11158                continue;
11159            }
11160            match v.to_f64() {
11161                Ok(x) => out.push(Scalar::Float64((x * factor).round_ties_even() / factor)),
11162                Err(err) => return Err(ColumnError::Type(err)),
11163            }
11164        }
11165        Self::new(DType::Float64, out)
11166    }
11167
11168    /// Per-row boolean membership test against `needles`.
11169    ///
11170    /// Matches `pd.Series.isin(values)`. The result is always a Bool
11171    /// column the same length as `self`. Missing input positions map
11172    /// to `false` (pandas convention — NaN is never "in" a set).
11173    pub fn isin(&self, needles: &[Scalar]) -> Result<Self, ColumnError> {
11174        #[derive(Hash, PartialEq, Eq)]
11175        enum Key<'a> {
11176            Bool(bool),
11177            Int64(i64),
11178            FloatBits(u64),
11179            Utf8(&'a str),
11180            Timedelta64(i64),
11181            Datetime64(i64),
11182            Period(i64),
11183            Interval(u64, u64, IntervalClosed),
11184        }
11185        fn key_of(v: &Scalar) -> Option<Key<'_>> {
11186            if v.is_missing() {
11187                return None;
11188            }
11189            Some(match v {
11190                Scalar::Bool(b) => Key::Bool(*b),
11191                Scalar::Int64(i) => Key::Int64(*i),
11192                Scalar::Float64(f) => {
11193                    let norm = if *f == 0.0 { 0.0 } else { *f };
11194                    Key::FloatBits(norm.to_bits())
11195                }
11196                Scalar::Utf8(s) => Key::Utf8(s.as_str()),
11197                Scalar::Timedelta64(v) => Key::Timedelta64(*v),
11198                Scalar::Datetime64(v) => Key::Datetime64(*v),
11199                Scalar::Period(v) => Key::Period(*v),
11200                Scalar::Interval(v) => {
11201                    let (left, right, closed) = interval_key(v);
11202                    Key::Interval(left, right, closed)
11203                }
11204                Scalar::Null(_) => return None,
11205            })
11206        }
11207
11208        // Typed dense-membership fast path: an all-valid Int64 column tested
11209        // against bounded Int64 needles uses a direct-address presence bitset
11210        // (indexed by `needle - min`) scanned over the contiguous i64 buffer,
11211        // instead of a per-element HashSet probe over materialized Scalars.
11212        // Bit-identical: an Int64 value's key is `Key::Int64`, which only ever
11213        // matches an Int64 needle (a Float64 5.0 needle is `Key::FloatBits`, a
11214        // distinct key), so the membership answer is exactly "is this i64 one of
11215        // the Int64 needles". Falls back for non-Int64 self/needle spans.
11216        if let Some(data) = self.as_i64_slice() {
11217            let mut n_min = i64::MAX;
11218            let mut n_max = i64::MIN;
11219            let mut saw_int_needle = false;
11220            for needle in needles {
11221                if let Scalar::Int64(v) = needle {
11222                    saw_int_needle = true;
11223                    n_min = n_min.min(*v);
11224                    n_max = n_max.max(*v);
11225                }
11226            }
11227            if !saw_int_needle {
11228                return Ok(Self::from_bool_values(vec![false; data.len()]));
11229            }
11230            let span = i128::from(n_max) - i128::from(n_min) + 1;
11231            if span > 0 && span <= (1i128 << 24) {
11232                let mut present = vec![false; span as usize];
11233                for needle in needles {
11234                    if let Scalar::Int64(v) = needle {
11235                        present[(v - n_min) as usize] = true;
11236                    }
11237                }
11238                let out: Vec<bool> = data
11239                    .iter()
11240                    .map(|&v| v >= n_min && v <= n_max && present[(v - n_min) as usize])
11241                    .collect();
11242                return Ok(Self::from_bool_values(out));
11243            }
11244        }
11245
11246        let mut lookup: FxHashSet<Key<'_>> = FxHashSet::default();
11247        for n in needles {
11248            if let Some(k) = key_of(n) {
11249                lookup.insert(k);
11250            }
11251        }
11252
11253        // Typed all-valid Bool output — every slot is a definite true/false
11254        // (missing input maps to false, never to a missing output), so this is
11255        // the same column `Self::new(DType::Bool, Vec<Scalar::Bool>)` builds,
11256        // minus the 32 B/elem Scalar wrap and the validity scan (the Int64
11257        // dense path above already emits this way).
11258        let out: Vec<bool> = self
11259            .values
11260            .iter()
11261            .map(|v| match key_of(v) {
11262                Some(k) => lookup.contains(&k),
11263                None => false,
11264            })
11265            .collect();
11266        Ok(Self::from_bool_values(out))
11267    }
11268
11269    /// Unique values in first-seen order, missing values dropped.
11270    ///
11271    /// Matches `pd.Series.unique()` (pandas returns values in order of
11272    /// appearance and drops NaN/NA). Float NaN is deduplicated on bit
11273    /// pattern; +0.0 / -0.0 fold to the same key.
11274    pub fn unique(&self) -> Result<Self, ColumnError> {
11275        // Dense direct-address fast path: an all-valid, bounded-range Int64
11276        // column dedups via a seen-bitset indexed by `v - min` — hash-free, no
11277        // per-element Scalar enum — preserving first-seen order. Bit-identical to
11278        // the HashSet path below (all-valid ⇒ nothing missing to skip; output is
11279        // the same first-seen distinct Int64 values). Same gate as isin/dense
11280        // duplicated (`i64_direct_address_range`).
11281        if let Some(data) = self.as_i64_slice()
11282            && let Some((min, range)) = i64_direct_address_range(data)
11283        {
11284            let mut seen = vec![false; range];
11285            let mut out: Vec<i64> = Vec::new();
11286            for &v in data {
11287                let slot = (v as i128 - min as i128) as usize;
11288                if !seen[slot] {
11289                    seen[slot] = true;
11290                    out.push(v);
11291                }
11292            }
11293            return Ok(Self::from_i64_values(out));
11294        }
11295
11296        #[derive(Hash, PartialEq, Eq)]
11297        enum Key<'a> {
11298            Bool(bool),
11299            Int64(i64),
11300            FloatBits(u64),
11301            Utf8(&'a str),
11302            Timedelta64(i64),
11303            Datetime64(i64),
11304            Period(i64),
11305            Interval(u64, u64, IntervalClosed),
11306        }
11307
11308        let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
11309        let mut out = Vec::new();
11310        for v in &self.values {
11311            if v.is_missing() {
11312                continue;
11313            }
11314            let key = match v {
11315                Scalar::Bool(b) => Key::Bool(*b),
11316                Scalar::Int64(i) => Key::Int64(*i),
11317                Scalar::Float64(f) => {
11318                    let norm = if *f == 0.0 { 0.0 } else { *f };
11319                    Key::FloatBits(norm.to_bits())
11320                }
11321                Scalar::Utf8(s) => Key::Utf8(s.as_str()),
11322                Scalar::Timedelta64(v) => Key::Timedelta64(*v),
11323                Scalar::Datetime64(v) => Key::Datetime64(*v),
11324                Scalar::Period(v) => Key::Period(*v),
11325                Scalar::Interval(v) => {
11326                    let (left, right, closed) = interval_key(v);
11327                    Key::Interval(left, right, closed)
11328                }
11329                Scalar::Null(_) => continue,
11330            };
11331            if seen.insert(key) {
11332                out.push(v.clone());
11333            }
11334        }
11335        Self::new(self.dtype, out)
11336    }
11337
11338    /// Set difference: values in self that are not in other.
11339    ///
11340    /// Matches np.setdiff1d().
11341    pub fn setdiff1d(&self, other: &Self) -> Result<Self, ColumnError> {
11342        let other_unique = other.unique()?;
11343        // O(N+M): hash-set membership for `other`, plus a `seen` set replacing
11344        // the O(N²) `out.any(...)` first-seen dedup.
11345        let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11346            .values()
11347            .iter()
11348            .filter_map(set_member_key)
11349            .collect();
11350        let mut seen: FxHashSet<SetMemberKey<'_>> = FxHashSet::default();
11351        let mut out = Vec::new();
11352        for v in &self.values {
11353            let Some(key) = set_member_key(v) else {
11354                continue;
11355            };
11356            if !other_set.contains(&key) && seen.insert(key) {
11357                out.push(v.clone());
11358            }
11359        }
11360        Self::new(self.dtype, out)
11361    }
11362
11363    /// Set intersection: values common to both columns.
11364    ///
11365    /// Matches np.intersect1d().
11366    pub fn intersect1d(&self, other: &Self) -> Result<Self, ColumnError> {
11367        let self_unique = self.unique()?;
11368        let other_unique = other.unique()?;
11369        let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11370            .values()
11371            .iter()
11372            .filter_map(set_member_key)
11373            .collect();
11374        let mut out = Vec::new();
11375        for v in self_unique.values() {
11376            let Some(key) = set_member_key(v) else {
11377                continue;
11378            };
11379            if other_set.contains(&key) {
11380                out.push(v.clone());
11381            }
11382        }
11383        Self::new(self.dtype, out)
11384    }
11385
11386    /// Set union: unique values from both columns.
11387    ///
11388    /// Matches np.union1d().
11389    pub fn union1d(&self, other: &Self) -> Result<Self, ColumnError> {
11390        let mut combined = self.values.to_vec();
11391        combined.extend(other.values().iter().cloned());
11392        let temp = Self::new(self.dtype, combined)?;
11393        temp.unique()
11394    }
11395
11396    /// Set symmetric difference: unique values in either but not both.
11397    ///
11398    /// Matches np.setxor1d(). Returns unique values that are in exactly
11399    /// one of the input arrays.
11400    pub fn setxor1d(&self, other: &Self) -> Result<Self, ColumnError> {
11401        let a_unique = self.unique()?;
11402        let b_unique = other.unique()?;
11403        let a_set: FxHashSet<SetMemberKey<'_>> = a_unique
11404            .values()
11405            .iter()
11406            .filter_map(set_member_key)
11407            .collect();
11408        let b_set: FxHashSet<SetMemberKey<'_>> = b_unique
11409            .values()
11410            .iter()
11411            .filter_map(set_member_key)
11412            .collect();
11413        let mut out = Vec::new();
11414        // Values in a but not in b
11415        for v in a_unique.values() {
11416            let Some(key) = set_member_key(v) else {
11417                continue;
11418            };
11419            if !b_set.contains(&key) {
11420                out.push(v.clone());
11421            }
11422        }
11423        // Values in b but not in a
11424        for v in b_unique.values() {
11425            let Some(key) = set_member_key(v) else {
11426                continue;
11427            };
11428            if !a_set.contains(&key) {
11429                out.push(v.clone());
11430            }
11431        }
11432        Self::new(self.dtype, out)
11433    }
11434
11435    /// Test whether each element is contained in other.
11436    ///
11437    /// Matches np.in1d(). Returns Bool column.
11438    pub fn in1d(&self, other: &Self) -> Result<Self, ColumnError> {
11439        let other_unique = other.unique()?;
11440        let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11441            .values()
11442            .iter()
11443            .filter_map(set_member_key)
11444            .collect();
11445        // Typed all-valid Bool output — same equivalence as isin above.
11446        let mut out = Vec::with_capacity(self.values.len());
11447        for v in &self.values {
11448            let found = match set_member_key(v) {
11449                Some(key) => other_set.contains(&key),
11450                None => false, // missing is never "in" the set (matches the scan)
11451            };
11452            out.push(found);
11453        }
11454        Ok(Self::from_bool_values(out))
11455    }
11456
11457    /// Count occurrences of each distinct value.
11458    ///
11459    /// Matches `pd.Series.value_counts()` default behavior at the
11460    /// columnar level: missing values are dropped, counts are sorted
11461    /// descending, and first-seen order breaks ties.
11462    pub fn value_counts(&self) -> Result<(Self, Self), ColumnError> {
11463        self.value_counts_with_options(false, true, false, true)
11464    }
11465
11466    /// Count occurrences of each distinct value with pandas-style options.
11467    ///
11468    /// Returns a pair of columns `(values, counts)`. The `values`
11469    /// column preserves the source dtype; the `counts` column is Int64
11470    /// unless `normalize=true`, in which case it is Float64.
11471    pub fn value_counts_with_options(
11472        &self,
11473        normalize: bool,
11474        sort: bool,
11475        ascending: bool,
11476        dropna: bool,
11477    ) -> Result<(Self, Self), ColumnError> {
11478        // O(N) tally: a `set_member_key`-keyed hash map gives O(1) lookup
11479        // instead of the old O(distinct) linear `counts.iter().find(semantic_eq)`
11480        // per value (O(N·distinct), quadratic for high-cardinality data). The
11481        // `counts` Vec is still built in first-seen order, so the later
11482        // stable count-sort breaks ties identically. Bit-identical: is_missing()
11483        // is tested first (so NaN/NAT sentinels stay in missing_count exactly as
11484        // before), and for the remaining values set_member_key equality matches
11485        // semantic_eq (the same key Column::unique uses; ±0.0 normalized).
11486        let mut counts: Vec<(Scalar, usize)> = Vec::new();
11487        let mut index: rustc_hash::FxHashMap<SetMemberKey<'_>, usize> =
11488            rustc_hash::FxHashMap::default();
11489        let mut missing_count = 0_usize;
11490
11491        for value in &self.values {
11492            if value.is_missing() {
11493                missing_count += 1;
11494                continue;
11495            }
11496            let Some(key) = set_member_key(value) else {
11497                // Unreachable: every non-missing scalar has a key.
11498                counts.push((value.clone(), 1));
11499                continue;
11500            };
11501            if let Some(&i) = index.get(&key) {
11502                counts[i].1 += 1;
11503            } else {
11504                index.insert(key, counts.len());
11505                counts.push((value.clone(), 1));
11506            }
11507        }
11508
11509        if !dropna && missing_count > 0 {
11510            counts.push((Scalar::Null(NullKind::NaN), missing_count));
11511        }
11512
11513        if sort {
11514            if ascending {
11515                counts.sort_by_key(|(_, count)| *count);
11516            } else {
11517                counts.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
11518            }
11519        }
11520
11521        let total = if normalize {
11522            counts.iter().map(|(_, count)| *count).sum::<usize>() as f64
11523        } else {
11524            1.0
11525        };
11526
11527        let mut values_out = Vec::with_capacity(counts.len());
11528        let mut counts_out = Vec::with_capacity(counts.len());
11529        for (value, count) in counts {
11530            values_out.push(value);
11531            if normalize {
11532                let normalized = if total == 0.0 {
11533                    0.0
11534                } else {
11535                    count as f64 / total
11536                };
11537                counts_out.push(Scalar::Float64(normalized));
11538            } else {
11539                counts_out.push(Scalar::Int64(i64::try_from(count).unwrap_or(i64::MAX)));
11540            }
11541        }
11542
11543        let values = Self::new(self.dtype, values_out)?;
11544        let counts = Self::new(
11545            if normalize {
11546                DType::Float64
11547            } else {
11548                DType::Int64
11549            },
11550            counts_out,
11551        )?;
11552        Ok((values, counts))
11553    }
11554
11555    #[must_use]
11556    pub fn semantic_eq(&self, other: &Self) -> bool {
11557        self.dtype == other.dtype
11558            && self.values.len() == other.values.len()
11559            && self
11560                .values
11561                .iter()
11562                .zip(&other.values)
11563                .all(|(left, right)| left.semantic_eq(right))
11564    }
11565
11566    /// Element-wise comparison for approximate equality.
11567    ///
11568    /// Matches np.isclose(). Returns True where |a - b| <= atol + rtol * |b|.
11569    pub fn isclose(&self, other: &Self, rtol: f64, atol: f64) -> Result<Self, ColumnError> {
11570        if self.len() != other.len() {
11571            return Err(ColumnError::LengthMismatch {
11572                left: self.len(),
11573                right: other.len(),
11574            });
11575        }
11576        let mut out = Vec::with_capacity(self.values.len());
11577        for (a, b) in self.values.iter().zip(&other.values) {
11578            if a.is_missing() || b.is_missing() {
11579                out.push(Scalar::Bool(false));
11580                continue;
11581            }
11582            let af = a.to_f64().map_err(ColumnError::Type)?;
11583            let bf = b.to_f64().map_err(ColumnError::Type)?;
11584            let close = (af - bf).abs() <= atol + rtol * bf.abs();
11585            out.push(Scalar::Bool(close));
11586        }
11587        Self::new(DType::Bool, out)
11588    }
11589
11590    /// Check if all elements are approximately equal.
11591    ///
11592    /// Matches np.allclose(). Returns True if all pairs satisfy isclose.
11593    pub fn allclose(&self, other: &Self, rtol: f64, atol: f64) -> Result<bool, ColumnError> {
11594        let close = self.isclose(other, rtol, atol)?;
11595        for v in close.values() {
11596            match v {
11597                Scalar::Bool(true) => continue,
11598                Scalar::Bool(false) => return Ok(false),
11599                _ => return Ok(false),
11600            }
11601        }
11602        Ok(true)
11603    }
11604}
11605
11606// ---------------------------------------------------------------------------
11607// AG-14: Database Cracking — Adaptive Column Sorting
11608// ---------------------------------------------------------------------------
11609
11610/// Adaptive crack index for progressive column partitioning.
11611///
11612/// Maintains a permutation of row indices and a sorted set of crack points.
11613/// Each filter operation partitions the relevant region around the predicate
11614/// pivot, progressively sorting the column across repeated queries.
11615///
11616/// Only works with numeric columns (values convertible to f64).
11617///
11618/// # Example
11619/// ```ignore
11620/// let mut crack = CrackIndex::new(column.len());
11621/// let gt5 = crack.filter_gt(&column, 5.0);  // partitions around 5.0
11622/// let gt3 = crack.filter_gt(&column, 3.0);  // refines: only re-scans [0, 5.0] region
11623/// ```
11624pub struct CrackIndex {
11625    /// Permuted row indices. Between consecutive crack points,
11626    /// elements are unsorted but bounded by the crack values.
11627    perm: Vec<usize>,
11628    /// Sorted crack points: (pivot_value, split_position_in_perm).
11629    /// All perm[..split] map to values <= pivot, perm[split..] map to values > pivot
11630    /// (within the containing region).
11631    cracks: Vec<(f64, usize)>,
11632}
11633
11634impl CrackIndex {
11635    /// Create a new crack index for a column of `len` rows.
11636    #[must_use]
11637    pub fn new(len: usize) -> Self {
11638        Self {
11639            perm: (0..len).collect(),
11640            cracks: Vec::new(),
11641        }
11642    }
11643
11644    /// Number of crack points recorded so far.
11645    #[must_use]
11646    pub fn num_cracks(&self) -> usize {
11647        self.cracks.len()
11648    }
11649
11650    /// Return row indices where `column[row] > value`.
11651    pub fn filter_gt(&mut self, column: &Column, value: f64) -> Vec<usize> {
11652        let split = self.crack_at(column, value);
11653        self.perm[split..].to_vec()
11654    }
11655
11656    /// Return row indices where `column[row] <= value`.
11657    pub fn filter_lte(&mut self, column: &Column, value: f64) -> Vec<usize> {
11658        let split = self.crack_at(column, value);
11659        self.perm[..split]
11660            .iter()
11661            .copied()
11662            .filter(|&idx| {
11663                column
11664                    .value(idx)
11665                    .and_then(|v| v.to_f64().ok())
11666                    .is_some_and(|f| f <= value)
11667            })
11668            .collect()
11669    }
11670
11671    /// Return row indices where `column[row] >= value`.
11672    pub fn filter_gte(&mut self, column: &Column, value: f64) -> Vec<usize> {
11673        // Crack just below value: use value - epsilon conceptually.
11674        // We crack at value, then scan the <= region for exact matches.
11675        let split = self.crack_at(column, value);
11676        // Everything in perm[split..] is > value.
11677        // Also include exact matches from perm[..split].
11678        let mut result: Vec<usize> = self.perm[split..].to_vec();
11679        for &idx in &self.perm[..split] {
11680            if let Some(v) = column.value(idx)
11681                && let Ok(f) = v.to_f64()
11682                && f == value
11683            {
11684                result.push(idx);
11685            }
11686        }
11687        result
11688    }
11689
11690    /// Return row indices where `column[row] < value`.
11691    pub fn filter_lt(&mut self, column: &Column, value: f64) -> Vec<usize> {
11692        let split = self.crack_at(column, value);
11693        // perm[..split] has values <= value. Filter out exact matches.
11694        self.perm[..split]
11695            .iter()
11696            .copied()
11697            .filter(|&idx| {
11698                column
11699                    .value(idx)
11700                    .and_then(|v| v.to_f64().ok())
11701                    .is_some_and(|f| f < value)
11702            })
11703            .collect()
11704    }
11705
11706    /// Return row indices where `column[row] == value`.
11707    pub fn filter_eq(&mut self, column: &Column, value: f64) -> Vec<usize> {
11708        let split = self.crack_at(column, value);
11709        // Exact matches are all in perm[..split] (the <= region).
11710        self.perm[..split]
11711            .iter()
11712            .copied()
11713            .filter(|&idx| {
11714                column
11715                    .value(idx)
11716                    .and_then(|v| v.to_f64().ok())
11717                    .is_some_and(|f| f == value)
11718            })
11719            .collect()
11720    }
11721
11722    /// Ensure a crack point exists at `value`. Returns the split position
11723    /// such that perm[..split] are all <= value and perm[split..] are all > value.
11724    fn crack_at(&mut self, column: &Column, value: f64) -> usize {
11725        // Check if we already have this exact crack point.
11726        if let Ok(pos) = self.cracks.binary_search_by(|probe| {
11727            probe
11728                .0
11729                .partial_cmp(&value)
11730                .unwrap_or(std::cmp::Ordering::Equal)
11731        }) {
11732            return self.cracks[pos].1;
11733        }
11734
11735        // Find the region to partition: between the nearest crack points.
11736        let (region_start, region_end) = self.find_region(value);
11737
11738        // Partition perm[region_start..region_end] around `value`.
11739        // Move indices with column[idx] <= value to the left, > value to the right.
11740        let split = self.partition_region(column, region_start, region_end, value);
11741
11742        // Insert the new crack point, maintaining sorted order.
11743        let insert_pos = self
11744            .cracks
11745            .binary_search_by(|probe| {
11746                probe
11747                    .0
11748                    .partial_cmp(&value)
11749                    .unwrap_or(std::cmp::Ordering::Equal)
11750            })
11751            .unwrap_or_else(|pos| pos);
11752        self.cracks.insert(insert_pos, (value, split));
11753
11754        split
11755    }
11756
11757    /// Find the region [start, end) in `perm` that contains `value`.
11758    fn find_region(&self, value: f64) -> (usize, usize) {
11759        let mut start = 0;
11760        let mut end = self.perm.len();
11761
11762        for &(crack_val, crack_pos) in &self.cracks {
11763            if crack_val < value {
11764                start = start.max(crack_pos);
11765            } else {
11766                end = end.min(crack_pos);
11767                break;
11768            }
11769        }
11770
11771        (start, end)
11772    }
11773
11774    /// Partition perm[start..end] so that indices with column values <= pivot
11775    /// come first. Returns the split position (absolute index in perm).
11776    fn partition_region(&mut self, column: &Column, start: usize, end: usize, pivot: f64) -> usize {
11777        // Simple two-pointer partition (like quicksort partition).
11778        let region = &mut self.perm[start..end];
11779        let mut write = 0;
11780
11781        for read in 0..region.len() {
11782            let idx = region[read];
11783            let val = column
11784                .value(idx)
11785                .and_then(|v| v.to_f64().ok())
11786                .unwrap_or(f64::NEG_INFINITY); // missing values sort to left
11787
11788            if val <= pivot {
11789                region.swap(write, read);
11790                write += 1;
11791            }
11792        }
11793
11794        start + write
11795    }
11796}
11797
11798#[cfg(test)]
11799mod tests {
11800    use fp_types::{DType, Interval, IntervalClosed, NullKind, Scalar, SparseDType};
11801
11802    use super::{
11803        ArithmeticOp, Column, ColumnData, ColumnError, ScalarValues, SparseColumn, ValidityMask,
11804    };
11805
11806    #[test]
11807    fn reindex_injects_missing_values() {
11808        let column = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)])
11809            .expect("column should build");
11810
11811        let out = column
11812            .reindex_by_positions(&[Some(1), None, Some(0)])
11813            .expect("reindex should work");
11814
11815        assert_eq!(
11816            out.values(),
11817            &[
11818                Scalar::Int64(20),
11819                Scalar::Null(NullKind::Null),
11820                Scalar::Int64(10)
11821            ]
11822        );
11823    }
11824
11825    #[test]
11826    fn take_positions_matches_validated_materialization() {
11827        let column = Column::new(
11828            DType::Float64,
11829            vec![
11830                Scalar::Float64(1.5),
11831                Scalar::Null(NullKind::NaN),
11832                Scalar::Float64(3.5),
11833            ],
11834        )
11835        .expect("column should build");
11836
11837        let positions = [2, 1, 0, 2];
11838        let gathered = column.take_positions(&positions);
11839        let expected_values = positions
11840            .iter()
11841            .map(|&position| column.values()[position].clone())
11842            .collect::<Vec<_>>();
11843        let expected =
11844            Column::new(column.dtype(), expected_values).expect("validated materialization");
11845
11846        assert_eq!(gathered.dtype(), expected.dtype());
11847        assert_eq!(gathered.values(), expected.values());
11848        assert_eq!(gathered.validity(), expected.validity());
11849
11850        let empty = column.take_positions(&[]);
11851        assert_eq!(empty.dtype(), column.dtype());
11852        assert!(empty.values().is_empty());
11853        assert_eq!(empty.validity(), &ValidityMask::all_invalid(0));
11854    }
11855
11856    #[test]
11857    fn take_positions_all_valid_primitives_match_validated_materialization() {
11858        let cases = [
11859            (
11860                DType::Bool,
11861                vec![Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(false)],
11862            ),
11863            (
11864                DType::Int64,
11865                vec![Scalar::Int64(10), Scalar::Int64(-5), Scalar::Int64(42)],
11866            ),
11867            (
11868                DType::Float64,
11869                vec![
11870                    Scalar::Float64(1.25),
11871                    Scalar::Float64(-0.0),
11872                    Scalar::Float64(9.5),
11873                ],
11874            ),
11875            (
11876                DType::Timedelta64,
11877                vec![
11878                    Scalar::Timedelta64(10),
11879                    Scalar::Timedelta64(-5),
11880                    Scalar::Timedelta64(42),
11881                ],
11882            ),
11883            (
11884                DType::Datetime64,
11885                vec![
11886                    Scalar::Datetime64(10),
11887                    Scalar::Datetime64(-5),
11888                    Scalar::Datetime64(42),
11889                ],
11890            ),
11891            (
11892                DType::Period,
11893                vec![Scalar::Period(10), Scalar::Period(-5), Scalar::Period(42)],
11894            ),
11895        ];
11896
11897        let positions = [2, 0, 2, 1];
11898        for (dtype, values) in cases {
11899            let column = Column::new(dtype, values).expect("column should build");
11900            let gathered = column.take_positions(&positions);
11901            let expected_values = positions
11902                .iter()
11903                .map(|&position| column.values()[position].clone())
11904                .collect::<Vec<_>>();
11905            let expected =
11906                Column::new(column.dtype(), expected_values).expect("validated materialization");
11907
11908            assert_eq!(gathered.dtype(), expected.dtype());
11909            assert_eq!(gathered.values(), expected.values());
11910            assert_eq!(gathered.validity(), expected.validity());
11911        }
11912    }
11913
11914    #[test]
11915    fn take_positions_preserves_exact_null_kind_contract() {
11916        // Isomorphism contract for the typed-columnar storage epic
11917        // (br-frankenpandas-typed-columnar-storage-epic): `take_positions`
11918        // reproduces the EXACT scalar stored at each source position, including
11919        // the precise `NullKind` at invalid positions — not merely a
11920        // valid/invalid bit. `normalize_missing_for_dtype` preserves NaN/NaT
11921        // null kinds regardless of dtype, so a Float64/Int64 column can legally
11922        // hold `Null(NaT)`. A future migration to typed `ColumnData` +
11923        // `ValidityMask` that canonicalizes nulls per dtype would silently
11924        // break `values()` parity for such columns; this test must keep passing
11925        // through that migration (the typed store must carry per-position null
11926        // kind, e.g. a 2-bit NaN/NaT/Null code, not just a validity bit).
11927        for (dtype, real) in [
11928            (DType::Float64, Scalar::Float64(2.5)),
11929            (DType::Int64, Scalar::Int64(7)),
11930        ] {
11931            let source = Column::new(
11932                dtype,
11933                vec![
11934                    Scalar::Null(NullKind::NaN),
11935                    Scalar::Null(NullKind::NaT),
11936                    Scalar::Null(NullKind::Null),
11937                    real,
11938                ],
11939            )
11940            .expect("column builds");
11941            // Compare against what the column actually stored, so the test is
11942            // robust to constructor canonicalization yet still pins that the
11943            // gather is byte-for-byte faithful to the stored representation.
11944            let stored = source.values().to_vec();
11945            let positions = [3, 2, 1, 0, 1];
11946            let gathered = source.take_positions(&positions);
11947            for (out_idx, &pos) in positions.iter().enumerate() {
11948                assert_eq!(
11949                    gathered.values()[out_idx],
11950                    stored[pos],
11951                    "dtype {dtype:?}: take_positions must reproduce the exact stored scalar \
11952                     (incl. NullKind) for source position {pos}",
11953                );
11954                // Invalid source positions must stay invalid after the gather.
11955                assert_eq!(
11956                    gathered.validity().get(out_idx),
11957                    source.validity().get(pos),
11958                    "dtype {dtype:?}: validity must follow the gathered position {pos}",
11959                );
11960            }
11961        }
11962    }
11963
11964    #[test]
11965    fn primitive_columns_cache_typed_data_for_take_positions() {
11966        let column = Column::new(
11967            DType::Float64,
11968            vec![
11969                Scalar::Float64(1.25),
11970                Scalar::Float64(-0.0),
11971                Scalar::Float64(9.5),
11972            ],
11973        )
11974        .expect("column should build");
11975
11976        assert!(matches!(column.data, Some(ColumnData::Float64(_))));
11977        let positions = [2, 0, 1, 2];
11978        let gathered = column.take_positions(&positions);
11979        let expected = Column::new(
11980            DType::Float64,
11981            positions
11982                .iter()
11983                .map(|&position| column.values()[position].clone())
11984                .collect(),
11985        )
11986        .expect("validated materialization");
11987
11988        assert_eq!(gathered.values(), expected.values());
11989        assert_eq!(gathered.validity(), expected.validity());
11990    }
11991
11992    #[test]
11993    fn float64_take_positions_defers_scalar_materialization() {
11994        let column = Column::new(
11995            DType::Float64,
11996            vec![
11997                Scalar::Float64(1.25),
11998                Scalar::Float64(-0.0),
11999                Scalar::Float64(9.5),
12000            ],
12001        )
12002        .expect("column should build");
12003
12004        let positions = [2, 0, 1, 2];
12005        let gathered = column.take_positions(&positions);
12006
12007        assert!(
12008            matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12009            "Float64 gather should defer scalar materialization"
12010        );
12011        if let ScalarValues::LazyAllValidFloat64 { data, values } = &gathered.values {
12012            assert_eq!(
12013                data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12014                vec![
12015                    9.5f64.to_bits(),
12016                    1.25f64.to_bits(),
12017                    (-0.0f64).to_bits(),
12018                    9.5f64.to_bits(),
12019                ]
12020            );
12021            assert!(values.get().is_none());
12022        }
12023        assert_eq!(gathered.len(), positions.len());
12024        assert_eq!(
12025            gathered.validity(),
12026            &ValidityMask::all_valid(positions.len())
12027        );
12028
12029        let expected = Column::new(
12030            DType::Float64,
12031            positions
12032                .iter()
12033                .map(|&position| column.values()[position].clone())
12034                .collect(),
12035        )
12036        .expect("validated materialization");
12037
12038        assert_eq!(gathered.values(), expected.values());
12039        assert!(
12040            matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12041            "Float64 gather should stay lazy after read"
12042        );
12043        if let ScalarValues::LazyAllValidFloat64 { values, .. } = &gathered.values {
12044            assert!(values.get().is_some());
12045        }
12046        assert_eq!(gathered.validity(), expected.validity());
12047    }
12048
12049    #[test]
12050    fn reindex_all_present_matches_materialization_and_keeps_float64_lazy() {
12051        let column = Column::from_f64_values(vec![1.25, -0.0, f64::INFINITY]);
12052
12053        let positions = [Some(2), Some(0), Some(1), Some(2)];
12054        let gathered = column
12055            .reindex_by_positions(&positions)
12056            .expect("all-present reindex should gather");
12057
12058        assert!(
12059            matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12060            "all-present Float64 reindex should defer scalar materialization"
12061        );
12062        if let ScalarValues::LazyAllValidFloat64 { data, values } = &gathered.values {
12063            assert_eq!(
12064                data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12065                vec![
12066                    f64::INFINITY.to_bits(),
12067                    1.25f64.to_bits(),
12068                    (-0.0f64).to_bits(),
12069                    f64::INFINITY.to_bits(),
12070                ]
12071            );
12072            assert!(values.get().is_none());
12073        }
12074
12075        let expected = Column::new(
12076            DType::Float64,
12077            positions
12078                .iter()
12079                .map(|&position| column.values()[position.expect("present position")].clone())
12080                .collect(),
12081        )
12082        .expect("validated scalar materialization");
12083
12084        assert_eq!(gathered.dtype(), expected.dtype());
12085        assert_eq!(gathered.values(), expected.values());
12086        assert_eq!(gathered.validity(), expected.validity());
12087    }
12088
12089    #[test]
12090    fn column_equality_ignores_skipped_typed_cache() {
12091        let column = Column::new(
12092            DType::Int64,
12093            vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
12094        )
12095        .expect("column should build");
12096
12097        let json = serde_json::to_string(&column).expect("serialize");
12098        let roundtrip: Column = serde_json::from_str(&json).expect("deserialize");
12099
12100        assert!(column.data.is_some());
12101        assert!(roundtrip.data.is_none());
12102        assert_eq!(column, roundtrip);
12103    }
12104
12105    #[test]
12106    fn column_clone_preserves_values_without_copying_private_cache() {
12107        let column = Column::new(
12108            DType::Int64,
12109            vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
12110        )
12111        .expect("column should build");
12112
12113        let cloned = column.clone();
12114
12115        assert!(column.data.is_some());
12116        assert!(cloned.data.is_none());
12117        assert_eq!(column, cloned);
12118    }
12119
12120    #[test]
12121    fn dense_primitive_clone_defers_float64_scalar_materialization_from_typed_cache() {
12122        let column = Column::new(
12123            DType::Float64,
12124            vec![
12125                Scalar::Float64(1.5),
12126                Scalar::Float64(-0.0),
12127                Scalar::Float64(3.25),
12128            ],
12129        )
12130        .expect("column should build");
12131
12132        let cloned_values = column
12133            .clone_dense_values_from_cache()
12134            .expect("all-valid Float64 typed cache should clone");
12135        assert!(
12136            matches!(&cloned_values, ScalarValues::LazyAllValidFloat64 { .. }),
12137            "Float64 clone should defer scalar materialization"
12138        );
12139        if let ScalarValues::LazyAllValidFloat64 { data, values } = &cloned_values {
12140            assert_eq!(
12141                data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12142                vec![1.5f64.to_bits(), (-0.0f64).to_bits(), 3.25f64.to_bits()]
12143            );
12144            assert!(values.get().is_none());
12145        }
12146
12147        let cloned = column.clone();
12148        assert!(
12149            matches!(&cloned.values, ScalarValues::LazyAllValidFloat64 { .. }),
12150            "Column::clone should keep all-valid Float64 clone values lazy"
12151        );
12152        if let ScalarValues::LazyAllValidFloat64 { values, .. } = &cloned.values {
12153            assert!(values.get().is_none());
12154        }
12155        assert_eq!(cloned.values(), column.values());
12156        if let ScalarValues::LazyAllValidFloat64 { values, .. } = &cloned.values {
12157            assert!(values.get().is_some());
12158        }
12159        assert_eq!(cloned.validity(), column.validity());
12160        assert!(cloned.data.is_none());
12161    }
12162
12163    #[test]
12164    fn dense_primitive_clone_falls_back_for_missing_values() {
12165        let column = Column::new(
12166            DType::Float64,
12167            vec![
12168                Scalar::Float64(1.5),
12169                Scalar::Null(NullKind::NaN),
12170                Scalar::Null(NullKind::Null),
12171            ],
12172        )
12173        .expect("column should build");
12174
12175        assert!(column.clone_dense_values_from_cache().is_none());
12176        let cloned = column.clone();
12177        assert_eq!(cloned.values(), column.values());
12178        assert_eq!(cloned.validity(), column.validity());
12179        assert!(cloned.data.is_none());
12180    }
12181
12182    #[test]
12183    fn numeric_addition_propagates_missing() {
12184        let left = Column::from_values(vec![
12185            Scalar::Int64(1),
12186            Scalar::Null(NullKind::Null),
12187            Scalar::Float64(f64::NAN),
12188        ])
12189        .expect("left");
12190        let right = Column::from_values(vec![Scalar::Int64(2), Scalar::Int64(5), Scalar::Int64(3)])
12191            .expect("right");
12192
12193        let out = left
12194            .binary_numeric(&right, ArithmeticOp::Add)
12195            .expect("add should pass");
12196
12197        assert_eq!(out.values()[0], Scalar::Float64(3.0));
12198        assert_eq!(out.values()[1], Scalar::Null(NullKind::NaN));
12199        assert_eq!(out.values()[2], Scalar::Null(NullKind::NaN));
12200    }
12201
12202    #[test]
12203    fn sparse_column_omits_fill_values_and_materializes_dense() {
12204        let dtype = SparseDType::new(DType::Int64, Scalar::Int64(0)).expect("sparse dtype");
12205        let sparse = SparseColumn::from_dense(
12206            dtype,
12207            vec![
12208                Scalar::Int64(0),
12209                Scalar::Int64(5),
12210                Scalar::Int64(0),
12211                Scalar::Int64(-2),
12212            ],
12213        )
12214        .expect("sparse column");
12215
12216        assert_eq!(sparse.value_dtype(), DType::Int64);
12217        assert_eq!(sparse.fill_value(), &Scalar::Int64(0));
12218        assert_eq!(sparse.len(), 4);
12219        assert_eq!(sparse.npoints(), 2);
12220        assert_eq!(sparse.indices(), &[1, 3]);
12221        assert_eq!(
12222            sparse.stored_values(),
12223            &[Scalar::Int64(5), Scalar::Int64(-2)]
12224        );
12225
12226        let dense = sparse.to_dense_column().expect("dense column");
12227        assert_eq!(dense.dtype(), DType::Int64);
12228        assert_eq!(
12229            dense.values(),
12230            &[
12231                Scalar::Int64(0),
12232                Scalar::Int64(5),
12233                Scalar::Int64(0),
12234                Scalar::Int64(-2),
12235            ]
12236        );
12237    }
12238
12239    #[test]
12240    fn sparse_column_preserves_nulls_when_fill_is_not_missing() {
12241        let dtype = SparseDType::new(DType::Float64, Scalar::Float64(0.0)).expect("sparse dtype");
12242        let sparse = SparseColumn::from_dense(
12243            dtype,
12244            vec![
12245                Scalar::Float64(0.0),
12246                Scalar::Null(NullKind::NaN),
12247                Scalar::Float64(2.5),
12248            ],
12249        )
12250        .expect("sparse column");
12251
12252        assert_eq!(sparse.indices(), &[1, 2]);
12253        assert_eq!(sparse.npoints(), 2);
12254        assert!((sparse.density() - (2.0 / 3.0)).abs() < f64::EPSILON);
12255        assert!(sparse.stored_values()[0].is_missing());
12256        assert_eq!(sparse.stored_values()[1], Scalar::Float64(2.5));
12257
12258        let dense = sparse.to_dense_column().expect("dense column");
12259        assert_eq!(
12260            dense.values(),
12261            &[
12262                Scalar::Float64(0.0),
12263                Scalar::Null(NullKind::NaN),
12264                Scalar::Float64(2.5),
12265            ]
12266        );
12267    }
12268
12269    #[test]
12270    fn sparse_column_missing_fill_omits_missing_values() {
12271        let dtype =
12272            SparseDType::new(DType::Float64, Scalar::Null(NullKind::NaN)).expect("sparse dtype");
12273        let sparse = SparseColumn::from_dense(
12274            dtype,
12275            vec![
12276                Scalar::Null(NullKind::Null),
12277                Scalar::Float64(1.5),
12278                Scalar::Float64(f64::NAN),
12279            ],
12280        )
12281        .expect("sparse column");
12282
12283        assert_eq!(sparse.fill_value(), &Scalar::Null(NullKind::NaN));
12284        assert_eq!(sparse.indices(), &[1]);
12285        assert_eq!(sparse.stored_values(), &[Scalar::Float64(1.5)]);
12286        assert_eq!(
12287            sparse.to_dense_values(),
12288            vec![
12289                Scalar::Null(NullKind::NaN),
12290                Scalar::Float64(1.5),
12291                Scalar::Null(NullKind::NaN),
12292            ]
12293        );
12294    }
12295
12296    // === Packed Bitvec ValidityMask Tests ===
12297
12298    #[test]
12299    fn validity_mask_from_values_packs_correctly() {
12300        let values = vec![
12301            Scalar::Int64(1),
12302            Scalar::Null(NullKind::Null),
12303            Scalar::Int64(3),
12304        ];
12305        let mask = ValidityMask::from_values(&values);
12306        assert_eq!(mask.len(), 3);
12307        assert!(mask.get(0));
12308        assert!(!mask.get(1));
12309        assert!(mask.get(2));
12310        assert_eq!(mask.count_valid(), 2);
12311    }
12312
12313    #[test]
12314    fn validity_mask_all_valid() {
12315        let mask = ValidityMask::all_valid(100);
12316        assert_eq!(mask.len(), 100);
12317        assert_eq!(mask.count_valid(), 100);
12318        assert!(
12319            mask.words.is_empty(),
12320            "all-valid masks store only the logical length"
12321        );
12322        for i in 0..100 {
12323            assert!(mask.get(i), "bit {i} should be valid");
12324        }
12325    }
12326
12327    #[test]
12328    fn validity_mask_all_valid_sentinel_matches_explicit_words() {
12329        for len in [1, 2, 63, 64, 65, 127, 128, 129] {
12330            let sentinel = ValidityMask::all_valid(len);
12331            let explicit =
12332                ValidityMask::from_words(ValidityMask::materialized_all_valid_words(len), len);
12333
12334            assert_eq!(sentinel, explicit, "len {len}");
12335            assert_eq!(
12336                sentinel.bits().collect::<Vec<_>>(),
12337                explicit.bits().collect::<Vec<_>>(),
12338                "len {len}"
12339            );
12340            assert!(sentinel.all(), "len {len}");
12341            assert_eq!(sentinel.count_invalid(), 0, "len {len}");
12342        }
12343    }
12344
12345    #[test]
12346    fn validity_mask_all_valid_sentinel_materializes_on_clear() {
12347        let mut mask = ValidityMask::all_valid(130);
12348        mask.set(64, true);
12349        assert!(
12350            mask.words.is_empty(),
12351            "setting a valid bit preserves the sentinel"
12352        );
12353
12354        mask.set(64, false);
12355        assert!(!mask.words.is_empty(), "clearing a bit materializes words");
12356        assert_eq!(mask.len(), 130);
12357        assert_eq!(mask.count_valid(), 129);
12358        assert!(!mask.get(64));
12359        assert!(mask.get(63));
12360        assert!(mask.get(65));
12361        assert_eq!(mask.bits().filter(|valid| *valid).count(), 129);
12362    }
12363
12364    #[test]
12365    fn validity_mask_all_invalid() {
12366        let mask = ValidityMask::all_invalid(100);
12367        assert_eq!(mask.len(), 100);
12368        assert_eq!(mask.count_valid(), 0);
12369        for i in 0..100 {
12370            assert!(!mask.get(i), "bit {i} should be invalid");
12371        }
12372    }
12373
12374    #[test]
12375    fn validity_mask_set_and_get() {
12376        let mut mask = ValidityMask::all_invalid(128);
12377        mask.set(0, true);
12378        mask.set(63, true);
12379        mask.set(64, true);
12380        mask.set(127, true);
12381        assert!(mask.get(0));
12382        assert!(mask.get(63));
12383        assert!(mask.get(64));
12384        assert!(mask.get(127));
12385        assert!(!mask.get(1));
12386        assert_eq!(mask.count_valid(), 4);
12387
12388        mask.set(63, false);
12389        assert!(!mask.get(63));
12390        assert_eq!(mask.count_valid(), 3);
12391    }
12392
12393    #[test]
12394    fn validity_mask_and_or_not() {
12395        let mut a = ValidityMask::all_invalid(4);
12396        a.set(0, true);
12397        a.set(1, true);
12398
12399        let mut b = ValidityMask::all_invalid(4);
12400        b.set(1, true);
12401        b.set(2, true);
12402
12403        let and = a.and_mask(&b);
12404        assert!(and.get(1));
12405        assert!(!and.get(0));
12406        assert!(!and.get(2));
12407        assert_eq!(and.count_valid(), 1);
12408
12409        let or = a.or_mask(&b);
12410        assert!(or.get(0));
12411        assert!(or.get(1));
12412        assert!(or.get(2));
12413        assert!(!or.get(3));
12414        assert_eq!(or.count_valid(), 3);
12415
12416        let not_a = a.not_mask();
12417        assert!(!not_a.get(0));
12418        assert!(!not_a.get(1));
12419        assert!(not_a.get(2));
12420        assert!(not_a.get(3));
12421        assert_eq!(not_a.count_valid(), 2);
12422    }
12423
12424    #[test]
12425    fn validity_mask_sentinel_mask_algebra_matches_explicit_bitmap() {
12426        let all = ValidityMask::all_valid(5);
12427        let nullable = ValidityMask::from_values(&[
12428            Scalar::Int64(1),
12429            Scalar::Null(NullKind::Null),
12430            Scalar::Int64(3),
12431            Scalar::Null(NullKind::NaN),
12432            Scalar::Int64(5),
12433        ]);
12434
12435        assert_eq!(all.and_mask(&nullable), nullable);
12436        assert_eq!(nullable.and_mask(&all), nullable);
12437        assert_eq!(all.or_mask(&nullable), all);
12438        assert_eq!(nullable.or_mask(&all), all);
12439        assert_eq!(
12440            all.xor_mask(&nullable).bits().collect::<Vec<_>>(),
12441            vec![false, true, false, true, false]
12442        );
12443        assert_eq!(
12444            all.not_mask().bits().collect::<Vec<_>>(),
12445            vec![false, false, false, false, false]
12446        );
12447        assert_eq!(
12448            all.slice(1, 3).bits().collect::<Vec<_>>(),
12449            vec![true, true, true]
12450        );
12451        assert_eq!(
12452            all.concat(&ValidityMask::all_valid(2)),
12453            ValidityMask::all_valid(7)
12454        );
12455    }
12456
12457    #[test]
12458    fn validity_mask_bits_iterator() {
12459        let values = vec![
12460            Scalar::Int64(1),
12461            Scalar::Null(NullKind::Null),
12462            Scalar::Int64(3),
12463            Scalar::Float64(f64::NAN),
12464        ];
12465        let mask = ValidityMask::from_values(&values);
12466        let bits: Vec<bool> = mask.bits().collect();
12467        assert_eq!(bits, vec![true, false, true, false]);
12468    }
12469
12470    #[test]
12471    fn validity_mask_serde_round_trip() {
12472        let values = vec![
12473            Scalar::Int64(1),
12474            Scalar::Null(NullKind::Null),
12475            Scalar::Int64(3),
12476        ];
12477        let mask = ValidityMask::from_values(&values);
12478        let json = serde_json::to_string(&mask).expect("serialize");
12479        let back: ValidityMask = serde_json::from_str(&json).expect("deserialize");
12480        assert_eq!(mask, back);
12481        // Verify backward-compatible format
12482        assert!(json.contains("\"bits\""), "should serialize as bits field");
12483    }
12484
12485    #[test]
12486    fn validity_mask_empty() {
12487        let mask = ValidityMask::from_values(&[]);
12488        assert!(mask.is_empty());
12489        assert_eq!(mask.len(), 0);
12490        assert_eq!(mask.count_valid(), 0);
12491        assert_eq!(mask.bits().count(), 0);
12492    }
12493
12494    #[test]
12495    fn validity_mask_count_invalid_matches_complement() {
12496        let mask = ValidityMask::from_values(&[
12497            Scalar::Int64(1),
12498            Scalar::Null(NullKind::NaN),
12499            Scalar::Int64(2),
12500            Scalar::Null(NullKind::Null),
12501            Scalar::Int64(3),
12502        ]);
12503        assert_eq!(mask.count_valid(), 3);
12504        assert_eq!(mask.count_invalid(), 2);
12505        assert_eq!(mask.count_valid() + mask.count_invalid(), mask.len());
12506    }
12507
12508    #[test]
12509    fn validity_mask_any_and_all() {
12510        let all_set = ValidityMask::all_valid(4);
12511        assert!(all_set.any());
12512        assert!(all_set.all());
12513
12514        let none_set = ValidityMask::all_invalid(4);
12515        assert!(!none_set.any());
12516        assert!(!none_set.all());
12517
12518        let mixed = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::NaN)]);
12519        assert!(mixed.any());
12520        assert!(!mixed.all());
12521
12522        let empty = ValidityMask::all_invalid(0);
12523        assert!(!empty.any());
12524        assert!(empty.all()); // vacuously true
12525    }
12526
12527    #[test]
12528    fn validity_mask_xor_finds_differences() {
12529        let a = ValidityMask::from_values(&[
12530            Scalar::Int64(1),
12531            Scalar::Int64(2),
12532            Scalar::Null(NullKind::NaN),
12533            Scalar::Int64(4),
12534        ]);
12535        let b = ValidityMask::from_values(&[
12536            Scalar::Int64(1),
12537            Scalar::Null(NullKind::NaN),
12538            Scalar::Null(NullKind::NaN),
12539            Scalar::Int64(4),
12540        ]);
12541        let diff = a.xor_mask(&b);
12542        assert_eq!(diff.len(), 4);
12543        // position 0: both valid → 0
12544        // position 1: a valid, b invalid → 1
12545        // position 2: both invalid → 0
12546        // position 3: both valid → 0
12547        assert!(!diff.get(0));
12548        assert!(diff.get(1));
12549        assert!(!diff.get(2));
12550        assert!(!diff.get(3));
12551    }
12552
12553    #[test]
12554    fn validity_mask_slice_extracts_range() {
12555        let mask = ValidityMask::from_values(&[
12556            Scalar::Int64(1),            // valid
12557            Scalar::Null(NullKind::NaN), // invalid
12558            Scalar::Int64(3),            // valid
12559            Scalar::Int64(4),            // valid
12560            Scalar::Null(NullKind::NaN), // invalid
12561        ]);
12562        let sub = mask.slice(1, 3);
12563        assert_eq!(sub.len(), 3);
12564        assert!(!sub.get(0));
12565        assert!(sub.get(1));
12566        assert!(sub.get(2));
12567    }
12568
12569    #[test]
12570    fn validity_mask_slice_past_end_clamps() {
12571        let mask = ValidityMask::all_valid(3);
12572        let sub = mask.slice(2, 10);
12573        assert_eq!(sub.len(), 1);
12574        assert!(sub.get(0));
12575
12576        let empty = mask.slice(100, 5);
12577        assert!(empty.is_empty());
12578    }
12579
12580    #[test]
12581    fn validity_mask_concat_appends() {
12582        let a = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::NaN)]);
12583        let b = ValidityMask::from_values(&[Scalar::Int64(2), Scalar::Int64(3)]);
12584        let merged = a.concat(&b);
12585        assert_eq!(merged.len(), 4);
12586        assert!(merged.get(0));
12587        assert!(!merged.get(1));
12588        assert!(merged.get(2));
12589        assert!(merged.get(3));
12590    }
12591
12592    #[test]
12593    fn validity_mask_first_last_valid() {
12594        let mask = ValidityMask::from_values(&[
12595            Scalar::Null(NullKind::NaN),
12596            Scalar::Null(NullKind::NaN),
12597            Scalar::Int64(1),
12598            Scalar::Int64(2),
12599            Scalar::Null(NullKind::NaN),
12600        ]);
12601        assert_eq!(mask.first_valid(), Some(2));
12602        assert_eq!(mask.last_valid(), Some(3));
12603
12604        let none_set = ValidityMask::all_invalid(3);
12605        assert_eq!(none_set.first_valid(), None);
12606        assert_eq!(none_set.last_valid(), None);
12607    }
12608
12609    #[test]
12610    fn validity_mask_boundary_65_elements() {
12611        let mut values = vec![Scalar::Int64(1); 65];
12612        values[64] = Scalar::Null(NullKind::Null);
12613        let mask = ValidityMask::from_values(&values);
12614        assert_eq!(mask.len(), 65);
12615        assert_eq!(mask.count_valid(), 64);
12616        assert!(mask.get(63));
12617        assert!(!mask.get(64));
12618    }
12619
12620    #[test]
12621    fn validity_mask_equality() {
12622        let a = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::Null)]);
12623        let b = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::Null)]);
12624        let c = ValidityMask::from_values(&[Scalar::Null(NullKind::Null), Scalar::Int64(1)]);
12625        assert_eq!(a, b);
12626        assert_ne!(a, c);
12627    }
12628
12629    #[test]
12630    fn validity_mask_nan_is_invalid() {
12631        let values = vec![
12632            Scalar::Float64(1.0),
12633            Scalar::Float64(f64::NAN),
12634            Scalar::Null(NullKind::NaN),
12635        ];
12636        let mask = ValidityMask::from_values(&values);
12637        assert!(mask.get(0));
12638        assert!(!mask.get(1), "Float64(NaN) should be invalid");
12639        assert!(!mask.get(2), "Null(NaN) should be invalid");
12640        assert_eq!(mask.count_valid(), 1);
12641    }
12642
12643    #[test]
12644    fn validity_mask_dense_null_half() {
12645        let values: Vec<Scalar> = (0..1000)
12646            .map(|i| {
12647                if i % 2 == 0 {
12648                    Scalar::Int64(i)
12649                } else {
12650                    Scalar::Null(NullKind::Null)
12651                }
12652            })
12653            .collect();
12654        let mask = ValidityMask::from_values(&values);
12655        assert_eq!(mask.len(), 1000);
12656        assert_eq!(mask.count_valid(), 500);
12657    }
12658
12659    // === AG-10: ColumnData and Vectorized Path Tests ===
12660
12661    #[test]
12662    fn column_data_float64_roundtrip() {
12663        let values = vec![
12664            Scalar::Float64(1.5),
12665            Scalar::Null(NullKind::NaN),
12666            Scalar::Float64(3.0),
12667        ];
12668        let validity = ValidityMask::from_values(&values);
12669        let data = super::ColumnData::from_scalars(&values, fp_types::DType::Float64);
12670        let back = data.to_scalars(fp_types::DType::Float64, &validity);
12671        assert_eq!(back.len(), 3);
12672        assert_eq!(back[0], Scalar::Float64(1.5));
12673        assert!(back[1].is_nan(), "position 1 should be NaN-missing");
12674        assert_eq!(back[2], Scalar::Float64(3.0));
12675    }
12676
12677    #[test]
12678    fn column_data_int64_roundtrip() {
12679        let values = vec![
12680            Scalar::Int64(10),
12681            Scalar::Null(NullKind::Null),
12682            Scalar::Int64(30),
12683        ];
12684        let validity = ValidityMask::from_values(&values);
12685        let data = super::ColumnData::from_scalars(&values, fp_types::DType::Int64);
12686        assert_eq!(data.len(), 3);
12687        let back = data.to_scalars(fp_types::DType::Int64, &validity);
12688        assert_eq!(back[0], Scalar::Int64(10));
12689        assert!(back[1].is_missing());
12690        assert_eq!(back[2], Scalar::Int64(30));
12691    }
12692
12693    #[test]
12694    fn column_data_interval_roundtrip_and_column_uniques_5g5uj() {
12695        let first = Interval::new(0.0, 1.0, IntervalClosed::Right);
12696        let second = Interval::new(1.0, 2.0, IntervalClosed::Right);
12697        let values = vec![
12698            Scalar::Interval(first),
12699            Scalar::Null(NullKind::Null),
12700            Scalar::Interval(second),
12701            Scalar::Interval(first),
12702        ];
12703        let validity = ValidityMask::from_values(&values);
12704        let data = super::ColumnData::from_scalars(&values, DType::Interval);
12705        assert_eq!(data.len(), 4);
12706        let back = data.to_scalars(DType::Interval, &validity);
12707        assert_eq!(back[0], Scalar::Interval(first));
12708        assert!(back[1].is_missing());
12709        assert_eq!(back[2], Scalar::Interval(second));
12710        assert_eq!(back[3], Scalar::Interval(first));
12711
12712        let column = Column::new(DType::Interval, values).expect("interval column");
12713        assert_eq!(column.dtype(), DType::Interval);
12714        assert!(column.has_duplicates());
12715        let uniques = column.unique().expect("unique intervals");
12716        assert_eq!(
12717            uniques.values(),
12718            &[Scalar::Interval(first), Scalar::Interval(second)]
12719        );
12720    }
12721
12722    #[test]
12723    fn vectorized_f64_addition_matches_scalar() {
12724        let left = Column::from_values(vec![
12725            Scalar::Float64(1.0),
12726            Scalar::Float64(2.0),
12727            Scalar::Float64(3.0),
12728        ])
12729        .expect("left");
12730        let right = Column::from_values(vec![
12731            Scalar::Float64(10.0),
12732            Scalar::Float64(20.0),
12733            Scalar::Float64(30.0),
12734        ])
12735        .expect("right");
12736
12737        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12738        assert_eq!(result.values()[0], Scalar::Float64(11.0));
12739        assert_eq!(result.values()[1], Scalar::Float64(22.0));
12740        assert_eq!(result.values()[2], Scalar::Float64(33.0));
12741    }
12742
12743    #[test]
12744    fn vectorized_i64_addition_matches_scalar() {
12745        let left = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
12746            .expect("left");
12747        let right = Column::from_values(vec![
12748            Scalar::Int64(10),
12749            Scalar::Int64(20),
12750            Scalar::Int64(30),
12751        ])
12752        .expect("right");
12753
12754        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12755        assert_eq!(result.values()[0], Scalar::Int64(11));
12756        assert_eq!(result.values()[1], Scalar::Int64(22));
12757        assert_eq!(result.values()[2], Scalar::Int64(33));
12758    }
12759
12760    #[test]
12761    fn vectorized_binary_all_valid_keeps_typed_output_lazy() {
12762        let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12763        let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12764
12765        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12766
12767        assert!(result.validity().all());
12768        assert_eq!(result.as_f64_slice(), Some([11.0, 22.0, 33.0].as_slice()));
12769        assert!(matches!(
12770            &result.values,
12771            ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
12772        ));
12773    }
12774
12775    #[test]
12776    fn vectorized_binary_operation_nan_matches_scalar_validity() {
12777        let left = Column::from_f64_values(vec![f64::INFINITY]);
12778        let right = Column::from_f64_values(vec![f64::INFINITY]);
12779
12780        let result = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
12781
12782        assert!(!result.validity().get(0));
12783        assert!(matches!(result.values()[0], Scalar::Float64(v) if v.is_nan()));
12784    }
12785
12786    #[test]
12787    fn vectorized_f64_with_nulls_propagates_missing() {
12788        let left = Column::from_values(vec![
12789            Scalar::Float64(1.0),
12790            Scalar::Null(NullKind::NaN),
12791            Scalar::Float64(3.0),
12792        ])
12793        .expect("left");
12794        let right = Column::from_values(vec![
12795            Scalar::Float64(10.0),
12796            Scalar::Float64(20.0),
12797            Scalar::Null(NullKind::NaN),
12798        ])
12799        .expect("right");
12800
12801        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12802        assert_eq!(result.values()[0], Scalar::Float64(11.0));
12803        assert!(result.values()[1].is_nan(), "null+valid should be NaN");
12804        assert!(result.values()[2].is_nan(), "valid+null should be NaN");
12805    }
12806
12807    #[test]
12808    fn aligned_binary_f64_matches_reindex_then_binary_numeric() {
12809        let left = Column::new(
12810            DType::Float64,
12811            vec![
12812                Scalar::Float64(1.0),
12813                Scalar::Float64(f64::NAN),
12814                Scalar::Float64(3.5),
12815            ],
12816        )
12817        .expect("left");
12818        let right = Column::new(
12819            DType::Float64,
12820            vec![
12821                Scalar::Float64(10.0),
12822                Scalar::Float64(20.0),
12823                Scalar::Null(NullKind::NaN),
12824            ],
12825        )
12826        .expect("right");
12827        let left_positions = [Some(2), None, Some(1), Some(0)];
12828        let right_positions = [None, Some(0), Some(2), Some(1)];
12829
12830        let expected_left = left
12831            .reindex_by_positions(&left_positions)
12832            .expect("left reindex");
12833        let expected_right = right
12834            .reindex_by_positions(&right_positions)
12835            .expect("right reindex");
12836        let expected = expected_left
12837            .binary_numeric(&expected_right, ArithmeticOp::Add)
12838            .expect("generic add");
12839        let actual = left
12840            .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12841            .expect("aligned add");
12842
12843        assert_eq!(actual.dtype(), expected.dtype());
12844        assert_eq!(actual.values(), expected.values());
12845        assert_eq!(actual.validity().len(), expected.validity().len());
12846        for idx in 0..actual.len() {
12847            assert_eq!(actual.validity().get(idx), expected.validity().get(idx));
12848        }
12849    }
12850
12851    #[test]
12852    fn aligned_binary_f64_all_valid_keeps_typed_output_lazy() {
12853        let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12854        let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12855        let left_positions = [Some(0), Some(1), Some(2)];
12856        let right_positions = [Some(0), Some(1), Some(2)];
12857
12858        let actual = left
12859            .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12860            .expect("aligned add");
12861
12862        assert!(actual.validity().all());
12863        assert_eq!(actual.as_f64_slice(), Some([11.0, 22.0, 33.0].as_slice()));
12864        assert!(matches!(
12865            &actual.values,
12866            ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
12867        ));
12868    }
12869
12870    #[test]
12871    fn aligned_binary_f64_nullable_gaps_keep_typed_output_lazy() {
12872        let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12873        let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12874        let left_positions = [Some(0), Some(1), Some(2), None];
12875        let right_positions = [None, Some(0), Some(1), Some(2)];
12876
12877        let expected_left = left
12878            .reindex_by_positions(&left_positions)
12879            .expect("left reindex");
12880        let expected_right = right
12881            .reindex_by_positions(&right_positions)
12882            .expect("right reindex");
12883        let expected = expected_left
12884            .binary_numeric(&expected_right, ArithmeticOp::Add)
12885            .expect("generic add");
12886        let actual = left
12887            .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12888            .expect("aligned add");
12889
12890        assert_eq!(actual.dtype(), expected.dtype());
12891        assert_eq!(actual.validity(), expected.validity());
12892        assert!(matches!(
12893            &actual.values,
12894            ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12895        ));
12896        assert_eq!(actual.values(), expected.values());
12897    }
12898
12899    #[test]
12900    fn aligned_binary_f64_int64_unit_ranges_matches_position_alignment() {
12901        let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12902        let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12903        let left_positions = [Some(0), Some(1), Some(2), None];
12904        let right_positions = [None, Some(0), Some(1), Some(2)];
12905
12906        let expected = left
12907            .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12908            .expect("position aligned add");
12909        let actual = left
12910            .aligned_binary_f64_int64_unit_ranges(&right, (0, 2), (1, 3), (0, 3), ArithmeticOp::Add)
12911            .expect("unit range aligned add");
12912
12913        assert_eq!(actual.dtype(), expected.dtype());
12914        assert_eq!(actual.validity(), expected.validity());
12915        assert!(matches!(
12916            &actual.values,
12917            ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12918        ));
12919        assert_eq!(actual.values(), expected.values());
12920    }
12921
12922    #[test]
12923    fn aligned_binary_f64_operation_nan_keeps_float_nan_materialization() {
12924        let left = Column::from_f64_values(vec![f64::INFINITY]);
12925        let right = Column::from_f64_values(vec![f64::INFINITY]);
12926        let positions = [Some(0)];
12927
12928        let actual = left
12929            .aligned_binary_f64(&right, &positions, &positions, ArithmeticOp::Sub)
12930            .expect("aligned sub");
12931
12932        assert!(!actual.validity().get(0));
12933        assert!(matches!(
12934            &actual.values,
12935            ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12936        ));
12937        assert!(matches!(actual.values()[0], Scalar::Float64(value) if value.is_nan()));
12938    }
12939
12940    #[test]
12941    fn apply_f64_slices_matches_fn_pointer_per_element_f64simd() {
12942        // The monomorphized slice op must be bit-for-bit identical to the
12943        // per-element fn pointer across every op and tricky operand
12944        // (NaN/inf/-0.0/zero divisor/negative base). Compared via raw bits so
12945        // NaN payloads must also match.
12946        let vals = [
12947            0.0_f64,
12948            -0.0,
12949            1.0,
12950            -1.0,
12951            2.5,
12952            -3.0,
12953            4.0,
12954            0.5,
12955            f64::NAN,
12956            f64::INFINITY,
12957            f64::NEG_INFINITY,
12958            1e300,
12959            -1e-300,
12960        ];
12961        let a: Vec<f64> = vals.to_vec();
12962        for op in [
12963            ArithmeticOp::Add,
12964            ArithmeticOp::Sub,
12965            ArithmeticOp::Mul,
12966            ArithmeticOp::Div,
12967            ArithmeticOp::Mod,
12968            ArithmeticOp::Pow,
12969            ArithmeticOp::FloorDiv,
12970        ] {
12971            for shift in 0..vals.len() {
12972                let b: Vec<f64> = (0..vals.len())
12973                    .map(|i| vals[(i + shift) % vals.len()])
12974                    .collect();
12975                let got = super::apply_f64_slices(op, &a, &b);
12976                let apply = super::binary_f64_apply(op);
12977                let expected: Vec<f64> = a.iter().zip(&b).map(|(x, y)| apply(*x, *y)).collect();
12978                for i in 0..a.len() {
12979                    assert_eq!(
12980                        got[i].to_bits(),
12981                        expected[i].to_bits(),
12982                        "op={op:?} a={} b={}",
12983                        a[i],
12984                        b[i]
12985                    );
12986                }
12987            }
12988        }
12989    }
12990
12991    #[test]
12992    fn aligned_binary_f64_same_positions_matches_identity_alignment() {
12993        let left = Column::new(
12994            DType::Float64,
12995            vec![
12996                Scalar::Float64(1.0),
12997                Scalar::Float64(f64::NAN),
12998                Scalar::Float64(3.0),
12999            ],
13000        )
13001        .expect("left");
13002        let right = Column::new(
13003            DType::Float64,
13004            vec![
13005                Scalar::Float64(10.0),
13006                Scalar::Float64(20.0),
13007                Scalar::Null(NullKind::NaN),
13008            ],
13009        )
13010        .expect("right");
13011        let positions = [Some(0), Some(1), Some(2)];
13012
13013        let expected = left
13014            .aligned_binary_f64(&right, &positions, &positions, ArithmeticOp::Add)
13015            .expect("identity aligned add");
13016        let actual = left
13017            .aligned_binary_f64_same_positions(&right, ArithmeticOp::Add)
13018            .expect("same-position add");
13019
13020        assert_eq!(actual.dtype(), expected.dtype());
13021        assert_eq!(actual.values(), expected.values());
13022        for idx in 0..actual.len() {
13023            assert_eq!(actual.validity().get(idx), expected.validity().get(idx));
13024        }
13025    }
13026
13027    #[test]
13028    fn aligned_binary_f64_borrows_lazy_float64_clone_data() {
13029        let left = Column::from_f64_values(vec![1.0, f64::NAN, 4.0]).clone();
13030        let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]).clone();
13031
13032        assert!(left.data.is_none());
13033        assert!(right.data.is_none());
13034        assert!(matches!(
13035            &left.values,
13036            ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
13037        ));
13038        assert!(matches!(
13039            &right.values,
13040            ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
13041        ));
13042
13043        let left_positions = [Some(0), Some(1), Some(2), None];
13044        let right_positions = [Some(2), Some(1), None, Some(0)];
13045        let actual = left
13046            .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
13047            .expect("aligned add");
13048
13049        assert_eq!(
13050            actual.values(),
13051            &[
13052                Scalar::Float64(31.0),
13053                Scalar::Null(NullKind::NaN),
13054                Scalar::Null(NullKind::NaN),
13055                Scalar::Null(NullKind::NaN),
13056            ]
13057        );
13058        if let ScalarValues::LazyAllValidFloat64 { values, .. } = &left.values {
13059            assert!(values.get().is_none());
13060        }
13061        if let ScalarValues::LazyAllValidFloat64 { values, .. } = &right.values {
13062            assert!(values.get().is_none());
13063        }
13064    }
13065
13066    #[test]
13067    fn from_f64_values_marks_nan_missing_like_scalar_path() {
13068        // br-frankenpandas-jyhf7: typed ingestion must treat NaN as missing,
13069        // matching Column::new(Float64, scalars). Otherwise a NaN-bearing column
13070        // claims all-valid and as_f64_slice leaks the NaN as a real value.
13071        let typed = Column::from_f64_values(vec![1.0, f64::NAN, 3.0, f64::NAN]);
13072        let scalar = Column::new(
13073            DType::Float64,
13074            vec![
13075                Scalar::Float64(1.0),
13076                Scalar::Float64(f64::NAN),
13077                Scalar::Float64(3.0),
13078                Scalar::Float64(f64::NAN),
13079            ],
13080        )
13081        .expect("scalar col");
13082
13083        // Per-position validity agrees with the Scalar path.
13084        for idx in 0..typed.len() {
13085            assert_eq!(
13086                typed.validity().get(idx),
13087                scalar.validity().get(idx),
13088                "validity mismatch at {idx}"
13089            );
13090        }
13091        assert!(typed.validity().get(0));
13092        assert!(!typed.validity().get(1));
13093        assert!(typed.validity().get(2));
13094        assert!(!typed.validity().get(3));
13095        assert_eq!(typed.validity().count_valid(), 2);
13096
13097        // A NaN-bearing column must NOT expose its raw f64 slice (the typed
13098        // fast path is only valid when every value is present).
13099        assert!(typed.as_f64_slice().is_none());
13100
13101        // No-NaN columns keep the all-valid fast path and expose the slice.
13102        let clean = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
13103        assert!(clean.validity().all());
13104        assert_eq!(clean.as_f64_slice(), Some([1.0, 2.0, 3.0].as_slice()));
13105    }
13106
13107    #[test]
13108    fn vectorized_i64_with_nulls_propagates_missing() {
13109        let left = Column::from_values(vec![
13110            Scalar::Int64(1),
13111            Scalar::Null(NullKind::Null),
13112            Scalar::Int64(3),
13113        ])
13114        .expect("left");
13115        let right = Column::from_values(vec![
13116            Scalar::Int64(10),
13117            Scalar::Int64(20),
13118            Scalar::Null(NullKind::Null),
13119        ])
13120        .expect("right");
13121
13122        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13123        assert_eq!(result.values()[0], Scalar::Int64(11));
13124        assert!(result.values()[1].is_missing());
13125        assert!(result.values()[2].is_missing());
13126    }
13127
13128    #[test]
13129    fn column_from_values_preserves_mixed_utf8_numeric_scalars() {
13130        let column = Column::from_values(vec![Scalar::Utf8("x".into()), Scalar::Int64(1)])
13131            .expect("mixed object-like constructor should succeed");
13132
13133        assert_eq!(column.dtype(), DType::Utf8);
13134        assert_eq!(
13135            column.values(),
13136            &[Scalar::Utf8("x".into()), Scalar::Int64(1)]
13137        );
13138    }
13139
13140    #[test]
13141    fn vectorized_division_promotes_to_float64() {
13142        let left = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(21)]).expect("left");
13143        let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(7)]).expect("right");
13144
13145        let result = left.binary_numeric(&right, ArithmeticOp::Div).expect("div");
13146        // Division always promotes to Float64.
13147        assert_eq!(result.dtype(), fp_types::DType::Float64);
13148        assert!(matches!(result.values()[0], Scalar::Float64(v) if (v - 10.0/3.0).abs() < 1e-10));
13149        assert_eq!(result.values()[1], Scalar::Float64(3.0));
13150    }
13151
13152    #[test]
13153    fn vectorized_all_four_ops_f64() {
13154        let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13155        let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13156
13157        let add = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13158        let sub = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
13159        let mul = left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul");
13160        let div = left.binary_numeric(&right, ArithmeticOp::Div).expect("div");
13161
13162        assert_eq!(add.values()[0], Scalar::Float64(13.0));
13163        assert_eq!(sub.values()[0], Scalar::Float64(7.0));
13164        assert_eq!(mul.values()[0], Scalar::Float64(30.0));
13165        assert!(matches!(div.values()[0], Scalar::Float64(v) if (v - 10.0/3.0).abs() < 1e-10));
13166    }
13167
13168    #[test]
13169    fn pandas_arithmetic_aliases_match_binary_numeric() {
13170        let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13171        let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13172
13173        assert_eq!(
13174            left.add(&right).expect("add"),
13175            left.binary_numeric(&right, ArithmeticOp::Add).expect("add")
13176        );
13177        assert_eq!(
13178            left.sub(&right).expect("sub"),
13179            left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub")
13180        );
13181        assert_eq!(
13182            left.mul(&right).expect("mul"),
13183            left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul")
13184        );
13185        assert_eq!(
13186            left.div(&right).expect("div"),
13187            left.binary_numeric(&right, ArithmeticOp::Div).expect("div")
13188        );
13189        assert_eq!(
13190            left.divide(&right).expect("divide"),
13191            left.div(&right).expect("div")
13192        );
13193    }
13194
13195    #[test]
13196    fn remaining_pandas_arithmetic_aliases_match_binary_numeric() {
13197        let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13198        let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13199
13200        assert_eq!(
13201            left.subtract(&right).expect("subtract"),
13202            left.sub(&right).expect("sub")
13203        );
13204        assert_eq!(
13205            left.multiply(&right).expect("multiply"),
13206            left.mul(&right).expect("mul")
13207        );
13208        assert_eq!(
13209            left.truediv(&right).expect("truediv"),
13210            left.div(&right).expect("div")
13211        );
13212        assert_eq!(
13213            left.floordiv(&right).expect("floordiv"),
13214            left.binary_numeric(&right, ArithmeticOp::FloorDiv)
13215                .expect("floordiv")
13216        );
13217        assert_eq!(
13218            left.r#mod(&right).expect("mod"),
13219            left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod")
13220        );
13221        assert_eq!(
13222            left.pow(&right).expect("pow"),
13223            left.binary_numeric(&right, ArithmeticOp::Pow).expect("pow")
13224        );
13225    }
13226
13227    #[test]
13228    fn pandas_reverse_arithmetic_aliases_swap_operands() {
13229        let series = Column::from_values(vec![Scalar::Float64(10.0)]).expect("series");
13230        let other = Column::from_values(vec![Scalar::Float64(3.0)]).expect("other");
13231
13232        assert_eq!(
13233            series.radd(&other).expect("radd"),
13234            other
13235                .binary_numeric(&series, ArithmeticOp::Add)
13236                .expect("add")
13237        );
13238        assert_eq!(
13239            series.rsub(&other).expect("rsub"),
13240            other
13241                .binary_numeric(&series, ArithmeticOp::Sub)
13242                .expect("sub")
13243        );
13244        assert_eq!(
13245            series.rmul(&other).expect("rmul"),
13246            other
13247                .binary_numeric(&series, ArithmeticOp::Mul)
13248                .expect("mul")
13249        );
13250        assert_eq!(
13251            series.rdiv(&other).expect("rdiv"),
13252            other
13253                .binary_numeric(&series, ArithmeticOp::Div)
13254                .expect("div")
13255        );
13256        assert_eq!(
13257            series.rtruediv(&other).expect("rtruediv"),
13258            series.rdiv(&other).expect("rdiv")
13259        );
13260        assert_eq!(
13261            series.rfloordiv(&other).expect("rfloordiv"),
13262            other
13263                .binary_numeric(&series, ArithmeticOp::FloorDiv)
13264                .expect("floordiv")
13265        );
13266        assert_eq!(
13267            series.rmod(&other).expect("rmod"),
13268            other
13269                .binary_numeric(&series, ArithmeticOp::Mod)
13270                .expect("mod")
13271        );
13272        assert_eq!(
13273            series.rpow(&other).expect("rpow"),
13274            other
13275                .binary_numeric(&series, ArithmeticOp::Pow)
13276                .expect("pow")
13277        );
13278    }
13279
13280    #[test]
13281    fn vectorized_f64_mod_pow_floordiv() {
13282        let left = Column::from_values(vec![
13283            Scalar::Float64(10.0),
13284            Scalar::Float64(2.0),
13285            Scalar::Float64(-3.0),
13286        ])
13287        .expect("left");
13288        let right = Column::from_values(vec![
13289            Scalar::Float64(3.0),
13290            Scalar::Float64(3.0),
13291            Scalar::Float64(2.0),
13292        ])
13293        .expect("right");
13294
13295        let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13296        assert_eq!(modulo.dtype(), DType::Float64);
13297        assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13298        assert!(matches!(modulo.values()[1], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13299        assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13300
13301        let pow = left.binary_numeric(&right, ArithmeticOp::Pow).expect("pow");
13302        assert_eq!(pow.dtype(), DType::Float64);
13303        assert!(matches!(pow.values()[0], Scalar::Float64(v) if (v - 1000.0).abs() < 1e-10));
13304        assert!(matches!(pow.values()[1], Scalar::Float64(v) if (v - 8.0).abs() < 1e-10));
13305        assert!(matches!(pow.values()[2], Scalar::Float64(v) if (v - 9.0).abs() < 1e-10));
13306
13307        let floordiv = left
13308            .binary_numeric(&right, ArithmeticOp::FloorDiv)
13309            .expect("floordiv");
13310        assert_eq!(floordiv.dtype(), DType::Float64);
13311        assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v - 3.0).abs() < 1e-10));
13312        assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if (v - 0.0).abs() < 1e-10));
13313        assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - -2.0).abs() < 1e-10));
13314    }
13315
13316    #[test]
13317    fn int_pow_stays_int64_and_negative_exponent_raises_3w0xn() {
13318        // br-frankenpandas-3w0xn: int ** int stays int64 (numpy/pandas: 2 ** 3
13319        // == 8, not 8.0), and a negative integer exponent raises.
13320        let base = Column::from_values(vec![Scalar::Int64(2), Scalar::Int64(3), Scalar::Int64(10)])
13321            .expect("base");
13322        let exp = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(2), Scalar::Int64(2)])
13323            .expect("exp");
13324        let pow = base
13325            .binary_numeric(&exp, ArithmeticOp::Pow)
13326            .expect("int pow");
13327        assert_eq!(pow.dtype(), DType::Int64);
13328        assert_eq!(pow.values()[0], Scalar::Int64(8));
13329        assert_eq!(pow.values()[1], Scalar::Int64(9));
13330        assert_eq!(pow.values()[2], Scalar::Int64(100));
13331
13332        // Negative integer exponent raises (numpy ValueError analogue).
13333        let neg_exp =
13334            Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(-1), Scalar::Int64(2)])
13335                .expect("neg_exp");
13336        let err = base
13337            .binary_numeric(&neg_exp, ArithmeticOp::Pow)
13338            .expect_err("negative integer power must raise");
13339        assert!(matches!(err, ColumnError::NegativeIntegerPower));
13340
13341        // A float operand promotes the whole op to Float64.
13342        let exp_f = Column::from_values(vec![
13343            Scalar::Float64(3.0),
13344            Scalar::Float64(2.0),
13345            Scalar::Float64(2.0),
13346        ])
13347        .expect("exp_f");
13348        let pow_f = base
13349            .binary_numeric(&exp_f, ArithmeticOp::Pow)
13350            .expect("mixed int/float pow");
13351        assert_eq!(pow_f.dtype(), DType::Float64);
13352        assert!(matches!(pow_f.values()[0], Scalar::Float64(v) if (v - 8.0).abs() < 1e-10));
13353    }
13354
13355    #[test]
13356    fn int64_mod_floordiv_preserves_dtype() {
13357        // Test that int % int and int // int stay Int64 (pandas parity)
13358        let left = Column::from_values(vec![
13359            Scalar::Int64(10),
13360            Scalar::Int64(20),
13361            Scalar::Int64(30),
13362        ])
13363        .expect("left");
13364        let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(7), Scalar::Int64(4)])
13365            .expect("right");
13366
13367        let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13368        assert_eq!(modulo.dtype(), DType::Int64, "mod should preserve Int64");
13369        assert_eq!(modulo.values()[0], Scalar::Int64(1));
13370        assert_eq!(modulo.values()[1], Scalar::Int64(6));
13371        assert_eq!(modulo.values()[2], Scalar::Int64(2));
13372
13373        let floordiv = left
13374            .binary_numeric(&right, ArithmeticOp::FloorDiv)
13375            .expect("floordiv");
13376        assert_eq!(
13377            floordiv.dtype(),
13378            DType::Int64,
13379            "floordiv should preserve Int64"
13380        );
13381        assert_eq!(floordiv.values()[0], Scalar::Int64(3));
13382        assert_eq!(floordiv.values()[1], Scalar::Int64(2));
13383        assert_eq!(floordiv.values()[2], Scalar::Int64(7));
13384    }
13385
13386    #[test]
13387    fn int64_mod_floordiv_match_pandas_negative_operand_signs() {
13388        let left = Column::from_values(vec![
13389            Scalar::Int64(7),
13390            Scalar::Int64(-7),
13391            Scalar::Int64(-7),
13392            Scalar::Int64(7),
13393        ])
13394        .expect("left");
13395        let right = Column::from_values(vec![
13396            Scalar::Int64(-3),
13397            Scalar::Int64(3),
13398            Scalar::Int64(-3),
13399            Scalar::Int64(3),
13400        ])
13401        .expect("right");
13402
13403        let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13404        assert_eq!(modulo.dtype(), DType::Int64);
13405        assert_eq!(
13406            modulo.values(),
13407            &[
13408                Scalar::Int64(-2),
13409                Scalar::Int64(2),
13410                Scalar::Int64(-1),
13411                Scalar::Int64(1)
13412            ]
13413        );
13414
13415        let floordiv = left
13416            .binary_numeric(&right, ArithmeticOp::FloorDiv)
13417            .expect("floordiv");
13418        assert_eq!(floordiv.dtype(), DType::Int64);
13419        assert_eq!(
13420            floordiv.values(),
13421            &[
13422                Scalar::Int64(-3),
13423                Scalar::Int64(-3),
13424                Scalar::Int64(2),
13425                Scalar::Int64(2)
13426            ]
13427        );
13428    }
13429
13430    #[test]
13431    fn float64_mod_floordiv_match_pandas_negative_operand_signs() {
13432        let left = Column::from_values(vec![
13433            Scalar::Float64(7.0),
13434            Scalar::Float64(-7.0),
13435            Scalar::Float64(-7.0),
13436            Scalar::Float64(7.0),
13437        ])
13438        .expect("left");
13439        let right = Column::from_values(vec![
13440            Scalar::Float64(-3.0),
13441            Scalar::Float64(3.0),
13442            Scalar::Float64(-3.0),
13443            Scalar::Float64(3.0),
13444        ])
13445        .expect("right");
13446
13447        let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13448        assert_eq!(modulo.dtype(), DType::Float64);
13449        assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v + 2.0).abs() < 1e-10));
13450        assert!(matches!(modulo.values()[1], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13451        assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v + 1.0).abs() < 1e-10));
13452        assert!(matches!(modulo.values()[3], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13453
13454        let floordiv = left
13455            .binary_numeric(&right, ArithmeticOp::FloorDiv)
13456            .expect("floordiv");
13457        assert_eq!(floordiv.dtype(), DType::Float64);
13458        assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v + 3.0).abs() < 1e-10));
13459        assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if (v + 3.0).abs() < 1e-10));
13460        assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13461        assert!(matches!(floordiv.values()[3], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13462    }
13463
13464    #[test]
13465    fn int64_mod_floordiv_with_zero_promotes_to_float() {
13466        // Test that int % 0 and int // 0 promote to Float64 (pandas parity)
13467        let left = Column::from_values(vec![
13468            Scalar::Int64(10),
13469            Scalar::Int64(20),
13470            Scalar::Int64(30),
13471        ])
13472        .expect("left");
13473        let right = Column::from_values(vec![
13474            Scalar::Int64(3),
13475            Scalar::Int64(0), // Zero divisor
13476            Scalar::Int64(4),
13477        ])
13478        .expect("right");
13479
13480        let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13481        assert_eq!(
13482            modulo.dtype(),
13483            DType::Float64,
13484            "mod with zero should promote to Float64"
13485        );
13486        assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13487        assert!(matches!(modulo.values()[1], Scalar::Float64(v) if v.is_nan()));
13488        assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13489
13490        let floordiv = left
13491            .binary_numeric(&right, ArithmeticOp::FloorDiv)
13492            .expect("floordiv");
13493        assert_eq!(
13494            floordiv.dtype(),
13495            DType::Float64,
13496            "floordiv with zero should promote to Float64"
13497        );
13498        assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v - 3.0).abs() < 1e-10));
13499        assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if v.is_infinite()));
13500        assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - 7.0).abs() < 1e-10));
13501    }
13502
13503    #[test]
13504    fn vectorized_empty_columns() {
13505        let left = Column::from_values(vec![]).expect("left");
13506        let right = Column::from_values(vec![]).expect("right");
13507        let result = left
13508            .binary_numeric(&right, ArithmeticOp::Add)
13509            .expect("add empty");
13510        assert!(result.is_empty());
13511    }
13512
13513    #[test]
13514    fn vectorized_large_column_matches_scalar_semantics() {
13515        // Build large columns to exercise batch processing.
13516        let n = 4096;
13517        let left_values: Vec<Scalar> = (0..n).map(|i| Scalar::Float64(i as f64)).collect();
13518        let right_values: Vec<Scalar> = (0..n).map(|i| Scalar::Float64((n - i) as f64)).collect();
13519
13520        let left = Column::from_values(left_values).expect("left");
13521        let right = Column::from_values(right_values).expect("right");
13522
13523        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13524
13525        // Every position should sum to n.
13526        for (i, v) in result.values().iter().enumerate() {
13527            assert_eq!(*v, Scalar::Float64(n as f64), "position {i} should be {n}");
13528        }
13529    }
13530
13531    #[test]
13532    fn vectorized_nan_vs_null_distinction_preserved() {
13533        // Float64 column: NaN is a specific kind of missing.
13534        let left =
13535            Column::from_values(vec![Scalar::Float64(f64::NAN), Scalar::Null(NullKind::NaN)])
13536                .expect("left");
13537        let right =
13538            Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("right");
13539
13540        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13541        // Both positions should be NaN-missing (not generic Null).
13542        assert!(result.values()[0].is_nan(), "NaN + valid = NaN");
13543        assert!(result.values()[1].is_nan(), "NaN-null + valid = NaN");
13544    }
13545
13546    #[test]
13547    fn vectorized_mixed_type_falls_back_to_scalar() {
13548        // Int64 + Float64 promotes to Float64 — vectorized path handles this.
13549        let left = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("left");
13550        let right =
13551            Column::from_values(vec![Scalar::Float64(0.5), Scalar::Float64(1.5)]).expect("right");
13552
13553        let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13554        assert_eq!(result.dtype(), fp_types::DType::Float64);
13555        assert_eq!(result.values()[0], Scalar::Float64(1.5));
13556        assert_eq!(result.values()[1], Scalar::Float64(3.5));
13557    }
13558
13559    #[test]
13560    fn vectorized_i64_sub_and_mul() {
13561        let left = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("left");
13562        let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(5)]).expect("right");
13563
13564        let sub = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
13565        assert_eq!(sub.values()[0], Scalar::Int64(7));
13566        assert_eq!(sub.values()[1], Scalar::Int64(15));
13567
13568        let mul = left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul");
13569        assert_eq!(mul.values()[0], Scalar::Int64(30));
13570        assert_eq!(mul.values()[1], Scalar::Int64(100));
13571    }
13572
13573    // === AG-14: Database Cracking Tests ===
13574
13575    mod crack_tests {
13576        use fp_types::Scalar;
13577
13578        use super::super::*;
13579
13580        fn make_column(values: &[f64]) -> Column {
13581            Column::from_values(values.iter().map(|&v| Scalar::Float64(v)).collect()).expect("col")
13582        }
13583
13584        #[test]
13585        fn crack_filter_gt_basic() {
13586            let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13587            let mut crack = CrackIndex::new(col.len());
13588
13589            let gt3 = crack.filter_gt(&col, 3.0);
13590            let mut gt3_vals: Vec<f64> = gt3
13591                .iter()
13592                .map(|&i| col.values()[i].to_f64().unwrap())
13593                .collect();
13594            gt3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13595            assert_eq!(gt3_vals, vec![5.0, 7.0]);
13596            assert_eq!(crack.num_cracks(), 1);
13597        }
13598
13599        #[test]
13600        fn crack_filter_lte_basic() {
13601            let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13602            let mut crack = CrackIndex::new(col.len());
13603
13604            let lte3 = crack.filter_lte(&col, 3.0);
13605            let mut lte3_vals: Vec<f64> = lte3
13606                .iter()
13607                .map(|&i| col.values()[i].to_f64().unwrap())
13608                .collect();
13609            lte3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13610            assert_eq!(lte3_vals, vec![1.0, 2.0, 3.0]);
13611        }
13612
13613        #[test]
13614        fn crack_filter_eq() {
13615            let col = make_column(&[1.0, 3.0, 3.0, 7.0, 3.0]);
13616            let mut crack = CrackIndex::new(col.len());
13617
13618            let eq3 = crack.filter_eq(&col, 3.0);
13619            assert_eq!(eq3.len(), 3, "three values equal to 3.0");
13620            for &idx in &eq3 {
13621                assert_eq!(col.values()[idx].to_f64().unwrap(), 3.0);
13622            }
13623        }
13624
13625        #[test]
13626        fn crack_filter_lt() {
13627            let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13628            let mut crack = CrackIndex::new(col.len());
13629
13630            let lt3 = crack.filter_lt(&col, 3.0);
13631            let mut lt3_vals: Vec<f64> = lt3
13632                .iter()
13633                .map(|&i| col.values()[i].to_f64().unwrap())
13634                .collect();
13635            lt3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13636            assert_eq!(lt3_vals, vec![1.0, 2.0]);
13637        }
13638
13639        #[test]
13640        fn crack_filter_gte() {
13641            let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13642            let mut crack = CrackIndex::new(col.len());
13643
13644            let gte3 = crack.filter_gte(&col, 3.0);
13645            let mut gte3_vals: Vec<f64> = gte3
13646                .iter()
13647                .map(|&i| col.values()[i].to_f64().unwrap())
13648                .collect();
13649            gte3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13650            assert_eq!(gte3_vals, vec![3.0, 5.0, 7.0]);
13651        }
13652
13653        #[test]
13654        fn crack_progressive_refinement() {
13655            let col = make_column(&[10.0, 2.0, 8.0, 4.0, 6.0, 1.0, 9.0, 3.0, 7.0, 5.0]);
13656            let mut crack = CrackIndex::new(col.len());
13657
13658            // First crack at 5.0
13659            let gt5 = crack.filter_gt(&col, 5.0);
13660            assert_eq!(gt5.len(), 5);
13661            assert_eq!(crack.num_cracks(), 1);
13662
13663            // Second crack at 3.0 — only re-partitions the [<=5.0] region
13664            let gt3 = crack.filter_gt(&col, 3.0);
13665            assert_eq!(gt3.len(), 7); // 4,5,6,7,8,9,10
13666            assert_eq!(crack.num_cracks(), 2);
13667
13668            // Third crack at 7.0 — only re-partitions the [>5.0] region
13669            let gt7 = crack.filter_gt(&col, 7.0);
13670            assert_eq!(gt7.len(), 3); // 8,9,10
13671            assert_eq!(crack.num_cracks(), 3);
13672        }
13673
13674        #[test]
13675        fn crack_duplicate_crack_point_is_idempotent() {
13676            let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13677            let mut crack = CrackIndex::new(col.len());
13678
13679            let gt3_first = crack.filter_gt(&col, 3.0);
13680            let gt3_second = crack.filter_gt(&col, 3.0);
13681
13682            // Same results both times
13683            let mut a: Vec<usize> = gt3_first;
13684            let mut b: Vec<usize> = gt3_second;
13685            a.sort_unstable();
13686            b.sort_unstable();
13687            assert_eq!(a, b);
13688            assert_eq!(crack.num_cracks(), 1, "no duplicate crack point");
13689        }
13690
13691        #[test]
13692        fn crack_empty_column() {
13693            let col = make_column(&[]);
13694            let mut crack = CrackIndex::new(col.len());
13695
13696            assert!(crack.filter_gt(&col, 5.0).is_empty());
13697            assert!(crack.filter_lte(&col, 5.0).is_empty());
13698        }
13699
13700        #[test]
13701        fn crack_single_element() {
13702            let col = make_column(&[42.0]);
13703            let mut crack = CrackIndex::new(col.len());
13704
13705            assert!(crack.filter_gt(&col, 42.0).is_empty());
13706            assert_eq!(crack.filter_lte(&col, 42.0).len(), 1);
13707            assert_eq!(crack.filter_eq(&col, 42.0).len(), 1);
13708        }
13709
13710        #[test]
13711        fn crack_all_same_values() {
13712            let col = make_column(&[5.0, 5.0, 5.0, 5.0]);
13713            let mut crack = CrackIndex::new(col.len());
13714
13715            assert!(crack.filter_gt(&col, 5.0).is_empty());
13716            assert_eq!(crack.filter_lte(&col, 5.0).len(), 4);
13717            assert_eq!(crack.filter_eq(&col, 5.0).len(), 4);
13718        }
13719
13720        #[test]
13721        fn crack_isomorphism_with_full_scan() {
13722            // Cracked filter must return identical results to naive full scan.
13723            let col = make_column(&[10.0, 2.0, 8.0, 4.0, 6.0, 1.0, 9.0, 3.0, 7.0, 5.0]);
13724            let mut crack = CrackIndex::new(col.len());
13725
13726            for pivot in [1.0, 3.0, 5.0, 7.0, 9.0, 0.0, 11.0] {
13727                let mut cracked: Vec<usize> = crack.filter_gt(&col, pivot);
13728                cracked.sort_unstable();
13729
13730                let mut naive: Vec<usize> = (0..col.len())
13731                    .filter(|&i| col.values()[i].to_f64().unwrap() > pivot)
13732                    .collect();
13733                naive.sort_unstable();
13734
13735                assert_eq!(
13736                    cracked, naive,
13737                    "cracked vs naive mismatch for pivot={pivot}"
13738                );
13739            }
13740        }
13741
13742        #[test]
13743        fn crack_int64_column() {
13744            let col = Column::from_values(vec![
13745                Scalar::Int64(10),
13746                Scalar::Int64(5),
13747                Scalar::Int64(3),
13748                Scalar::Int64(8),
13749                Scalar::Int64(1),
13750            ])
13751            .expect("col");
13752            let mut crack = CrackIndex::new(col.len());
13753
13754            let gt5 = crack.filter_gt(&col, 5.0);
13755            let mut gt5_vals: Vec<i64> = gt5
13756                .iter()
13757                .filter_map(|&i| match &col.values()[i] {
13758                    Scalar::Int64(v) => Some(*v),
13759                    _ => None,
13760                })
13761                .collect();
13762            assert_eq!(gt5_vals.len(), gt5.len(), "expected Int64 values");
13763            gt5_vals.sort_unstable();
13764            assert_eq!(gt5_vals, vec![8, 10]);
13765        }
13766
13767        #[test]
13768        fn crack_large_column_correctness() {
13769            let n = 1000;
13770            let values: Vec<f64> = (0..n).map(|i| ((i * 7 + 13) % n) as f64).collect();
13771            let col = make_column(&values);
13772            let mut crack = CrackIndex::new(col.len());
13773
13774            // Multiple cracks at different points
13775            for pivot in [100.0, 500.0, 250.0, 750.0, 50.0, 900.0] {
13776                let mut cracked: Vec<usize> = crack.filter_gt(&col, pivot);
13777                cracked.sort_unstable();
13778
13779                let mut naive: Vec<usize> =
13780                    (0..n as usize).filter(|&i| values[i] > pivot).collect();
13781                naive.sort_unstable();
13782
13783                assert_eq!(cracked, naive, "large column mismatch for pivot={pivot}");
13784            }
13785        }
13786    }
13787
13788    // === Comparison, Filter, and Missing-Data Operation Tests ===
13789
13790    mod comparison_tests {
13791        use fp_types::{NullKind, Scalar};
13792
13793        use super::super::*;
13794
13795        #[test]
13796        fn comparison_gt_int64() {
13797            let left =
13798                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(5), Scalar::Int64(3)])
13799                    .expect("left");
13800            let right =
13801                Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(3), Scalar::Int64(3)])
13802                    .expect("right");
13803
13804            let result = left
13805                .binary_comparison(&right, ComparisonOp::Gt)
13806                .expect("gt");
13807            assert_eq!(result.dtype(), fp_types::DType::Bool);
13808            assert_eq!(result.values()[0], Scalar::Bool(false));
13809            assert_eq!(result.values()[1], Scalar::Bool(true));
13810            assert_eq!(result.values()[2], Scalar::Bool(false));
13811        }
13812
13813        #[test]
13814        fn comparison_all_ops_numeric() {
13815            let left = Column::from_values(vec![Scalar::Float64(5.0)]).expect("left");
13816            let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13817
13818            let gt = left
13819                .binary_comparison(&right, ComparisonOp::Gt)
13820                .expect("gt");
13821            let lt = left
13822                .binary_comparison(&right, ComparisonOp::Lt)
13823                .expect("lt");
13824            let eq = left
13825                .binary_comparison(&right, ComparisonOp::Eq)
13826                .expect("eq");
13827            let ne = left
13828                .binary_comparison(&right, ComparisonOp::Ne)
13829                .expect("ne");
13830            let ge = left
13831                .binary_comparison(&right, ComparisonOp::Ge)
13832                .expect("ge");
13833            let le = left
13834                .binary_comparison(&right, ComparisonOp::Le)
13835                .expect("le");
13836
13837            assert_eq!(gt.values()[0], Scalar::Bool(true));
13838            assert_eq!(lt.values()[0], Scalar::Bool(false));
13839            assert_eq!(eq.values()[0], Scalar::Bool(false));
13840            assert_eq!(ne.values()[0], Scalar::Bool(true));
13841            assert_eq!(ge.values()[0], Scalar::Bool(true));
13842            assert_eq!(le.values()[0], Scalar::Bool(false));
13843        }
13844
13845        #[test]
13846        fn pandas_comparison_aliases_match_binary_comparison() {
13847            let left = Column::from_values(vec![Scalar::Float64(5.0)]).expect("left");
13848            let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13849
13850            assert_eq!(
13851                left.eq(&right).expect("eq"),
13852                left.binary_comparison(&right, ComparisonOp::Eq)
13853                    .expect("eq")
13854            );
13855            assert_eq!(
13856                left.ne(&right).expect("ne"),
13857                left.binary_comparison(&right, ComparisonOp::Ne)
13858                    .expect("ne")
13859            );
13860            assert_eq!(
13861                left.lt(&right).expect("lt"),
13862                left.binary_comparison(&right, ComparisonOp::Lt)
13863                    .expect("lt")
13864            );
13865            assert_eq!(
13866                left.le(&right).expect("le"),
13867                left.binary_comparison(&right, ComparisonOp::Le)
13868                    .expect("le")
13869            );
13870            assert_eq!(
13871                left.gt(&right).expect("gt"),
13872                left.binary_comparison(&right, ComparisonOp::Gt)
13873                    .expect("gt")
13874            );
13875            assert_eq!(
13876                left.ge(&right).expect("ge"),
13877                left.binary_comparison(&right, ComparisonOp::Ge)
13878                    .expect("ge")
13879            );
13880        }
13881
13882        #[test]
13883        fn comparison_equality_equal_values() {
13884            let col = Column::from_values(vec![Scalar::Int64(42)]).expect("col");
13885            let result = col.binary_comparison(&col, ComparisonOp::Eq).expect("eq");
13886            assert_eq!(result.values()[0], Scalar::Bool(true));
13887
13888            let ne = col.binary_comparison(&col, ComparisonOp::Ne).expect("ne");
13889            assert_eq!(ne.values()[0], Scalar::Bool(false));
13890        }
13891
13892        #[test]
13893        fn comparison_null_propagation() {
13894            let left = Column::from_values(vec![
13895                Scalar::Int64(1),
13896                Scalar::Null(NullKind::Null),
13897                Scalar::Int64(3),
13898            ])
13899            .expect("left");
13900            let right = Column::from_values(vec![
13901                Scalar::Int64(2),
13902                Scalar::Int64(2),
13903                Scalar::Null(NullKind::Null),
13904            ])
13905            .expect("right");
13906
13907            let result = left
13908                .binary_comparison(&right, ComparisonOp::Gt)
13909                .expect("gt");
13910            assert_eq!(result.values()[0], Scalar::Bool(false));
13911            assert!(result.values()[1].is_missing(), "null op valid = null");
13912            assert!(result.values()[2].is_missing(), "valid op null = null");
13913        }
13914
13915        #[test]
13916        fn comparison_utf8_lexicographic() {
13917            let left = Column::from_values(vec![
13918                Scalar::Utf8("banana".to_string()),
13919                Scalar::Utf8("apple".to_string()),
13920            ])
13921            .expect("left");
13922            let right = Column::from_values(vec![
13923                Scalar::Utf8("apple".to_string()),
13924                Scalar::Utf8("cherry".to_string()),
13925            ])
13926            .expect("right");
13927
13928            let gt = left
13929                .binary_comparison(&right, ComparisonOp::Gt)
13930                .expect("gt");
13931            assert_eq!(gt.values()[0], Scalar::Bool(true));
13932            assert_eq!(gt.values()[1], Scalar::Bool(false));
13933        }
13934
13935        #[test]
13936        fn compare_scalar_gt() {
13937            let col = Column::from_values(vec![
13938                Scalar::Int64(1),
13939                Scalar::Int64(5),
13940                Scalar::Null(NullKind::Null),
13941                Scalar::Int64(3),
13942            ])
13943            .expect("col");
13944
13945            let result = col
13946                .compare_scalar(&Scalar::Int64(3), ComparisonOp::Gt)
13947                .expect("gt");
13948            assert_eq!(result.values()[0], Scalar::Bool(false));
13949            assert_eq!(result.values()[1], Scalar::Bool(true));
13950            assert!(result.values()[2].is_missing());
13951            assert_eq!(result.values()[3], Scalar::Bool(false));
13952        }
13953
13954        #[test]
13955        fn compare_scalar_with_missing_scalar() {
13956            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
13957
13958            let result = col
13959                .compare_scalar(&Scalar::Null(NullKind::Null), ComparisonOp::Eq)
13960                .expect("eq");
13961            assert!(result.values()[0].is_missing());
13962            assert!(result.values()[1].is_missing());
13963        }
13964
13965        #[test]
13966        fn filter_by_mask_basic() {
13967            let col = Column::from_values(vec![
13968                Scalar::Int64(10),
13969                Scalar::Int64(20),
13970                Scalar::Int64(30),
13971                Scalar::Int64(40),
13972            ])
13973            .expect("col");
13974            let mask = Column::from_values(vec![
13975                Scalar::Bool(true),
13976                Scalar::Bool(false),
13977                Scalar::Bool(true),
13978                Scalar::Bool(false),
13979            ])
13980            .expect("mask");
13981
13982            let result = col.filter_by_mask(&mask).expect("filter");
13983            assert_eq!(result.len(), 2);
13984            assert_eq!(result.values()[0], Scalar::Int64(10));
13985            assert_eq!(result.values()[1], Scalar::Int64(30));
13986        }
13987
13988        #[test]
13989        fn filter_by_mask_float64_typed_path_matches_scalar() {
13990            // The Float64 typed gather (as_f64_slice + from_f64_values) must be
13991            // bit-identical to the Scalar clone path: same selected values, same
13992            // order, all-valid result.
13993            let col = Column::from_f64_values(vec![1.5, -0.0, 2.5, f64::INFINITY, 0.0]);
13994            let mask = Column::from_values(vec![
13995                Scalar::Bool(true),
13996                Scalar::Bool(true),
13997                Scalar::Bool(false),
13998                Scalar::Bool(true),
13999                Scalar::Null(NullKind::Null), // missing -> not selected
14000            ])
14001            .expect("mask");
14002            let result = col.filter_by_mask(&mask).expect("filter");
14003            assert_eq!(result.dtype(), DType::Float64);
14004            assert_eq!(
14005                result.values(),
14006                &[
14007                    Scalar::Float64(1.5),
14008                    Scalar::Float64(-0.0),
14009                    Scalar::Float64(f64::INFINITY),
14010                ]
14011            );
14012        }
14013
14014        #[test]
14015        fn compare_scalar_typed_path_matches_scalar_compare() {
14016            // Isomorphism proof for br-frankenpandas-2kpwa: the typed f64/i64
14017            // compare_scalar fast paths must be bit-identical to the per-element
14018            // scalar_compare reference for every op and operand combination.
14019            let f64_vals = vec![1.5f64, -0.0, 0.0, 2.5, -3.0, f64::INFINITY, 100.0];
14020            let i64_vals = vec![1i64, -2, 0, 5, 100, -7];
14021            let ops = [
14022                ComparisonOp::Gt,
14023                ComparisonOp::Lt,
14024                ComparisonOp::Eq,
14025                ComparisonOp::Ne,
14026                ComparisonOp::Ge,
14027                ComparisonOp::Le,
14028            ];
14029            for op in ops {
14030                // Float64 column vs Float64 scalar.
14031                for &probe in &[0.0f64, 1.5, 2.5, -3.0, f64::INFINITY] {
14032                    let got = Column::from_f64_values(f64_vals.clone())
14033                        .compare_scalar(&Scalar::Float64(probe), op)
14034                        .expect("f64 cmp");
14035                    let expected: Vec<Scalar> = f64_vals
14036                        .iter()
14037                        .map(|&v| {
14038                            Scalar::Bool(
14039                                scalar_compare(&Scalar::Float64(v), &Scalar::Float64(probe), op)
14040                                    .unwrap(),
14041                            )
14042                        })
14043                        .collect();
14044                    assert_eq!(
14045                        got.values(),
14046                        expected.as_slice(),
14047                        "f64 op {op:?} probe {probe}"
14048                    );
14049                }
14050                // Float64 column vs Int64 scalar (f64-promotion branch).
14051                let got = Column::from_f64_values(f64_vals.clone())
14052                    .compare_scalar(&Scalar::Int64(2), op)
14053                    .expect("f64-vs-i64 cmp");
14054                let expected: Vec<Scalar> = f64_vals
14055                    .iter()
14056                    .map(|&v| {
14057                        Scalar::Bool(
14058                            scalar_compare(&Scalar::Float64(v), &Scalar::Int64(2), op).unwrap(),
14059                        )
14060                    })
14061                    .collect();
14062                assert_eq!(got.values(), expected.as_slice(), "f64-vs-i64 op {op:?}");
14063                // Int64 column vs Int64 scalar (both-Int64 branch).
14064                let got = Column::from_i64_values(i64_vals.clone())
14065                    .compare_scalar(&Scalar::Int64(0), op)
14066                    .expect("i64 cmp");
14067                let expected: Vec<Scalar> = i64_vals
14068                    .iter()
14069                    .map(|&v| {
14070                        Scalar::Bool(
14071                            scalar_compare(&Scalar::Int64(v), &Scalar::Int64(0), op).unwrap(),
14072                        )
14073                    })
14074                    .collect();
14075                assert_eq!(got.values(), expected.as_slice(), "i64 op {op:?}");
14076            }
14077        }
14078
14079        #[test]
14080        #[ignore = "perf timing harness, run with --ignored"]
14081        fn compare_scalar_typed_vs_aos_timing() {
14082            use std::time::Instant;
14083            let n = 5_000_000usize;
14084            let raw: Vec<f64> = (0..n).map(|i| (i % 1000) as f64 - 500.0).collect();
14085            let scalars: Vec<Scalar> = raw.iter().map(|&v| Scalar::Float64(v)).collect();
14086            let probe = Scalar::Float64(0.0);
14087            let op = ComparisonOp::Gt;
14088
14089            // AoS reference: per-element scalar_compare + Scalar::Bool alloc.
14090            let t = Instant::now();
14091            let aos: Vec<Scalar> = scalars
14092                .iter()
14093                .map(|v| Scalar::Bool(scalar_compare(v, &probe, op).unwrap()))
14094                .collect();
14095            let aos_ns = t.elapsed().as_nanos();
14096            std::hint::black_box(&aos);
14097
14098            // Typed path through compare_scalar (as_f64_slice -> from_bool_values).
14099            let col = Column::from_f64_values(raw.clone());
14100            let t = Instant::now();
14101            let typed = col.compare_scalar(&probe, op).expect("typed cmp");
14102            let typed_ns = t.elapsed().as_nanos();
14103            std::hint::black_box(&typed);
14104
14105            assert_eq!(typed.values(), aos.as_slice(), "typed must match AoS");
14106            let ratio = aos_ns as f64 / typed_ns as f64;
14107            println!(
14108                "compare_scalar Gt n={n}: AoS {aos_ns}ns  typed {typed_ns}ns  Score={ratio:.2}x"
14109            );
14110        }
14111
14112        #[test]
14113        fn filter_by_mask_null_treated_as_false() {
14114            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14115            let mask = Column::from_values(vec![Scalar::Bool(true), Scalar::Null(NullKind::Null)])
14116                .expect("mask");
14117
14118            let result = col.filter_by_mask(&mask).expect("filter");
14119            assert_eq!(result.len(), 1);
14120            assert_eq!(result.values()[0], Scalar::Int64(1));
14121        }
14122
14123        #[test]
14124        fn filter_by_mask_rejects_non_boolean_mask() {
14125            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14126            let mask = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(0)]).expect("mask");
14127
14128            let err = col.filter_by_mask(&mask).expect_err("non-bool mask");
14129            assert!(matches!(err, ColumnError::InvalidMaskType { .. }));
14130        }
14131
14132        #[test]
14133        fn filter_by_mask_empty_result() {
14134            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14135            let mask =
14136                Column::from_values(vec![Scalar::Bool(false), Scalar::Bool(false)]).expect("mask");
14137
14138            let result = col.filter_by_mask(&mask).expect("filter");
14139            assert!(result.is_empty());
14140        }
14141
14142        #[test]
14143        fn fillna_replaces_missing() {
14144            let col = Column::from_values(vec![
14145                Scalar::Int64(1),
14146                Scalar::Null(NullKind::Null),
14147                Scalar::Int64(3),
14148                Scalar::Null(NullKind::Null),
14149            ])
14150            .expect("col");
14151
14152            let result = col.fillna(&Scalar::Int64(0)).expect("fillna");
14153            assert_eq!(result.values()[0], Scalar::Int64(1));
14154            assert_eq!(result.values()[1], Scalar::Int64(0));
14155            assert_eq!(result.values()[2], Scalar::Int64(3));
14156            assert_eq!(result.values()[3], Scalar::Int64(0));
14157            assert_eq!(result.validity().count_valid(), 4);
14158        }
14159
14160        #[test]
14161        fn dropna_removes_missing() {
14162            let col = Column::from_values(vec![
14163                Scalar::Int64(1),
14164                Scalar::Null(NullKind::Null),
14165                Scalar::Int64(3),
14166                Scalar::Null(NullKind::NaN),
14167            ])
14168            .expect("col");
14169
14170            let result = col.dropna().expect("dropna");
14171            assert_eq!(result.len(), 2);
14172            assert_eq!(result.values()[0], Scalar::Int64(1));
14173            assert_eq!(result.values()[1], Scalar::Int64(3));
14174        }
14175
14176        #[test]
14177        fn comparison_empty_columns() {
14178            let left = Column::from_values(vec![]).expect("left");
14179            let right = Column::from_values(vec![]).expect("right");
14180            let result = left
14181                .binary_comparison(&right, ComparisonOp::Eq)
14182                .expect("eq");
14183            assert!(result.is_empty());
14184        }
14185
14186        #[test]
14187        fn comparison_length_mismatch_error() {
14188            let left = Column::from_values(vec![Scalar::Int64(1)]).expect("left");
14189            let right =
14190                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("right");
14191            assert!(left.binary_comparison(&right, ComparisonOp::Eq).is_err());
14192        }
14193
14194        #[test]
14195        fn comparison_bool_ordering() {
14196            let left =
14197                Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("left");
14198            let right =
14199                Column::from_values(vec![Scalar::Bool(false), Scalar::Bool(true)]).expect("right");
14200
14201            let gt = left
14202                .binary_comparison(&right, ComparisonOp::Gt)
14203                .expect("gt");
14204            assert_eq!(gt.values()[0], Scalar::Bool(true));
14205            assert_eq!(gt.values()[1], Scalar::Bool(false));
14206        }
14207    }
14208
14209    mod iter_and_predicates {
14210        use fp_types::NullKind;
14211
14212        use super::*;
14213
14214        #[test]
14215        fn iter_values_preserves_order() {
14216            let col =
14217                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14218                    .expect("col");
14219            let collected: Vec<_> = col.iter_values().cloned().collect();
14220            assert_eq!(collected, col.values());
14221        }
14222
14223        #[test]
14224        fn to_vec_returns_owned_clone() {
14225            let col = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14226            let v = col.to_vec();
14227            assert_eq!(v, vec![Scalar::Int64(5), Scalar::Int64(6)]);
14228            // Column still owns its values; to_vec was a clone.
14229            assert_eq!(col.len(), 2);
14230        }
14231
14232        #[test]
14233        fn copy_returns_independent_clone() {
14234            let col = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14235            let copied = col.copy();
14236            let viewed = col.view();
14237            let transposed = col.transpose();
14238            assert_eq!(copied, col);
14239            assert_eq!(viewed, col);
14240            assert_eq!(transposed, col);
14241            assert_eq!(col.t(), transposed);
14242            assert_eq!(col.T(), transposed);
14243            assert_ne!(copied.values().as_ptr(), col.values().as_ptr());
14244            assert_ne!(viewed.values().as_ptr(), col.values().as_ptr());
14245            assert_ne!(transposed.values().as_ptr(), col.values().as_ptr());
14246        }
14247
14248        #[test]
14249        fn item_extracts_single_value_and_rejects_other_lengths() {
14250            let single = Column::from_values(vec![Scalar::Int64(5)]).expect("col");
14251            assert_eq!(single.item(), Ok(Scalar::Int64(5)));
14252
14253            let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14254            assert_eq!(
14255                empty.item(),
14256                Err(crate::ColumnError::InvalidLength {
14257                    operation: "item()",
14258                    expected: 1,
14259                    actual: 0,
14260                })
14261            );
14262
14263            let multi = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14264            assert_eq!(
14265                multi.item(),
14266                Err(crate::ColumnError::InvalidLength {
14267                    operation: "item()",
14268                    expected: 1,
14269                    actual: 2,
14270                })
14271            );
14272        }
14273
14274        #[test]
14275        fn has_any_missing_detects_null() {
14276            let populated =
14277                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14278            assert!(!populated.has_any_missing());
14279            assert_eq!(populated.hasnans(), populated.has_any_missing());
14280            assert_eq!(populated.nbytes(), populated.memory_usage(false));
14281
14282            let with_null =
14283                Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14284                    .expect("col");
14285            assert!(with_null.has_any_missing());
14286            assert_eq!(with_null.hasnans(), with_null.has_any_missing());
14287            assert_eq!(with_null.nbytes(), with_null.memory_usage(false));
14288        }
14289
14290        #[test]
14291        fn all_missing_empty_is_true() {
14292            let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14293            assert!(empty.all_missing());
14294
14295            let all_null = Column::from_values(vec![
14296                Scalar::Null(NullKind::NaN),
14297                Scalar::Null(NullKind::Null),
14298            ])
14299            .expect("col");
14300            assert!(all_null.all_missing());
14301
14302            let mixed = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14303                .expect("col");
14304            assert!(!mixed.all_missing());
14305        }
14306
14307        #[test]
14308        fn apply_bool_positive_predicate() {
14309            let col = Column::from_values(vec![
14310                Scalar::Int64(1),
14311                Scalar::Int64(2),
14312                Scalar::Int64(3),
14313                Scalar::Int64(4),
14314            ])
14315            .expect("col");
14316            let even = col
14317                .apply_bool(|v| v.to_f64().map(|f| f as i64 % 2 == 0).unwrap_or(false))
14318                .expect("apply_bool");
14319            assert_eq!(even.dtype(), DType::Bool);
14320            assert_eq!(even.values()[0], Scalar::Bool(false));
14321            assert_eq!(even.values()[1], Scalar::Bool(true));
14322            assert_eq!(even.values()[2], Scalar::Bool(false));
14323            assert_eq!(even.values()[3], Scalar::Bool(true));
14324        }
14325
14326        #[test]
14327        fn first_and_last_return_endpoints() {
14328            let col = Column::from_values(vec![
14329                Scalar::Int64(10),
14330                Scalar::Int64(20),
14331                Scalar::Int64(30),
14332            ])
14333            .expect("col");
14334            assert_eq!(col.first(), Some(&Scalar::Int64(10)));
14335            assert_eq!(col.last(), Some(&Scalar::Int64(30)));
14336
14337            let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14338            assert_eq!(empty.first(), None);
14339            assert_eq!(empty.last(), None);
14340        }
14341
14342        #[test]
14343        fn count_matching_ignores_missing_and_mismatches() {
14344            let col = Column::from_values(vec![
14345                Scalar::Int64(1),
14346                Scalar::Int64(2),
14347                Scalar::Null(NullKind::NaN),
14348                Scalar::Int64(4),
14349                Scalar::Int64(6),
14350            ])
14351            .expect("col");
14352            let evens =
14353                col.count_matching(|v| v.to_f64().map(|f| f as i64 % 2 == 0).unwrap_or(false));
14354            assert_eq!(evens, 3); // 2, 4, 6 — missing not counted.
14355        }
14356
14357        #[test]
14358        fn zip_with_elementwise_combine() {
14359            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14360                .expect("a");
14361            let b = Column::from_values(vec![
14362                Scalar::Int64(10),
14363                Scalar::Int64(20),
14364                Scalar::Int64(30),
14365            ])
14366            .expect("b");
14367            let sum = a
14368                .zip_with(&b, |l, r| match (l.to_f64(), r.to_f64()) {
14369                    (Ok(lf), Ok(rf)) => Scalar::Float64(lf + rf),
14370                    _ => Scalar::Null(NullKind::NaN),
14371                })
14372                .expect("zip_with");
14373            assert_eq!(sum.values()[0], Scalar::Float64(11.0));
14374            assert_eq!(sum.values()[1], Scalar::Float64(22.0));
14375            assert_eq!(sum.values()[2], Scalar::Float64(33.0));
14376        }
14377
14378        #[test]
14379        fn zip_with_length_mismatch_errors() {
14380            let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
14381            let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
14382            assert!(a.zip_with(&b, |l, _| l.clone()).is_err());
14383        }
14384
14385        #[test]
14386        fn iter_enumerate_yields_positions() {
14387            let col = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col");
14388            let collected: Vec<_> = col.iter_enumerate().map(|(i, v)| (i, v.clone())).collect();
14389            assert_eq!(
14390                collected,
14391                vec![(0, Scalar::Int64(10)), (1, Scalar::Int64(20))]
14392            );
14393        }
14394
14395        #[test]
14396        fn apply_bool_missing_maps_to_false() {
14397            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14398                .expect("col");
14399            let result = col.apply_bool(|_| true).expect("apply_bool");
14400            assert_eq!(result.values()[0], Scalar::Bool(true));
14401            // Missing input → false (per doc contract).
14402            assert_eq!(result.values()[1], Scalar::Bool(false));
14403        }
14404    }
14405
14406    mod take_slice_concat_repeat {
14407        use super::*;
14408
14409        #[test]
14410        fn take_reorders_rows() {
14411            let col = Column::from_values(vec![
14412                Scalar::Int64(10),
14413                Scalar::Int64(20),
14414                Scalar::Int64(30),
14415            ])
14416            .expect("col");
14417            let picked = col.take(&[2, 0, 1]).expect("take");
14418            assert_eq!(picked.values()[0], Scalar::Int64(30));
14419            assert_eq!(picked.values()[1], Scalar::Int64(10));
14420            assert_eq!(picked.values()[2], Scalar::Int64(20));
14421        }
14422
14423        #[test]
14424        fn take_out_of_bounds_errors() {
14425            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14426            let err = col.take(&[5]).unwrap_err();
14427            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
14428        }
14429
14430        #[test]
14431        fn slice_returns_contiguous_range() {
14432            let col = Column::from_values(vec![
14433                Scalar::Int64(1),
14434                Scalar::Int64(2),
14435                Scalar::Int64(3),
14436                Scalar::Int64(4),
14437            ])
14438            .expect("col");
14439            let middle = col.slice(1, 2).expect("slice");
14440            assert_eq!(middle.len(), 2);
14441            assert_eq!(middle.values()[0], Scalar::Int64(2));
14442            assert_eq!(middle.values()[1], Scalar::Int64(3));
14443        }
14444
14445        #[test]
14446        fn slice_past_end_yields_empty() {
14447            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14448            let empty = col.slice(10, 5).expect("slice");
14449            assert!(empty.is_empty());
14450            assert_eq!(empty.dtype(), DType::Int64);
14451        }
14452
14453        #[test]
14454        fn slice_len_clamps_to_tail() {
14455            let col = Column::from_values(vec![
14456                Scalar::Float64(1.0),
14457                Scalar::Float64(2.0),
14458                Scalar::Float64(3.0),
14459            ])
14460            .expect("col");
14461            let tail = col.slice(2, 100).expect("slice");
14462            assert_eq!(tail.len(), 1);
14463            assert_eq!(tail.values()[0], Scalar::Float64(3.0));
14464        }
14465
14466        #[test]
14467        fn slice_huge_len_clamps_without_overflow() {
14468            let col =
14469                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14470                    .expect("col");
14471            let tail = col.slice(1, usize::MAX).expect("slice");
14472            assert_eq!(tail.values(), &[Scalar::Int64(2), Scalar::Int64(3)]);
14473        }
14474
14475        #[test]
14476        fn head_returns_first_n_values() {
14477            let col = Column::from_values(vec![
14478                Scalar::Int64(10),
14479                Scalar::Int64(20),
14480                Scalar::Int64(30),
14481                Scalar::Int64(40),
14482            ])
14483            .expect("col");
14484            let out = col.head(2).expect("head");
14485            assert_eq!(out.values(), &[Scalar::Int64(10), Scalar::Int64(20)]);
14486        }
14487
14488        #[test]
14489        fn tail_returns_last_n_values() {
14490            let col = Column::from_values(vec![
14491                Scalar::Int64(10),
14492                Scalar::Int64(20),
14493                Scalar::Int64(30),
14494                Scalar::Int64(40),
14495            ])
14496            .expect("col");
14497            let out = col.tail(2).expect("tail");
14498            assert_eq!(out.values(), &[Scalar::Int64(30), Scalar::Int64(40)]);
14499        }
14500
14501        #[test]
14502        fn head_tail_negative_n_match_pandas_style() {
14503            let col = Column::from_values(vec![
14504                Scalar::Int64(10),
14505                Scalar::Int64(20),
14506                Scalar::Int64(30),
14507                Scalar::Int64(40),
14508                Scalar::Int64(50),
14509            ])
14510            .expect("col");
14511            let head = col.head(-2).expect("head");
14512            let tail = col.tail(-2).expect("tail");
14513            assert_eq!(
14514                head.values(),
14515                &[Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)]
14516            );
14517            assert_eq!(
14518                tail.values(),
14519                &[Scalar::Int64(30), Scalar::Int64(40), Scalar::Int64(50)]
14520            );
14521        }
14522
14523        #[test]
14524        fn head_tail_large_negative_n_saturate_to_empty() {
14525            let col = Column::from_values(vec![
14526                Scalar::Float64(1.0),
14527                Scalar::Float64(2.0),
14528                Scalar::Float64(3.0),
14529            ])
14530            .expect("col");
14531            let head = col.head(-10).expect("head");
14532            let tail = col.tail(-10).expect("tail");
14533            assert!(head.is_empty());
14534            assert!(tail.is_empty());
14535            assert_eq!(head.dtype(), DType::Float64);
14536            assert_eq!(tail.dtype(), DType::Float64);
14537        }
14538
14539        #[test]
14540        fn concat_appends_same_dtype() {
14541            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
14542            let b = Column::from_values(vec![Scalar::Int64(3)]).expect("b");
14543            let combined = a.concat(&b).expect("concat");
14544            assert_eq!(combined.len(), 3);
14545            assert_eq!(combined.values()[2], Scalar::Int64(3));
14546        }
14547
14548        #[test]
14549        fn concat_different_dtypes_errors() {
14550            let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
14551            let b = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("b");
14552            let err = a.concat(&b).unwrap_err();
14553            assert!(matches!(err, crate::ColumnError::DTypeMismatch { .. }));
14554        }
14555
14556        #[test]
14557        fn repeat_duplicates_contiguously() {
14558            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14559            let out = col.repeat(3).expect("repeat");
14560            assert_eq!(out.len(), 6);
14561            assert_eq!(out.values()[0], Scalar::Int64(1));
14562            assert_eq!(out.values()[1], Scalar::Int64(1));
14563            assert_eq!(out.values()[2], Scalar::Int64(1));
14564            assert_eq!(out.values()[3], Scalar::Int64(2));
14565        }
14566
14567        #[test]
14568        fn repeat_zero_is_empty_same_dtype() {
14569            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14570            let out = col.repeat(0).expect("repeat");
14571            assert!(out.is_empty());
14572            assert_eq!(out.dtype(), DType::Int64);
14573        }
14574
14575        #[test]
14576        fn repeat_one_is_clone() {
14577            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14578            let out = col.repeat(1).expect("repeat");
14579            assert_eq!(out.values(), col.values());
14580        }
14581    }
14582
14583    mod reverse_head_tail_cumulatives_unique {
14584        use fp_types::NullKind;
14585
14586        use super::*;
14587
14588        #[test]
14589        fn reverse_swaps_order_and_preserves_dtype() {
14590            let col =
14591                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14592                    .expect("col");
14593            let r = col.reverse().expect("reverse");
14594            assert_eq!(r.values()[0], Scalar::Int64(3));
14595            assert_eq!(r.values()[2], Scalar::Int64(1));
14596            assert_eq!(r.dtype(), DType::Int64);
14597        }
14598
14599        #[test]
14600        fn head_positive_takes_first_n() {
14601            let col = Column::from_values(vec![
14602                Scalar::Int64(1),
14603                Scalar::Int64(2),
14604                Scalar::Int64(3),
14605                Scalar::Int64(4),
14606            ])
14607            .expect("col");
14608            let h = col.head(2).expect("head");
14609            assert_eq!(h.len(), 2);
14610            assert_eq!(h.values()[0], Scalar::Int64(1));
14611            assert_eq!(h.values()[1], Scalar::Int64(2));
14612        }
14613
14614        #[test]
14615        fn head_negative_drops_last_n() {
14616            let col =
14617                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14618                    .expect("col");
14619            let h = col.head(-1).expect("head");
14620            assert_eq!(h.len(), 2);
14621            assert_eq!(h.values()[1], Scalar::Int64(2));
14622        }
14623
14624        #[test]
14625        fn tail_positive_takes_last_n() {
14626            let col = Column::from_values(vec![
14627                Scalar::Int64(1),
14628                Scalar::Int64(2),
14629                Scalar::Int64(3),
14630                Scalar::Int64(4),
14631            ])
14632            .expect("col");
14633            let t = col.tail(2).expect("tail");
14634            assert_eq!(t.len(), 2);
14635            assert_eq!(t.values()[0], Scalar::Int64(3));
14636            assert_eq!(t.values()[1], Scalar::Int64(4));
14637        }
14638
14639        #[test]
14640        fn tail_negative_drops_first_n() {
14641            let col =
14642                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14643                    .expect("col");
14644            let t = col.tail(-1).expect("tail");
14645            assert_eq!(t.len(), 2);
14646            assert_eq!(t.values()[0], Scalar::Int64(2));
14647        }
14648
14649        #[test]
14650        fn head_tail_out_of_range_clamps() {
14651            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14652            assert_eq!(col.head(10).expect("head").len(), 1);
14653            assert_eq!(col.tail(10).expect("tail").len(), 1);
14654            assert_eq!(col.head(-10).expect("head").len(), 0);
14655            assert_eq!(col.tail(-10).expect("tail").len(), 0);
14656        }
14657
14658        #[test]
14659        fn cumsum_produces_float64_running_sum() {
14660            let col = Column::from_values(vec![
14661                Scalar::Float64(1.0),
14662                Scalar::Null(NullKind::NaN),
14663                Scalar::Float64(3.0),
14664            ])
14665            .expect("col");
14666            let c = col.cumsum().expect("cumsum");
14667            assert_eq!(c.dtype(), DType::Float64);
14668            assert_eq!(c.values()[0], Scalar::Float64(1.0));
14669            assert!(c.values()[1].is_missing());
14670            assert_eq!(c.values()[2], Scalar::Float64(4.0));
14671        }
14672
14673        #[test]
14674        fn cumprod_running_product() {
14675            let col = Column::from_values(vec![
14676                Scalar::Float64(2.0),
14677                Scalar::Float64(3.0),
14678                Scalar::Float64(4.0),
14679            ])
14680            .expect("col");
14681            let c = col.cumprod().expect("cumprod");
14682            assert_eq!(c.values()[2], Scalar::Float64(24.0));
14683        }
14684
14685        #[test]
14686        fn cummax_cummin_running_extrema() {
14687            let col = Column::from_values(vec![
14688                Scalar::Float64(3.0),
14689                Scalar::Float64(1.0),
14690                Scalar::Float64(4.0),
14691                Scalar::Float64(1.0),
14692                Scalar::Float64(5.0),
14693            ])
14694            .expect("col");
14695            let mx = col.cummax().expect("cummax");
14696            assert_eq!(mx.values()[4], Scalar::Float64(5.0));
14697            let mn = col.cummin().expect("cummin");
14698            assert_eq!(mn.values()[4], Scalar::Float64(1.0));
14699        }
14700
14701        #[test]
14702        fn unique_preserves_first_seen_order() {
14703            let col = Column::from_values(vec![
14704                Scalar::Int64(3),
14705                Scalar::Int64(1),
14706                Scalar::Int64(3),
14707                Scalar::Int64(2),
14708                Scalar::Int64(1),
14709            ])
14710            .expect("col");
14711            let u = col.unique().expect("unique");
14712            assert_eq!(u.len(), 3);
14713            assert_eq!(u.values()[0], Scalar::Int64(3));
14714            assert_eq!(u.values()[1], Scalar::Int64(1));
14715            assert_eq!(u.values()[2], Scalar::Int64(2));
14716        }
14717
14718        #[test]
14719        fn unique_drops_nulls() {
14720            let col = Column::from_values(vec![
14721                Scalar::Int64(1),
14722                Scalar::Null(NullKind::NaN),
14723                Scalar::Int64(1),
14724                Scalar::Null(NullKind::NaN),
14725            ])
14726            .expect("col");
14727            let u = col.unique().expect("unique");
14728            assert_eq!(u.len(), 1);
14729            assert_eq!(u.values()[0], Scalar::Int64(1));
14730        }
14731    }
14732
14733    mod abs_shift_clip_round_isin {
14734        use fp_types::NullKind;
14735
14736        use super::*;
14737
14738        #[test]
14739        fn abs_int_and_float() {
14740            let int_col =
14741                Column::from_values(vec![Scalar::Int64(-3), Scalar::Int64(0), Scalar::Int64(5)])
14742                    .expect("int");
14743            let a = int_col.abs().expect("abs");
14744            assert_eq!(a.values()[0], Scalar::Int64(3));
14745            assert_eq!(a.values()[1], Scalar::Int64(0));
14746
14747            let float_col =
14748                Column::from_values(vec![Scalar::Float64(-1.5), Scalar::Null(NullKind::NaN)])
14749                    .expect("float");
14750            let b = float_col.abs().expect("abs");
14751            assert_eq!(b.values()[0], Scalar::Float64(1.5));
14752            assert!(b.values()[1].is_missing());
14753        }
14754
14755        #[test]
14756        fn abs_bool_preserves_dtype() {
14757            let bool_col =
14758                Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("bool");
14759            let result = bool_col.abs().expect("abs");
14760            assert_eq!(result.dtype(), DType::Bool);
14761            assert_eq!(result.values(), &[Scalar::Bool(true), Scalar::Bool(false)]);
14762        }
14763
14764        #[test]
14765        fn abs_utf8_errors() {
14766            let col = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("col");
14767            assert!(col.abs().is_err());
14768        }
14769
14770        #[test]
14771        fn shift_positive_pads_left_with_fill() {
14772            let col =
14773                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14774                    .expect("col");
14775            let s = col.shift(1, Scalar::Null(NullKind::NaN)).expect("shift");
14776            assert!(s.values()[0].is_missing());
14777            assert_eq!(s.values()[1], Scalar::Int64(1));
14778            assert_eq!(s.values()[2], Scalar::Int64(2));
14779        }
14780
14781        #[test]
14782        fn shift_negative_pads_right() {
14783            let col =
14784                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14785                    .expect("col");
14786            let s = col.shift(-1, Scalar::Int64(0)).expect("shift");
14787            assert_eq!(s.values()[0], Scalar::Int64(2));
14788            assert_eq!(s.values()[1], Scalar::Int64(3));
14789            assert_eq!(s.values()[2], Scalar::Int64(0));
14790        }
14791
14792        #[test]
14793        fn shift_zero_is_clone() {
14794            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14795            let s = col.shift(0, Scalar::Int64(-1)).expect("shift");
14796            assert_eq!(s.values(), col.values());
14797        }
14798
14799        #[test]
14800        fn clip_both_bounds() {
14801            let col = Column::from_values(vec![
14802                Scalar::Float64(-5.0),
14803                Scalar::Float64(3.0),
14804                Scalar::Float64(10.0),
14805            ])
14806            .expect("col");
14807            let c = col.clip(Some(0.0), Some(5.0)).expect("clip");
14808            assert_eq!(c.values()[0], Scalar::Float64(0.0));
14809            assert_eq!(c.values()[1], Scalar::Float64(3.0));
14810            assert_eq!(c.values()[2], Scalar::Float64(5.0));
14811        }
14812
14813        #[test]
14814        fn clip_none_bounds_are_noop() {
14815            let col = Column::from_values(vec![Scalar::Float64(-5.0), Scalar::Float64(10.0)])
14816                .expect("col");
14817            let c = col.clip(None, None).expect("clip");
14818            assert_eq!(c.values()[0], Scalar::Float64(-5.0));
14819            assert_eq!(c.values()[1], Scalar::Float64(10.0));
14820        }
14821
14822        #[test]
14823        fn round_rounds_floats() {
14824            let col = Column::from_values(vec![
14825                Scalar::Float64(1.234),
14826                Scalar::Float64(5.678),
14827                Scalar::Null(NullKind::NaN),
14828            ])
14829            .expect("col");
14830            let r = col.round(1).expect("round");
14831            assert_eq!(r.values()[0], Scalar::Float64(1.2));
14832            assert_eq!(r.values()[1], Scalar::Float64(5.7));
14833            assert!(r.values()[2].is_missing());
14834        }
14835
14836        #[test]
14837        fn round_int_nonnegative_decimals_is_noop() {
14838            let col = Column::from_values(vec![Scalar::Int64(12), Scalar::Int64(34)]).expect("col");
14839            let r = col.round(2).expect("round");
14840            assert_eq!(r.values(), col.values());
14841            assert_eq!(r.dtype(), DType::Int64);
14842        }
14843
14844        #[test]
14845        fn round_int_negative_decimals_preserves_dtype() {
14846            let col = Column::from_values(vec![
14847                Scalar::Int64(15),
14848                Scalar::Int64(25),
14849                Scalar::Int64(35),
14850                Scalar::Int64(-15),
14851            ])
14852            .expect("col");
14853            let r = col.round(-1).expect("round");
14854            assert_eq!(r.dtype(), DType::Int64);
14855            assert_eq!(
14856                r.values(),
14857                &[
14858                    Scalar::Int64(20),
14859                    Scalar::Int64(20),
14860                    Scalar::Int64(40),
14861                    Scalar::Int64(-20)
14862                ]
14863            );
14864        }
14865
14866        #[test]
14867        fn round_bool_is_noop() {
14868            let col =
14869                Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("col");
14870            let r = col.round(-2).expect("round");
14871            assert_eq!(r.dtype(), DType::Bool);
14872            assert_eq!(r.values(), col.values());
14873        }
14874
14875        #[test]
14876        fn round_negative_decimals_rounds_left() {
14877            let col = Column::from_values(vec![Scalar::Float64(1234.0)]).expect("col");
14878            let r = col.round(-2).expect("round");
14879            assert_eq!(r.values()[0], Scalar::Float64(1200.0));
14880        }
14881
14882        #[test]
14883        fn round_uses_pandas_half_even_ties() {
14884            let col = Column::from_values(vec![
14885                Scalar::Float64(1.5),
14886                Scalar::Float64(2.5),
14887                Scalar::Float64(-1.5),
14888                Scalar::Float64(3.5),
14889            ])
14890            .expect("col");
14891            let r = col.round(0).expect("round");
14892            assert_eq!(
14893                r.values(),
14894                &[
14895                    Scalar::Float64(2.0),
14896                    Scalar::Float64(2.0),
14897                    Scalar::Float64(-2.0),
14898                    Scalar::Float64(4.0)
14899                ]
14900            );
14901        }
14902
14903        #[test]
14904        fn round_negative_decimals_uses_half_even_ties() {
14905            let col = Column::from_values(vec![
14906                Scalar::Float64(15.0),
14907                Scalar::Float64(25.0),
14908                Scalar::Float64(35.0),
14909                Scalar::Float64(-15.0),
14910            ])
14911            .expect("col");
14912            let r = col.round(-1).expect("round");
14913            assert_eq!(
14914                r.values(),
14915                &[
14916                    Scalar::Float64(20.0),
14917                    Scalar::Float64(20.0),
14918                    Scalar::Float64(40.0),
14919                    Scalar::Float64(-20.0)
14920                ]
14921            );
14922        }
14923
14924        #[test]
14925        fn isin_returns_bool_column() {
14926            let col = Column::from_values(vec![
14927                Scalar::Int64(1),
14928                Scalar::Int64(2),
14929                Scalar::Int64(3),
14930                Scalar::Null(NullKind::NaN),
14931            ])
14932            .expect("col");
14933            let needles = vec![Scalar::Int64(1), Scalar::Int64(3)];
14934            let r = col.isin(&needles).expect("isin");
14935            assert_eq!(r.dtype(), DType::Bool);
14936            assert_eq!(r.values()[0], Scalar::Bool(true));
14937            assert_eq!(r.values()[1], Scalar::Bool(false));
14938            assert_eq!(r.values()[2], Scalar::Bool(true));
14939            assert_eq!(r.values()[3], Scalar::Bool(false));
14940        }
14941
14942        #[test]
14943        fn isin_empty_needles_yields_all_false() {
14944            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14945            let r = col.isin(&[]).expect("isin");
14946            assert_eq!(r.values()[0], Scalar::Bool(false));
14947            assert_eq!(r.values()[1], Scalar::Bool(false));
14948        }
14949    }
14950
14951    mod sort_diff_duplicated_between {
14952        use fp_types::NullKind;
14953
14954        use super::*;
14955
14956        #[test]
14957        fn sort_values_ascending_puts_nulls_last() {
14958            let col = Column::from_values(vec![
14959                Scalar::Int64(3),
14960                Scalar::Null(NullKind::NaN),
14961                Scalar::Int64(1),
14962                Scalar::Int64(2),
14963            ])
14964            .expect("col");
14965            let s = col.sort_values(true).expect("sort");
14966            assert_eq!(s.values()[0], Scalar::Int64(1));
14967            assert_eq!(s.values()[1], Scalar::Int64(2));
14968            assert_eq!(s.values()[2], Scalar::Int64(3));
14969            assert!(s.values()[3].is_missing());
14970        }
14971
14972        #[test]
14973        fn sort_values_descending_keeps_nulls_last() {
14974            let col = Column::from_values(vec![
14975                Scalar::Int64(1),
14976                Scalar::Null(NullKind::NaN),
14977                Scalar::Int64(3),
14978                Scalar::Int64(2),
14979            ])
14980            .expect("col");
14981            let s = col.sort_values(false).expect("sort");
14982            assert_eq!(s.values()[0], Scalar::Int64(3));
14983            assert_eq!(s.values()[1], Scalar::Int64(2));
14984            assert_eq!(s.values()[2], Scalar::Int64(1));
14985            assert!(s.values()[3].is_missing());
14986        }
14987
14988        #[test]
14989        fn argsort_matches_take_sort_values() {
14990            let col =
14991                Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(1), Scalar::Int64(2)])
14992                    .expect("col");
14993            let positions = col.argsort();
14994            assert_eq!(positions, vec![1, 2, 0]);
14995            let via_take = col.take(&positions).expect("take");
14996            let via_sort = col.sort_values(true).expect("sort");
14997            assert_eq!(via_take.values(), via_sort.values());
14998        }
14999
15000        // Naive comparator reference (the pre-radix Scalar path) for isomorphism
15001        // proofs: rebuilds the sorted Scalar vec exactly as the old code did.
15002        fn scalar_sort_reference(values: &[Scalar], ascending: bool) -> Vec<Scalar> {
15003            let mut indexed: Vec<(usize, &Scalar)> = values.iter().enumerate().collect();
15004            indexed.sort_by(|a, b| crate::compare_scalars_na_last(a.1, b.1, ascending));
15005            indexed.into_iter().map(|(_, v)| v.clone()).collect()
15006        }
15007
15008        #[test]
15009        fn radix_sort_matches_scalar_reference_i64_and_f64() {
15010            // Deterministic LCG covering negatives, zero, duplicates (tie
15011            // stability), and large magnitudes — the typed radix path must be
15012            // BIT-IDENTICAL to the stable Scalar comparator path, both orders.
15013            let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
15014            let mut next = || {
15015                state = state
15016                    .wrapping_mul(6364136223846793005)
15017                    .wrapping_add(1442695040888963407);
15018                state
15019            };
15020            for trial in 0..200 {
15021                let n = (next() % 400) as usize + 1;
15022                let i64_vals: Vec<Scalar> = (0..n)
15023                    .map(|_| {
15024                        // Narrow range forces frequent ties; occasional wide value.
15025                        let r = next();
15026                        let v = if r % 7 == 0 {
15027                            r as i64 // full-width incl negatives via wraparound
15028                        } else {
15029                            (r % 11) as i64 - 5
15030                        };
15031                        Scalar::Int64(v)
15032                    })
15033                    .collect();
15034                let f64_vals: Vec<Scalar> = i64_vals
15035                    .iter()
15036                    .map(|s| match s {
15037                        Scalar::Int64(v) => {
15038                            // Map into floats incl negatives, zero, fractional ties.
15039                            let f = (*v as f64) / 4.0;
15040                            Scalar::Float64(if f == 0.0 { 0.0 } else { f })
15041                        }
15042                        _ => unreachable!(),
15043                    })
15044                    .collect();
15045                for (vals, label) in [(&i64_vals, "i64"), (&f64_vals, "f64")] {
15046                    let col = Column::from_values(vals.clone()).expect("col");
15047                    // Skip if any value became missing (NaN guard not exercised here).
15048                    assert!(
15049                        col.validity.all(),
15050                        "{label} trial {trial}: unexpected missing"
15051                    );
15052                    for ascending in [true, false] {
15053                        let got = col.sort_values(ascending).expect("sort").values().to_vec();
15054                        let want = scalar_sort_reference(vals, ascending);
15055                        assert_eq!(
15056                            got, want,
15057                            "{label} trial {trial} asc={ascending} sort mismatch"
15058                        );
15059                    }
15060                    // argsort (ascending) must reproduce the stable permutation.
15061                    let perm = col.argsort();
15062                    let via_perm: Vec<Scalar> = perm.iter().map(|&i| vals[i].clone()).collect();
15063                    assert_eq!(
15064                        via_perm,
15065                        scalar_sort_reference(vals, true),
15066                        "{label} trial {trial} argsort mismatch"
15067                    );
15068                }
15069            }
15070        }
15071
15072        #[test]
15073        fn contiguous_utf8_argsort_matches_scalar_reference() {
15074            let raw = ["bee", "alpha", "bee", "alphabet", "", "zulu"];
15075            let scalars: Vec<Scalar> = raw
15076                .iter()
15077                .map(|value| Scalar::Utf8((*value).to_owned()))
15078                .collect();
15079            let scalar_col = Column::from_values(scalars.clone()).expect("scalar col");
15080            let mut bytes = Vec::new();
15081            let mut offsets = Vec::with_capacity(raw.len() + 1);
15082            offsets.push(0);
15083            for value in raw {
15084                bytes.extend_from_slice(value.as_bytes());
15085                offsets.push(bytes.len());
15086            }
15087            let contiguous_col = Column::from_utf8_contiguous(bytes, offsets);
15088
15089            assert_eq!(contiguous_col.argsort_with(true), vec![4, 1, 3, 0, 2, 5]);
15090            assert_eq!(contiguous_col.argsort_with(false), vec![5, 0, 2, 3, 1, 4]);
15091            for ascending in [true, false] {
15092                let got = contiguous_col
15093                    .sort_values(ascending)
15094                    .expect("contiguous sort")
15095                    .values()
15096                    .to_vec();
15097                let want = scalar_col
15098                    .sort_values(ascending)
15099                    .expect("scalar sort")
15100                    .values()
15101                    .to_vec();
15102                assert_eq!(got, want, "ascending={ascending}");
15103            }
15104        }
15105
15106        #[test]
15107        fn contiguous_utf8_strict_witness_matches_byte_order_483i5() {
15108            fn contiguous(values: &[&str]) -> Column {
15109                let mut bytes = Vec::new();
15110                let mut offsets = Vec::with_capacity(values.len() + 1);
15111                offsets.push(0);
15112                for value in values {
15113                    bytes.extend_from_slice(value.as_bytes());
15114                    offsets.push(bytes.len());
15115                }
15116                Column::from_utf8_contiguous(bytes, offsets)
15117            }
15118
15119            assert!(
15120                contiguous(&["a", "b", "c"])
15121                    .as_strictly_increasing_utf8_contiguous()
15122                    .is_some()
15123            );
15124            assert!(
15125                contiguous(&[])
15126                    .as_strictly_increasing_utf8_contiguous()
15127                    .is_some()
15128            );
15129            assert!(
15130                contiguous(&["only"])
15131                    .as_strictly_increasing_utf8_contiguous()
15132                    .is_some()
15133            );
15134            assert!(
15135                contiguous(&["a", "a"])
15136                    .as_strictly_increasing_utf8_contiguous()
15137                    .is_none()
15138            );
15139            assert!(
15140                contiguous(&["b", "a"])
15141                    .as_strictly_increasing_utf8_contiguous()
15142                    .is_none()
15143            );
15144
15145            let scalar_backed = Column::from_values(vec![
15146                Scalar::Utf8("a".to_owned()),
15147                Scalar::Utf8("b".to_owned()),
15148            ])
15149            .expect("scalar-backed utf8");
15150            assert!(
15151                scalar_backed
15152                    .as_strictly_increasing_utf8_contiguous()
15153                    .is_none()
15154            );
15155        }
15156
15157        #[test]
15158        fn abs_typed_matches_scalar_reference() {
15159            // The typed abs fast path must be bit-identical to the Scalar loop
15160            // for all-valid Int64/Float64, incl i64::MIN (wrapping_abs) and
15161            // -0.0/large floats.
15162            let mut state: u64 = 0x2545_F491_4F6C_DD1D;
15163            let mut next = || {
15164                state = state
15165                    .wrapping_mul(6364136223846793005)
15166                    .wrapping_add(1442695040888963407);
15167                state
15168            };
15169            for trial in 0..150 {
15170                let n = (next() % 300) as usize + 1;
15171                let i64_vals: Vec<Scalar> = (0..n)
15172                    .map(|_| {
15173                        let r = next();
15174                        if r % 50 == 0 {
15175                            Scalar::Int64(i64::MIN)
15176                        } else {
15177                            Scalar::Int64((r % 2000) as i64 - 1000)
15178                        }
15179                    })
15180                    .collect();
15181                let f64_vals: Vec<Scalar> = i64_vals
15182                    .iter()
15183                    .map(|s| match s {
15184                        Scalar::Int64(v) => {
15185                            let f = if *v == i64::MIN {
15186                                -0.0
15187                            } else {
15188                                *v as f64 / 4.0
15189                            };
15190                            Scalar::Float64(f)
15191                        }
15192                        _ => unreachable!(),
15193                    })
15194                    .collect();
15195                for vals in [&i64_vals, &f64_vals] {
15196                    let col = Column::from_values(vals.clone()).expect("col");
15197                    let got = col.abs().expect("abs").values().to_vec();
15198                    let want: Vec<Scalar> = vals
15199                        .iter()
15200                        .map(|v| match v {
15201                            Scalar::Int64(x) => Scalar::Int64(x.wrapping_abs()),
15202                            Scalar::Float64(x) => Scalar::Float64(x.abs()),
15203                            other => other.clone(),
15204                        })
15205                        .collect();
15206                    // Float abs of -0.0 → 0.0; compare by bits for floats.
15207                    for (g, w) in got.iter().zip(&want) {
15208                        match (g, w) {
15209                            (Scalar::Float64(a), Scalar::Float64(b)) => {
15210                                assert_eq!(a.to_bits(), b.to_bits(), "trial {trial} float abs")
15211                            }
15212                            _ => assert_eq!(g, w, "trial {trial} abs"),
15213                        }
15214                    }
15215                }
15216            }
15217        }
15218
15219        #[test]
15220        #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15221        fn abs_typed_timing_vs_scalar() {
15222            use std::time::Instant;
15223            let n = 5_000_000usize;
15224            let iters = 10;
15225            let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
15226            let mut next = || {
15227                state = state
15228                    .wrapping_mul(6364136223846793005)
15229                    .wrapping_add(1442695040888963407);
15230                state
15231            };
15232            let data: Vec<f64> = (0..n)
15233                .map(|_| (next() % 2_000_000) as f64 - 1_000_000.0)
15234                .collect();
15235            let mk = || Column::from_f64_values(data.clone());
15236
15237            let t0 = Instant::now();
15238            let mut chk = 0usize;
15239            for _ in 0..iters {
15240                chk ^= mk().abs().unwrap().len();
15241            }
15242            let typed = t0.elapsed();
15243
15244            let t1 = Instant::now();
15245            let mut chk2 = 0usize;
15246            for _ in 0..iters {
15247                let col = mk();
15248                let out: Vec<Scalar> = col
15249                    .values()
15250                    .iter()
15251                    .map(|v| match v {
15252                        Scalar::Float64(x) => Scalar::Float64(x.abs()),
15253                        other => other.clone(),
15254                    })
15255                    .collect();
15256                chk2 ^= Column::new(DType::Float64, out).unwrap().len();
15257            }
15258            let scalar = t1.elapsed();
15259            let t2 = Instant::now();
15260            let mut sink = 0usize;
15261            for _ in 0..iters {
15262                sink ^= mk().len();
15263            }
15264            let build = t2.elapsed();
15265            let typed_op = typed.saturating_sub(build).as_secs_f64();
15266            let scalar_op = scalar.saturating_sub(build).as_secs_f64();
15267            eprintln!(
15268                "abs 5M f64 x{iters}: typed={typed:?} scalar={scalar:?} build={build:?} \
15269                 op-only ratio={:.2}x (full {:.2}x, chk {chk}/{chk2}/{sink})",
15270                scalar_op / typed_op,
15271                scalar.as_secs_f64() / typed.as_secs_f64()
15272            );
15273        }
15274
15275        #[test]
15276        fn factorize_direct_address_matches_reference() {
15277            // Independent O(n^2) first-seen reference (linear position scan, no
15278            // hashing/direct-address) for bounded-range all-valid Int64. The
15279            // direct-address fast path must be bit-identical for codes AND
15280            // uniques, both sort modes (use_na_sentinel is moot — all valid).
15281            let mut state: u64 = 0x51A4_3C29_7E10_BB67;
15282            let mut next = || {
15283                state = state
15284                    .wrapping_mul(6364136223846793005)
15285                    .wrapping_add(1442695040888963407);
15286                state
15287            };
15288            for trial in 0..150 {
15289                let n = (next() % 300) as usize + 1;
15290                let data: Vec<i64> = (0..n).map(|_| (next() % 13) as i64 - 6).collect();
15291
15292                for sort in [false, true] {
15293                    // Reference: first-seen codes via linear scan, then optional
15294                    // stable sort of uniques + code remap.
15295                    let mut uniques: Vec<i64> = Vec::new();
15296                    let mut codes: Vec<i64> = Vec::with_capacity(n);
15297                    for &v in &data {
15298                        match uniques.iter().position(|&u| u == v) {
15299                            Some(p) => codes.push(p as i64),
15300                            None => {
15301                                codes.push(uniques.len() as i64);
15302                                uniques.push(v);
15303                            }
15304                        }
15305                    }
15306                    if sort {
15307                        let mut order: Vec<usize> = (0..uniques.len()).collect();
15308                        order.sort_by(|&a, &b| uniques[a].cmp(&uniques[b]));
15309                        let mut remap = vec![0i64; uniques.len()];
15310                        let sorted: Vec<i64> = order
15311                            .iter()
15312                            .enumerate()
15313                            .map(|(new_pos, &orig)| {
15314                                remap[orig] = new_pos as i64;
15315                                uniques[orig]
15316                            })
15317                            .collect();
15318                        for c in &mut codes {
15319                            *c = remap[*c as usize];
15320                        }
15321                        uniques = sorted;
15322                    }
15323
15324                    let col = Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15325                        .expect("col");
15326                    let (code_col, uniq_col) =
15327                        col.factorize_with_options(sort, true).expect("factorize");
15328                    let got_codes: Vec<i64> = code_col
15329                        .values()
15330                        .iter()
15331                        .filter_map(|v| match v {
15332                            Scalar::Int64(c) => Some(*c),
15333                            _ => None,
15334                        })
15335                        .collect();
15336                    let got_uniques: Vec<i64> = uniq_col
15337                        .values()
15338                        .iter()
15339                        .filter_map(|v| match v {
15340                            Scalar::Int64(c) => Some(*c),
15341                            _ => None,
15342                        })
15343                        .collect();
15344                    assert_eq!(got_codes.len(), code_col.len(), "non-int code");
15345                    assert_eq!(got_uniques.len(), uniq_col.len(), "non-int unique");
15346                    assert_eq!(got_codes, codes, "trial {trial} sort={sort} codes");
15347                    assert_eq!(got_uniques, uniques, "trial {trial} sort={sort} uniques");
15348                }
15349            }
15350        }
15351
15352        #[test]
15353        #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15354        fn factorize_direct_address_timing_vs_hashmap() {
15355            use std::{collections::HashMap, time::Instant};
15356            let n = 5_000_000usize;
15357            let iters = 10;
15358            for cardinality in [1_000u64, 2_000_000u64] {
15359                let mut state: u64 = 0x2468_ACE0_1357_9BDF ^ cardinality;
15360                let mut next = || {
15361                    state = state
15362                        .wrapping_mul(6364136223846793005)
15363                        .wrapping_add(1442695040888963407);
15364                    state
15365                };
15366                let data: Vec<i64> = (0..n).map(|_| (next() % cardinality) as i64).collect();
15367
15368                let col = Column::from_i64_values(data.clone());
15369                let t0 = Instant::now();
15370                let mut chk = 0i64;
15371                for _ in 0..iters {
15372                    let (codes, _u) = col.factorize_with_options(false, true).expect("da");
15373                    if let Scalar::Int64(c) = &codes.values()[n - 1] {
15374                        chk ^= *c;
15375                    }
15376                }
15377                let direct = t0.elapsed();
15378
15379                // OLD: HashMap<i64,i64> first-seen code assignment over &[Scalar].
15380                let scalar_col =
15381                    Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15382                        .expect("col");
15383                let t1 = Instant::now();
15384                let mut chk2 = 0i64;
15385                for _ in 0..iters {
15386                    let mut map: HashMap<i64, i64> = HashMap::new();
15387                    let mut uniques = 0i64;
15388                    let mut last = 0i64;
15389                    for v in scalar_col.values() {
15390                        if let Scalar::Int64(i) = v {
15391                            let code = *map.entry(*i).or_insert_with(|| {
15392                                let c = uniques;
15393                                uniques += 1;
15394                                c
15395                            });
15396                            last = code;
15397                        }
15398                    }
15399                    chk2 ^= last;
15400                }
15401                let scalar = t1.elapsed();
15402                eprintln!(
15403                    "factorize 5M i64 card={cardinality} x{iters}: direct={direct:?} hashmap={scalar:?} ratio={:.2}x (chk {chk}/{chk2})",
15404                    scalar.as_secs_f64() / direct.as_secs_f64()
15405                );
15406            }
15407        }
15408
15409        #[test]
15410        fn duplicated_typed_matches_bruteforce_reference() {
15411            // Independent O(n^2) reference: for keep=first a value is a dup iff
15412            // an equal value occurs earlier; last iff later; none iff any other
15413            // position is equal. Float keys compare on normalized to_bits (so
15414            // -0.0==+0.0) — matching key_of. Proves the typed FxHash fast path
15415            // is bit-identical for all-valid Int64/Float64 columns.
15416            let mut state: u64 = 0xD1B5_4A32_D192_ED03;
15417            let mut next = || {
15418                state = state
15419                    .wrapping_mul(6364136223846793005)
15420                    .wrapping_add(1442695040888963407);
15421                state
15422            };
15423            let fbits = |f: f64| (if f == 0.0 { 0.0 } else { f }).to_bits();
15424            for trial in 0..150 {
15425                let n = (next() % 300) as usize + 1;
15426                // Narrow value range → many duplicates / ties.
15427                let raw: Vec<i64> = (0..n).map(|_| (next() % 9) as i64 - 4).collect();
15428                let i64_vals: Vec<Scalar> = raw.iter().map(|&v| Scalar::Int64(v)).collect();
15429                let f64_vals: Vec<Scalar> = raw
15430                    .iter()
15431                    .map(|&v| Scalar::Float64(v as f64 / 2.0))
15432                    .collect();
15433                let i64_keys: Vec<i64> = raw.clone();
15434                let f64_keys: Vec<u64> = raw.iter().map(|&v| fbits(v as f64 / 2.0)).collect();
15435
15436                for keep in ["first", "last", "false"] {
15437                    // Brute-force references over both key representations.
15438                    let bf = |eq_keys: &dyn Fn(usize, usize) -> bool| -> Vec<bool> {
15439                        (0..n)
15440                            .map(|i| match keep {
15441                                "first" => (0..i).any(|j| eq_keys(i, j)),
15442                                "last" => (i + 1..n).any(|j| eq_keys(i, j)),
15443                                _ => (0..n).any(|j| j != i && eq_keys(i, j)),
15444                            })
15445                            .collect()
15446                    };
15447                    let want_i = bf(&|a, b| i64_keys[a] == i64_keys[b]);
15448                    let want_f = bf(&|a, b| f64_keys[a] == f64_keys[b]);
15449
15450                    let col_i = Column::from_values(i64_vals.clone()).expect("i64 col");
15451                    let got_i: Vec<bool> = col_i
15452                        .duplicated_keep(keep)
15453                        .expect("dup i64")
15454                        .values()
15455                        .iter()
15456                        .map(|v| matches!(v, Scalar::Bool(true)))
15457                        .collect();
15458                    assert_eq!(got_i, want_i, "i64 trial {trial} keep={keep}");
15459
15460                    let col_f = Column::from_values(f64_vals.clone()).expect("f64 col");
15461                    let got_f: Vec<bool> = col_f
15462                        .duplicated_keep(keep)
15463                        .expect("dup f64")
15464                        .values()
15465                        .iter()
15466                        .map(|v| matches!(v, Scalar::Bool(true)))
15467                        .collect();
15468                    assert_eq!(got_f, want_f, "f64 trial {trial} keep={keep}");
15469                }
15470            }
15471        }
15472
15473        #[test]
15474        #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15475        fn duplicated_typed_timing_vs_scalar() {
15476            use std::{collections::HashSet, time::Instant};
15477            let n = 5_000_000usize;
15478            let iters = 10;
15479            // Faithful replica of the OLD path: build the per-value Key enum
15480            // over &[Scalar] and insert into a std (SipHash) HashSet.
15481            #[derive(Hash, PartialEq, Eq)]
15482            enum OldKey {
15483                Int64(i64),
15484                Null,
15485            }
15486            for cardinality in [1_000u64, 2_000_000u64] {
15487                let mut state: u64 = 0x0FED_CBA9_8765_4321 ^ cardinality;
15488                let mut next = || {
15489                    state = state
15490                        .wrapping_mul(6364136223846793005)
15491                        .wrapping_add(1442695040888963407);
15492                    state
15493                };
15494                let data: Vec<i64> = (0..n).map(|_| (next() % cardinality) as i64).collect();
15495
15496                let col = Column::from_i64_values(data.clone());
15497                let t0 = Instant::now();
15498                let mut chk = 0usize;
15499                for _ in 0..iters {
15500                    let d = col.duplicated_keep("first").expect("typed");
15501                    chk ^= d
15502                        .values()
15503                        .iter()
15504                        .filter(|v| matches!(v, Scalar::Bool(true)))
15505                        .count();
15506                }
15507                let typed = t0.elapsed();
15508
15509                let scalar_col =
15510                    Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15511                        .expect("col");
15512                let t1 = Instant::now();
15513                let mut chk2 = 0usize;
15514                for _ in 0..iters {
15515                    let mut seen: HashSet<OldKey> = HashSet::new();
15516                    let mut count = 0usize;
15517                    for v in scalar_col.values() {
15518                        let key = if v.is_missing() {
15519                            OldKey::Null
15520                        } else if let Scalar::Int64(i) = v {
15521                            OldKey::Int64(*i)
15522                        } else {
15523                            OldKey::Null
15524                        };
15525                        if !seen.insert(key) {
15526                            count += 1;
15527                        }
15528                    }
15529                    chk2 ^= count;
15530                }
15531                let scalar = t1.elapsed();
15532                eprintln!(
15533                    "duplicated 5M i64 card={cardinality} x{iters}: typed={typed:?} old_keyenum_siphash={scalar:?} ratio={:.2}x (chk {chk}/{chk2})",
15534                    scalar.as_secs_f64() / typed.as_secs_f64()
15535                );
15536            }
15537        }
15538
15539        #[test]
15540        #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15541        fn radix_sort_timing_vs_scalar() {
15542            use std::time::Instant;
15543            let n = 5_000_000usize;
15544            let mut state: u64 = 0x1234_5678_9ABC_DEF0;
15545            let mut next = || {
15546                state = state
15547                    .wrapping_mul(6364136223846793005)
15548                    .wrapping_add(1442695040888963407);
15549                state
15550            };
15551            let data: Vec<i64> = (0..n).map(|_| next() as i64).collect();
15552            let col = Column::from_i64_values(data.clone());
15553
15554            let iters = 10;
15555            let t0 = Instant::now();
15556            let mut checksum = 0i64;
15557            for _ in 0..iters {
15558                let sorted = col.sort_values(true).expect("radix");
15559                checksum ^= match &sorted.values()[0] {
15560                    Scalar::Int64(v) => *v,
15561                    _ => 0,
15562                };
15563            }
15564            let radix = t0.elapsed();
15565
15566            // Old Scalar comparator path, reproduced inline for the A/B.
15567            let scalar_col = Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15568                .expect("scalar col");
15569            let t1 = Instant::now();
15570            let mut checksum2 = 0i64;
15571            for _ in 0..iters {
15572                let mut indexed: Vec<(usize, &Scalar)> =
15573                    scalar_col.values().iter().enumerate().collect();
15574                indexed.sort_by(|a, b| crate::compare_scalars_na_last(a.1, b.1, true));
15575                if let Scalar::Int64(v) = indexed[0].1 {
15576                    checksum2 ^= *v;
15577                }
15578            }
15579            let scalar = t1.elapsed();
15580            eprintln!(
15581                "sort_single 5M i64 x{iters}: radix={radix:?} scalar={scalar:?} ratio={:.2}x (chk {checksum}/{checksum2})",
15582                scalar.as_secs_f64() / radix.as_secs_f64()
15583            );
15584        }
15585
15586        #[test]
15587        fn diff_periods_one_subtracts_prev() {
15588            let col =
15589                Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(8), Scalar::Int64(10)])
15590                    .expect("col");
15591            let d = col.diff(1).expect("diff");
15592            assert!(d.values()[0].is_missing());
15593            assert_eq!(d.values()[1], Scalar::Float64(3.0));
15594            assert_eq!(d.values()[2], Scalar::Float64(2.0));
15595            assert_eq!(d.dtype(), DType::Float64);
15596        }
15597
15598        #[test]
15599        fn diff_negative_period_looks_ahead() {
15600            let col =
15601                Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(8), Scalar::Int64(10)])
15602                    .expect("col");
15603            let d = col.diff(-1).expect("diff");
15604            assert_eq!(d.values()[0], Scalar::Float64(-3.0));
15605            assert_eq!(d.values()[1], Scalar::Float64(-2.0));
15606            assert!(d.values()[2].is_missing());
15607        }
15608
15609        #[test]
15610        fn diff_timedelta64_returns_timedelta_e607u() {
15611            // Per br-frankenpandas-e607u: Column::diff on Timedelta64 preserves
15612            // Timedelta dtype (was forced to Float64 NaN before via to_f64 catch-all).
15613            let one_hour = 3_600 * 1_000_000_000_i64;
15614            let col = Column::from_values(vec![
15615                Scalar::Timedelta64(one_hour),
15616                Scalar::Timedelta64(3 * one_hour),
15617                Scalar::Timedelta64(2 * one_hour),
15618            ])
15619            .expect("col");
15620            let d = col.diff(1).expect("diff");
15621            assert_eq!(d.dtype(), DType::Timedelta64);
15622            assert!(d.values()[0].is_missing()); // first row → NaT
15623            assert_eq!(d.values()[1], Scalar::Timedelta64(2 * one_hour));
15624            assert_eq!(d.values()[2], Scalar::Timedelta64(-one_hour));
15625        }
15626
15627        #[test]
15628        fn diff_timedelta64_nat_propagates_e607u() {
15629            use fp_types::Timedelta;
15630            let one_hour = 3_600 * 1_000_000_000_i64;
15631            let col = Column::from_values(vec![
15632                Scalar::Timedelta64(one_hour),
15633                Scalar::Timedelta64(Timedelta::NAT),
15634                Scalar::Timedelta64(2 * one_hour),
15635            ])
15636            .expect("col");
15637            let d = col.diff(1).expect("diff");
15638            assert_eq!(d.dtype(), DType::Timedelta64);
15639            assert!(d.values()[0].is_missing());
15640            assert!(d.values()[1].is_missing()); // NaT current → NaT
15641            assert!(d.values()[2].is_missing()); // NaT previous → NaT
15642        }
15643
15644        #[test]
15645        fn duplicated_keep_first() {
15646            let col = Column::from_values(vec![
15647                Scalar::Int64(1),
15648                Scalar::Int64(2),
15649                Scalar::Int64(1),
15650                Scalar::Int64(3),
15651                Scalar::Int64(2),
15652            ])
15653            .expect("col");
15654            let d = col.duplicated().expect("duplicated");
15655            assert_eq!(d.values()[0], Scalar::Bool(false));
15656            assert_eq!(d.values()[1], Scalar::Bool(false));
15657            assert_eq!(d.values()[2], Scalar::Bool(true));
15658            assert_eq!(d.values()[3], Scalar::Bool(false));
15659            assert_eq!(d.values()[4], Scalar::Bool(true));
15660        }
15661
15662        #[test]
15663        fn duplicated_treats_nulls_as_one_bucket() {
15664            let col = Column::from_values(vec![
15665                Scalar::Null(NullKind::NaN),
15666                Scalar::Null(NullKind::NaN),
15667                Scalar::Int64(1),
15668            ])
15669            .expect("col");
15670            let d = col.duplicated().expect("duplicated");
15671            assert_eq!(d.values()[0], Scalar::Bool(false));
15672            assert_eq!(d.values()[1], Scalar::Bool(true));
15673            assert_eq!(d.values()[2], Scalar::Bool(false));
15674        }
15675
15676        #[test]
15677        fn duplicated_keep_variants_match_pandas() {
15678            let col = Column::from_values(vec![
15679                Scalar::Int64(1),
15680                Scalar::Int64(2),
15681                Scalar::Int64(1),
15682                Scalar::Int64(1),
15683                Scalar::Int64(3),
15684                Scalar::Int64(2),
15685                Scalar::Null(NullKind::NaN),
15686                Scalar::Null(NullKind::NaN),
15687            ])
15688            .expect("col");
15689
15690            let last = col.duplicated_keep("last").expect("duplicated last");
15691            assert_eq!(
15692                last.values(),
15693                &[
15694                    Scalar::Bool(true),
15695                    Scalar::Bool(true),
15696                    Scalar::Bool(true),
15697                    Scalar::Bool(false),
15698                    Scalar::Bool(false),
15699                    Scalar::Bool(false),
15700                    Scalar::Bool(true),
15701                    Scalar::Bool(false),
15702                ]
15703            );
15704
15705            let none = col.duplicated_keep("false").expect("duplicated none");
15706            assert_eq!(
15707                none.values(),
15708                &[
15709                    Scalar::Bool(true),
15710                    Scalar::Bool(true),
15711                    Scalar::Bool(true),
15712                    Scalar::Bool(true),
15713                    Scalar::Bool(false),
15714                    Scalar::Bool(true),
15715                    Scalar::Bool(true),
15716                    Scalar::Bool(true),
15717                ]
15718            );
15719        }
15720
15721        #[test]
15722        fn between_inclusive_both() {
15723            let col = Column::from_values(vec![
15724                Scalar::Float64(0.5),
15725                Scalar::Float64(1.0),
15726                Scalar::Float64(5.0),
15727                Scalar::Float64(6.0),
15728            ])
15729            .expect("col");
15730            let b = col.between(1.0, 5.0, true).expect("between");
15731            assert_eq!(b.values()[0], Scalar::Bool(false));
15732            assert_eq!(b.values()[1], Scalar::Bool(true));
15733            assert_eq!(b.values()[2], Scalar::Bool(true));
15734            assert_eq!(b.values()[3], Scalar::Bool(false));
15735        }
15736
15737        #[test]
15738        fn between_exclusive() {
15739            let col = Column::from_values(vec![
15740                Scalar::Float64(1.0),
15741                Scalar::Float64(3.0),
15742                Scalar::Float64(5.0),
15743            ])
15744            .expect("col");
15745            let b = col.between(1.0, 5.0, false).expect("between");
15746            assert_eq!(b.values()[0], Scalar::Bool(false));
15747            assert_eq!(b.values()[1], Scalar::Bool(true));
15748            assert_eq!(b.values()[2], Scalar::Bool(false));
15749        }
15750
15751        #[test]
15752        fn between_left_and_right_inclusive_edges() {
15753            let col = Column::from_values(vec![
15754                Scalar::Float64(1.0),
15755                Scalar::Float64(3.0),
15756                Scalar::Float64(5.0),
15757            ])
15758            .expect("col");
15759
15760            let left = col
15761                .between_inclusive(1.0, 5.0, "left")
15762                .expect("between left");
15763            assert_eq!(
15764                left.values(),
15765                &[Scalar::Bool(true), Scalar::Bool(true), Scalar::Bool(false),]
15766            );
15767
15768            let right = col
15769                .between_inclusive(1.0, 5.0, "right")
15770                .expect("between right");
15771            assert_eq!(
15772                right.values(),
15773                &[Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(true),]
15774            );
15775        }
15776
15777        #[test]
15778        fn between_missing_maps_to_false() {
15779            let col = Column::from_values(vec![Scalar::Null(NullKind::NaN), Scalar::Float64(3.0)])
15780                .expect("col");
15781            let b = col.between(1.0, 5.0, true).expect("between");
15782            assert_eq!(b.values()[0], Scalar::Bool(false));
15783            assert_eq!(b.values()[1], Scalar::Bool(true));
15784        }
15785    }
15786
15787    mod factorize {
15788        use fp_types::NullKind;
15789
15790        use super::*;
15791
15792        #[test]
15793        fn factorize_preserves_first_seen_order() {
15794            let col = Column::from_values(vec![
15795                Scalar::Utf8("b".into()),
15796                Scalar::Utf8("a".into()),
15797                Scalar::Utf8("b".into()),
15798                Scalar::Utf8("c".into()),
15799                Scalar::Utf8("a".into()),
15800            ])
15801            .expect("col");
15802
15803            let (codes, uniques) = col.factorize().expect("factorize");
15804            assert_eq!(codes.dtype(), DType::Int64);
15805            assert_eq!(
15806                codes.values(),
15807                &[
15808                    Scalar::Int64(0),
15809                    Scalar::Int64(1),
15810                    Scalar::Int64(0),
15811                    Scalar::Int64(2),
15812                    Scalar::Int64(1),
15813                ]
15814            );
15815            assert_eq!(
15816                uniques.values(),
15817                &[
15818                    Scalar::Utf8("b".into()),
15819                    Scalar::Utf8("a".into()),
15820                    Scalar::Utf8("c".into()),
15821                ]
15822            );
15823        }
15824
15825        #[test]
15826        fn factorize_missing_values_map_to_negative_one() {
15827            let col = Column::from_values(vec![
15828                Scalar::Float64(1.5),
15829                Scalar::Null(NullKind::NaN),
15830                Scalar::Float64(2.5),
15831                Scalar::Null(NullKind::Null),
15832                Scalar::Float64(1.5),
15833            ])
15834            .expect("col");
15835
15836            let (codes, uniques) = col.factorize().expect("factorize");
15837            assert_eq!(
15838                codes.values(),
15839                &[
15840                    Scalar::Int64(0),
15841                    Scalar::Int64(-1),
15842                    Scalar::Int64(1),
15843                    Scalar::Int64(-1),
15844                    Scalar::Int64(0),
15845                ]
15846            );
15847            assert_eq!(uniques.dtype(), DType::Float64);
15848            assert_eq!(
15849                uniques.values(),
15850                &[Scalar::Float64(1.5), Scalar::Float64(2.5)]
15851            );
15852        }
15853
15854        #[test]
15855        fn factorize_empty_column_returns_empty_outputs() {
15856            let col = Column::new(DType::Int64, Vec::new()).expect("col");
15857            let (codes, uniques) = col.factorize().expect("factorize");
15858            assert!(codes.is_empty());
15859            assert!(uniques.is_empty());
15860            assert_eq!(codes.dtype(), DType::Int64);
15861            assert_eq!(uniques.dtype(), DType::Int64);
15862        }
15863
15864        #[test]
15865        fn factorize_with_sort_sorts_uniques_and_relabels_codes() {
15866            let col = Column::from_values(vec![
15867                Scalar::Utf8("b".into()),
15868                Scalar::Utf8("a".into()),
15869                Scalar::Utf8("b".into()),
15870                Scalar::Utf8("c".into()),
15871                Scalar::Utf8("a".into()),
15872            ])
15873            .expect("col");
15874
15875            let (codes, uniques) = col.factorize_with_options(true, true).expect("factorize");
15876            assert_eq!(
15877                codes.values(),
15878                &[
15879                    Scalar::Int64(1),
15880                    Scalar::Int64(0),
15881                    Scalar::Int64(1),
15882                    Scalar::Int64(2),
15883                    Scalar::Int64(0),
15884                ]
15885            );
15886            assert_eq!(
15887                uniques.values(),
15888                &[
15889                    Scalar::Utf8("a".into()),
15890                    Scalar::Utf8("b".into()),
15891                    Scalar::Utf8("c".into()),
15892                ]
15893            );
15894        }
15895
15896        #[test]
15897        fn factorize_with_use_na_sentinel_false_keeps_missing_in_uniques() {
15898            let col = Column::from_values(vec![
15899                Scalar::Float64(1.5),
15900                Scalar::Null(NullKind::NaN),
15901                Scalar::Float64(2.5),
15902                Scalar::Null(NullKind::Null),
15903                Scalar::Float64(1.5),
15904            ])
15905            .expect("col");
15906
15907            let (codes, uniques) = col.factorize_with_options(false, false).expect("factorize");
15908            assert_eq!(
15909                codes.values(),
15910                &[
15911                    Scalar::Int64(0),
15912                    Scalar::Int64(1),
15913                    Scalar::Int64(2),
15914                    Scalar::Int64(1),
15915                    Scalar::Int64(0),
15916                ]
15917            );
15918            assert_eq!(uniques.dtype(), DType::Float64);
15919            assert_eq!(
15920                uniques.values(),
15921                &[
15922                    Scalar::Float64(1.5),
15923                    Scalar::Null(NullKind::NaN),
15924                    Scalar::Float64(2.5),
15925                ]
15926            );
15927        }
15928
15929        #[test]
15930        fn factorize_with_sort_and_use_na_sentinel_false_sorts_missing_last() {
15931            let col = Column::from_values(vec![
15932                Scalar::Utf8("b".into()),
15933                Scalar::Null(NullKind::Null),
15934                Scalar::Utf8("a".into()),
15935                Scalar::Utf8("b".into()),
15936                Scalar::Null(NullKind::NaN),
15937            ])
15938            .expect("col");
15939
15940            let (codes, uniques) = col.factorize_with_options(true, false).expect("factorize");
15941            assert_eq!(
15942                codes.values(),
15943                &[
15944                    Scalar::Int64(1),
15945                    Scalar::Int64(2),
15946                    Scalar::Int64(0),
15947                    Scalar::Int64(1),
15948                    Scalar::Int64(2),
15949                ]
15950            );
15951            assert_eq!(
15952                uniques.values(),
15953                &[
15954                    Scalar::Utf8("a".into()),
15955                    Scalar::Utf8("b".into()),
15956                    Scalar::Null(NullKind::Null),
15957                ]
15958            );
15959        }
15960    }
15961
15962    mod aggregation_helpers {
15963        use fp_types::{NullKind, Timedelta};
15964
15965        use super::*;
15966
15967        fn assert_float_nan(value: Scalar) {
15968            assert!(
15969                matches!(value, Scalar::Float64(v) if v.is_nan()),
15970                "expected Float64(NaN), got {value:?}"
15971            );
15972        }
15973
15974        #[test]
15975        fn sum_skips_nulls() {
15976            let col = Column::from_values(vec![
15977                Scalar::Float64(1.0),
15978                Scalar::Null(NullKind::NaN),
15979                Scalar::Float64(2.0),
15980                Scalar::Float64(3.0),
15981            ])
15982            .expect("col");
15983            let sum = col.sum();
15984            assert!(matches!(sum, Scalar::Float64(_)), "expected Float64 result");
15985            if let Scalar::Float64(v) = sum {
15986                assert!((v - 6.0).abs() < 1e-9);
15987            }
15988        }
15989
15990        #[test]
15991        fn sum_empty_is_zero() {
15992            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
15993            assert_eq!(col.sum(), Scalar::Float64(0.0));
15994        }
15995
15996        #[test]
15997        fn mean_matches_sum_over_count() {
15998            let col = Column::from_values(vec![
15999                Scalar::Float64(2.0),
16000                Scalar::Float64(4.0),
16001                Scalar::Float64(6.0),
16002            ])
16003            .expect("col");
16004            let mean = col.mean();
16005            assert!(
16006                matches!(mean, Scalar::Float64(_)),
16007                "expected Float64 result"
16008            );
16009            if let Scalar::Float64(v) = mean {
16010                assert!((v - 4.0).abs() < 1e-9);
16011            }
16012        }
16013
16014        #[test]
16015        fn mean_empty_is_null() {
16016            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16017            assert!(col.mean().is_missing());
16018        }
16019
16020        #[test]
16021        fn min_max_extrema_skip_nulls() {
16022            let col = Column::from_values(vec![
16023                Scalar::Int64(3),
16024                Scalar::Null(NullKind::NaN),
16025                Scalar::Int64(1),
16026                Scalar::Int64(5),
16027                Scalar::Int64(2),
16028            ])
16029            .expect("col");
16030            assert_eq!(col.min(), Scalar::Int64(1));
16031            assert_eq!(col.max(), Scalar::Int64(5));
16032        }
16033
16034        #[test]
16035        fn median_of_odd_count() {
16036            let col = Column::from_values(vec![
16037                Scalar::Float64(1.0),
16038                Scalar::Float64(5.0),
16039                Scalar::Float64(3.0),
16040            ])
16041            .expect("col");
16042            let median = col.median();
16043            assert!(
16044                matches!(median, Scalar::Float64(_)),
16045                "expected Float64 result"
16046            );
16047            if let Scalar::Float64(v) = median {
16048                assert!((v - 3.0).abs() < 1e-9);
16049            }
16050        }
16051
16052        #[test]
16053        fn prod_multiplies_non_nulls() {
16054            let col = Column::from_values(vec![
16055                Scalar::Float64(2.0),
16056                Scalar::Null(NullKind::NaN),
16057                Scalar::Float64(3.0),
16058                Scalar::Float64(4.0),
16059            ])
16060            .expect("col");
16061            let prod = col.prod();
16062            assert!(
16063                matches!(prod, Scalar::Float64(_)),
16064                "expected Float64 result"
16065            );
16066            if let Scalar::Float64(v) = prod {
16067                assert!((v - 24.0).abs() < 1e-9);
16068            }
16069        }
16070
16071        #[test]
16072        fn prod_empty_is_one() {
16073            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16074            assert_eq!(col.prod(), Scalar::Float64(1.0));
16075        }
16076
16077        #[test]
16078        fn product_alias_matches_prod() {
16079            let col = Column::from_values(vec![
16080                Scalar::Float64(2.0),
16081                Scalar::Null(NullKind::NaN),
16082                Scalar::Float64(3.0),
16083            ])
16084            .expect("col");
16085            assert_eq!(col.product(), col.prod());
16086        }
16087
16088        #[test]
16089        fn skipna_false_aggregate_variants_propagate_nan() {
16090            let col = Column::from_values(vec![
16091                Scalar::Float64(2.0),
16092                Scalar::Null(NullKind::NaN),
16093                Scalar::Float64(4.0),
16094            ])
16095            .expect("col");
16096
16097            assert_eq!(col.sum_skipna(true), col.sum());
16098            assert_float_nan(col.sum_skipna(false));
16099            assert_float_nan(col.mean_skipna(false));
16100            assert_float_nan(col.min_skipna(false));
16101            assert_float_nan(col.max_skipna(false));
16102            assert_float_nan(col.median_skipna(false));
16103            assert_float_nan(col.prod_skipna(false));
16104            assert_float_nan(col.var_skipna(1, false));
16105            assert_float_nan(col.std_skipna(1, false));
16106            assert_float_nan(col.sem_skipna(1, false));
16107        }
16108
16109        #[test]
16110        fn skipna_false_timedelta_aggregate_variants_propagate_nat() {
16111            let col = Column::from_values(vec![
16112                Scalar::Timedelta64(Timedelta::NANOS_PER_SEC),
16113                Scalar::Timedelta64(Timedelta::NAT),
16114            ])
16115            .expect("col");
16116
16117            assert_eq!(col.sum_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16118            assert_eq!(col.mean_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16119            assert_eq!(col.min_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16120            assert_eq!(col.max_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16121        }
16122
16123        #[test]
16124        fn quantile_median_of_sorted_values() {
16125            let col = Column::from_values(vec![
16126                Scalar::Float64(1.0),
16127                Scalar::Float64(2.0),
16128                Scalar::Float64(3.0),
16129                Scalar::Float64(4.0),
16130                Scalar::Float64(5.0),
16131            ])
16132            .expect("col");
16133            let quantile = col.quantile(0.5);
16134            assert!(
16135                matches!(quantile, Scalar::Float64(v) if (v - 3.0).abs() < 1e-9),
16136                "expected Float64 median, got {quantile:?}"
16137            );
16138        }
16139
16140        #[test]
16141        fn quantile_empty_is_null() {
16142            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16143            assert!(col.quantile(0.5).is_missing());
16144        }
16145
16146        #[test]
16147        fn quantile_out_of_range_is_null() {
16148            let col = Column::from_values(vec![Scalar::Float64(1.0)]).expect("col");
16149            assert!(col.quantile(1.5).is_missing());
16150            assert!(col.quantile(-0.1).is_missing());
16151        }
16152
16153        #[test]
16154        fn mode_returns_tied_max_frequency() {
16155            let col = Column::from_values(vec![
16156                Scalar::Int64(1),
16157                Scalar::Int64(2),
16158                Scalar::Int64(2),
16159                Scalar::Int64(3),
16160                Scalar::Int64(3),
16161            ])
16162            .expect("col");
16163            let m = col.mode().expect("mode");
16164            assert_eq!(m.values(), &[Scalar::Int64(2), Scalar::Int64(3)]);
16165        }
16166
16167        #[test]
16168        fn mode_ignores_missing_values() {
16169            let col = Column::from_values(vec![
16170                Scalar::Int64(1),
16171                Scalar::Null(NullKind::NaN),
16172                Scalar::Int64(1),
16173                Scalar::Null(NullKind::NaN),
16174            ])
16175            .expect("col");
16176            let m = col.mode().expect("mode");
16177            assert_eq!(m.values(), &[Scalar::Int64(1)]);
16178        }
16179
16180        #[test]
16181        fn mode_empty_is_empty_same_dtype() {
16182            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16183            let m = col.mode().expect("mode");
16184            assert!(m.is_empty());
16185        }
16186
16187        #[test]
16188        fn memory_usage_fixed_width_for_numeric() {
16189            let col =
16190                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16191                    .expect("col");
16192            let usage = col.memory_usage(false);
16193            // 3 * 8 + ceil(3/8) = 24 + 1
16194            assert_eq!(usage, 25);
16195        }
16196
16197        #[test]
16198        fn memory_usage_deep_counts_utf8_bytes() {
16199            let col = Column::from_values(vec![
16200                Scalar::Utf8("hi".into()),
16201                Scalar::Utf8("world".into()),
16202            ])
16203            .expect("col");
16204            let shallow = col.memory_usage(false);
16205            let deep = col.memory_usage(true);
16206            assert!(deep > shallow);
16207            // deep_extra = "hi".len() + "world".len() = 2 + 5 = 7
16208            assert_eq!(deep - shallow, 7);
16209        }
16210
16211        #[test]
16212        fn interpolate_fills_interior_gaps() {
16213            let col = Column::from_values(vec![
16214                Scalar::Float64(1.0),
16215                Scalar::Null(NullKind::NaN),
16216                Scalar::Null(NullKind::NaN),
16217                Scalar::Float64(4.0),
16218            ])
16219            .expect("col");
16220            let r = col.interpolate_linear().expect("interpolate");
16221            assert_eq!(r.values()[0], Scalar::Float64(1.0));
16222            assert!(
16223                matches!(&r.values()[1], Scalar::Float64(v) if (*v - 2.0).abs() < 1e-9),
16224                "expected Float64, got {:?}",
16225                r.values()[1]
16226            );
16227            assert!(
16228                matches!(&r.values()[2], Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16229                "expected Float64, got {:?}",
16230                r.values()[2]
16231            );
16232            assert_eq!(r.values()[3], Scalar::Float64(4.0));
16233        }
16234
16235        #[test]
16236        fn interpolate_leading_null_stays_null_trailing_forward_fills() {
16237            // pandas Series.interpolate(method='linear') default
16238            // limit_direction='forward': leading NaN stays NaN, interior is
16239            // interpolated, trailing NaN is forward-filled with the last valid
16240            // value (NOT extrapolated). [nan,2,nan,4,nan] -> [nan,2,3,4,4].
16241            // (br-frankenpandas-8ic7c)
16242            let col = Column::from_values(vec![
16243                Scalar::Null(NullKind::NaN),
16244                Scalar::Float64(2.0),
16245                Scalar::Null(NullKind::NaN),
16246                Scalar::Float64(4.0),
16247                Scalar::Null(NullKind::NaN),
16248            ])
16249            .expect("col");
16250            let r = col.interpolate_linear().expect("interpolate");
16251            assert!(r.values()[0].is_missing());
16252            assert_eq!(r.values()[1], Scalar::Float64(2.0));
16253            assert!(
16254                matches!(&r.values()[2], Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16255                "expected Float64, got {:?}",
16256                r.values()[2]
16257            );
16258            assert_eq!(r.values()[3], Scalar::Float64(4.0));
16259            // Trailing NaN forward-filled with the last valid value (4.0).
16260            assert_eq!(r.values()[4], Scalar::Float64(4.0));
16261        }
16262
16263        #[test]
16264        fn interpolate_trailing_run_forward_fills_without_extrapolating() {
16265            // [2,4,nan,nan] -> [2,4,4,4] (ffill), NOT [2,4,6,8] (extrapolation).
16266            let col = Column::from_values(vec![
16267                Scalar::Float64(2.0),
16268                Scalar::Float64(4.0),
16269                Scalar::Null(NullKind::NaN),
16270                Scalar::Null(NullKind::NaN),
16271            ])
16272            .expect("col");
16273            let r = col.interpolate_linear().expect("interpolate");
16274            assert_eq!(
16275                r.values(),
16276                &[
16277                    Scalar::Float64(2.0),
16278                    Scalar::Float64(4.0),
16279                    Scalar::Float64(4.0),
16280                    Scalar::Float64(4.0),
16281                ]
16282            );
16283        }
16284
16285        #[test]
16286        fn interpolate_empty_is_empty_float64() {
16287            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16288            let r = col.interpolate_linear().expect("interpolate");
16289            assert!(r.is_empty());
16290            assert_eq!(r.dtype(), DType::Float64);
16291        }
16292
16293        #[test]
16294        fn interpolate_alias_matches_default_linear_interpolation() {
16295            let col = Column::from_values(vec![
16296                Scalar::Float64(1.0),
16297                Scalar::Null(NullKind::NaN),
16298                Scalar::Float64(3.0),
16299            ])
16300            .expect("col");
16301
16302            assert_eq!(
16303                col.interpolate().expect("interpolate"),
16304                col.interpolate_linear().expect("interpolate_linear")
16305            );
16306        }
16307
16308        #[test]
16309        fn drop_duplicates_keeps_first_occurrence() {
16310            let col = Column::from_values(vec![
16311                Scalar::Int64(1),
16312                Scalar::Int64(2),
16313                Scalar::Int64(1),
16314                Scalar::Int64(3),
16315                Scalar::Int64(2),
16316            ])
16317            .expect("col");
16318            let d = col.drop_duplicates().expect("drop_duplicates");
16319            assert_eq!(
16320                d.values(),
16321                &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
16322            );
16323        }
16324
16325        #[test]
16326        fn drop_duplicates_treats_nulls_as_one_bucket() {
16327            let col = Column::from_values(vec![
16328                Scalar::Null(NullKind::NaN),
16329                Scalar::Int64(1),
16330                Scalar::Null(NullKind::NaN),
16331            ])
16332            .expect("col");
16333            let d = col.drop_duplicates().expect("drop_duplicates");
16334            // First null is kept; subsequent null is dropped.
16335            assert_eq!(d.len(), 2);
16336            assert!(d.values()[0].is_missing());
16337            assert_eq!(d.values()[1], Scalar::Int64(1));
16338        }
16339
16340        #[test]
16341        fn drop_duplicates_keep_variants_match_pandas() {
16342            let col = Column::from_values(vec![
16343                Scalar::Int64(1),
16344                Scalar::Int64(2),
16345                Scalar::Int64(1),
16346                Scalar::Int64(1),
16347                Scalar::Int64(3),
16348                Scalar::Int64(2),
16349                Scalar::Null(NullKind::NaN),
16350                Scalar::Null(NullKind::NaN),
16351            ])
16352            .expect("col");
16353
16354            let last = col.drop_duplicates_keep("last").expect("drop last");
16355            assert_eq!(last.len(), 4);
16356            assert_eq!(last.values()[0], Scalar::Int64(1));
16357            assert_eq!(last.values()[1], Scalar::Int64(3));
16358            assert_eq!(last.values()[2], Scalar::Int64(2));
16359            assert!(last.values()[3].is_missing());
16360
16361            let none = col.drop_duplicates_keep("false").expect("drop none");
16362            assert_eq!(none.values(), &[Scalar::Int64(3)]);
16363        }
16364
16365        #[test]
16366        fn compare_returns_only_differences() {
16367            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16368                .expect("a");
16369            let b =
16370                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(20), Scalar::Int64(3)])
16371                    .expect("b");
16372            let (left, right) = a.compare(&b).expect("compare");
16373            assert_eq!(left.values(), &[Scalar::Int64(2)]);
16374            assert_eq!(right.values(), &[Scalar::Int64(20)]);
16375        }
16376
16377        #[test]
16378        fn compare_treats_matching_nulls_as_equal() {
16379            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
16380                .expect("a");
16381            let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
16382                .expect("b");
16383            let (left, right) = a.compare(&b).expect("compare");
16384            assert!(left.is_empty());
16385            assert!(right.is_empty());
16386        }
16387
16388        #[test]
16389        fn compare_length_mismatch_errors() {
16390            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
16391            let b = Column::from_values(vec![Scalar::Int64(1)]).expect("b");
16392            let err = a.compare(&b).unwrap_err();
16393            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16394        }
16395
16396        #[test]
16397        fn map_applies_unary_function() {
16398            let col =
16399                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16400                    .expect("col");
16401            let doubled = col
16402                .map(|v| match v {
16403                    Scalar::Int64(i) => Scalar::Int64(i * 2),
16404                    other => other.clone(),
16405                })
16406                .expect("map");
16407            assert_eq!(doubled.values()[0], Scalar::Int64(2));
16408            assert_eq!(doubled.values()[1], Scalar::Int64(4));
16409            assert_eq!(doubled.values()[2], Scalar::Int64(6));
16410        }
16411
16412        #[test]
16413        fn map_can_change_dtype() {
16414            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
16415            let as_str = col
16416                .map(|v| match v {
16417                    Scalar::Int64(i) => Scalar::Utf8(i.to_string()),
16418                    other => other.clone(),
16419                })
16420                .expect("map");
16421            assert_eq!(as_str.dtype(), DType::Utf8);
16422            assert_eq!(as_str.values()[0], Scalar::Utf8("1".into()));
16423        }
16424
16425        #[test]
16426        fn argmin_argmax_skip_missing() {
16427            let col = Column::from_values(vec![
16428                Scalar::Int64(3),
16429                Scalar::Null(NullKind::NaN),
16430                Scalar::Int64(1),
16431                Scalar::Int64(5),
16432                Scalar::Int64(2),
16433            ])
16434            .expect("col");
16435            assert_eq!(col.argmin(), Some(2));
16436            assert_eq!(col.argmax(), Some(3));
16437            assert_eq!(col.idxmin(), Some(2));
16438            assert_eq!(col.idxmax(), Some(3));
16439        }
16440
16441        #[test]
16442        fn argmin_argmax_all_missing_returns_none() {
16443            let col = Column::from_values(vec![
16444                Scalar::Null(NullKind::NaN),
16445                Scalar::Null(NullKind::Null),
16446            ])
16447            .expect("col");
16448            assert!(col.argmin().is_none());
16449            assert!(col.argmax().is_none());
16450            assert!(col.idxmin().is_none());
16451            assert!(col.idxmax().is_none());
16452        }
16453
16454        #[test]
16455        fn is_monotonic_increasing_detects_ascending() {
16456            let col = Column::from_values(vec![
16457                Scalar::Int64(1),
16458                Scalar::Int64(2),
16459                Scalar::Int64(2),
16460                Scalar::Int64(5),
16461            ])
16462            .expect("col");
16463            assert!(col.is_monotonic_increasing());
16464            assert!(!col.is_monotonic_decreasing());
16465        }
16466
16467        #[test]
16468        fn is_monotonic_decreasing_detects_descending() {
16469            let col =
16470                Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(3), Scalar::Int64(1)])
16471                    .expect("col");
16472            assert!(col.is_monotonic_decreasing());
16473            assert!(!col.is_monotonic_increasing());
16474        }
16475
16476        #[test]
16477        fn is_monotonic_skips_missing_values() {
16478            let col = Column::from_values(vec![
16479                Scalar::Int64(1),
16480                Scalar::Null(NullKind::NaN),
16481                Scalar::Int64(3),
16482                Scalar::Int64(5),
16483            ])
16484            .expect("col");
16485            assert!(col.is_monotonic_increasing());
16486        }
16487
16488        #[test]
16489        fn is_monotonic_empty_is_true() {
16490            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16491            assert!(col.is_monotonic_increasing());
16492            assert!(col.is_monotonic_decreasing());
16493        }
16494
16495        #[test]
16496        fn combine_first_fills_missing_from_other() {
16497            let a = Column::from_values(vec![
16498                Scalar::Int64(1),
16499                Scalar::Null(NullKind::NaN),
16500                Scalar::Int64(3),
16501            ])
16502            .expect("a");
16503            let b = Column::from_values(vec![
16504                Scalar::Int64(10),
16505                Scalar::Int64(20),
16506                Scalar::Int64(30),
16507            ])
16508            .expect("b");
16509            let c = a.combine_first(&b).expect("combine_first");
16510            assert_eq!(c.values()[0], Scalar::Int64(1));
16511            assert_eq!(c.values()[1], Scalar::Int64(20));
16512            assert_eq!(c.values()[2], Scalar::Int64(3));
16513        }
16514
16515        #[test]
16516        fn combine_first_length_mismatch_errors() {
16517            let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
16518            let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
16519            let err = a.combine_first(&b).unwrap_err();
16520            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16521        }
16522
16523        #[test]
16524        fn clip_lower_only() {
16525            let col = Column::from_values(vec![
16526                Scalar::Float64(-2.0),
16527                Scalar::Float64(0.0),
16528                Scalar::Float64(5.0),
16529            ])
16530            .expect("col");
16531            let c = col.clip_lower(0.0).expect("clip_lower");
16532            assert_eq!(c.values()[0], Scalar::Float64(0.0));
16533            assert_eq!(c.values()[1], Scalar::Float64(0.0));
16534            assert_eq!(c.values()[2], Scalar::Float64(5.0));
16535        }
16536
16537        #[test]
16538        fn clip_upper_only() {
16539            let col = Column::from_values(vec![
16540                Scalar::Float64(-2.0),
16541                Scalar::Float64(0.0),
16542                Scalar::Float64(5.0),
16543            ])
16544            .expect("col");
16545            let c = col.clip_upper(1.0).expect("clip_upper");
16546            assert_eq!(c.values()[0], Scalar::Float64(-2.0));
16547            assert_eq!(c.values()[1], Scalar::Float64(0.0));
16548            assert_eq!(c.values()[2], Scalar::Float64(1.0));
16549        }
16550
16551        #[test]
16552        fn describe_returns_pandas_order() {
16553            let col = Column::from_values(vec![
16554                Scalar::Float64(1.0),
16555                Scalar::Float64(2.0),
16556                Scalar::Float64(3.0),
16557                Scalar::Float64(4.0),
16558                Scalar::Float64(5.0),
16559            ])
16560            .expect("col");
16561            let stats = col.describe().expect("describe");
16562            let names: Vec<&str> = stats.iter().map(|(k, _)| *k).collect();
16563            assert_eq!(
16564                names,
16565                vec!["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
16566            );
16567            assert_eq!(stats[0].1, Scalar::Int64(5));
16568            assert!(
16569                matches!(&stats[1].1, Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16570                "expected Float64, got {:?}",
16571                stats[1].1
16572            );
16573            assert_eq!(stats[3].1, Scalar::Float64(1.0));
16574            assert_eq!(stats[7].1, Scalar::Float64(5.0));
16575        }
16576
16577        #[test]
16578        fn describe_rejects_utf8_column() {
16579            let col = Column::from_values(vec![Scalar::Utf8("a".into())]).expect("col");
16580            assert!(col.describe().is_err());
16581        }
16582
16583        #[test]
16584        fn combine_uses_fill_for_missing() {
16585            let a = Column::from_values(vec![
16586                Scalar::Int64(1),
16587                Scalar::Null(NullKind::NaN),
16588                Scalar::Int64(3),
16589            ])
16590            .expect("a");
16591            let b = Column::from_values(vec![
16592                Scalar::Int64(10),
16593                Scalar::Int64(20),
16594                Scalar::Null(NullKind::NaN),
16595            ])
16596            .expect("b");
16597            let out = a
16598                .combine(
16599                    &b,
16600                    |l, r| {
16601                        if let (Ok(lf), Ok(rf)) = (l.to_f64(), r.to_f64()) {
16602                            Scalar::Float64(lf + rf)
16603                        } else {
16604                            Scalar::Null(NullKind::NaN)
16605                        }
16606                    },
16607                    Some(Scalar::Int64(0)),
16608                )
16609                .expect("combine");
16610            assert_eq!(out.values()[0], Scalar::Float64(11.0));
16611            assert_eq!(out.values()[1], Scalar::Float64(20.0));
16612            assert_eq!(out.values()[2], Scalar::Float64(3.0));
16613        }
16614
16615        #[test]
16616        fn combine_length_mismatch_errors() {
16617            let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
16618            let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
16619            let err = a
16620                .combine(&b, |l, _| l.clone(), Some(Scalar::Int64(0)))
16621                .unwrap_err();
16622            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16623        }
16624
16625        #[test]
16626        fn combine_fill_none_propagates_nulls_without_invoking_func() {
16627            let a = Column::from_values(vec![
16628                Scalar::Float64(1.0),
16629                Scalar::Null(NullKind::NaN),
16630                Scalar::Float64(3.0),
16631            ])
16632            .expect("a");
16633            let b = Column::from_values(vec![
16634                Scalar::Float64(10.0),
16635                Scalar::Float64(20.0),
16636                Scalar::Null(NullKind::NaN),
16637            ])
16638            .expect("b");
16639            let mut calls = 0usize;
16640            let out = a
16641                .combine(
16642                    &b,
16643                    |l, r| {
16644                        calls += 1;
16645                        Scalar::Float64(l.to_f64().unwrap() + r.to_f64().unwrap())
16646                    },
16647                    None,
16648                )
16649                .expect("combine");
16650            // Only the position with both non-null invokes func.
16651            assert_eq!(calls, 1);
16652            assert_eq!(out.values()[0], Scalar::Float64(11.0));
16653            assert!(out.values()[1].is_missing());
16654            assert!(out.values()[2].is_missing());
16655        }
16656
16657        #[test]
16658        fn combine_fill_none_all_present_matches_elementwise_apply() {
16659            let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
16660            let b = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("b");
16661            let out = a
16662                .combine(
16663                    &b,
16664                    |l, r| Scalar::Int64(l.to_f64().unwrap() as i64 + r.to_f64().unwrap() as i64),
16665                    None,
16666                )
16667                .expect("combine");
16668            assert_eq!(out.values()[0], Scalar::Int64(11));
16669            assert_eq!(out.values()[1], Scalar::Int64(22));
16670        }
16671
16672        #[test]
16673        fn apply_float_applies_numeric_fn() {
16674            let col = Column::from_values(vec![
16675                Scalar::Float64(1.0),
16676                Scalar::Null(NullKind::NaN),
16677                Scalar::Float64(4.0),
16678            ])
16679            .expect("col");
16680            let out = col.apply_float(|x| x.sqrt()).expect("apply_float");
16681            assert_eq!(out.values()[0], Scalar::Float64(1.0));
16682            assert!(out.values()[1].is_missing());
16683            assert_eq!(out.values()[2], Scalar::Float64(2.0));
16684            assert_eq!(out.dtype(), DType::Float64);
16685        }
16686
16687        #[test]
16688        fn apply_float_rejects_non_numeric() {
16689            let col = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("col");
16690            assert!(col.apply_float(|x| x + 1.0).is_err());
16691        }
16692
16693        #[test]
16694        fn hist_counts_equal_width_bins() {
16695            let col = Column::from_values(vec![
16696                Scalar::Float64(0.0),
16697                Scalar::Float64(1.0),
16698                Scalar::Float64(2.0),
16699                Scalar::Float64(3.0),
16700                Scalar::Float64(9.0),
16701            ])
16702            .expect("col");
16703            let counts = col.hist_counts(3);
16704            // Bin width = 3, buckets [0,3), [3,6), [6,9]
16705            assert_eq!(counts.len(), 3);
16706            assert_eq!(counts[0], 3); // 0,1,2
16707            assert_eq!(counts[1], 1); // 3
16708            assert_eq!(counts[2], 1); // 9 clamps into last bin
16709        }
16710
16711        #[test]
16712        fn hist_counts_zero_bins_is_empty() {
16713            let col = Column::from_values(vec![Scalar::Float64(1.0)]).expect("col");
16714            assert!(col.hist_counts(0).is_empty());
16715        }
16716
16717        #[test]
16718        fn hist_counts_constant_column_puts_all_in_first_bin() {
16719            let col = Column::from_values(vec![
16720                Scalar::Float64(5.0),
16721                Scalar::Float64(5.0),
16722                Scalar::Float64(5.0),
16723            ])
16724            .expect("col");
16725            let counts = col.hist_counts(3);
16726            assert_eq!(counts[0], 3);
16727            assert_eq!(counts[1], 0);
16728            assert_eq!(counts[2], 0);
16729        }
16730
16731        #[test]
16732        fn nunique_drops_nulls() {
16733            let col = Column::from_values(vec![
16734                Scalar::Int64(1),
16735                Scalar::Int64(2),
16736                Scalar::Int64(1),
16737                Scalar::Null(NullKind::NaN),
16738            ])
16739            .expect("col");
16740            assert_eq!(col.nunique(), Scalar::Int64(2));
16741        }
16742
16743        #[test]
16744        fn nunique_with_dropna_false_counts_missing_once() {
16745            let col = Column::from_values(vec![
16746                Scalar::Int64(1),
16747                Scalar::Int64(2),
16748                Scalar::Int64(1),
16749                Scalar::Null(NullKind::NaN),
16750                Scalar::Null(NullKind::Null),
16751            ])
16752            .expect("col");
16753            assert_eq!(col.nunique_with_dropna(false), Scalar::Int64(3));
16754        }
16755
16756        #[test]
16757        fn nunique_with_dropna_false_all_missing_is_one() {
16758            let col = Column::from_values(vec![
16759                Scalar::Null(NullKind::NaN),
16760                Scalar::Null(NullKind::Null),
16761                Scalar::Null(NullKind::NaN),
16762            ])
16763            .expect("col");
16764            assert_eq!(col.nunique(), Scalar::Int64(0));
16765            assert_eq!(col.nunique_with_dropna(false), Scalar::Int64(1));
16766        }
16767
16768        #[test]
16769        fn any_all_reductions() {
16770            let col = Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(0)]).expect("col");
16771            assert_eq!(col.any(), Scalar::Bool(false));
16772            assert_eq!(col.all(), Scalar::Bool(false));
16773
16774            let mixed = Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(1)]).expect("col");
16775            assert_eq!(mixed.any(), Scalar::Bool(true));
16776            assert_eq!(mixed.all(), Scalar::Bool(false));
16777
16778            let all_true =
16779                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
16780            assert_eq!(all_true.all(), Scalar::Bool(true));
16781        }
16782
16783        #[test]
16784        fn is_unique_true_when_no_repeats() {
16785            let col =
16786                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16787                    .expect("col");
16788            assert!(col.is_unique());
16789            assert!(!col.has_duplicates());
16790        }
16791
16792        #[test]
16793        fn has_duplicates_true_when_repeats_present() {
16794            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(1)]).expect("col");
16795            assert!(col.has_duplicates());
16796            assert!(!col.is_unique());
16797        }
16798
16799        #[test]
16800        fn is_unique_ignores_nulls() {
16801            let col = Column::from_values(vec![
16802                Scalar::Int64(1),
16803                Scalar::Null(NullKind::NaN),
16804                Scalar::Null(NullKind::NaN),
16805            ])
16806            .expect("col");
16807            assert!(col.is_unique());
16808        }
16809
16810        #[test]
16811        fn pct_change_periods_one() {
16812            let col = Column::from_values(vec![
16813                Scalar::Float64(10.0),
16814                Scalar::Float64(12.0),
16815                Scalar::Float64(9.0),
16816            ])
16817            .expect("col");
16818            let r = col.pct_change(1).expect("pct_change");
16819            assert!(r.values()[0].is_missing());
16820            assert!(
16821                matches!(&r.values()[1], Scalar::Float64(v) if (*v - 0.2).abs() < 1e-9),
16822                "expected Float64, got {:?}",
16823                r.values()[1]
16824            );
16825            assert!(
16826                matches!(&r.values()[2], Scalar::Float64(v) if (*v + 0.25).abs() < 1e-9),
16827                "expected Float64, got {:?}",
16828                r.values()[2]
16829            );
16830        }
16831
16832        #[test]
16833        fn pct_change_zero_prev_yields_null() {
16834            let col =
16835                Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(5.0)]).expect("col");
16836            let r = col.pct_change(1).expect("pct_change");
16837            assert!(r.values()[1].is_missing());
16838        }
16839
16840        #[test]
16841        fn pct_change_timedelta64_matches_pandas_mcu90() {
16842            // Per br-frankenpandas-mcu90: pct_change on Timedelta64 returns
16843            // dimensionless f64 ratios; was silently NaN before via the
16844            // to_f64-else catch-all (Timedelta64.to_f64() errors).
16845            let one_hour = 3_600 * 1_000_000_000_i64;
16846            let col = Column::from_values(vec![
16847                Scalar::Timedelta64(one_hour),
16848                Scalar::Timedelta64(2 * one_hour),
16849                Scalar::Timedelta64(4 * one_hour),
16850            ])
16851            .expect("col");
16852            let r = col.pct_change(1).expect("pct_change");
16853            assert!(r.values()[0].is_missing());
16854            assert!(
16855                matches!(&r.values()[1], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-10),
16856                "expected Float64(1.0), got {:?}",
16857                r.values()[1]
16858            );
16859            assert!(
16860                matches!(&r.values()[2], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-10),
16861                "expected Float64(1.0), got {:?}",
16862                r.values()[2]
16863            );
16864        }
16865
16866        #[test]
16867        fn pct_change_timedelta64_nat_propagates_mcu90() {
16868            use fp_types::Timedelta;
16869            let one_hour = 3_600 * 1_000_000_000_i64;
16870            let col = Column::from_values(vec![
16871                Scalar::Timedelta64(one_hour),
16872                Scalar::Timedelta64(Timedelta::NAT),
16873                Scalar::Timedelta64(2 * one_hour),
16874            ])
16875            .expect("col");
16876            let r = col.pct_change(1).expect("pct_change");
16877            assert!(r.values()[0].is_missing());
16878            assert!(r.values()[1].is_missing()); // NaT current → NaN
16879            assert!(r.values()[2].is_missing()); // NaT previous → NaN
16880        }
16881
16882        #[test]
16883        fn pct_change_with_fill_ffill_uses_filled_previous_value() {
16884            let col = Column::from_values(vec![
16885                Scalar::Float64(10.0),
16886                Scalar::Null(NullKind::NaN),
16887                Scalar::Float64(12.0),
16888            ])
16889            .expect("col");
16890            let r = col
16891                .pct_change_with_fill(1, Some("ffill"), None)
16892                .expect("pct_change_with_fill");
16893            assert!(r.values()[0].is_missing());
16894            assert!(
16895                matches!(&r.values()[1], Scalar::Float64(v) if v.abs() < 1e-9),
16896                "expected Float64, got {:?}",
16897                r.values()[1]
16898            );
16899            assert!(
16900                matches!(&r.values()[2], Scalar::Float64(v) if (*v - 0.2).abs() < 1e-9),
16901                "expected Float64, got {:?}",
16902                r.values()[2]
16903            );
16904        }
16905
16906        #[test]
16907        fn pct_change_with_fill_limit_caps_forward_fill_runs() {
16908            let col = Column::from_values(vec![
16909                Scalar::Float64(10.0),
16910                Scalar::Null(NullKind::NaN),
16911                Scalar::Null(NullKind::NaN),
16912                Scalar::Float64(20.0),
16913            ])
16914            .expect("col");
16915            let r = col
16916                .pct_change_with_fill(1, Some("ffill"), Some(1))
16917                .expect("pct_change_with_fill");
16918            assert!(r.values()[0].is_missing());
16919            assert!(
16920                matches!(&r.values()[1], Scalar::Float64(v) if v.abs() < 1e-9),
16921                "expected Float64, got {:?}",
16922                r.values()[1]
16923            );
16924            assert!(r.values()[2].is_missing());
16925            assert!(r.values()[3].is_missing());
16926        }
16927
16928        #[test]
16929        fn pct_change_with_fill_bfill_aliases_backward_fill() {
16930            let col = Column::from_values(vec![
16931                Scalar::Float64(10.0),
16932                Scalar::Null(NullKind::NaN),
16933                Scalar::Float64(20.0),
16934            ])
16935            .expect("col");
16936            let r = col
16937                .pct_change_with_fill(1, Some("backfill"), None)
16938                .expect("pct_change_with_fill");
16939            assert!(r.values()[0].is_missing());
16940            assert!(
16941                matches!(&r.values()[1], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-9),
16942                "expected Float64, got {:?}",
16943                r.values()[1]
16944            );
16945            assert!(
16946                matches!(&r.values()[2], Scalar::Float64(v) if v.abs() < 1e-9),
16947                "expected Float64, got {:?}",
16948                r.values()[2]
16949            );
16950        }
16951
16952        #[test]
16953        fn pct_change_with_fill_rejects_invalid_method() {
16954            let col =
16955                Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
16956            let err = col
16957                .pct_change_with_fill(1, Some("nearest"), None)
16958                .expect_err("invalid fill_method should error");
16959            assert!(matches!(
16960                err,
16961                crate::ColumnError::Type(fp_types::TypeError::NonNumericValue { .. })
16962            ));
16963        }
16964
16965        #[test]
16966        fn ffill_fills_trailing_missing_runs() {
16967            let col = Column::from_values(vec![
16968                Scalar::Null(NullKind::NaN),
16969                Scalar::Int64(1),
16970                Scalar::Null(NullKind::NaN),
16971                Scalar::Null(NullKind::NaN),
16972                Scalar::Int64(5),
16973            ])
16974            .expect("col");
16975            let r = col.ffill(None).expect("ffill");
16976            assert!(r.values()[0].is_missing());
16977            assert_eq!(r.values()[1], Scalar::Int64(1));
16978            assert_eq!(r.values()[2], Scalar::Int64(1));
16979            assert_eq!(r.values()[3], Scalar::Int64(1));
16980            assert_eq!(r.values()[4], Scalar::Int64(5));
16981        }
16982
16983        #[test]
16984        fn ffill_respects_limit_per_run() {
16985            let col = Column::from_values(vec![
16986                Scalar::Int64(1),
16987                Scalar::Null(NullKind::NaN),
16988                Scalar::Null(NullKind::NaN),
16989                Scalar::Null(NullKind::NaN),
16990                Scalar::Int64(9),
16991            ])
16992            .expect("col");
16993            let r = col.ffill(Some(2)).expect("ffill");
16994            assert_eq!(r.values()[0], Scalar::Int64(1));
16995            assert_eq!(r.values()[1], Scalar::Int64(1));
16996            assert_eq!(r.values()[2], Scalar::Int64(1));
16997            assert!(r.values()[3].is_missing());
16998            assert_eq!(r.values()[4], Scalar::Int64(9));
16999            assert_eq!(col.pad(Some(2)), col.ffill(Some(2)));
17000        }
17001
17002        #[test]
17003        fn bfill_fills_leading_missing_runs() {
17004            let col = Column::from_values(vec![
17005                Scalar::Null(NullKind::NaN),
17006                Scalar::Null(NullKind::NaN),
17007                Scalar::Int64(3),
17008                Scalar::Null(NullKind::NaN),
17009            ])
17010            .expect("col");
17011            let r = col.bfill(None).expect("bfill");
17012            assert_eq!(r.values()[0], Scalar::Int64(3));
17013            assert_eq!(r.values()[1], Scalar::Int64(3));
17014            assert_eq!(r.values()[2], Scalar::Int64(3));
17015            // Trailing null stays null (no next value).
17016            assert!(r.values()[3].is_missing());
17017        }
17018
17019        #[test]
17020        fn bfill_respects_limit_per_run() {
17021            let col = Column::from_values(vec![
17022                Scalar::Null(NullKind::NaN),
17023                Scalar::Null(NullKind::NaN),
17024                Scalar::Null(NullKind::NaN),
17025                Scalar::Int64(7),
17026            ])
17027            .expect("col");
17028            let r = col.bfill(Some(1)).expect("bfill");
17029            assert!(r.values()[0].is_missing());
17030            assert!(r.values()[1].is_missing());
17031            assert_eq!(r.values()[2], Scalar::Int64(7));
17032            assert_eq!(r.values()[3], Scalar::Int64(7));
17033            assert_eq!(col.backfill(Some(1)), col.bfill(Some(1)));
17034        }
17035
17036        #[test]
17037        fn ffill_empty_is_empty_same_dtype() {
17038            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17039            let r = col.ffill(None).expect("ffill");
17040            assert!(r.is_empty());
17041            assert_eq!(r.dtype(), DType::Null);
17042        }
17043
17044        #[test]
17045        fn pandas_metadata_and_materialization_aliases_match_core_methods()
17046        -> Result<(), crate::ColumnError> {
17047            let col = Column::from_values(vec![
17048                Scalar::Int64(1),
17049                Scalar::Null(NullKind::NaN),
17050                Scalar::Int64(2),
17051            ])?;
17052
17053            assert_eq!(col.size(), col.len());
17054            assert_eq!(col.shape(), (col.len(),));
17055            assert_eq!(col.ndim(), 1);
17056            assert_eq!(col.empty(), col.is_empty());
17057            assert_eq!(col.to_list(), col.to_vec());
17058            assert_eq!(col.tolist(), col.to_vec());
17059            assert_eq!(col.to_numpy(), col.to_vec());
17060            assert_eq!(col.ravel(), col.to_numpy());
17061            assert_eq!(col.array(), col.to_vec());
17062            Ok(())
17063        }
17064
17065        #[test]
17066        fn isnull_notnull_flag_missing_positions() {
17067            let col = Column::from_values(vec![
17068                Scalar::Int64(1),
17069                Scalar::Null(NullKind::NaN),
17070                Scalar::Int64(2),
17071            ])
17072            .expect("col");
17073            let is_null = col.isnull().expect("isnull");
17074            let not_null = col.notnull().expect("notnull");
17075            assert_eq!(is_null.dtype(), DType::Bool);
17076            assert_eq!(
17077                is_null.values(),
17078                &[Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(false),]
17079            );
17080            assert_eq!(col.isna(), col.isnull());
17081            assert_eq!(
17082                not_null.values(),
17083                &[Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true),]
17084            );
17085            assert_eq!(col.notna(), col.notnull());
17086        }
17087
17088        #[test]
17089        fn var_std_sem_ddof_one() {
17090            let col = Column::from_values(vec![
17091                Scalar::Float64(2.0),
17092                Scalar::Float64(4.0),
17093                Scalar::Float64(4.0),
17094                Scalar::Float64(4.0),
17095                Scalar::Float64(5.0),
17096                Scalar::Float64(5.0),
17097                Scalar::Float64(7.0),
17098                Scalar::Float64(9.0),
17099            ])
17100            .expect("col");
17101            match col.var(1) {
17102                Scalar::Float64(v) => assert!((v - 4.571428571428571).abs() < 1e-9),
17103                other => unreachable!("expected Float64, got {other:?}"),
17104            }
17105            match col.std(1) {
17106                Scalar::Float64(v) => assert!((v - 2.138089935299395).abs() < 1e-9),
17107                other => unreachable!("expected Float64, got {other:?}"),
17108            }
17109            match col.sem(1) {
17110                Scalar::Float64(v) => assert!((v - 0.7559289460184544).abs() < 1e-9),
17111                other => unreachable!("expected Float64, got {other:?}"),
17112            }
17113        }
17114
17115        #[test]
17116        fn skew_symmetric_is_zero() {
17117            let col = Column::from_values(vec![
17118                Scalar::Float64(1.0),
17119                Scalar::Float64(2.0),
17120                Scalar::Float64(3.0),
17121                Scalar::Float64(4.0),
17122                Scalar::Float64(5.0),
17123            ])
17124            .expect("col");
17125            match col.skew() {
17126                Scalar::Float64(v) => assert!(v.abs() < 1e-9),
17127                other => unreachable!("expected Float64, got {other:?}"),
17128            }
17129        }
17130
17131        #[test]
17132        fn kurt_uniform_five_values_is_minus_one_point_two() {
17133            let col = Column::from_values(vec![
17134                Scalar::Float64(1.0),
17135                Scalar::Float64(2.0),
17136                Scalar::Float64(3.0),
17137                Scalar::Float64(4.0),
17138                Scalar::Float64(5.0),
17139            ])
17140            .expect("col");
17141            match col.kurt() {
17142                Scalar::Float64(v) => assert!((v + 1.2).abs() < 1e-9),
17143                other => unreachable!("expected Float64, got {other:?}"),
17144            }
17145        }
17146
17147        #[test]
17148        fn kurtosis_alias_matches_kurt() {
17149            let col = Column::from_values(vec![
17150                Scalar::Float64(1.0),
17151                Scalar::Float64(2.0),
17152                Scalar::Float64(3.0),
17153                Scalar::Float64(4.0),
17154                Scalar::Float64(5.0),
17155            ])
17156            .expect("col");
17157            assert_eq!(col.kurtosis(), col.kurt());
17158        }
17159
17160        #[test]
17161        fn ptp_returns_max_minus_min() {
17162            let col = Column::from_values(vec![
17163                Scalar::Float64(3.0),
17164                Scalar::Null(NullKind::NaN),
17165                Scalar::Float64(7.0),
17166                Scalar::Float64(1.0),
17167            ])
17168            .expect("col");
17169            assert_eq!(col.ptp(), Scalar::Float64(6.0));
17170        }
17171
17172        #[test]
17173        fn ptp_empty_is_null() {
17174            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17175            assert!(col.ptp().is_missing());
17176        }
17177
17178        #[test]
17179        fn skew_too_few_values_returns_null() {
17180            let col =
17181                Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
17182            assert!(col.skew().is_missing());
17183        }
17184
17185        #[test]
17186        fn rolling_window_sum_full_window() {
17187            let col = Column::from_values(vec![
17188                Scalar::Float64(1.0),
17189                Scalar::Float64(2.0),
17190                Scalar::Float64(3.0),
17191                Scalar::Float64(4.0),
17192                Scalar::Float64(5.0),
17193            ])
17194            .expect("col");
17195            // window=3, min_periods=3 -> [NaN, NaN, 6, 9, 12]
17196            let r = col.rolling_window_sum(3, 3).expect("rolling");
17197            assert!(r.values()[0].is_missing());
17198            assert!(r.values()[1].is_missing());
17199            assert_eq!(r.values()[2], Scalar::Float64(6.0));
17200            assert_eq!(r.values()[3], Scalar::Float64(9.0));
17201            assert_eq!(r.values()[4], Scalar::Float64(12.0));
17202        }
17203
17204        #[test]
17205        fn rolling_window_sum_min_periods_relaxed() {
17206            let col = Column::from_values(vec![
17207                Scalar::Float64(1.0),
17208                Scalar::Float64(2.0),
17209                Scalar::Float64(3.0),
17210            ])
17211            .expect("col");
17212            // window=3, min_periods=1 -> [1, 3, 6]
17213            let r = col.rolling_window_sum(3, 1).expect("rolling");
17214            assert_eq!(r.values()[0], Scalar::Float64(1.0));
17215            assert_eq!(r.values()[1], Scalar::Float64(3.0));
17216            assert_eq!(r.values()[2], Scalar::Float64(6.0));
17217        }
17218
17219        #[test]
17220        fn rolling_window_sum_skips_missing() {
17221            let col = Column::from_values(vec![
17222                Scalar::Float64(1.0),
17223                Scalar::Null(NullKind::NaN),
17224                Scalar::Float64(3.0),
17225                Scalar::Float64(4.0),
17226            ])
17227            .expect("col");
17228            // window=3, min_periods=2:
17229            // i=0: {1.0} observed=1 → NaN (below min_periods)
17230            // i=1: {1.0, NaN} observed=1 → NaN
17231            // i=2: {1.0, NaN, 3.0} observed=2 → 4.0
17232            // i=3: {NaN, 3.0, 4.0} observed=2 → 7.0
17233            let r = col.rolling_window_sum(3, 2).expect("rolling");
17234            assert!(r.values()[0].is_missing());
17235            assert!(r.values()[1].is_missing());
17236            assert_eq!(r.values()[2], Scalar::Float64(4.0));
17237            assert_eq!(r.values()[3], Scalar::Float64(7.0));
17238        }
17239
17240        #[test]
17241        fn rolling_window_sum_window_zero_is_all_null() {
17242            let col =
17243                Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
17244            let r = col.rolling_window_sum(0, 0).expect("rolling");
17245            assert!(r.values()[0].is_missing());
17246            assert!(r.values()[1].is_missing());
17247            assert_eq!(r.dtype(), DType::Float64);
17248        }
17249
17250        #[test]
17251        fn diff_valid_skips_missing_predecessors() {
17252            let col = Column::from_values(vec![
17253                Scalar::Null(NullKind::NaN),
17254                Scalar::Float64(1.0),
17255                Scalar::Null(NullKind::NaN),
17256                Scalar::Float64(4.0),
17257                Scalar::Float64(7.0),
17258            ])
17259            .expect("col");
17260            let r = col.diff_valid().expect("diff_valid");
17261            assert!(r.values()[0].is_missing()); // null in, null out
17262            assert!(r.values()[1].is_missing()); // first non-missing -> no prev
17263            assert!(r.values()[2].is_missing()); // null in
17264            assert_eq!(r.values()[3], Scalar::Float64(3.0)); // 4 - 1
17265            assert_eq!(r.values()[4], Scalar::Float64(3.0)); // 7 - 4
17266        }
17267
17268        #[test]
17269        fn diff_valid_empty_column() {
17270            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17271            let r = col.diff_valid().expect("diff_valid");
17272            assert!(r.is_empty());
17273            assert_eq!(r.dtype(), DType::Float64);
17274        }
17275
17276        #[test]
17277        fn sample_without_replacement_deterministic_by_seed() {
17278            let col =
17279                Column::from_values((0..10).map(Scalar::Int64).collect::<Vec<_>>()).expect("col");
17280            let a = col.sample(3, 42).expect("sample");
17281            let b = col.sample(3, 42).expect("sample");
17282            // Same seed → identical ordering.
17283            assert_eq!(a.values(), b.values());
17284            assert_eq!(a.len(), 3);
17285            // All picks lie within the original range.
17286            for v in a.values() {
17287                match v {
17288                    Scalar::Int64(x) => assert!((0..10).contains(x)),
17289                    other => unreachable!("unexpected value {other:?}"),
17290                }
17291            }
17292        }
17293
17294        #[test]
17295        fn sample_different_seeds_likely_differ() {
17296            let col =
17297                Column::from_values((0..100).map(Scalar::Int64).collect::<Vec<_>>()).expect("col");
17298            let a = col.sample(5, 1).expect("sample");
17299            let b = col.sample(5, 2).expect("sample");
17300            // Two independent seeds on a 100-element population: collision
17301            // probability of the full 5-pick tuple is astronomically low.
17302            assert_ne!(a.values(), b.values());
17303        }
17304
17305        #[test]
17306        fn sample_n_at_or_above_len_returns_clone() {
17307            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17308            let r = col.sample(10, 42).expect("sample");
17309            assert_eq!(r.values(), col.values());
17310        }
17311
17312        #[test]
17313        fn first_valid_last_valid_skip_nulls() {
17314            let col = Column::from_values(vec![
17315                Scalar::Null(NullKind::NaN),
17316                Scalar::Null(NullKind::NaN),
17317                Scalar::Int64(5),
17318                Scalar::Int64(7),
17319                Scalar::Null(NullKind::NaN),
17320            ])
17321            .expect("col");
17322            assert_eq!(col.first_valid(), Some(2));
17323            assert_eq!(col.last_valid(), Some(3));
17324            assert_eq!(col.first_valid_index(), Some(2));
17325            assert_eq!(col.last_valid_index(), Some(3));
17326        }
17327
17328        #[test]
17329        fn nsmallest_keep_first_breaks_ties_by_earlier_position() {
17330            let col = Column::from_values(vec![
17331                Scalar::Int64(2), // pos 0
17332                Scalar::Int64(1), // pos 1
17333                Scalar::Int64(1), // pos 2
17334                Scalar::Int64(3), // pos 3
17335                Scalar::Int64(1), // pos 4
17336            ])
17337            .expect("col");
17338            let r = col.nsmallest_keep(2, "first").expect("nsmallest_keep");
17339            // Two 1s: ties broken by earliest position → positions 1,2 → values [1, 1].
17340            assert_eq!(r.len(), 2);
17341            assert_eq!(r.values()[0], Scalar::Int64(1));
17342            assert_eq!(r.values()[1], Scalar::Int64(1));
17343        }
17344
17345        #[test]
17346        fn nsmallest_keep_last_breaks_ties_by_later_position() {
17347            let col = Column::from_values(vec![
17348                Scalar::Int64(1), // pos 0
17349                Scalar::Int64(2),
17350                Scalar::Int64(1), // pos 2
17351                Scalar::Int64(3),
17352                Scalar::Int64(1), // pos 4
17353            ])
17354            .expect("col");
17355            // Three tied 1s; keep=last picks positions 4, 2 (latest two).
17356            let r = col.nsmallest_keep(2, "last").expect("nsmallest_keep");
17357            assert_eq!(r.len(), 2);
17358            assert_eq!(r.values()[0], Scalar::Int64(1));
17359            assert_eq!(r.values()[1], Scalar::Int64(1));
17360        }
17361
17362        #[test]
17363        fn nsmallest_keep_all_expands_beyond_n_on_ties() {
17364            let col = Column::from_values(vec![
17365                Scalar::Int64(1),
17366                Scalar::Int64(1),
17367                Scalar::Int64(1),
17368                Scalar::Int64(2),
17369            ])
17370            .expect("col");
17371            // n=1 but three 1s tied for smallest; keep='all' returns them all.
17372            let r = col.nsmallest_keep(1, "all").expect("nsmallest_keep");
17373            assert_eq!(r.len(), 3);
17374            assert_eq!(r.values()[0], Scalar::Int64(1));
17375            assert_eq!(r.values()[1], Scalar::Int64(1));
17376            assert_eq!(r.values()[2], Scalar::Int64(1));
17377        }
17378
17379        #[test]
17380        fn nlargest_keep_mirror_symmetry() {
17381            let col = Column::from_values(vec![
17382                Scalar::Int64(1),
17383                Scalar::Int64(3),
17384                Scalar::Int64(3),
17385                Scalar::Int64(2),
17386            ])
17387            .expect("col");
17388            let r = col.nlargest_keep(1, "all").expect("nlargest_keep");
17389            assert_eq!(r.len(), 2);
17390            assert_eq!(r.values()[0], Scalar::Int64(3));
17391            assert_eq!(r.values()[1], Scalar::Int64(3));
17392        }
17393
17394        #[test]
17395        fn nkeep_invalid_keep_errors() {
17396            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17397            assert!(col.nsmallest_keep(1, "middle").is_err());
17398            assert!(col.nlargest_keep(1, "middle").is_err());
17399        }
17400
17401        #[test]
17402        fn nkeep_zero_is_empty_same_dtype() {
17403            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17404            let r = col.nsmallest_keep(0, "first").expect("nsmallest_keep");
17405            assert!(r.is_empty());
17406            assert_eq!(r.dtype(), DType::Int64);
17407        }
17408
17409        #[test]
17410        fn first_valid_last_valid_all_missing_is_none() {
17411            let col = Column::from_values(vec![
17412                Scalar::Null(NullKind::NaN),
17413                Scalar::Null(NullKind::Null),
17414            ])
17415            .expect("col");
17416            assert_eq!(col.first_valid(), None);
17417            assert_eq!(col.last_valid(), None);
17418            assert_eq!(col.first_valid_index(), None);
17419            assert_eq!(col.last_valid_index(), None);
17420        }
17421
17422        #[test]
17423        fn rolling_window_sum_empty_column() {
17424            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17425            let r = col.rolling_window_sum(3, 1).expect("rolling");
17426            assert!(r.is_empty());
17427            assert_eq!(r.dtype(), DType::Float64);
17428        }
17429
17430        #[test]
17431        fn pct_change_negative_periods() {
17432            let col = Column::from_values(vec![Scalar::Float64(10.0), Scalar::Float64(15.0)])
17433                .expect("col");
17434            let r = col.pct_change(-1).expect("pct_change");
17435            // (10 - 15) / 15 = -1/3
17436            assert!(
17437                matches!(&r.values()[0], Scalar::Float64(v) if (*v + 1.0 / 3.0).abs() < 1e-9),
17438                "expected Float64, got {:?}",
17439                r.values()[0]
17440            );
17441            assert!(r.values()[1].is_missing());
17442        }
17443
17444        #[test]
17445        fn count_excludes_nulls() {
17446            let col = Column::from_values(vec![
17447                Scalar::Int64(1),
17448                Scalar::Null(NullKind::NaN),
17449                Scalar::Int64(2),
17450                Scalar::Null(NullKind::Null),
17451            ])
17452            .expect("col");
17453            assert_eq!(col.count(), 2);
17454        }
17455    }
17456
17457    mod where_mask {
17458        use fp_types::NullKind;
17459
17460        use super::*;
17461
17462        #[test]
17463        fn where_cond_keeps_true_positions() {
17464            let col =
17465                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17466                    .expect("col");
17467            let cond = Column::from_values(vec![
17468                Scalar::Bool(true),
17469                Scalar::Bool(false),
17470                Scalar::Bool(true),
17471            ])
17472            .expect("cond");
17473            let fill = Scalar::Int64(-1);
17474            let out = col.where_cond(&cond, &fill).expect("where");
17475            assert_eq!(col.r#where(&cond, &fill).expect("where"), out);
17476            assert_eq!(out.values()[0], Scalar::Int64(1));
17477            assert_eq!(out.values()[1], Scalar::Int64(-1));
17478            assert_eq!(out.values()[2], Scalar::Int64(3));
17479        }
17480
17481        #[test]
17482        fn mask_inverts_where_cond() {
17483            let col =
17484                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17485                    .expect("col");
17486            let cond = Column::from_values(vec![
17487                Scalar::Bool(true),
17488                Scalar::Bool(false),
17489                Scalar::Bool(true),
17490            ])
17491            .expect("cond");
17492            let fill = Scalar::Int64(0);
17493            let out = col.mask(&cond, &fill).expect("mask");
17494            assert_eq!(out.values()[0], Scalar::Int64(0));
17495            assert_eq!(out.values()[1], Scalar::Int64(2));
17496            assert_eq!(out.values()[2], Scalar::Int64(0));
17497        }
17498
17499        #[test]
17500        fn where_missing_cond_propagates_null() {
17501            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17502            let cond = Column::from_values(vec![Scalar::Bool(true), Scalar::Null(NullKind::NaN)])
17503                .expect("cond");
17504            let fill = Scalar::Int64(-1);
17505            let out = col.where_cond(&cond, &fill).expect("where");
17506            assert_eq!(out.values()[0], Scalar::Int64(1));
17507            assert!(out.values()[1].is_missing());
17508        }
17509
17510        #[test]
17511        fn where_rejects_non_bool_cond() {
17512            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17513            let cond = Column::from_values(vec![Scalar::Int64(1)]).expect("cond");
17514            let err = col.where_cond(&cond, &Scalar::Int64(0)).unwrap_err();
17515            assert!(matches!(err, crate::ColumnError::InvalidMaskType { .. }));
17516        }
17517
17518        #[test]
17519        fn equals_elementwise_matches_semantic_eq() {
17520            let a = Column::from_values(vec![
17521                Scalar::Int64(1),
17522                Scalar::Int64(2),
17523                Scalar::Null(NullKind::NaN),
17524            ])
17525            .expect("a");
17526            let b = Column::from_values(vec![
17527                Scalar::Int64(1),
17528                Scalar::Int64(3),
17529                Scalar::Null(NullKind::NaN),
17530            ])
17531            .expect("b");
17532            let r = a.equals(&b).expect("equals");
17533            assert_eq!(r.dtype(), DType::Bool);
17534            assert_eq!(r.values()[0], Scalar::Bool(true));
17535            assert_eq!(r.values()[1], Scalar::Bool(false));
17536            // NaN vs NaN → false (pandas semantics)
17537            assert_eq!(r.values()[2], Scalar::Bool(false));
17538        }
17539
17540        #[test]
17541        fn equals_length_mismatch_errors() {
17542            let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
17543            let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
17544            assert!(a.equals(&b).is_err());
17545        }
17546
17547        #[test]
17548        fn dot_ignores_missing() {
17549            let a = Column::from_values(vec![
17550                Scalar::Float64(1.0),
17551                Scalar::Null(NullKind::NaN),
17552                Scalar::Float64(3.0),
17553            ])
17554            .expect("a");
17555            let b = Column::from_values(vec![
17556                Scalar::Float64(2.0),
17557                Scalar::Float64(4.0),
17558                Scalar::Float64(5.0),
17559            ])
17560            .expect("b");
17561            let r = a.dot(&b).expect("dot");
17562            // 1*2 + skip + 3*5 = 17
17563            assert!((r - 17.0).abs() < 1e-9);
17564        }
17565
17566        #[test]
17567        fn dot_non_numeric_errors() {
17568            let a = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("a");
17569            let b = Column::from_values(vec![Scalar::Float64(1.0)]).expect("b");
17570            assert!(a.dot(&b).is_err());
17571        }
17572
17573        #[test]
17574        fn fillna_with_column_fills_missing_positions() {
17575            let a = Column::from_values(vec![
17576                Scalar::Int64(1),
17577                Scalar::Null(NullKind::NaN),
17578                Scalar::Int64(3),
17579            ])
17580            .expect("a");
17581            let b = Column::from_values(vec![
17582                Scalar::Int64(10),
17583                Scalar::Int64(20),
17584                Scalar::Int64(30),
17585            ])
17586            .expect("b");
17587            let r = a.fillna_with_column(&b).expect("fillna_with_column");
17588            assert_eq!(r.values()[0], Scalar::Int64(1));
17589            assert_eq!(r.values()[1], Scalar::Int64(20));
17590            assert_eq!(r.values()[2], Scalar::Int64(3));
17591        }
17592
17593        #[test]
17594        fn divmod_returns_quotient_and_remainder() {
17595            let a = Column::from_values(vec![
17596                Scalar::Float64(10.0),
17597                Scalar::Float64(7.0),
17598                Scalar::Float64(-5.0),
17599            ])
17600            .expect("a");
17601            let b = Column::from_values(vec![
17602                Scalar::Float64(3.0),
17603                Scalar::Float64(2.0),
17604                Scalar::Float64(3.0),
17605            ])
17606            .expect("b");
17607            let (q, r) = a.divmod(&b).expect("divmod");
17608            // Python-style: floor(10/3)=3, 10 - 3*3 = 1
17609            match (&q.values()[0], &r.values()[0]) {
17610                (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17611                    assert!((qv - 3.0).abs() < 1e-9);
17612                    assert!((rv - 1.0).abs() < 1e-9);
17613                }
17614                other => unreachable!("unexpected {other:?}"),
17615            }
17616            // 7 / 2 → q=3, r=1
17617            match (&q.values()[1], &r.values()[1]) {
17618                (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17619                    assert!((qv - 3.0).abs() < 1e-9);
17620                    assert!((rv - 1.0).abs() < 1e-9);
17621                }
17622                other => unreachable!("unexpected {other:?}"),
17623            }
17624            // -5 / 3 → q=-2 (floor), r = -5 - (-2*3) = 1
17625            match (&q.values()[2], &r.values()[2]) {
17626                (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17627                    assert!((qv + 2.0).abs() < 1e-9);
17628                    assert!((rv - 1.0).abs() < 1e-9);
17629                }
17630                other => unreachable!("unexpected {other:?}"),
17631            }
17632        }
17633
17634        #[test]
17635        fn divmod_zero_divisor_yields_null() {
17636            let a = Column::from_values(vec![Scalar::Float64(10.0)]).expect("a");
17637            let b = Column::from_values(vec![Scalar::Float64(0.0)]).expect("b");
17638            let (q, r) = a.divmod(&b).expect("divmod");
17639            assert!(q.values()[0].is_missing());
17640            assert!(r.values()[0].is_missing());
17641        }
17642
17643        #[test]
17644        fn divmod_infinite_operands_match_pandas_float_semantics() {
17645            let a = Column::from_values(vec![
17646                Scalar::Float64(f64::INFINITY),
17647                Scalar::Float64(f64::NEG_INFINITY),
17648                Scalar::Float64(5.0),
17649                Scalar::Float64(-5.0),
17650                Scalar::Float64(f64::INFINITY),
17651            ])
17652            .expect("a");
17653            let b = Column::from_values(vec![
17654                Scalar::Float64(2.0),
17655                Scalar::Float64(-2.0),
17656                Scalar::Float64(f64::INFINITY),
17657                Scalar::Float64(f64::INFINITY),
17658                Scalar::Float64(f64::INFINITY),
17659            ])
17660            .expect("b");
17661
17662            let (q, r) = a.divmod(&b).expect("divmod");
17663            assert!(matches!(q.values()[0], Scalar::Float64(v) if v.is_nan()));
17664            assert!(matches!(r.values()[0], Scalar::Float64(v) if v.is_nan()));
17665            assert!(matches!(q.values()[1], Scalar::Float64(v) if v.is_nan()));
17666            assert!(matches!(r.values()[1], Scalar::Float64(v) if v.is_nan()));
17667            assert_eq!(q.values()[2], Scalar::Float64(0.0));
17668            assert_eq!(r.values()[2], Scalar::Float64(5.0));
17669            assert_eq!(q.values()[3], Scalar::Float64(-1.0));
17670            assert_eq!(r.values()[3], Scalar::Float64(f64::INFINITY));
17671            assert!(matches!(q.values()[4], Scalar::Float64(v) if v.is_nan()));
17672            assert!(matches!(r.values()[4], Scalar::Float64(v) if v.is_nan()));
17673        }
17674
17675        #[test]
17676        fn where_cond_series_fills_from_other_column() {
17677            let col =
17678                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17679                    .expect("col");
17680            let cond = Column::from_values(vec![
17681                Scalar::Bool(true),
17682                Scalar::Bool(false),
17683                Scalar::Bool(true),
17684            ])
17685            .expect("cond");
17686            let other = Column::from_values(vec![
17687                Scalar::Int64(10),
17688                Scalar::Int64(20),
17689                Scalar::Int64(30),
17690            ])
17691            .expect("other");
17692            let out = col.where_cond_series(&cond, &other).expect("where_series");
17693            assert_eq!(out.values()[0], Scalar::Int64(1));
17694            assert_eq!(out.values()[1], Scalar::Int64(20));
17695            assert_eq!(out.values()[2], Scalar::Int64(3));
17696        }
17697
17698        #[test]
17699        fn mask_series_fills_from_other_column() {
17700            let col =
17701                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17702                    .expect("col");
17703            let cond = Column::from_values(vec![
17704                Scalar::Bool(true),
17705                Scalar::Bool(false),
17706                Scalar::Bool(true),
17707            ])
17708            .expect("cond");
17709            let other =
17710                Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(0), Scalar::Int64(0)])
17711                    .expect("other");
17712            let out = col.mask_series(&cond, &other).expect("mask_series");
17713            assert_eq!(out.values()[0], Scalar::Int64(0));
17714            assert_eq!(out.values()[1], Scalar::Int64(2));
17715            assert_eq!(out.values()[2], Scalar::Int64(0));
17716        }
17717
17718        #[test]
17719        fn where_cond_series_rejects_non_bool_cond() {
17720            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17721            let cond = Column::from_values(vec![Scalar::Int64(1)]).expect("cond");
17722            let other = Column::from_values(vec![Scalar::Int64(0)]).expect("other");
17723            let err = col.where_cond_series(&cond, &other).unwrap_err();
17724            assert!(matches!(err, crate::ColumnError::InvalidMaskType { .. }));
17725        }
17726
17727        #[test]
17728        fn replace_values_applies_first_match() {
17729            let col = Column::from_values(vec![
17730                Scalar::Int64(1),
17731                Scalar::Int64(2),
17732                Scalar::Int64(3),
17733                Scalar::Int64(2),
17734            ])
17735            .expect("col");
17736            let to_replace = vec![Scalar::Int64(2), Scalar::Int64(3)];
17737            let replacement = vec![Scalar::Int64(20), Scalar::Int64(30)];
17738            let out = col
17739                .replace_values(&to_replace, &replacement)
17740                .expect("replace");
17741            let alias = col
17742                .replace(&to_replace, &replacement)
17743                .expect("replace alias");
17744            assert_eq!(alias, out);
17745            assert_eq!(out.values()[0], Scalar::Int64(1));
17746            assert_eq!(out.values()[1], Scalar::Int64(20));
17747            assert_eq!(out.values()[2], Scalar::Int64(30));
17748            assert_eq!(out.values()[3], Scalar::Int64(20));
17749        }
17750
17751        #[test]
17752        fn replace_values_can_replace_nulls() {
17753            let col = Column::from_values(vec![
17754                Scalar::Int64(1),
17755                Scalar::Null(NullKind::NaN),
17756                Scalar::Int64(2),
17757            ])
17758            .expect("col");
17759            let to_replace = vec![Scalar::Null(NullKind::NaN)];
17760            let replacement = vec![Scalar::Int64(-1)];
17761            let out = col
17762                .replace_values(&to_replace, &replacement)
17763                .expect("replace");
17764            assert_eq!(out.values()[0], Scalar::Int64(1));
17765            assert_eq!(out.values()[1], Scalar::Int64(-1));
17766            assert_eq!(out.values()[2], Scalar::Int64(2));
17767        }
17768
17769        #[test]
17770        fn replace_values_length_mismatch_errors() {
17771            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17772            let err = col
17773                .replace_values(&[Scalar::Int64(1)], &[Scalar::Int64(2), Scalar::Int64(3)])
17774                .unwrap_err();
17775            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
17776        }
17777
17778        #[test]
17779        fn nonzero_returns_truthy_positions() {
17780            let col = Column::from_values(vec![
17781                Scalar::Int64(0),
17782                Scalar::Int64(5),
17783                Scalar::Null(NullKind::NaN),
17784                Scalar::Int64(-3),
17785                Scalar::Int64(0),
17786            ])
17787            .expect("col");
17788            assert_eq!(col.nonzero(), vec![1, 3]);
17789        }
17790
17791        #[test]
17792        fn nonzero_empty_column_is_empty() {
17793            let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17794            assert!(col.nonzero().is_empty());
17795        }
17796
17797        #[test]
17798        fn where_rejects_length_mismatch() {
17799            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17800            let cond = Column::from_values(vec![Scalar::Bool(true)]).expect("cond");
17801            let err = col.where_cond(&cond, &Scalar::Int64(0)).unwrap_err();
17802            assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
17803        }
17804    }
17805
17806    mod nlargest_nsmallest {
17807        use fp_types::NullKind;
17808
17809        use super::*;
17810
17811        #[test]
17812        fn nlargest_returns_top_n_descending() {
17813            let col = Column::from_values(vec![
17814                Scalar::Int64(3),
17815                Scalar::Int64(1),
17816                Scalar::Int64(5),
17817                Scalar::Int64(2),
17818                Scalar::Int64(4),
17819            ])
17820            .expect("col");
17821            let top = col.nlargest(3).expect("nlargest");
17822            assert_eq!(top.len(), 3);
17823            assert_eq!(top.values()[0], Scalar::Int64(5));
17824            assert_eq!(top.values()[1], Scalar::Int64(4));
17825            assert_eq!(top.values()[2], Scalar::Int64(3));
17826        }
17827
17828        #[test]
17829        fn nsmallest_returns_bottom_n_ascending() {
17830            let col = Column::from_values(vec![
17831                Scalar::Int64(3),
17832                Scalar::Int64(1),
17833                Scalar::Int64(5),
17834                Scalar::Int64(2),
17835                Scalar::Int64(4),
17836            ])
17837            .expect("col");
17838            let bot = col.nsmallest(2).expect("nsmallest");
17839            assert_eq!(bot.len(), 2);
17840            assert_eq!(bot.values()[0], Scalar::Int64(1));
17841            assert_eq!(bot.values()[1], Scalar::Int64(2));
17842        }
17843
17844        #[test]
17845        fn nlargest_excludes_missing_when_n_fits() {
17846            let col = Column::from_values(vec![
17847                Scalar::Int64(5),
17848                Scalar::Null(NullKind::NaN),
17849                Scalar::Int64(3),
17850                Scalar::Int64(7),
17851            ])
17852            .expect("col");
17853            let top = col.nlargest(2).expect("nlargest");
17854            assert_eq!(top.len(), 2);
17855            assert_eq!(top.values()[0], Scalar::Int64(7));
17856            assert_eq!(top.values()[1], Scalar::Int64(5));
17857        }
17858
17859        #[test]
17860        fn nlargest_n_larger_than_length_clamps() {
17861            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17862            let top = col.nlargest(100).expect("nlargest");
17863            assert_eq!(top.len(), 2);
17864        }
17865
17866        #[test]
17867        fn nlargest_zero_is_empty_same_dtype() {
17868            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17869            let top = col.nlargest(0).expect("nlargest");
17870            assert!(top.is_empty());
17871            assert_eq!(top.dtype(), DType::Int64);
17872        }
17873    }
17874
17875    mod astype {
17876        use fp_types::NullKind;
17877
17878        use super::*;
17879
17880        #[test]
17881        fn astype_int_to_float_preserves_values() {
17882            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17883            let out = col.astype(DType::Float64).expect("astype");
17884            assert_eq!(out.dtype(), DType::Float64);
17885            assert_eq!(out.values()[0], Scalar::Float64(1.0));
17886            assert_eq!(out.values()[1], Scalar::Float64(2.0));
17887        }
17888
17889        #[test]
17890        fn astype_same_dtype_is_noop_clone() {
17891            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17892            let out = col.astype(DType::Int64).expect("astype");
17893            assert_eq!(out.values(), col.values());
17894        }
17895
17896        #[test]
17897        fn astype_bool_to_int() {
17898            let col =
17899                Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("col");
17900            let out = col.astype(DType::Int64).expect("astype");
17901            assert_eq!(out.dtype(), DType::Int64);
17902            assert_eq!(out.values()[0], Scalar::Int64(1));
17903            assert_eq!(out.values()[1], Scalar::Int64(0));
17904        }
17905
17906        #[test]
17907        fn astype_to_utf8_uses_pandas_string_spellings() {
17908            let bool_col = Column::new(DType::Bool, vec![Scalar::Bool(true), Scalar::Bool(false)])
17909                .expect("bool col");
17910            let int_col = Column::new(DType::Int64, vec![Scalar::Int64(-7)]).expect("int col");
17911            let float_col = Column::new(
17912                DType::Float64,
17913                vec![Scalar::Float64(1.0), Scalar::Null(NullKind::NaN)],
17914            )
17915            .expect("float col");
17916
17917            let bool_out = bool_col.astype(DType::Utf8).expect("astype bool");
17918            let int_out = int_col.astype(DType::Utf8).expect("astype int");
17919            let float_out = float_col.astype(DType::Utf8).expect("astype float");
17920
17921            assert_eq!(bool_out.dtype(), DType::Utf8);
17922            assert_eq!(
17923                bool_out.values(),
17924                &[
17925                    Scalar::Utf8("True".to_owned()),
17926                    Scalar::Utf8("False".to_owned()),
17927                ]
17928            );
17929            assert_eq!(int_out.values(), &[Scalar::Utf8("-7".to_owned())]);
17930            assert_eq!(
17931                float_out.values(),
17932                &[
17933                    Scalar::Utf8("1.0".to_owned()),
17934                    Scalar::Utf8("nan".to_owned()),
17935                ]
17936            );
17937        }
17938
17939        #[test]
17940        fn astype_propagates_missing() {
17941            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
17942                .expect("col");
17943            let out = col.astype(DType::Float64).expect("astype");
17944            assert_eq!(out.values()[0], Scalar::Float64(1.0));
17945            assert!(out.values()[1].is_missing());
17946        }
17947
17948        #[test]
17949        fn astype_finite_float_to_int_truncates_toward_zero() {
17950            // pandas astype(int64) truncates finite floats toward zero
17951            // (br-frankenpandas-qcutc); only non-finite values raise.
17952            let col = Column::from_values(vec![
17953                Scalar::Float64(1.5),
17954                Scalar::Float64(2.9),
17955                Scalar::Float64(-1.5),
17956                Scalar::Float64(-2.9),
17957                Scalar::Float64(0.4),
17958            ])
17959            .expect("col");
17960            let out = col.astype(DType::Int64).expect("truncating cast");
17961            assert_eq!(
17962                out.values(),
17963                &[
17964                    Scalar::Int64(1),
17965                    Scalar::Int64(2),
17966                    Scalar::Int64(-1),
17967                    Scalar::Int64(-2),
17968                    Scalar::Int64(0),
17969                ]
17970            );
17971            // Non-finite still raises.
17972            let inf = Column::from_values(vec![Scalar::Float64(f64::INFINITY)]).expect("col");
17973            assert!(matches!(
17974                inf.astype(DType::Int64).unwrap_err(),
17975                crate::ColumnError::Type(_)
17976            ));
17977        }
17978
17979        #[test]
17980        fn new_int64_from_lossy_float_errors_unlike_astype() {
17981            // The typed constructor with an explicit dtype is STRICT, matching
17982            // pandas DataFrame(dtype='int64') which raises on a non-integer
17983            // float — unlike astype which truncates. (br-frankenpandas-8nupg)
17984            let err = Column::new(DType::Int64, vec![Scalar::Float64(1.5)]).unwrap_err();
17985            assert!(matches!(
17986                err,
17987                crate::ColumnError::Type(fp_types::TypeError::LossyFloatToInt { .. })
17988            ));
17989            // Integer-valued floats still coerce fine (1.0 -> 1).
17990            let ok = Column::new(DType::Int64, vec![Scalar::Float64(2.0)]).expect("integer float");
17991            assert_eq!(ok.values(), &[Scalar::Int64(2)]);
17992        }
17993    }
17994
17995    mod rank_searchsorted {
17996        use fp_types::NullKind;
17997
17998        use super::*;
17999
18000        #[test]
18001        fn rank_average_ties_get_midpoint() {
18002            let col = Column::from_values(vec![
18003                Scalar::Float64(10.0),
18004                Scalar::Float64(20.0),
18005                Scalar::Float64(20.0),
18006                Scalar::Float64(30.0),
18007            ])
18008            .expect("col");
18009            let r = col.rank("average", true).expect("rank");
18010            assert_eq!(r.values()[0], Scalar::Float64(1.0));
18011            // Two tied values occupy positions 2 and 3 → avg = 2.5
18012            assert_eq!(r.values()[1], Scalar::Float64(2.5));
18013            assert_eq!(r.values()[2], Scalar::Float64(2.5));
18014            assert_eq!(r.values()[3], Scalar::Float64(4.0));
18015        }
18016
18017        #[test]
18018        fn rank_min_assigns_lowest_tied_rank() {
18019            let col = Column::from_values(vec![
18020                Scalar::Int64(1),
18021                Scalar::Int64(2),
18022                Scalar::Int64(2),
18023                Scalar::Int64(3),
18024            ])
18025            .expect("col");
18026            let r = col.rank("min", true).expect("rank");
18027            assert_eq!(r.values()[1], Scalar::Float64(2.0));
18028            assert_eq!(r.values()[2], Scalar::Float64(2.0));
18029            assert_eq!(r.values()[3], Scalar::Float64(4.0));
18030        }
18031
18032        #[test]
18033        fn rank_max_assigns_highest_tied_rank() {
18034            let col = Column::from_values(vec![
18035                Scalar::Int64(1),
18036                Scalar::Int64(2),
18037                Scalar::Int64(2),
18038                Scalar::Int64(3),
18039            ])
18040            .expect("col");
18041            let r = col.rank("max", true).expect("rank");
18042            assert_eq!(r.values()[1], Scalar::Float64(3.0));
18043            assert_eq!(r.values()[2], Scalar::Float64(3.0));
18044            assert_eq!(r.values()[3], Scalar::Float64(4.0));
18045        }
18046
18047        #[test]
18048        fn rank_first_breaks_ties_by_appearance_order() {
18049            let col =
18050                Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(3), Scalar::Int64(3)])
18051                    .expect("col");
18052            let r = col.rank("first", true).expect("rank");
18053            // Sorted positions: (1,3), (2,3), (0,5) → ranks 1,2,3
18054            assert_eq!(r.values()[0], Scalar::Float64(3.0));
18055            assert_eq!(r.values()[1], Scalar::Float64(1.0));
18056            assert_eq!(r.values()[2], Scalar::Float64(2.0));
18057        }
18058
18059        #[test]
18060        fn rank_dense_has_no_gaps() {
18061            let col = Column::from_values(vec![
18062                Scalar::Int64(1),
18063                Scalar::Int64(2),
18064                Scalar::Int64(2),
18065                Scalar::Int64(3),
18066            ])
18067            .expect("col");
18068            let r = col.rank("dense", true).expect("rank");
18069            assert_eq!(r.values()[0], Scalar::Float64(1.0));
18070            assert_eq!(r.values()[1], Scalar::Float64(2.0));
18071            assert_eq!(r.values()[2], Scalar::Float64(2.0));
18072            assert_eq!(r.values()[3], Scalar::Float64(3.0));
18073        }
18074
18075        #[test]
18076        fn rank_null_inputs_stay_null() {
18077            let col = Column::from_values(vec![
18078                Scalar::Float64(1.0),
18079                Scalar::Null(NullKind::NaN),
18080                Scalar::Float64(2.0),
18081            ])
18082            .expect("col");
18083            let r = col.rank("average", true).expect("rank");
18084            assert_eq!(r.values()[0], Scalar::Float64(1.0));
18085            assert!(r.values()[1].is_missing());
18086            assert_eq!(r.values()[2], Scalar::Float64(2.0));
18087        }
18088
18089        #[test]
18090        fn rank_descending_reverses_assignment() {
18091            let col =
18092                Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18093                    .expect("col");
18094            let r = col.rank("min", false).expect("rank");
18095            assert_eq!(r.values()[0], Scalar::Float64(3.0));
18096            assert_eq!(r.values()[1], Scalar::Float64(2.0));
18097            assert_eq!(r.values()[2], Scalar::Float64(1.0));
18098        }
18099
18100        #[test]
18101        fn rank_invalid_method_errors() {
18102            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18103            let err = col.rank("bogus", true).unwrap_err();
18104            assert!(matches!(err, crate::ColumnError::Type(_)));
18105        }
18106
18107        #[test]
18108        fn searchsorted_left_finds_first_insertion() {
18109            let col = Column::from_values(vec![
18110                Scalar::Int64(1),
18111                Scalar::Int64(2),
18112                Scalar::Int64(2),
18113                Scalar::Int64(5),
18114            ])
18115            .expect("col");
18116            assert_eq!(col.searchsorted(&Scalar::Int64(2), "left").unwrap(), 1);
18117            assert_eq!(col.searchsorted(&Scalar::Int64(0), "left").unwrap(), 0);
18118            assert_eq!(col.searchsorted(&Scalar::Int64(6), "left").unwrap(), 4);
18119        }
18120
18121        #[test]
18122        fn searchsorted_right_finds_last_insertion() {
18123            let col = Column::from_values(vec![
18124                Scalar::Int64(1),
18125                Scalar::Int64(2),
18126                Scalar::Int64(2),
18127                Scalar::Int64(5),
18128            ])
18129            .expect("col");
18130            assert_eq!(col.searchsorted(&Scalar::Int64(2), "right").unwrap(), 3);
18131        }
18132
18133        #[test]
18134        fn searchsorted_rejects_invalid_side() {
18135            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18136            let err = col.searchsorted(&Scalar::Int64(0), "middle").unwrap_err();
18137            assert!(matches!(err, crate::ColumnError::Type(_)));
18138        }
18139
18140        #[test]
18141        fn searchsorted_rejects_missing_needle() {
18142            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18143            let err = col
18144                .searchsorted(&Scalar::Null(NullKind::NaN), "left")
18145                .unwrap_err();
18146            assert!(matches!(err, crate::ColumnError::Type(_)));
18147        }
18148
18149        #[test]
18150        fn searchsorted_treats_trailing_nulls_as_greater() {
18151            let col = Column::from_values(vec![
18152                Scalar::Int64(1),
18153                Scalar::Int64(2),
18154                Scalar::Null(NullKind::NaN),
18155            ])
18156            .expect("col");
18157            // needle=3 should land at position 2 (before trailing null).
18158            assert_eq!(col.searchsorted(&Scalar::Int64(3), "left").unwrap(), 2);
18159        }
18160
18161        #[test]
18162        fn searchsorted_values_left_returns_positions_column() {
18163            let col = Column::from_values(vec![
18164                Scalar::Int64(1),
18165                Scalar::Int64(2),
18166                Scalar::Int64(2),
18167                Scalar::Int64(5),
18168            ])
18169            .expect("col");
18170            let positions = col
18171                .searchsorted_values(
18172                    &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18173                    "left",
18174                )
18175                .expect("searchsorted");
18176            assert_eq!(positions.dtype(), DType::Int64);
18177            assert_eq!(
18178                positions.values(),
18179                &[Scalar::Int64(0), Scalar::Int64(1), Scalar::Int64(4)]
18180            );
18181        }
18182
18183        #[test]
18184        fn searchsorted_values_right_returns_positions_column() {
18185            let col = Column::from_values(vec![
18186                Scalar::Int64(1),
18187                Scalar::Int64(2),
18188                Scalar::Int64(2),
18189                Scalar::Int64(5),
18190            ])
18191            .expect("col");
18192            let positions = col
18193                .searchsorted_values(
18194                    &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18195                    "right",
18196                )
18197                .expect("searchsorted");
18198            assert_eq!(
18199                positions.values(),
18200                &[Scalar::Int64(0), Scalar::Int64(3), Scalar::Int64(4)]
18201            );
18202        }
18203
18204        #[test]
18205        fn searchsorted_values_rejects_invalid_side() {
18206            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18207            let err = col
18208                .searchsorted_values(&[Scalar::Int64(0)], "middle")
18209                .unwrap_err();
18210            assert!(matches!(err, crate::ColumnError::Type(_)));
18211        }
18212
18213        #[test]
18214        fn searchsorted_values_rejects_missing_needles() {
18215            let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18216            let err = col
18217                .searchsorted_values(&[Scalar::Null(NullKind::NaN)], "left")
18218                .unwrap_err();
18219            assert!(matches!(err, crate::ColumnError::Type(_)));
18220        }
18221
18222        #[test]
18223        fn searchsorted_with_sorter_uses_argsort_permutation() {
18224            let col = Column::from_values(vec![
18225                Scalar::Int64(5),
18226                Scalar::Int64(1),
18227                Scalar::Int64(2),
18228                Scalar::Int64(2),
18229            ])
18230            .expect("col");
18231            let sorter = col.argsort();
18232            assert_eq!(
18233                col.searchsorted_with_sorter(&Scalar::Int64(2), "left", &sorter)
18234                    .unwrap(),
18235                1
18236            );
18237            assert_eq!(
18238                col.searchsorted_with_sorter(&Scalar::Int64(2), "right", &sorter)
18239                    .unwrap(),
18240                3
18241            );
18242            assert_eq!(
18243                col.searchsorted_with_sorter(&Scalar::Int64(6), "left", &sorter)
18244                    .unwrap(),
18245                4
18246            );
18247        }
18248
18249        #[test]
18250        fn searchsorted_values_with_sorter_returns_positions_column() {
18251            let col = Column::from_values(vec![
18252                Scalar::Int64(5),
18253                Scalar::Int64(1),
18254                Scalar::Int64(2),
18255                Scalar::Int64(2),
18256            ])
18257            .expect("col");
18258            let sorter = col.argsort();
18259            let positions = col
18260                .searchsorted_values_with_sorter(
18261                    &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18262                    "left",
18263                    &sorter,
18264                )
18265                .expect("searchsorted");
18266            assert_eq!(
18267                positions.values(),
18268                &[Scalar::Int64(0), Scalar::Int64(1), Scalar::Int64(4)]
18269            );
18270        }
18271
18272        #[test]
18273        fn searchsorted_with_sorter_rejects_length_mismatch() {
18274            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
18275            let err = col
18276                .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0])
18277                .unwrap_err();
18278            assert!(matches!(
18279                err,
18280                crate::ColumnError::LengthMismatch { left: 2, right: 1 }
18281            ));
18282        }
18283
18284        #[test]
18285        fn searchsorted_with_sorter_rejects_duplicate_or_oob_indices() {
18286            let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
18287            let duplicate = col
18288                .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0, 0])
18289                .unwrap_err();
18290            assert!(matches!(
18291                duplicate,
18292                crate::ColumnError::InvalidSorter { .. }
18293            ));
18294
18295            let out_of_bounds = col
18296                .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0, 2])
18297                .unwrap_err();
18298            assert!(matches!(
18299                out_of_bounds,
18300                crate::ColumnError::InvalidSorter { .. }
18301            ));
18302        }
18303    }
18304
18305    mod value_counts {
18306        use fp_types::NullKind;
18307
18308        use super::*;
18309
18310        #[test]
18311        fn value_counts_default_drops_missing_and_sorts_descending() {
18312            let col = Column::from_values(vec![
18313                Scalar::Int64(3),
18314                Scalar::Int64(1),
18315                Scalar::Null(NullKind::NaN),
18316                Scalar::Int64(3),
18317                Scalar::Int64(2),
18318                Scalar::Int64(1),
18319                Scalar::Int64(3),
18320            ])
18321            .expect("col");
18322
18323            let (values, counts) = col.value_counts().expect("value_counts");
18324            assert_eq!(
18325                values.values(),
18326                &[Scalar::Int64(3), Scalar::Int64(1), Scalar::Int64(2)]
18327            );
18328            assert_eq!(
18329                counts.values(),
18330                &[Scalar::Int64(3), Scalar::Int64(2), Scalar::Int64(1)]
18331            );
18332        }
18333
18334        #[test]
18335        fn value_counts_sort_false_preserves_first_seen_order() {
18336            let col = Column::from_values(vec![
18337                Scalar::Int64(2),
18338                Scalar::Int64(1),
18339                Scalar::Int64(2),
18340                Scalar::Int64(3),
18341                Scalar::Int64(1),
18342            ])
18343            .expect("col");
18344
18345            let (values, counts) = col
18346                .value_counts_with_options(false, false, false, true)
18347                .expect("value_counts");
18348            assert_eq!(
18349                values.values(),
18350                &[Scalar::Int64(2), Scalar::Int64(1), Scalar::Int64(3)]
18351            );
18352            assert_eq!(
18353                counts.values(),
18354                &[Scalar::Int64(2), Scalar::Int64(2), Scalar::Int64(1)]
18355            );
18356        }
18357
18358        #[test]
18359        fn value_counts_dropna_false_includes_missing_bucket() {
18360            let col = Column::from_values(vec![
18361                Scalar::Utf8("a".into()),
18362                Scalar::Null(NullKind::NaN),
18363                Scalar::Utf8("a".into()),
18364                Scalar::Null(NullKind::Null),
18365            ])
18366            .expect("col");
18367
18368            let (values, counts) = col
18369                .value_counts_with_options(false, true, false, false)
18370                .expect("value_counts");
18371            assert_eq!(values.values()[0], Scalar::Utf8("a".into()));
18372            assert!(values.values()[1].is_missing());
18373            assert_eq!(counts.values(), &[Scalar::Int64(2), Scalar::Int64(2)]);
18374        }
18375
18376        #[test]
18377        fn value_counts_normalize_uses_returned_total() {
18378            let col = Column::from_values(vec![
18379                Scalar::Float64(1.0),
18380                Scalar::Float64(2.0),
18381                Scalar::Float64(1.0),
18382                Scalar::Null(NullKind::NaN),
18383            ])
18384            .expect("col");
18385
18386            let (values, counts) = col
18387                .value_counts_with_options(true, true, false, true)
18388                .expect("value_counts");
18389            assert_eq!(
18390                values.values(),
18391                &[Scalar::Float64(1.0), Scalar::Float64(2.0)]
18392            );
18393            assert_eq!(counts.dtype(), DType::Float64);
18394            assert_eq!(
18395                counts.values(),
18396                &[Scalar::Float64(2.0 / 3.0), Scalar::Float64(1.0 / 3.0)]
18397            );
18398        }
18399
18400        #[test]
18401        fn python_mod_f64_handles_infinity_divisor() {
18402            use crate::python_mod_f64;
18403
18404            assert_eq!(python_mod_f64(5.0, f64::INFINITY), 5.0);
18405            assert_eq!(python_mod_f64(-5.0, f64::INFINITY), f64::INFINITY);
18406            assert_eq!(python_mod_f64(5.0, f64::NEG_INFINITY), f64::NEG_INFINITY);
18407            assert_eq!(python_mod_f64(-5.0, f64::NEG_INFINITY), -5.0);
18408            assert_eq!(python_mod_f64(0.0, f64::INFINITY), 0.0);
18409            assert!(python_mod_f64(0.0, f64::NEG_INFINITY).is_sign_negative());
18410            assert_eq!(python_mod_f64(-0.0, f64::INFINITY), 0.0);
18411            assert_eq!(python_mod_f64(-0.0, f64::NEG_INFINITY), -0.0);
18412            assert!(python_mod_f64(f64::NAN, f64::INFINITY).is_nan());
18413            assert!(python_mod_f64(f64::NAN, f64::NEG_INFINITY).is_nan());
18414            assert!(python_mod_f64(f64::INFINITY, f64::INFINITY).is_nan());
18415            assert!(python_mod_f64(f64::NEG_INFINITY, f64::NEG_INFINITY).is_nan());
18416        }
18417
18418        #[test]
18419        fn python_floor_div_f64_handles_infinite_operands() {
18420            use crate::python_floor_div_f64;
18421
18422            assert_eq!(python_floor_div_f64(5.0, f64::INFINITY), 0.0);
18423            assert_eq!(python_floor_div_f64(-5.0, f64::INFINITY), -1.0);
18424            assert_eq!(python_floor_div_f64(5.0, f64::NEG_INFINITY), -1.0);
18425            assert_eq!(python_floor_div_f64(-5.0, f64::NEG_INFINITY), 0.0);
18426            assert_eq!(python_floor_div_f64(0.0, f64::INFINITY), 0.0);
18427            assert!(python_floor_div_f64(-0.0, f64::INFINITY).is_sign_negative());
18428            assert!(python_floor_div_f64(0.0, f64::NEG_INFINITY).is_sign_negative());
18429            assert_eq!(python_floor_div_f64(-0.0, f64::NEG_INFINITY), 0.0);
18430            assert!(python_floor_div_f64(f64::INFINITY, 2.0).is_nan());
18431            assert!(python_floor_div_f64(f64::NEG_INFINITY, -2.0).is_nan());
18432            assert!(python_floor_div_f64(f64::INFINITY, f64::INFINITY).is_nan());
18433        }
18434
18435        #[test]
18436        fn histogram_counts_values_in_bins() {
18437            let col = Column::from_values(vec![
18438                Scalar::Float64(0.5),
18439                Scalar::Float64(1.5),
18440                Scalar::Float64(2.5),
18441                Scalar::Float64(1.2),
18442                Scalar::Float64(2.8),
18443            ])
18444            .unwrap();
18445            let edges = vec![0.0, 1.0, 2.0, 3.0];
18446            let counts = col.histogram(&edges).unwrap();
18447            assert_eq!(
18448                counts.values(),
18449                &[
18450                    Scalar::Int64(1), // [0, 1): 0.5
18451                    Scalar::Int64(2), // [1, 2): 1.5, 1.2
18452                    Scalar::Int64(2), // [2, 3]: 2.5, 2.8
18453                ]
18454            );
18455        }
18456
18457        #[test]
18458        fn histogram_auto_creates_bins() {
18459            let col = Column::from_values(vec![
18460                Scalar::Float64(1.0),
18461                Scalar::Float64(2.0),
18462                Scalar::Float64(3.0),
18463                Scalar::Float64(4.0),
18464            ])
18465            .unwrap();
18466            let (counts, edges) = col.histogram_auto(3).unwrap();
18467            assert_eq!(counts.len(), 3);
18468            assert_eq!(edges.len(), 4);
18469            assert!((edges[0] - 1.0).abs() < 1e-10);
18470            assert!((edges[3] - 4.0).abs() < 1e-10);
18471        }
18472
18473        #[test]
18474        fn histogram_auto_constant_values_extends_range() {
18475            let col = Column::from_values(vec![
18476                Scalar::Float64(5.0),
18477                Scalar::Float64(5.0),
18478                Scalar::Float64(5.0),
18479            ])
18480            .unwrap();
18481            let (counts, edges) = col.histogram_auto(2).unwrap();
18482            assert_eq!(counts.len(), 2);
18483            assert!(edges[0] < 5.0);
18484            assert!(edges[2] > 5.0);
18485        }
18486
18487        #[test]
18488        fn hanning_window_shape() {
18489            let win = Column::hanning(5).unwrap();
18490            assert_eq!(win.len(), 5);
18491            // Endpoints should be 0
18492            assert!((win.values()[0].to_f64().unwrap()).abs() < 1e-10);
18493            assert!((win.values()[4].to_f64().unwrap()).abs() < 1e-10);
18494            // Center should be 1
18495            assert!((win.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18496        }
18497
18498        #[test]
18499        fn hamming_window_shape() {
18500            let win = Column::hamming(5).unwrap();
18501            assert_eq!(win.len(), 5);
18502            // Hamming endpoints are ~0.08, not 0
18503            let v0 = win.values()[0].to_f64().unwrap();
18504            assert!(v0 > 0.07 && v0 < 0.09);
18505        }
18506
18507        #[test]
18508        fn bartlett_window_triangular() {
18509            let win = Column::bartlett(5).unwrap();
18510            assert_eq!(win.len(), 5);
18511            // Endpoints should be 0
18512            assert!((win.values()[0].to_f64().unwrap()).abs() < 1e-10);
18513            assert!((win.values()[4].to_f64().unwrap()).abs() < 1e-10);
18514            // Center should be 1
18515            assert!((win.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18516        }
18517
18518        #[test]
18519        fn convolve_full_mode() {
18520            let a = Column::from_values(vec![
18521                Scalar::Float64(1.0),
18522                Scalar::Float64(2.0),
18523                Scalar::Float64(3.0),
18524            ])
18525            .unwrap();
18526            let v = Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(1.0)]).unwrap();
18527            let result = a.convolve(&v, "full").unwrap();
18528            // Full convolution: [1*1, 1*1+2*1, 2*1+3*1, 3*1] = [1, 3, 5, 3]
18529            assert_eq!(result.len(), 4);
18530            assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18531            assert!((result.values()[1].to_f64().unwrap() - 3.0).abs() < 1e-10);
18532            assert!((result.values()[2].to_f64().unwrap() - 5.0).abs() < 1e-10);
18533            assert!((result.values()[3].to_f64().unwrap() - 3.0).abs() < 1e-10);
18534        }
18535
18536        #[test]
18537        fn geomspace_creates_geometric_progression() {
18538            let col = Column::geomspace(1.0, 1000.0, 4).unwrap();
18539            assert_eq!(col.len(), 4);
18540            assert!((col.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18541            assert!((col.values()[1].to_f64().unwrap() - 10.0).abs() < 1e-10);
18542            assert!((col.values()[2].to_f64().unwrap() - 100.0).abs() < 1e-10);
18543            assert!((col.values()[3].to_f64().unwrap() - 1000.0).abs() < 1e-10);
18544        }
18545
18546        #[test]
18547        fn nan_to_num_replaces_special_values() {
18548            let col = Column::from_values(vec![
18549                Scalar::Float64(1.0),
18550                Scalar::Float64(f64::NAN),
18551                Scalar::Float64(f64::INFINITY),
18552                Scalar::Float64(f64::NEG_INFINITY),
18553            ])
18554            .unwrap();
18555            let result = col.nan_to_num().unwrap();
18556            assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18557            assert!((result.values()[1].to_f64().unwrap() - 0.0).abs() < 1e-10);
18558            assert_eq!(result.values()[2].to_f64().unwrap(), f64::MAX);
18559            assert_eq!(result.values()[3].to_f64().unwrap(), f64::MIN);
18560        }
18561
18562        #[test]
18563        fn rint_rounds_to_nearest_even() {
18564            let col = Column::from_values(vec![
18565                Scalar::Float64(0.5),
18566                Scalar::Float64(1.5),
18567                Scalar::Float64(2.5),
18568                Scalar::Float64(3.5),
18569            ])
18570            .unwrap();
18571            let result = col.rint().unwrap();
18572            // Banker's rounding: 0.5->0, 1.5->2, 2.5->2, 3.5->4
18573            assert!((result.values()[0].to_f64().unwrap() - 0.0).abs() < 1e-10);
18574            assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18575            assert!((result.values()[2].to_f64().unwrap() - 2.0).abs() < 1e-10);
18576            assert!((result.values()[3].to_f64().unwrap() - 4.0).abs() < 1e-10);
18577        }
18578
18579        #[test]
18580        fn ldexp_multiplies_by_power_of_two() {
18581            let col = Column::from_values(vec![
18582                Scalar::Float64(1.0),
18583                Scalar::Float64(2.0),
18584                Scalar::Float64(0.5),
18585            ])
18586            .unwrap();
18587            let result = col.ldexp(3).unwrap(); // multiply by 2^3 = 8
18588            assert!((result.values()[0].to_f64().unwrap() - 8.0).abs() < 1e-10);
18589            assert!((result.values()[1].to_f64().unwrap() - 16.0).abs() < 1e-10);
18590            assert!((result.values()[2].to_f64().unwrap() - 4.0).abs() < 1e-10);
18591        }
18592
18593        #[test]
18594        fn modf_splits_integer_and_fraction() {
18595            let col = Column::from_values(vec![
18596                Scalar::Float64(3.5),
18597                Scalar::Float64(-2.25),
18598                Scalar::Float64(1.0),
18599            ])
18600            .unwrap();
18601            let (frac, int) = col.modf().unwrap();
18602            assert!((frac.values()[0].to_f64().unwrap() - 0.5).abs() < 1e-10);
18603            assert!((int.values()[0].to_f64().unwrap() - 3.0).abs() < 1e-10);
18604            assert!((frac.values()[1].to_f64().unwrap() - (-0.25)).abs() < 1e-10);
18605            assert!((int.values()[1].to_f64().unwrap() - (-2.0)).abs() < 1e-10);
18606            assert!((frac.values()[2].to_f64().unwrap() - 0.0).abs() < 1e-10);
18607            assert!((int.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18608        }
18609
18610        #[test]
18611        fn spacing_returns_ulp() {
18612            let col = Column::from_values(vec![
18613                Scalar::Float64(1.0),
18614                Scalar::Float64(-1.0),
18615                Scalar::Float64(0.0),
18616            ])
18617            .unwrap();
18618            let result = col.spacing().unwrap();
18619            // Spacing at 1.0 is about 2.2e-16
18620            let s1 = result.values()[0].to_f64().unwrap();
18621            assert!(s1 > 0.0 && s1 < 1e-15);
18622            // Spacing is symmetric for negative numbers
18623            let s_neg1 = result.values()[1].to_f64().unwrap();
18624            assert!((s1 - s_neg1).abs() < 1e-20);
18625            // Spacing at 0 is smallest denormal (not MIN_POSITIVE which is normalized)
18626            assert_eq!(result.values()[2].to_f64().unwrap(), f64::from_bits(1));
18627        }
18628
18629        #[test]
18630        fn frexp_decomposes_floats() {
18631            let col = Column::from_values(vec![
18632                Scalar::Float64(4.0),
18633                Scalar::Float64(0.5),
18634                Scalar::Float64(-8.0),
18635                Scalar::Float64(0.0),
18636            ])
18637            .unwrap();
18638            let (mant, exp) = col.frexp().unwrap();
18639            // 4.0 = 0.5 * 2^3
18640            assert!((mant.values()[0].to_f64().unwrap() - 0.5).abs() < 1e-10);
18641            assert_eq!(exp.values()[0].to_i64().unwrap(), 3);
18642            // 0.5 = 0.5 * 2^0
18643            assert!((mant.values()[1].to_f64().unwrap() - 0.5).abs() < 1e-10);
18644            assert_eq!(exp.values()[1].to_i64().unwrap(), 0);
18645            // -8.0 = -0.5 * 2^4
18646            assert!((mant.values()[2].to_f64().unwrap() - (-0.5)).abs() < 1e-10);
18647            assert_eq!(exp.values()[2].to_i64().unwrap(), 4);
18648            // 0.0 = 0.0 * 2^0
18649            assert!((mant.values()[3].to_f64().unwrap() - 0.0).abs() < 1e-10);
18650            assert_eq!(exp.values()[3].to_i64().unwrap(), 0);
18651        }
18652
18653        #[test]
18654        fn nextafter_returns_adjacent_floats() {
18655            let col = Column::from_values(vec![
18656                Scalar::Float64(0.0),
18657                Scalar::Float64(1.0),
18658                Scalar::Float64(1.0),
18659            ])
18660            .unwrap();
18661            let toward = Column::from_values(vec![
18662                Scalar::Float64(1.0),
18663                Scalar::Float64(2.0),
18664                Scalar::Float64(0.0),
18665            ])
18666            .unwrap();
18667            let result = col.nextafter(&toward).unwrap();
18668            // nextafter(0, 1) = smallest positive denormal (not MIN_POSITIVE which is normalized)
18669            assert_eq!(result.values()[0].to_f64().unwrap(), f64::from_bits(1));
18670            // nextafter(1, 2) > 1
18671            let r1 = result.values()[1].to_f64().unwrap();
18672            assert!(r1 > 1.0 && r1 < 1.0 + 1e-15);
18673            // nextafter(1, 0) < 1
18674            let r2 = result.values()[2].to_f64().unwrap();
18675            assert!(r2 < 1.0 && r2 > 1.0 - 1e-15);
18676        }
18677
18678        #[test]
18679        fn isneginf_isposinf_detect_infinities() {
18680            let col = Column::from_values(vec![
18681                Scalar::Float64(f64::NEG_INFINITY),
18682                Scalar::Float64(f64::INFINITY),
18683                Scalar::Float64(1.0),
18684                Scalar::Float64(f64::NAN),
18685            ])
18686            .unwrap();
18687            let neginf = col.isneginf().unwrap();
18688            let posinf = col.isposinf().unwrap();
18689            assert_eq!(neginf.values()[0], Scalar::Bool(true));
18690            assert_eq!(neginf.values()[1], Scalar::Bool(false));
18691            assert_eq!(neginf.values()[2], Scalar::Bool(false));
18692            assert_eq!(neginf.values()[3], Scalar::Bool(false));
18693            assert_eq!(posinf.values()[0], Scalar::Bool(false));
18694            assert_eq!(posinf.values()[1], Scalar::Bool(true));
18695            assert_eq!(posinf.values()[2], Scalar::Bool(false));
18696            assert_eq!(posinf.values()[3], Scalar::Bool(false));
18697        }
18698
18699        #[test]
18700        fn exp2_computes_power_of_two() {
18701            let col = Column::from_values(vec![
18702                Scalar::Float64(0.0),
18703                Scalar::Float64(1.0),
18704                Scalar::Float64(3.0),
18705                Scalar::Float64(-1.0),
18706            ])
18707            .unwrap();
18708            let result = col.exp2().unwrap();
18709            assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18710            assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18711            assert!((result.values()[2].to_f64().unwrap() - 8.0).abs() < 1e-10);
18712            assert!((result.values()[3].to_f64().unwrap() - 0.5).abs() < 1e-10);
18713        }
18714
18715        #[test]
18716        fn sinc_computes_sinc_function() {
18717            let col = Column::from_values(vec![
18718                Scalar::Float64(0.0),
18719                Scalar::Float64(1.0),
18720                Scalar::Float64(0.5),
18721            ])
18722            .unwrap();
18723            let result = col.sinc().unwrap();
18724            // sinc(0) = 1
18725            assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18726            // sinc(1) = sin(pi)/pi = 0
18727            assert!(result.values()[1].to_f64().unwrap().abs() < 1e-10);
18728            // sinc(0.5) = sin(pi/2)/(pi/2) = 2/pi ≈ 0.6366
18729            let expected = 2.0 / std::f64::consts::PI;
18730            assert!((result.values()[2].to_f64().unwrap() - expected).abs() < 1e-10);
18731        }
18732
18733        #[test]
18734        fn logaddexp_computes_stable_log_sum() {
18735            let x = Column::from_values(vec![
18736                Scalar::Float64(0.0),
18737                Scalar::Float64(1.0),
18738                Scalar::Float64(-1000.0),
18739            ])
18740            .unwrap();
18741            let y = Column::from_values(vec![
18742                Scalar::Float64(0.0),
18743                Scalar::Float64(2.0),
18744                Scalar::Float64(-1000.0),
18745            ])
18746            .unwrap();
18747            let result = x.logaddexp(&y).unwrap();
18748            // log(exp(0) + exp(0)) = log(2) ≈ 0.693
18749            assert!((result.values()[0].to_f64().unwrap() - std::f64::consts::LN_2).abs() < 1e-10);
18750            // log(exp(1) + exp(2)) ≈ 2.313
18751            let expected1 = (1.0_f64.exp() + 2.0_f64.exp()).ln();
18752            assert!((result.values()[1].to_f64().unwrap() - expected1).abs() < 1e-10);
18753            // log(exp(-1000) + exp(-1000)) = -1000 + log(2)
18754            let expected2 = -1000.0 + std::f64::consts::LN_2;
18755            assert!((result.values()[2].to_f64().unwrap() - expected2).abs() < 1e-8);
18756        }
18757
18758        #[test]
18759        fn logaddexp2_computes_stable_log2_sum() {
18760            let x = Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(1.0)]).unwrap();
18761            let y = Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(1.0)]).unwrap();
18762            let result = x.logaddexp2(&y).unwrap();
18763            // log2(2^0 + 2^0) = log2(2) = 1
18764            assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18765            // log2(2^1 + 2^1) = log2(4) = 2
18766            assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18767        }
18768
18769        #[test]
18770        fn roll_shifts_elements_circularly() {
18771            let col = Column::from_values(vec![
18772                Scalar::Int64(1),
18773                Scalar::Int64(2),
18774                Scalar::Int64(3),
18775                Scalar::Int64(4),
18776                Scalar::Int64(5),
18777            ])
18778            .unwrap();
18779            // Roll right by 2: [4, 5, 1, 2, 3]
18780            let r1 = col.roll(2).unwrap();
18781            assert_eq!(r1.values()[0].to_i64().unwrap(), 4);
18782            assert_eq!(r1.values()[1].to_i64().unwrap(), 5);
18783            assert_eq!(r1.values()[2].to_i64().unwrap(), 1);
18784            // Roll left by 2: [3, 4, 5, 1, 2]
18785            let r2 = col.roll(-2).unwrap();
18786            assert_eq!(r2.values()[0].to_i64().unwrap(), 3);
18787            assert_eq!(r2.values()[1].to_i64().unwrap(), 4);
18788            assert_eq!(r2.values()[2].to_i64().unwrap(), 5);
18789            // Roll by 0 or length is no-op
18790            let r3 = col.roll(0).unwrap();
18791            assert_eq!(r3.values()[0].to_i64().unwrap(), 1);
18792            let r4 = col.roll(5).unwrap();
18793            assert_eq!(r4.values()[0].to_i64().unwrap(), 1);
18794        }
18795
18796        #[test]
18797        fn trim_zeros_removes_leading_trailing() {
18798            let col = Column::from_values(vec![
18799                Scalar::Int64(0),
18800                Scalar::Int64(0),
18801                Scalar::Int64(1),
18802                Scalar::Int64(2),
18803                Scalar::Int64(0),
18804            ])
18805            .unwrap();
18806            // Trim both
18807            let r1 = col.trim_zeros("fb").unwrap();
18808            assert_eq!(r1.len(), 2);
18809            assert_eq!(r1.values()[0].to_i64().unwrap(), 1);
18810            assert_eq!(r1.values()[1].to_i64().unwrap(), 2);
18811            // Trim front only
18812            let r2 = col.trim_zeros("f").unwrap();
18813            assert_eq!(r2.len(), 3);
18814            assert_eq!(r2.values()[0].to_i64().unwrap(), 1);
18815            // Trim back only
18816            let r3 = col.trim_zeros("b").unwrap();
18817            assert_eq!(r3.len(), 4);
18818            assert_eq!(r3.values()[3].to_i64().unwrap(), 2);
18819        }
18820
18821        #[test]
18822        fn around_rounds_to_decimals() {
18823            let col = Column::from_values(vec![
18824                Scalar::Float64(1.234),
18825                Scalar::Float64(5.678),
18826                Scalar::Float64(3.5),
18827            ])
18828            .unwrap();
18829            // Round to 2 decimals
18830            let r1 = col.around(2).unwrap();
18831            assert!((r1.values()[0].to_f64().unwrap() - 1.23).abs() < 1e-10);
18832            assert!((r1.values()[1].to_f64().unwrap() - 5.68).abs() < 1e-10);
18833            assert!((r1.values()[2].to_f64().unwrap() - 3.5).abs() < 1e-10);
18834            // Round to 0 decimals
18835            let r2 = col.around(0).unwrap();
18836            assert!((r2.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18837            assert!((r2.values()[1].to_f64().unwrap() - 6.0).abs() < 1e-10);
18838            // Round to -1 (tens) - np.around uses round-half-to-even (banker's)
18839            let col2 = Column::from_values(vec![
18840                Scalar::Float64(15.0),
18841                Scalar::Float64(24.0),
18842                Scalar::Float64(35.0),
18843            ])
18844            .unwrap();
18845            let r3 = col2.around(-1).unwrap();
18846            assert!((r3.values()[0].to_f64().unwrap() - 20.0).abs() < 1e-10);
18847            assert!((r3.values()[1].to_f64().unwrap() - 20.0).abs() < 1e-10);
18848            assert!((r3.values()[2].to_f64().unwrap() - 40.0).abs() < 1e-10);
18849        }
18850
18851        #[test]
18852        fn around_uses_numpy_half_even_ties() {
18853            // np.around is round-half-to-EVEN, matching pd.Series.round. The old
18854            // implementation used f64::round (half away from zero), diverging on
18855            // exact .5 ties: np.around([0.5,1.5,2.5,3.5]) == [0,2,2,4].
18856            let col = Column::from_values(vec![
18857                Scalar::Float64(0.5),
18858                Scalar::Float64(1.5),
18859                Scalar::Float64(2.5),
18860                Scalar::Float64(3.5),
18861                Scalar::Float64(-2.5),
18862            ])
18863            .unwrap();
18864            let r = col.around(0).unwrap();
18865            let got: Vec<f64> = r.values().iter().map(|v| v.to_f64().unwrap()).collect();
18866            assert_eq!(got, vec![0.0, 2.0, 2.0, 4.0, -2.0]);
18867
18868            // Negative decimals: np.around([15,25,35], -1) == [20,20,40] (25->20).
18869            let tens = Column::from_values(vec![
18870                Scalar::Float64(15.0),
18871                Scalar::Float64(25.0),
18872                Scalar::Float64(35.0),
18873            ])
18874            .unwrap();
18875            let rt = tens.around(-1).unwrap();
18876            let gott: Vec<f64> = rt.values().iter().map(|v| v.to_f64().unwrap()).collect();
18877            assert_eq!(gott, vec![20.0, 20.0, 40.0]);
18878
18879            // around must agree with round (both banker's).
18880            assert_eq!(
18881                col.around(0).unwrap().values(),
18882                col.round(0).unwrap().values()
18883            );
18884        }
18885
18886        #[test]
18887        fn unwrap_removes_phase_discontinuities() {
18888            use std::f64::consts::PI;
18889            let col = Column::from_values(vec![
18890                Scalar::Float64(0.0),
18891                Scalar::Float64(PI * 0.9),
18892                Scalar::Float64(-PI * 0.9), // jump > PI
18893                Scalar::Float64(0.0),
18894            ])
18895            .unwrap();
18896            let result = col.unwrap(None).unwrap();
18897            // After unwrap, the sequence should be continuous
18898            assert!((result.values()[0].to_f64().unwrap() - 0.0).abs() < 1e-10);
18899            // Second value unchanged
18900            assert!((result.values()[1].to_f64().unwrap() - PI * 0.9).abs() < 1e-10);
18901            // Third value should be unwrapped (added 2*PI)
18902            let v2 = result.values()[2].to_f64().unwrap();
18903            let v1 = result.values()[1].to_f64().unwrap();
18904            assert!((v2 - v1).abs() < PI); // difference should now be < PI
18905        }
18906    }
18907
18908    // ── Nullable Int64/Bool column tests (br-frankenpandas-rg8ys.6.4) ────
18909
18910    #[test]
18911    fn column_has_nulls_detects_missing_values() {
18912        let col_with_null = Column::from_values(vec![
18913            Scalar::Int64(1),
18914            Scalar::Null(NullKind::Null),
18915            Scalar::Int64(3),
18916        ])
18917        .unwrap();
18918        assert!(col_with_null.has_nulls());
18919
18920        let col_no_null =
18921            Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18922                .unwrap();
18923        assert!(!col_no_null.has_nulls());
18924    }
18925
18926    #[test]
18927    fn column_promote_to_nullable_upgrades_dtype() {
18928        let col = Column::new(
18929            DType::Int64,
18930            vec![
18931                Scalar::Int64(1),
18932                Scalar::Null(NullKind::Null),
18933                Scalar::Int64(3),
18934            ],
18935        )
18936        .unwrap();
18937        assert_eq!(col.dtype(), DType::Int64);
18938        assert!(col.has_nulls());
18939
18940        let promoted = col.promote_to_nullable();
18941        assert_eq!(promoted.dtype(), DType::Int64Nullable);
18942        assert_eq!(promoted.len(), 3);
18943    }
18944
18945    #[test]
18946    fn column_promote_to_nullable_noop_without_nulls() {
18947        let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18948            .unwrap();
18949        let promoted = col.promote_to_nullable();
18950        // No nulls, so dtype stays Int64
18951        assert_eq!(promoted.dtype(), DType::Int64);
18952    }
18953
18954    #[test]
18955    fn column_with_dtype_changes_metadata() {
18956        let col = Column::new(DType::Int64, vec![Scalar::Int64(42)]).unwrap();
18957        let changed = col.with_dtype(DType::Int64Nullable);
18958        assert_eq!(changed.dtype(), DType::Int64Nullable);
18959        assert_eq!(changed.values()[0], Scalar::Int64(42));
18960    }
18961
18962    #[test]
18963    fn nullable_int64_from_scalars_preserves_storage() {
18964        use super::ColumnData;
18965        let values = vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)];
18966        let data = ColumnData::from_scalars(&values, DType::Int64Nullable);
18967        assert!(matches!(&data, ColumnData::Int64(_)));
18968        if let ColumnData::Int64(arr) = data {
18969            assert_eq!(arr, vec![1, 2, 3]);
18970        }
18971    }
18972
18973    #[test]
18974    fn typed_all_valid_constructors_keep_single_typed_backing() {
18975        let ints = Column::from_i64_values(vec![1, 2, 3]);
18976        assert_eq!(ints.dtype(), DType::Int64);
18977        assert!(ints.validity.all());
18978        assert!(ints.data.is_none());
18979        assert_eq!(ints.as_i64_slice(), Some([1, 2, 3].as_slice()));
18980        assert_eq!(
18981            ints.values(),
18982            &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
18983        );
18984
18985        let floats = Column::from_f64_values(vec![1.5, -0.0, f64::INFINITY]);
18986        assert_eq!(floats.dtype(), DType::Float64);
18987        assert!(floats.validity.all());
18988        assert!(floats.data.is_none());
18989        assert_eq!(
18990            floats.as_f64_slice().map(|values| {
18991                values
18992                    .iter()
18993                    .map(|value| value.to_bits())
18994                    .collect::<Vec<_>>()
18995            }),
18996            Some(vec![
18997                1.5f64.to_bits(),
18998                (-0.0f64).to_bits(),
18999                f64::INFINITY.to_bits()
19000            ])
19001        );
19002        assert_eq!(
19003            floats.values(),
19004            &[
19005                Scalar::Float64(1.5),
19006                Scalar::Float64(-0.0),
19007                Scalar::Float64(f64::INFINITY)
19008            ]
19009        );
19010    }
19011
19012    #[test]
19013    fn repeated_slice_int64_column_matches_eager_materialization() {
19014        let lazy = Column::from_i64_repeated_slices(
19015            vec![10, 11, 12, 20, 21],
19016            vec![(0, 3), (3, 2), (0, 3)],
19017        );
19018        let eager = Column::from_i64_values(vec![10, 11, 12, 20, 21, 10, 11, 12]);
19019
19020        assert_eq!(lazy.dtype(), DType::Int64);
19021        assert!(lazy.validity.all());
19022        assert_eq!(lazy.len(), eager.len());
19023        assert_eq!(lazy.as_i64_slice(), eager.as_i64_slice());
19024        assert_eq!(lazy.values(), eager.values());
19025        assert_eq!(lazy, eager);
19026    }
19027}
fp_columnar/lib.rs

fp_columnar/
lib.rs