fp_types/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4//! Foundational value-type abstractions for **frankenpandas** — the
5//! enums, structs, and free functions that every other crate
6//! (fp-columnar, fp-index, fp-frame, fp-io, ...) consumes when
7//! representing scalar data, dtypes, missing values, and time deltas.
8//!
9//! The types here intentionally stay tiny and dependency-light
10//! (`serde`, `thiserror`) so they can sit at the bottom of the
11//! workspace dep graph.
12//!
13//! ## Core value types
14//!
15//! - [`DType`]: the dtype enum — `Null`, `Bool`, `Int64`, `Float64`,
16//!   `Utf8`, `Categorical`, `Timedelta64`, `Datetime64`, `Period`,
17//!   `Interval`, `Sparse`. Drives column / series storage decisions
18//!   across the workspace.
19//! - [`Scalar`]: the per-cell value enum, parameterized by `DType`.
20//!   Each variant holds the actual data (`Int64(i64)`, `Float64(f64)`,
21//!   `Utf8(String)`, ...) plus the `Null(NullKind)` variant for
22//!   missing values.
23//! - [`NullKind`]: distinguishes the three pandas missing-value
24//!   "flavors" — `Null` (Python `None` / SQL NULL), `NaN`
25//!   (floating-point not-a-number), `NaT` (timedelta / datetime
26//!   not-a-time). `Scalar::Null(...)` carries the kind so downstream
27//!   code can preserve pandas semantics.
28//! - [`SparseDType`]: descriptor for sparse-encoded dtypes (paired
29//!   value dtype + fill value).
30//!
31//! ## Time / duration types
32//!
33//! - [`Timedelta`]: nanosecond-precision duration with arithmetic
34//!   helpers ([`Timedelta::add`], [`Timedelta::sub`],
35//!   [`Timedelta::mul_scalar`], [`Timedelta::div_scalar`],
36//!   [`Timedelta::div_timedelta`]) that propagate `NaT` per pandas
37//!   semantics. [`TimedeltaComponents`] breaks a timedelta into
38//!   days/hours/minutes/seconds/nanos for display.
39//! - [`Timestamp`]: nanosecond-precision wall-clock timestamp with
40//!   optional timezone. Includes floor / ceil / round helpers and
41//!   `NaT` propagation.
42//!
43//! ## Dtype inference + casting
44//!
45//! - [`infer_dtype`]: derive a [`DType`] from a slice of scalars
46//!   (used during DataFrame construction).
47//! - [`common_dtype`]: pandas-style dtype promotion for binary ops.
48//! - [`cast_scalar`] / [`cast_scalar_owned`]: convert a scalar to a
49//!   target dtype with explicit error reporting on impossible casts.
50//!
51//! ## Missing-value helpers
52//!
53//! Free fns matching `pd.isna` / `pd.notna` / `pd.fillna` / `pd.dropna`
54//! plus the `nan*` aggregations ([`nansum`], [`nanmean`], [`nancount`],
55//! [`nanmin`], [`nanmax`], [`nanmedian`], [`nanvar`], [`nanstd`])
56//! that mirror pandas' missing-aware reductions.
57//!
58//! ## Error reporting
59//!
60//! Errors are explicit enums via `thiserror`: [`TypeError`] for
61//! dtype-related failures (incompatible-cast, no-common-dtype) and
62//! [`TimedeltaError`] for parse failures.
63
64use serde::{Deserialize, Serialize};
65use thiserror::Error;
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub enum DType {
70    Null,
71    Bool,
72    /// Nullable boolean extension dtype. Matches pandas `BooleanDtype()`.
73    #[serde(rename = "boolean")]
74    BoolNullable,
75    Int64,
76    /// Nullable Int64 extension dtype. Matches pandas `Int64Dtype()` / `pd.NA`.
77    #[serde(rename = "Int64")]
78    Int64Nullable,
79    Float64,
80    #[serde(alias = "string", alias = "str")]
81    Utf8,
82    Categorical,
83    Timedelta64,
84    /// Nanosecond-precision datetime since Unix epoch. Matches pandas `datetime64[ns]`.
85    Datetime64,
86    /// Period ordinal. Matches pandas `period[freq]`. Stores ordinal + frequency code.
87    Period,
88    /// Numeric interval value. Matches pandas `interval[float64]`.
89    Interval,
90    Sparse,
91}
92
93impl DType {
94    /// Returns true if the dtype is numeric (integer or floating point).
95    #[must_use]
96    pub const fn is_numeric(&self) -> bool {
97        matches!(self, Self::Int64 | Self::Int64Nullable | Self::Float64)
98    }
99
100    /// Returns true if the dtype is an integer type.
101    #[must_use]
102    pub const fn is_integer(&self) -> bool {
103        matches!(self, Self::Int64 | Self::Int64Nullable)
104    }
105
106    /// Returns true if the dtype is a floating point type.
107    #[must_use]
108    pub const fn is_floating(&self) -> bool {
109        matches!(self, Self::Float64)
110    }
111
112    /// Returns true if the dtype is boolean.
113    #[must_use]
114    pub const fn is_bool(&self) -> bool {
115        matches!(self, Self::Bool | Self::BoolNullable)
116    }
117
118    /// Returns true if the dtype is object/string type.
119    #[must_use]
120    pub const fn is_object(&self) -> bool {
121        matches!(self, Self::Utf8)
122    }
123
124    /// Returns true if the dtype is datetime.
125    #[must_use]
126    pub const fn is_datetime(&self) -> bool {
127        matches!(self, Self::Datetime64)
128    }
129
130    /// Returns true if the dtype is timedelta.
131    #[must_use]
132    pub const fn is_timedelta(&self) -> bool {
133        matches!(self, Self::Timedelta64)
134    }
135
136    /// Returns true if the dtype is categorical.
137    #[must_use]
138    pub const fn is_categorical(&self) -> bool {
139        matches!(self, Self::Categorical)
140    }
141
142    /// Returns true if the dtype is sparse.
143    #[must_use]
144    pub const fn is_sparse(&self) -> bool {
145        matches!(self, Self::Sparse)
146    }
147
148    /// Returns true if the dtype is period.
149    #[must_use]
150    pub const fn is_period(&self) -> bool {
151        matches!(self, Self::Period)
152    }
153
154    /// Returns true if the dtype is interval.
155    #[must_use]
156    pub const fn is_interval(&self) -> bool {
157        matches!(self, Self::Interval)
158    }
159
160    /// Return the dtype name as a string.
161    ///
162    /// Matches numpy dtype.name property.
163    #[must_use]
164    pub const fn name(&self) -> &'static str {
165        match self {
166            Self::Bool => "bool",
167            Self::BoolNullable => "boolean",
168            Self::Int64 => "int64",
169            Self::Int64Nullable => "Int64",
170            Self::Float64 => "float64",
171            Self::Utf8 => "object",
172            Self::Datetime64 => "datetime64[ns]",
173            Self::Timedelta64 => "timedelta64[ns]",
174            Self::Categorical => "category",
175            Self::Period => "period",
176            Self::Interval => "interval",
177            Self::Sparse => "Sparse",
178            Self::Null => "object",
179        }
180    }
181
182    /// Return the dtype kind character.
183    ///
184    /// Matches numpy dtype.kind property.
185    #[must_use]
186    pub const fn kind(&self) -> char {
187        match self {
188            Self::Bool | Self::BoolNullable => 'b',
189            Self::Int64 | Self::Int64Nullable => 'i',
190            Self::Float64 => 'f',
191            Self::Utf8 => 'O',
192            Self::Datetime64 => 'M',
193            Self::Timedelta64 => 'm',
194            Self::Categorical => 'O',
195            Self::Period => 'O',
196            Self::Interval => 'O',
197            Self::Sparse => 'O',
198            Self::Null => 'O',
199        }
200    }
201
202    /// Return the dtype itemsize in bytes.
203    ///
204    /// Matches numpy dtype.itemsize property.
205    #[must_use]
206    pub const fn itemsize(&self) -> usize {
207        match self {
208            Self::Bool | Self::BoolNullable => 1,
209            Self::Int64
210            | Self::Int64Nullable
211            | Self::Float64
212            | Self::Datetime64
213            | Self::Timedelta64
214            | Self::Period => 8,
215            Self::Utf8 | Self::Categorical | Self::Interval | Self::Sparse | Self::Null => 8,
216        }
217    }
218
219    /// Returns true if this is an extension dtype (categorical, sparse, period, interval, nullable).
220    ///
221    /// Matches `pd.api.types.is_extension_array_dtype()`.
222    #[must_use]
223    pub const fn is_extension(&self) -> bool {
224        matches!(
225            self,
226            Self::Categorical
227                | Self::Sparse
228                | Self::Period
229                | Self::Interval
230                | Self::Int64Nullable
231                | Self::BoolNullable
232        )
233    }
234
235    /// Returns true if this is a nullable extension dtype (Int64, boolean).
236    ///
237    /// Nullable extension dtypes preserve their dtype when nulls are introduced,
238    /// unlike numpy dtypes which promote to float64.
239    #[must_use]
240    pub const fn is_nullable(&self) -> bool {
241        matches!(self, Self::Int64Nullable | Self::BoolNullable)
242    }
243
244    /// Returns the non-nullable equivalent dtype.
245    ///
246    /// For nullable extension dtypes, returns the numpy equivalent.
247    /// For non-nullable dtypes, returns self.
248    #[must_use]
249    pub const fn to_non_nullable(&self) -> Self {
250        match self {
251            Self::Int64Nullable => Self::Int64,
252            Self::BoolNullable => Self::Bool,
253            other => *other,
254        }
255    }
256
257    /// Returns the nullable equivalent dtype.
258    ///
259    /// For numpy int64/bool, returns the nullable extension dtype.
260    /// For already-nullable or other dtypes, returns self.
261    #[must_use]
262    pub const fn to_nullable(&self) -> Self {
263        match self {
264            Self::Int64 => Self::Int64Nullable,
265            Self::Bool => Self::BoolNullable,
266            other => *other,
267        }
268    }
269
270    /// Returns true if this is a signed integer type.
271    ///
272    /// Matches `pd.api.types.is_signed_integer_dtype()`.
273    #[must_use]
274    pub const fn is_signed_integer(&self) -> bool {
275        matches!(self, Self::Int64 | Self::Int64Nullable)
276    }
277
278    /// Returns true if this is a string/object dtype.
279    ///
280    /// Matches `pd.api.types.is_string_dtype()`.
281    #[must_use]
282    pub const fn is_string_dtype(&self) -> bool {
283        matches!(self, Self::Utf8)
284    }
285
286    /// Returns true for any real numeric dtype (integer or float).
287    ///
288    /// Matches `pd.api.types.is_any_real_numeric_dtype()`.
289    #[must_use]
290    pub const fn is_any_real_numeric(&self) -> bool {
291        self.is_numeric()
292    }
293
294    /// Returns true for datetime-like dtypes (datetime, timedelta, period).
295    ///
296    /// Matches `pd.api.types.is_datetime64_any_dtype()` family.
297    #[must_use]
298    pub const fn is_datetime_like(&self) -> bool {
299        matches!(self, Self::Datetime64 | Self::Timedelta64 | Self::Period)
300    }
301
302    /// Return the numpy dtype character code.
303    ///
304    /// Matches numpy dtype.char property.
305    #[must_use]
306    pub const fn char(&self) -> char {
307        match self {
308            Self::Bool | Self::BoolNullable => '?',
309            Self::Int64 | Self::Int64Nullable => 'l',
310            Self::Float64 => 'd',
311            Self::Utf8 => 'O',
312            Self::Datetime64 => 'M',
313            Self::Timedelta64 => 'm',
314            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 'O',
315        }
316    }
317
318    /// Return the numpy type number.
319    ///
320    /// Matches numpy dtype.num property.
321    #[must_use]
322    pub const fn num(&self) -> i32 {
323        match self {
324            Self::Bool | Self::BoolNullable => 0,
325            Self::Int64 | Self::Int64Nullable => 7,
326            Self::Float64 => 12,
327            Self::Utf8 => 17,
328            Self::Datetime64 => 21,
329            Self::Timedelta64 => 22,
330            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 17,
331        }
332    }
333
334    /// Return the byte order character.
335    ///
336    /// Matches numpy dtype.byteorder property. Returns '=' (native) for all types.
337    #[must_use]
338    pub const fn byteorder(&self) -> char {
339        '='
340    }
341
342    /// Return the numpy dtype string representation.
343    ///
344    /// Matches numpy dtype.str property.
345    #[must_use]
346    pub const fn str_repr(&self) -> &'static str {
347        match self {
348            Self::Bool | Self::BoolNullable => "|b1",
349            Self::Int64 | Self::Int64Nullable => "<i8",
350            Self::Float64 => "<f8",
351            Self::Utf8 => "|O8",
352            Self::Datetime64 => "<M8[ns]",
353            Self::Timedelta64 => "<m8[ns]",
354            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => "|O8",
355        }
356    }
357}
358
359#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
360pub struct SparseDType {
361    pub value_dtype: DType,
362    pub fill_value: Scalar,
363}
364
365impl SparseDType {
366    /// Construct a pandas-style sparse dtype descriptor.
367    ///
368    /// This records the logical dense value dtype plus the scalar value that is
369    /// elided from storage. The concrete sparse column representation lives in
370    /// fp-columnar; this descriptor is the shared public contract.
371    pub fn new(value_dtype: DType, fill_value: Scalar) -> Result<Self, TypeError> {
372        if matches!(value_dtype, DType::Null | DType::Sparse) {
373            return Err(TypeError::InvalidSparseValueDType { dtype: value_dtype });
374        }
375
376        let fill_value = if fill_value.is_missing() {
377            Scalar::missing_for_dtype(value_dtype)
378        } else {
379            cast_scalar_owned(fill_value, value_dtype)?
380        };
381
382        Ok(Self {
383            value_dtype,
384            fill_value,
385        })
386    }
387}
388
389#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
390#[serde(rename_all = "snake_case")]
391pub enum NullKind {
392    Null,
393    NaN,
394    NaT,
395}
396
397#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
398#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
399pub enum Scalar {
400    Null(NullKind),
401    Bool(bool),
402    Int64(i64),
403    Float64(f64),
404    #[serde(alias = "string", alias = "str")]
405    Utf8(String),
406    Timedelta64(i64),
407    /// Nanoseconds since Unix epoch. Matches pandas `datetime64[ns]`.
408    /// Uses `Timestamp::NAT` (i64::MIN) for missing values.
409    Datetime64(i64),
410    /// Period value (ordinal + frequency). A `Period` whose `ordinal` is
411    /// `i64::MIN` is NaT (missing). The frequency is carried so writers can
412    /// render the pandas calendar string (`2024Q1`, `2024-03`, ...) — the
413    /// calendar string is not recoverable from the ordinal alone, since
414    /// different frequencies share overlapping ordinal axes.
415    Period(Period),
416    /// Numeric interval value. Missing values remain `Scalar::Null`.
417    Interval(Interval),
418}
419
420impl std::fmt::Display for Scalar {
421    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
422        match self {
423            Self::Null(NullKind::NaN) => write!(f, "NaN"),
424            Self::Null(NullKind::NaT) => write!(f, "NaT"),
425            Self::Null(NullKind::Null) => write!(f, "None"),
426            Self::Bool(b) => write!(f, "{}", if *b { "True" } else { "False" }),
427            Self::Int64(v) => write!(f, "{v}"),
428            Self::Float64(v) => write!(f, "{v}"),
429            Self::Utf8(s) => write!(f, "{s}"),
430            Self::Timedelta64(nanos) => write!(f, "{}", Timedelta::format(*nanos)),
431            Self::Datetime64(nanos) => {
432                if *nanos == Timestamp::NAT {
433                    write!(f, "NaT")
434                } else {
435                    write!(f, "Timestamp[{nanos}]")
436                }
437            }
438            Self::Period(p) => {
439                if p.ordinal == i64::MIN {
440                    write!(f, "NaT")
441                } else {
442                    write!(f, "{}", p.calendar_string())
443                }
444            }
445            Self::Interval(interval) => write!(f, "{interval}"),
446        }
447    }
448}
449
450// Ergonomic From impls (br-frankenpandas-esjjy / fd90.182). Mirrors
451// IndexLabel's From<i64>/From<&str>/From<String> so users can write
452//   let v: Vec<Scalar> = vec![1i64.into(), 2.0.into(), "three".into()];
453// instead of the explicit Scalar::Int64(...)/Scalar::Float64(...) form.
454//
455// i64 maps to Int64 (more common than Timedelta64 in pandas-style code).
456// Users wanting Timedelta64 should construct it explicitly with
457// Scalar::Timedelta64(nanos) or via Timedelta::parse / to_timedelta.
458
459impl From<bool> for Scalar {
460    fn from(value: bool) -> Self {
461        Self::Bool(value)
462    }
463}
464
465impl From<i64> for Scalar {
466    fn from(value: i64) -> Self {
467        Self::Int64(value)
468    }
469}
470
471impl From<f64> for Scalar {
472    fn from(value: f64) -> Self {
473        Self::Float64(value)
474    }
475}
476
477impl From<&str> for Scalar {
478    fn from(value: &str) -> Self {
479        Self::Utf8(value.to_owned())
480    }
481}
482
483impl From<String> for Scalar {
484    fn from(value: String) -> Self {
485        Self::Utf8(value)
486    }
487}
488
489impl Scalar {
490    #[must_use]
491    pub fn dtype(&self) -> DType {
492        match self {
493            Self::Null(_) => DType::Null,
494            Self::Bool(_) => DType::Bool,
495            Self::Int64(_) => DType::Int64,
496            Self::Float64(_) => DType::Float64,
497            Self::Utf8(_) => DType::Utf8,
498            Self::Timedelta64(_) => DType::Timedelta64,
499            Self::Datetime64(_) => DType::Datetime64,
500            Self::Period(_) => DType::Period,
501            Self::Interval(_) => DType::Interval,
502        }
503    }
504
505    #[must_use]
506    pub fn is_missing(&self) -> bool {
507        match self {
508            Self::Null(_) => true,
509            Self::Float64(v) => v.is_nan(),
510            Self::Timedelta64(v) => *v == Timedelta::NAT,
511            Self::Datetime64(v) => *v == Timestamp::NAT,
512            Self::Period(p) => p.ordinal == i64::MIN,
513            _ => false,
514        }
515    }
516
517    #[must_use]
518    pub fn is_nan(&self) -> bool {
519        matches!(self, Self::Null(NullKind::NaN)) || matches!(self, Self::Float64(v) if v.is_nan())
520    }
521
522    /// Returns true if this is a Bool scalar.
523    #[must_use]
524    pub const fn is_bool(&self) -> bool {
525        matches!(self, Self::Bool(_))
526    }
527
528    /// Returns true if this is an Int64 scalar.
529    #[must_use]
530    pub const fn is_integer(&self) -> bool {
531        matches!(self, Self::Int64(_))
532    }
533
534    /// Returns true if this is a Float64 scalar.
535    #[must_use]
536    pub const fn is_float(&self) -> bool {
537        matches!(self, Self::Float64(_))
538    }
539
540    /// Returns true if this is a numeric scalar (Int64 or Float64).
541    #[must_use]
542    pub const fn is_numeric(&self) -> bool {
543        matches!(self, Self::Int64(_) | Self::Float64(_))
544    }
545
546    /// Returns true if this is a Utf8 (string) scalar.
547    #[must_use]
548    pub const fn is_string(&self) -> bool {
549        matches!(self, Self::Utf8(_))
550    }
551
552    /// Returns true if this is a Datetime64 scalar.
553    #[must_use]
554    pub const fn is_datetime(&self) -> bool {
555        matches!(self, Self::Datetime64(_))
556    }
557
558    /// Returns true if this is a Timedelta64 scalar.
559    #[must_use]
560    pub const fn is_timedelta(&self) -> bool {
561        matches!(self, Self::Timedelta64(_))
562    }
563
564    /// Returns true if this is a Period scalar.
565    #[must_use]
566    pub const fn is_period(&self) -> bool {
567        matches!(self, Self::Period(_))
568    }
569
570    /// Returns true if this is an Interval scalar.
571    #[must_use]
572    pub const fn is_interval(&self) -> bool {
573        matches!(self, Self::Interval(_))
574    }
575
576    #[must_use]
577    pub fn missing_for_dtype(dtype: DType) -> Self {
578        match dtype {
579            DType::Float64 => Self::Null(NullKind::NaN),
580            DType::Timedelta64 => Self::Timedelta64(Timedelta::NAT),
581            DType::Datetime64 => Self::Datetime64(Timestamp::NAT),
582            DType::Period => Self::Period(Period::new(i64::MIN, PeriodFreq::Daily)),
583            DType::Null => Self::Null(NullKind::Null),
584            DType::Bool
585            | DType::BoolNullable
586            | DType::Int64
587            | DType::Int64Nullable
588            | DType::Utf8
589            | DType::Categorical
590            | DType::Interval
591            | DType::Sparse => Self::Null(NullKind::Null),
592        }
593    }
594
595    #[must_use]
596    pub fn semantic_eq(&self, other: &Self) -> bool {
597        match (self, other) {
598            (Self::Float64(a), Self::Float64(b)) => {
599                if a.is_nan() && b.is_nan() {
600                    return true;
601                }
602                if *a == *b {
603                    return true;
604                }
605                let diff = (*a - *b).abs();
606                let max_abs = a.abs().max(b.abs());
607                if max_abs == 0.0 {
608                    diff < f64::EPSILON
609                } else {
610                    diff / max_abs < 1e-14
611                }
612            }
613            (Self::Null(_), Self::Float64(v)) | (Self::Float64(v), Self::Null(_)) => v.is_nan(),
614            // All Null kinds (Null / NaN / NaT) mark missingness; they are
615            // semantically indistinguishable for oracle-parity checks even
616            // though derived PartialEq would reject a cross-kind pair.
617            // fp-frame normalizes Float64 column missing cells to
618            // Null(NaN) at Column::new time, while fixture oracles encode
619            // the canonical missing marker as Null(Null).
620            (Self::Null(_), Self::Null(_)) => true,
621            _ => self == other,
622        }
623    }
624
625    #[must_use]
626    pub fn semantic_le(&self, other: &Self) -> bool {
627        match self.semantic_cmp(other) {
628            std::cmp::Ordering::Less | std::cmp::Ordering::Equal => true,
629            std::cmp::Ordering::Greater => false,
630        }
631    }
632
633    #[must_use]
634    pub fn semantic_ge(&self, other: &Self) -> bool {
635        match self.semantic_cmp(other) {
636            std::cmp::Ordering::Greater | std::cmp::Ordering::Equal => true,
637            std::cmp::Ordering::Less => false,
638        }
639    }
640
641    #[must_use]
642    pub fn is_null(&self) -> bool {
643        matches!(self, Self::Null(_))
644    }
645
646    #[must_use]
647    pub fn is_na(&self) -> bool {
648        self.is_missing()
649    }
650
651    #[must_use]
652    pub fn coalesce(&self, other: &Self) -> Self {
653        if self.is_missing() {
654            other.clone()
655        } else {
656            self.clone()
657        }
658    }
659
660    #[must_use]
661    pub fn semantic_cmp(&self, other: &Self) -> std::cmp::Ordering {
662        match (self, other) {
663            (Self::Int64(a), Self::Int64(b)) => a.cmp(b),
664            (Self::Float64(a), Self::Float64(b)) => {
665                a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
666            }
667            (Self::Utf8(a), Self::Utf8(b)) => a.cmp(b),
668            (Self::Bool(a), Self::Bool(b)) => a.cmp(b),
669            (Self::Null(a), Self::Null(b)) => a.cmp(b),
670            (Self::Timedelta64(a), Self::Timedelta64(b)) => {
671                if *a == Timedelta::NAT || *b == Timedelta::NAT {
672                    std::cmp::Ordering::Equal
673                } else {
674                    a.cmp(b)
675                }
676            }
677            (Self::Datetime64(a), Self::Datetime64(b)) => {
678                if *a == Timestamp::NAT || *b == Timestamp::NAT {
679                    std::cmp::Ordering::Equal
680                } else {
681                    a.cmp(b)
682                }
683            }
684            (Self::Period(a), Self::Period(b)) => {
685                if a.ordinal == i64::MIN || b.ordinal == i64::MIN {
686                    std::cmp::Ordering::Equal
687                } else {
688                    a.ordinal.cmp(&b.ordinal)
689                }
690            }
691            (Self::Interval(a), Self::Interval(b)) => a
692                .left
693                .partial_cmp(&b.left)
694                .unwrap_or(std::cmp::Ordering::Equal)
695                .then_with(|| {
696                    a.right
697                        .partial_cmp(&b.right)
698                        .unwrap_or(std::cmp::Ordering::Equal)
699                })
700                .then_with(|| a.closed.cmp(&b.closed)),
701            // Cross-numeric comparison
702            (Self::Int64(a), Self::Float64(b)) => (*a as f64)
703                .partial_cmp(b)
704                .unwrap_or(std::cmp::Ordering::Equal),
705            (Self::Float64(a), Self::Int64(b)) => a
706                .partial_cmp(&(*b as f64))
707                .unwrap_or(std::cmp::Ordering::Equal),
708            // Fallback to debug representation for inconsistent types
709            (a, b) => format!("{a:?}").cmp(&format!("{b:?}")),
710        }
711    }
712
713    pub fn to_f64(&self) -> Result<f64, TypeError> {
714        match self {
715            Self::Bool(v) => Ok(if *v { 1.0 } else { 0.0 }),
716            Self::Int64(v) => Ok(*v as f64),
717            Self::Float64(v) => Ok(*v),
718            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
719            Self::Utf8(v) => Err(TypeError::NonNumericValue {
720                value: v.clone(),
721                dtype: DType::Utf8,
722            }),
723            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
724                kind: NullKind::NaT,
725            }),
726            Self::Timedelta64(v) => Err(TypeError::NonNumericValue {
727                value: Timedelta::format(*v),
728                dtype: DType::Timedelta64,
729            }),
730            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
731                kind: NullKind::NaT,
732            }),
733            Self::Datetime64(v) => Err(TypeError::NonNumericValue {
734                value: format!("Timestamp[{v}]"),
735                dtype: DType::Datetime64,
736            }),
737            Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
738                kind: NullKind::NaT,
739            }),
740            Self::Period(p) => Err(TypeError::NonNumericValue {
741                value: p.calendar_string(),
742                dtype: DType::Period,
743            }),
744            Self::Interval(v) => Err(TypeError::NonNumericValue {
745                value: v.to_string(),
746                dtype: DType::Interval,
747            }),
748        }
749    }
750
751    /// Try to convert to i64. Returns error for missing or non-numeric values.
752    pub fn to_i64(&self) -> Result<i64, TypeError> {
753        match self {
754            Self::Bool(v) => Ok(if *v { 1 } else { 0 }),
755            Self::Int64(v) => Ok(*v),
756            Self::Float64(v) => Ok(*v as i64),
757            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
758            Self::Utf8(v) => Err(TypeError::NonNumericValue {
759                value: v.clone(),
760                dtype: DType::Utf8,
761            }),
762            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
763                kind: NullKind::NaT,
764            }),
765            Self::Timedelta64(v) => Ok(*v),
766            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
767                kind: NullKind::NaT,
768            }),
769            Self::Datetime64(v) => Ok(*v),
770            Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
771                kind: NullKind::NaT,
772            }),
773            Self::Period(p) => Ok(p.ordinal),
774            Self::Interval(v) => Err(TypeError::NonNumericValue {
775                value: v.to_string(),
776                dtype: DType::Interval,
777            }),
778        }
779    }
780
781    /// Try to convert to bool. Returns error for missing values.
782    pub fn to_bool(&self) -> Result<bool, TypeError> {
783        match self {
784            Self::Bool(v) => Ok(*v),
785            Self::Int64(v) => Ok(*v != 0),
786            Self::Float64(v) => Ok(*v != 0.0 && !v.is_nan()),
787            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
788            Self::Utf8(v) => Ok(!v.is_empty()),
789            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
790                kind: NullKind::NaT,
791            }),
792            Self::Timedelta64(v) => Ok(*v != 0),
793            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
794                kind: NullKind::NaT,
795            }),
796            Self::Datetime64(v) => Ok(*v != 0),
797            Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
798                kind: NullKind::NaT,
799            }),
800            Self::Period(p) => Ok(p.ordinal != 0),
801            Self::Interval(_) => Ok(true),
802        }
803    }
804
805    /// Try to convert to string representation.
806    pub fn to_str(&self) -> String {
807        match self {
808            Self::Bool(v) => if *v { "True" } else { "False" }.to_string(),
809            Self::Int64(v) => v.to_string(),
810            Self::Float64(v) => {
811                if v.is_nan() {
812                    "nan".to_string()
813                } else if v.is_infinite() {
814                    if *v > 0.0 { "inf" } else { "-inf" }.to_string()
815                } else {
816                    v.to_string()
817                }
818            }
819            Self::Null(_) => "NaN".to_string(),
820            Self::Utf8(v) => v.clone(),
821            Self::Timedelta64(v) => Timedelta::format(*v),
822            Self::Datetime64(v) if *v == Timestamp::NAT => "NaT".to_string(),
823            Self::Datetime64(v) => Timestamp::from_nanos(*v).isoformat(),
824            Self::Period(p) if p.ordinal == i64::MIN => "NaT".to_string(),
825            Self::Period(p) => p.calendar_string(),
826            Self::Interval(v) => v.to_string(),
827        }
828    }
829}
830
831#[derive(Debug, Error, Clone, PartialEq)]
832pub enum TypeError {
833    #[error("dtype coercion from {left:?} to {right:?} has no compatible common type")]
834    IncompatibleDtypes { left: DType, right: DType },
835    #[error("cannot cast scalar of dtype {from:?} to {to:?}")]
836    InvalidCast { from: DType, to: DType },
837    #[error("cannot cast float {value} to int64 without loss")]
838    LossyFloatToInt { value: f64 },
839    #[error("expected 0/1 for bool cast from int64 but found {value}")]
840    InvalidBoolInt { value: i64 },
841    #[error("expected 0.0/1.0 for bool cast from float64 but found {value}")]
842    InvalidBoolFloat { value: f64 },
843    #[error("value {value:?} has non-numeric dtype {dtype:?}")]
844    NonNumericValue { value: String, dtype: DType },
845    #[error("value is missing ({kind:?})")]
846    ValueIsMissing { kind: NullKind },
847    #[error("sparse value dtype cannot be {dtype:?}")]
848    InvalidSparseValueDType { dtype: DType },
849    #[error("interval_range step must be finite, positive, and not NaN (got {step})")]
850    InvalidIntervalStep { step: f64 },
851    #[error("interval_range step {step} does not evenly divide range end-start={span}")]
852    IntervalStepDoesNotDivide { step: f64, span: f64 },
853    #[error("cannot parse '{value}' as {target}")]
854    ValueNotParseable { value: String, target: String },
855}
856
857pub fn common_dtype(left: DType, right: DType) -> Result<DType, TypeError> {
858    use DType::{
859        Bool, BoolNullable, Categorical, Datetime64, Float64, Int64, Int64Nullable, Null, Sparse,
860        Timedelta64,
861    };
862
863    let out = match (left, right) {
864        (a, b) if a == b => a,
865        (Null, other) | (other, Null) => other,
866        (Categorical, Categorical) => Categorical,
867
868        // Bool promotions (nullable absorbs non-nullable)
869        (Bool, Int64) | (Int64, Bool) => Int64,
870        (Bool, Int64Nullable) | (Int64Nullable, Bool) => Int64Nullable,
871        (BoolNullable, Int64) | (Int64, BoolNullable) => Int64Nullable,
872        (BoolNullable, Int64Nullable) | (Int64Nullable, BoolNullable) => Int64Nullable,
873        (Bool, BoolNullable) | (BoolNullable, Bool) => BoolNullable,
874        (Bool, Float64) | (Float64, Bool) => Float64,
875        (BoolNullable, Float64) | (Float64, BoolNullable) => Float64,
876
877        // Int64 promotions (nullable absorbs non-nullable)
878        (Int64, Float64) | (Float64, Int64) => Float64,
879        (Int64Nullable, Float64) | (Float64, Int64Nullable) => Float64,
880        (Int64, Int64Nullable) | (Int64Nullable, Int64) => Int64Nullable,
881
882        // Datetime/Timedelta
883        (Timedelta64, Timedelta64) => Timedelta64,
884        (Datetime64, Datetime64) => Datetime64,
885
886        (Sparse, _) | (_, Sparse) => return Err(TypeError::IncompatibleDtypes { left, right }),
887        _ => return Err(TypeError::IncompatibleDtypes { left, right }),
888    };
889
890    Ok(out)
891}
892
893pub fn infer_dtype(values: &[Scalar]) -> Result<DType, TypeError> {
894    let mut current = DType::Null;
895    let mut saw_utf8 = false;
896    let mut saw_timedelta = false;
897    let mut saw_datetime = false;
898    let mut saw_non_utf8_non_null = false;
899
900    for value in values {
901        match value.dtype() {
902            DType::Null => {}
903            DType::Utf8 => saw_utf8 = true,
904            DType::Timedelta64 => {
905                saw_timedelta = true;
906                if current == DType::Null {
907                    current = DType::Timedelta64;
908                } else if current != DType::Timedelta64 {
909                    return Err(TypeError::IncompatibleDtypes {
910                        left: current,
911                        right: DType::Timedelta64,
912                    });
913                }
914            }
915            DType::Datetime64 => {
916                saw_datetime = true;
917                if current == DType::Null {
918                    current = DType::Datetime64;
919                } else if current != DType::Datetime64 {
920                    return Err(TypeError::IncompatibleDtypes {
921                        left: current,
922                        right: DType::Datetime64,
923                    });
924                }
925            }
926            other => {
927                saw_non_utf8_non_null = true;
928                current = common_dtype(current, other)?;
929            }
930        }
931
932        if saw_utf8 && saw_non_utf8_non_null {
933            // Constructor inference follows pandas object-dtype behavior for
934            // heterogeneous string/scalar payloads while arithmetic coercion
935            // remains governed by the stricter common_dtype lattice.
936            return Ok(DType::Utf8);
937        }
938        if saw_timedelta && saw_non_utf8_non_null {
939            return Err(TypeError::IncompatibleDtypes {
940                left: DType::Timedelta64,
941                right: current,
942            });
943        }
944        if saw_datetime && saw_non_utf8_non_null {
945            return Err(TypeError::IncompatibleDtypes {
946                left: DType::Datetime64,
947                right: current,
948            });
949        }
950    }
951
952    if saw_utf8 {
953        Ok(DType::Utf8)
954    } else {
955        Ok(current)
956    }
957}
958
959/// Cast a scalar to a target dtype, taking ownership to avoid redundant clones
960/// when the value already has the correct type (AG-03: identity-cast skip).
961pub fn cast_scalar_owned(value: Scalar, target: DType) -> Result<Scalar, TypeError> {
962    let from = value.dtype();
963    if from == target {
964        return Ok(value);
965    }
966    // Int64 <-> Int64Nullable: same representation, just different dtype tracking
967    if (from == DType::Int64 && target == DType::Int64Nullable)
968        || (from == DType::Int64Nullable && target == DType::Int64)
969    {
970        return Ok(value);
971    }
972    // Bool <-> BoolNullable: same representation
973    if (from == DType::Bool && target == DType::BoolNullable)
974        || (from == DType::BoolNullable && target == DType::Bool)
975    {
976        return Ok(value);
977    }
978    if target == DType::Utf8 {
979        return Ok(Scalar::Utf8(scalar_to_string_for_astype(value)));
980    }
981    // Per br-frankenpandas-cyi4h: pandas astype(bool) (the numpy bool dtype)
982    // treats a float NaN as truthy -> True (bool(nan) is True), unlike the
983    // nullable 'boolean' dtype which keeps NA. FP's NaN=missing model would
984    // otherwise fall through to the missing branch below and yield a null.
985    // Verified vs live pandas 2.2.3.
986    if target == DType::Bool
987        && let Scalar::Float64(v) = &value
988        && v.is_nan()
989    {
990        return Ok(Scalar::Bool(true));
991    }
992    if value.is_missing() {
993        return Ok(Scalar::missing_for_dtype(target));
994    }
995
996    // Note: identity casts (from == target) are handled above, so same-type
997    // arms are omitted from the match below.
998    match target {
999        DType::Null => Ok(Scalar::Null(NullKind::Null)),
1000        DType::Bool => match &value {
1001            // numpy bool: zero -> False, ANY nonzero -> True (it does not
1002            // restrict to 0/1). e.g. bool of -3 / 2.5 is True.
1003            Scalar::Int64(v) => Ok(Scalar::Bool(*v != 0)),
1004            // 0.0 and -0.0 -> False; every other value, INCLUDING NaN, -> True
1005            // (NaN != 0.0 is true), matching numpy/pandas truthiness.
1006            Scalar::Float64(v) => Ok(Scalar::Bool(*v != 0.0)),
1007            _ => Err(TypeError::InvalidCast { from, to: target }),
1008        },
1009        DType::BoolNullable => match &value {
1010            // pandas nullable Boolean ('boolean') is STRICT: only 0/1 (or
1011            // True/False) are accepted — any other value raises "Need to pass
1012            // bool-like values", UNLIKE numpy bool which is nonzero-truthy.
1013            // (br-frankenpandas-tjomg)
1014            Scalar::Bool(b) => Ok(Scalar::Bool(*b)),
1015            Scalar::Int64(0) => Ok(Scalar::Bool(false)),
1016            Scalar::Int64(1) => Ok(Scalar::Bool(true)),
1017            Scalar::Int64(v) => Err(TypeError::InvalidBoolInt { value: *v }),
1018            Scalar::Float64(v) if *v == 0.0 => Ok(Scalar::Bool(false)),
1019            Scalar::Float64(v) if *v == 1.0 => Ok(Scalar::Bool(true)),
1020            Scalar::Float64(v) => Err(TypeError::InvalidBoolFloat { value: *v }),
1021            _ => Err(TypeError::InvalidCast { from, to: target }),
1022        },
1023        DType::Int64 | DType::Int64Nullable => match &value {
1024            Scalar::Bool(v) => Ok(Scalar::Int64(i64::from(*v))),
1025            Scalar::Float64(v) => {
1026                // pandas astype(int64) truncates a finite float toward zero
1027                // (1.9 -> 1, -1.9 -> -1, 2.5 -> 2); only non-finite (NaN/±inf)
1028                // or out-of-i64-range values raise. Verified vs pandas 2.2.3
1029                // (br-frankenpandas-qcutc). NaN is handled as missing above, so
1030                // here non-finite means ±inf. `as i64` performs the toward-zero
1031                // truncation for in-range finite values.
1032                if !v.is_finite() {
1033                    return Err(TypeError::LossyFloatToInt { value: *v });
1034                }
1035                if *v < i64::MIN as f64 || *v >= 9223372036854775808.0 {
1036                    return Err(TypeError::LossyFloatToInt { value: *v });
1037                }
1038                Ok(Scalar::Int64(*v as i64))
1039            }
1040            Scalar::Utf8(s) => {
1041                // Try direct int parse first, then try float parse + truncate
1042                // (pandas accepts "1.0" as valid int via float intermediate)
1043                if let Ok(v) = s.parse::<i64>() {
1044                    return Ok(Scalar::Int64(v));
1045                }
1046                if let Ok(f) = s.parse::<f64>()
1047                    && f.is_finite()
1048                    && f.fract() == 0.0
1049                    && f >= i64::MIN as f64
1050                    && f < 9223372036854775808.0
1051                {
1052                    return Ok(Scalar::Int64(f as i64));
1053                }
1054                Err(TypeError::InvalidCast { from, to: target })
1055            }
1056            _ => Err(TypeError::InvalidCast { from, to: target }),
1057        },
1058        DType::Float64 => match &value {
1059            Scalar::Bool(v) => Ok(Scalar::Float64(if *v { 1.0 } else { 0.0 })),
1060            Scalar::Int64(v) => Ok(Scalar::Float64(*v as f64)),
1061            Scalar::Utf8(s) => s
1062                .parse::<f64>()
1063                .map(Scalar::Float64)
1064                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1065            _ => Err(TypeError::InvalidCast { from, to: target }),
1066        },
1067        DType::Utf8 => Ok(Scalar::Utf8(scalar_to_string_for_astype(value))),
1068        DType::Categorical => Err(TypeError::InvalidCast { from, to: target }),
1069        DType::Timedelta64 => match &value {
1070            Scalar::Int64(v) => Ok(Scalar::Timedelta64(*v)),
1071            Scalar::Utf8(s) => Timedelta::parse(s)
1072                .map(Scalar::Timedelta64)
1073                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1074            _ => Err(TypeError::InvalidCast { from, to: target }),
1075        },
1076        DType::Datetime64 => match &value {
1077            Scalar::Int64(v) => Ok(Scalar::Datetime64(*v)),
1078            Scalar::Utf8(s) => Timestamp::parse(s)
1079                .map(|timestamp| Scalar::Datetime64(timestamp.nanos))
1080                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1081            _ => Err(TypeError::InvalidCast { from, to: target }),
1082        },
1083        DType::Period => match &value {
1084            // Int cast to a freq-less DType::Period: default to Daily (pandas
1085            // requires an explicit freq in the dtype; ours is freq-less).
1086            Scalar::Int64(v) => Ok(Scalar::Period(Period::new(*v, PeriodFreq::Daily))),
1087            Scalar::Utf8(s) => Period::parse(s)
1088                .map(Scalar::Period)
1089                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1090            _ => Err(TypeError::InvalidCast { from, to: target }),
1091        },
1092        DType::Interval => match &value {
1093            Scalar::Utf8(s) => Interval::parse(s)
1094                .map(Scalar::Interval)
1095                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1096            _ => Err(TypeError::InvalidCast { from, to: target }),
1097        },
1098        DType::Sparse => Err(TypeError::InvalidCast { from, to: target }),
1099    }
1100}
1101
1102fn scalar_to_string_for_astype(value: Scalar) -> String {
1103    match value {
1104        Scalar::Null(NullKind::Null) => "None".to_owned(),
1105        Scalar::Null(NullKind::NaN) => "nan".to_owned(),
1106        Scalar::Null(NullKind::NaT) => "NaT".to_owned(),
1107        Scalar::Bool(true) => "True".to_owned(),
1108        Scalar::Bool(false) => "False".to_owned(),
1109        Scalar::Int64(v) => v.to_string(),
1110        Scalar::Float64(v) => float_to_string_for_astype(v),
1111        Scalar::Utf8(s) => s,
1112        Scalar::Timedelta64(v) if v == Timedelta::NAT => "NaT".to_owned(),
1113        Scalar::Timedelta64(v) => Timedelta::format(v),
1114        Scalar::Datetime64(v) if v == Timestamp::NAT => "NaT".to_owned(),
1115        Scalar::Datetime64(v) => format!("Timestamp[{v}]"),
1116        Scalar::Period(p) if p.ordinal == i64::MIN => "NaT".to_owned(),
1117        Scalar::Period(p) => p.calendar_string(),
1118        Scalar::Interval(v) => v.to_string(),
1119    }
1120}
1121
1122fn float_to_string_for_astype(value: f64) -> String {
1123    if value.is_nan() {
1124        return "nan".to_owned();
1125    }
1126    if value.is_infinite() {
1127        return value.to_string(); // "inf" / "-inf"
1128    }
1129    // pandas astype(str) renders floats via Python str(float): whole numbers
1130    // keep ".0", decimals use the shortest round-trip, and extreme magnitudes use
1131    // scientific notation ("1e+16", "1e-05"). Rust's Debug formatter matches this
1132    // (shortest round-trip, ".0" on whole numbers, scientific at Python's
1133    // boundaries); only the exponent spelling differs (Rust "1e16"/"1e-5" vs
1134    // Python "1e+16"/"1e-05"), so normalize that. The old `{:.1}` whole / Display
1135    // decimal path lost scientific notation (1e16 -> "10000000000000000.0").
1136    let s = format!("{value:?}");
1137    match s.split_once('e') {
1138        None => s,
1139        Some((mantissa, exp)) => {
1140            let (sign, digits) = match exp.strip_prefix('-') {
1141                Some(d) => ('-', d),
1142                None => ('+', exp.strip_prefix('+').unwrap_or(exp)),
1143            };
1144            format!("{mantissa}e{sign}{digits:0>2}")
1145        }
1146    }
1147}
1148
1149/// Cast a scalar reference to a target dtype (clones only when conversion is needed).
1150pub fn cast_scalar(value: &Scalar, target: DType) -> Result<Scalar, TypeError> {
1151    cast_scalar_owned(value.clone(), target)
1152}
1153
1154// ── Timedelta support ──────────────────────────────────────────────────
1155
1156#[derive(Debug, Error, Clone, PartialEq)]
1157pub enum TimedeltaError {
1158    #[error("invalid timedelta string: {0}")]
1159    InvalidFormat(String),
1160    #[error("overflow in timedelta computation")]
1161    Overflow,
1162}
1163
1164#[derive(Debug, Clone, Copy, Default)]
1165pub struct TimedeltaComponents {
1166    pub days: i64,
1167    pub hours: i64,
1168    pub minutes: i64,
1169    pub seconds: i64,
1170    pub milliseconds: i64,
1171    pub microseconds: i64,
1172    pub nanoseconds: i64,
1173}
1174
1175pub struct Timedelta;
1176
1177impl Timedelta {
1178    pub const NANOS_PER_MICRO: i64 = 1_000;
1179    pub const NANOS_PER_MILLI: i64 = 1_000_000;
1180    pub const NANOS_PER_SEC: i64 = 1_000_000_000;
1181    pub const NANOS_PER_MIN: i64 = 60 * Self::NANOS_PER_SEC;
1182    pub const NANOS_PER_HOUR: i64 = 60 * Self::NANOS_PER_MIN;
1183    pub const NANOS_PER_DAY: i64 = 24 * Self::NANOS_PER_HOUR;
1184    pub const NANOS_PER_WEEK: i64 = 7 * Self::NANOS_PER_DAY;
1185
1186    pub const NAT: i64 = i64::MIN;
1187
1188    pub fn parse(s: &str) -> Result<i64, TimedeltaError> {
1189        let s = s.trim();
1190
1191        if s.eq_ignore_ascii_case("nat") {
1192            return Ok(Self::NAT);
1193        }
1194
1195        let (negative, s) = if let Some(rest) = s.strip_prefix('-') {
1196            (true, rest.trim())
1197        } else {
1198            (false, s)
1199        };
1200
1201        if let Some(nanos) = Self::try_parse_time_format(s) {
1202            return Ok(if negative { -nanos } else { nanos });
1203        }
1204
1205        if let Some(nanos) = Self::try_parse_iso8601_duration(s) {
1206            return Ok(if negative { -nanos } else { nanos });
1207        }
1208
1209        let nanos = Self::parse_compound(s)?;
1210        Ok(if negative { -nanos } else { nanos })
1211    }
1212
1213    /// Parse an ISO-8601 duration the way pandas `Timedelta` accepts it:
1214    /// a leading uppercase `P`, an optional `T` separator that is otherwise
1215    /// ignored, and unit letters `W`/`D`/`H`/`M`/`S` in any position. `M` is
1216    /// always MINUTES (never months), and only seconds may be fractional —
1217    /// years/months and lowercase units are rejected, matching pandas.
1218    /// (pandas mis-handles fractional non-second components; those are rejected
1219    /// here rather than reproducing the buggy value.) (br-frankenpandas-c3p8b)
1220    fn try_parse_iso8601_duration(s: &str) -> Option<i64> {
1221        let mut rest = s.strip_prefix('P')?;
1222        if rest.is_empty() {
1223            return None;
1224        }
1225        let mut total: i64 = 0;
1226        let mut saw_component = false;
1227        while !rest.is_empty() {
1228            if let Some(after_t) = rest.strip_prefix('T') {
1229                rest = after_t;
1230                continue;
1231            }
1232            let num_end = rest.find(|c: char| !c.is_ascii_digit() && c != '.')?;
1233            if num_end == 0 {
1234                return None;
1235            }
1236            let num_str = &rest[..num_end];
1237            let unit = rest.as_bytes()[num_end];
1238            let is_fractional = num_str.contains('.');
1239            rest = &rest[num_end + 1..];
1240
1241            let (multiplier, frac_ok) = match unit {
1242                b'W' => (Self::NANOS_PER_WEEK, false),
1243                b'D' => (Self::NANOS_PER_DAY, false),
1244                b'H' => (Self::NANOS_PER_HOUR, false),
1245                b'M' => (Self::NANOS_PER_MIN, false),
1246                b'S' => (Self::NANOS_PER_SEC, true),
1247                _ => return None,
1248            };
1249            if is_fractional {
1250                if !frac_ok {
1251                    return None;
1252                }
1253                let value: f64 = num_str.parse().ok()?;
1254                let product = value * multiplier as f64;
1255                if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
1256                    return None;
1257                }
1258                total = total.checked_add(product.round() as i64)?;
1259            } else {
1260                let value: i64 = num_str.parse().ok()?;
1261                total = total.checked_add(value.checked_mul(multiplier)?)?;
1262            }
1263            saw_component = true;
1264        }
1265        saw_component.then_some(total)
1266    }
1267
1268    fn try_parse_time_format(s: &str) -> Option<i64> {
1269        let parts: Vec<&str> = s.split(':').collect();
1270        if parts.len() < 2 || parts.len() > 3 {
1271            return None;
1272        }
1273
1274        let hours: i64 = parts[0].parse().ok()?;
1275        let minutes: i64 = parts[1].parse().ok()?;
1276
1277        let (seconds, frac_nanos) = if parts.len() == 3 {
1278            if let Some((sec_str, frac_str)) = parts[2].split_once('.') {
1279                let sec: i64 = sec_str.parse().ok()?;
1280                if !frac_str.bytes().all(|byte| byte.is_ascii_digit()) {
1281                    return None;
1282                }
1283                let mut frac = 0_i64;
1284                let taken = frac_str.len().min(9);
1285                for byte in frac_str.bytes().take(9) {
1286                    frac = frac * 10 + i64::from(byte - b'0');
1287                }
1288                for _ in taken..9 {
1289                    frac *= 10;
1290                }
1291                (sec, frac)
1292            } else {
1293                let sec: i64 = parts[2].parse().ok()?;
1294                (sec, 0)
1295            }
1296        } else {
1297            (0, 0)
1298        };
1299
1300        hours
1301            .checked_mul(Self::NANOS_PER_HOUR)?
1302            .checked_add(minutes.checked_mul(Self::NANOS_PER_MIN)?)?
1303            .checked_add(seconds.checked_mul(Self::NANOS_PER_SEC)?)?
1304            .checked_add(frac_nanos)
1305    }
1306
1307    fn parse_compound(s: &str) -> Result<i64, TimedeltaError> {
1308        let mut total: i64 = 0;
1309        let mut remaining = s;
1310
1311        while !remaining.is_empty() {
1312            remaining = remaining.trim_start();
1313            if remaining.is_empty() {
1314                break;
1315            }
1316
1317            // Per br-frankenpandas-i9bah: check if remaining is a time format
1318            // (HH:MM:SS) which can appear after "N days " in pandas timedelta strings.
1319            if remaining.contains(':')
1320                && let Some(time_nanos) = Self::try_parse_time_format(remaining)
1321            {
1322                total = total
1323                    .checked_add(time_nanos)
1324                    .ok_or(TimedeltaError::Overflow)?;
1325                break;
1326            }
1327
1328            let num_end = remaining
1329                .find(|c: char| !c.is_ascii_digit() && c != '.' && c != '-')
1330                .unwrap_or(remaining.len());
1331
1332            if num_end == 0 {
1333                return Err(TimedeltaError::InvalidFormat(s.to_string()));
1334            }
1335
1336            let num_str = &remaining[..num_end];
1337            let num: f64 = num_str
1338                .parse()
1339                .map_err(|_| TimedeltaError::InvalidFormat(s.to_string()))?;
1340
1341            remaining = remaining[num_end..].trim_start();
1342
1343            let unit_end = remaining
1344                .find(|c: char| c.is_ascii_digit() || c.is_whitespace())
1345                .unwrap_or(remaining.len());
1346
1347            let unit = &remaining[..unit_end];
1348            remaining = &remaining[unit_end..];
1349
1350            let multiplier = Self::unit_to_nanos(unit)
1351                .ok_or_else(|| TimedeltaError::InvalidFormat(s.to_string()))?;
1352
1353            // Per br-frankenpandas-zw3mg: pandas raises OverflowError on
1354            // huge scientific-notation Timedeltas like "1e100 days". The
1355            // raw `as i64` cast silently saturated to i64::MAX, masking
1356            // overflow before checked_add could catch it.
1357            let product = num * multiplier as f64;
1358            if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
1359                return Err(TimedeltaError::Overflow);
1360            }
1361            let nanos = product.round() as i64;
1362            total = total.checked_add(nanos).ok_or(TimedeltaError::Overflow)?;
1363        }
1364
1365        if total == 0 && !s.trim().is_empty() && s.trim() != "0" {
1366            return Err(TimedeltaError::InvalidFormat(s.to_string()));
1367        }
1368
1369        Ok(total)
1370    }
1371
1372    /// Map a pandas-style frequency-alias string to a nanosecond-count.
1373    ///
1374    /// Recognizes pandas's offset alias core set plus common word forms:
1375    /// W/week(s), D/day(s), H/hr/hour(s), m/T/min/minute(s), s/sec/second(s),
1376    /// ms/milli/millisecond(s)/L, us/µs/micro/microsecond(s)/U, ns/nano/
1377    /// nanosecond(s)/N. Empty string maps to days (matches pandas default).
1378    /// Returns `None` for unrecognized aliases — callers can choose to map
1379    /// that to NaT (consistent with the rest of fp-types) or surface as a
1380    /// typed error.
1381    ///
1382    /// Per br-frankenpandas-lbsx (9p0u Phase 2.6): public surface so
1383    /// downstream crates can consume the same alias map fp-types uses for
1384    /// `Timedelta::from_unit` / `Timestamp::*_to_unit`.
1385    #[must_use]
1386    pub fn unit_to_nanos(unit: &str) -> Option<i64> {
1387        match unit.to_lowercase().as_str() {
1388            "w" | "week" | "weeks" => Some(Self::NANOS_PER_WEEK),
1389            "d" | "day" | "days" => Some(Self::NANOS_PER_DAY),
1390            "h" | "hr" | "hour" | "hours" => Some(Self::NANOS_PER_HOUR),
1391            "m" | "min" | "minute" | "minutes" | "t" => Some(Self::NANOS_PER_MIN),
1392            "s" | "sec" | "second" | "seconds" => Some(Self::NANOS_PER_SEC),
1393            "ms" | "milli" | "millis" | "millisecond" | "milliseconds" | "l" => {
1394                Some(Self::NANOS_PER_MILLI)
1395            }
1396            "us" | "µs" | "micro" | "micros" | "microsecond" | "microseconds" | "u" => {
1397                Some(Self::NANOS_PER_MICRO)
1398            }
1399            "ns" | "nano" | "nanos" | "nanosecond" | "nanoseconds" | "n" => Some(1),
1400            "" => Some(Self::NANOS_PER_DAY),
1401            _ => None,
1402        }
1403    }
1404
1405    pub fn components(nanos: i64) -> TimedeltaComponents {
1406        if nanos == Self::NAT {
1407            return TimedeltaComponents::default();
1408        }
1409
1410        // pandas/Python normalize via FLOOR division (like format()): the days
1411        // component can be negative while the time-of-day remainder is always in
1412        // [0, 1 day). So pd.Timedelta(-1,'s').components == (-1, 23, 59, 59, 0, 0, 0),
1413        // NOT the abs-based (0, 0, 0, 1, 0, 0, 0).
1414        let days = nanos.div_euclid(Self::NANOS_PER_DAY);
1415        let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
1416
1417        let hours = rem / Self::NANOS_PER_HOUR;
1418        let rem = rem % Self::NANOS_PER_HOUR;
1419
1420        let minutes = rem / Self::NANOS_PER_MIN;
1421        let rem = rem % Self::NANOS_PER_MIN;
1422
1423        let seconds = rem / Self::NANOS_PER_SEC;
1424        let rem = rem % Self::NANOS_PER_SEC;
1425
1426        let milliseconds = rem / Self::NANOS_PER_MILLI;
1427        let rem = rem % Self::NANOS_PER_MILLI;
1428
1429        let microseconds = rem / Self::NANOS_PER_MICRO;
1430        let nanoseconds = rem % Self::NANOS_PER_MICRO;
1431
1432        TimedeltaComponents {
1433            days,
1434            hours,
1435            minutes,
1436            seconds,
1437            milliseconds,
1438            microseconds,
1439            nanoseconds,
1440        }
1441    }
1442
1443    pub fn total_seconds(nanos: i64) -> f64 {
1444        if nanos == Self::NAT {
1445            f64::NAN
1446        } else {
1447            nanos as f64 / Self::NANOS_PER_SEC as f64
1448        }
1449    }
1450
1451    /// Convert to specified time unit.
1452    ///
1453    /// Matches pd.Timedelta.as_unit(). Supported units: ns, us, ms, s, m, h, D.
1454    #[must_use]
1455    pub fn as_unit(nanos: i64, unit: &str) -> f64 {
1456        if nanos == Self::NAT {
1457            return f64::NAN;
1458        }
1459        let nanos_f = nanos as f64;
1460        match unit {
1461            "ns" | "nanoseconds" => nanos_f,
1462            "us" | "microseconds" => nanos_f / Self::NANOS_PER_MICRO as f64,
1463            "ms" | "milliseconds" => nanos_f / Self::NANOS_PER_MILLI as f64,
1464            "s" | "seconds" => nanos_f / Self::NANOS_PER_SEC as f64,
1465            "m" | "minutes" => nanos_f / Self::NANOS_PER_MIN as f64,
1466            "h" | "hours" => nanos_f / Self::NANOS_PER_HOUR as f64,
1467            "D" | "days" => nanos_f / Self::NANOS_PER_DAY as f64,
1468            _ => f64::NAN,
1469        }
1470    }
1471
1472    /// Return the days component. Matches `pd.Timedelta.days`.
1473    #[must_use]
1474    pub fn days(nanos: i64) -> i64 {
1475        if nanos == Self::NAT {
1476            return 0; // pandas returns 0 for NaT.days (no error)
1477        }
1478        // FLOOR division like pandas: pd.Timedelta(-1,'s').days == -1, not 0.
1479        nanos.div_euclid(Self::NANOS_PER_DAY)
1480    }
1481
1482    /// Return the seconds component (0-86399). Matches `pd.Timedelta.seconds`.
1483    #[must_use]
1484    pub fn seconds(nanos: i64) -> i64 {
1485        if nanos == Self::NAT {
1486            return 0;
1487        }
1488        // Floor-normalized time-of-day remainder: pd.Timedelta(-1,'s').seconds == 86399.
1489        nanos.rem_euclid(Self::NANOS_PER_DAY) / Self::NANOS_PER_SEC
1490    }
1491
1492    /// Return the microseconds component (0-999999). Matches `pd.Timedelta.microseconds`.
1493    #[must_use]
1494    pub fn microseconds(nanos: i64) -> i64 {
1495        if nanos == Self::NAT {
1496            return 0;
1497        }
1498        nanos.rem_euclid(Self::NANOS_PER_SEC) / Self::NANOS_PER_MICRO
1499    }
1500
1501    /// Return the nanoseconds component (0-999). Matches `pd.Timedelta.nanoseconds`.
1502    #[must_use]
1503    pub fn nanoseconds(nanos: i64) -> i64 {
1504        if nanos == Self::NAT {
1505            return 0;
1506        }
1507        nanos.rem_euclid(Self::NANOS_PER_MICRO)
1508    }
1509
1510    pub fn format(nanos: i64) -> String {
1511        if nanos == Self::NAT {
1512            return "NaT".to_string();
1513        }
1514
1515        // pandas / Python timedelta normalize via FLOOR division: the days
1516        // component can be negative while the time-of-day remainder is always
1517        // non-negative, and a negative-days value prints a '+' before the time
1518        // (e.g. -1s -> "-1 days +23:59:59", not "-0 days 00:00:01"). Compute the
1519        // components with Euclidean div/rem so the remainder is in [0, 1 day).
1520        let days = nanos.div_euclid(Self::NANOS_PER_DAY);
1521        let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
1522        let hours = rem / Self::NANOS_PER_HOUR;
1523        let minutes = (rem % Self::NANOS_PER_HOUR) / Self::NANOS_PER_MIN;
1524        let seconds = (rem % Self::NANOS_PER_MIN) / Self::NANOS_PER_SEC;
1525        let frac = rem % Self::NANOS_PER_SEC;
1526
1527        let time_part = format!("{hours:02}:{minutes:02}:{seconds:02}");
1528        // '+' joins the negative day count to the positive time remainder.
1529        let sep = if days < 0 { "+" } else { "" };
1530
1531        if frac > 0 {
1532            // pandas renders the sub-second part with microsecond precision
1533            // (6 digits) unless a sub-microsecond (nanosecond) component is
1534            // present, in which case it widens to 9 digits.
1535            if frac % 1_000 == 0 {
1536                format!("{days} days {sep}{time_part}.{:06}", frac / 1_000)
1537            } else {
1538                format!("{days} days {sep}{time_part}.{frac:09}")
1539            }
1540        } else {
1541            format!("{days} days {sep}{time_part}")
1542        }
1543    }
1544
1545    pub fn from_unit(value: f64, unit: &str) -> Result<i64, TimedeltaError> {
1546        let multiplier = Self::unit_to_nanos(unit)
1547            .ok_or_else(|| TimedeltaError::InvalidFormat(unit.to_string()))?;
1548        Ok((value * multiplier as f64).round() as i64)
1549    }
1550
1551    // ── Arithmetic (br-frankenpandas-4r56 Phase 1) ──────────────────────
1552    //
1553    // NaT propagation: any arithmetic with `NAT` returns `NAT`. Matches
1554    // pandas `pd.NaT + anything == NaT`, `pd.NaT - anything == NaT`, etc.
1555    // Saturation: i64 overflow clamps to i64::MAX/MIN (never wraps). Matches
1556    // pandas's OverflowError surface at the type-system boundary.
1557
1558    /// Add two Timedelta nanosecond values. NaT propagates; saturates on overflow.
1559    #[must_use]
1560    pub fn add(a: i64, b: i64) -> i64 {
1561        if a == Self::NAT || b == Self::NAT {
1562            return Self::NAT;
1563        }
1564        a.saturating_add(b)
1565    }
1566
1567    /// Subtract two Timedelta nanosecond values. NaT propagates; saturates on overflow.
1568    #[must_use]
1569    pub fn sub(a: i64, b: i64) -> i64 {
1570        if a == Self::NAT || b == Self::NAT {
1571            return Self::NAT;
1572        }
1573        a.saturating_sub(b)
1574    }
1575
1576    /// Negate a Timedelta value. NaT stays NaT. Saturates on overflow
1577    /// (pandas: `-pd.Timedelta.min` is NaT since min == -max - 1 cannot be negated).
1578    #[must_use]
1579    pub fn neg(a: i64) -> i64 {
1580        if a == Self::NAT {
1581            return Self::NAT;
1582        }
1583        a.saturating_neg()
1584    }
1585
1586    /// Absolute value of a Timedelta. NaT stays NaT. Saturates on overflow.
1587    #[must_use]
1588    pub fn abs(a: i64) -> i64 {
1589        if a == Self::NAT {
1590            return Self::NAT;
1591        }
1592        a.saturating_abs()
1593    }
1594
1595    /// Multiply a Timedelta value by an integer factor. NaT propagates;
1596    /// saturates on overflow.
1597    ///
1598    /// Matches pandas `pd.Timedelta(...) * int`.
1599    #[must_use]
1600    pub fn mul_scalar(a: i64, factor: i64) -> i64 {
1601        if a == Self::NAT {
1602            return Self::NAT;
1603        }
1604        a.saturating_mul(factor)
1605    }
1606
1607    /// Floor-divide a Timedelta value by an integer divisor. NaT propagates.
1608    /// Returns NaT on divide-by-zero (matches pandas, which raises, but we
1609    /// surface as NaT to avoid panics at the type-system boundary).
1610    ///
1611    /// Matches pandas / Python `pd.Timedelta(...) // int`: floor division,
1612    /// not truncation toward zero. `-100 // 3 == -34`, and `100 // -3 ==
1613    /// -34`. Rust's `/` truncates toward zero and `div_euclid` keeps the
1614    /// remainder non-negative — neither matches pandas when the divisor is
1615    /// negative. This helper adjusts trunc-toward-zero into floor.
1616    #[must_use]
1617    pub fn div_scalar(a: i64, divisor: i64) -> i64 {
1618        if a == Self::NAT || divisor == 0 {
1619            return Self::NAT;
1620        }
1621        // NAT == i64::MIN so the classic `i64::MIN / -1` overflow path is
1622        // already handled by the NAT check above. `(i64::MIN + 1) / -1`
1623        // equals `i64::MAX` with no overflow, so we never need a
1624        // saturation branch here.
1625        let q = a / divisor;
1626        let r = a % divisor;
1627        // If remainder is non-zero and has opposite sign from divisor,
1628        // Rust's trunc-toward-zero `/` is one step above the floor. Adjust
1629        // down by 1 to match Python/pandas floor division.
1630        if r != 0 && (r < 0) != (divisor < 0) {
1631            q - 1
1632        } else {
1633            q
1634        }
1635    }
1636
1637    /// Divide two Timedelta values, returning the ratio as f64.
1638    /// Matches pandas `pd.Timedelta(...) / pd.Timedelta(...)` → float.
1639    /// NaT in either operand → NaN. Zero divisor → ±Inf (per IEEE 754).
1640    #[must_use]
1641    pub fn div_timedelta(a: i64, b: i64) -> f64 {
1642        if a == Self::NAT || b == Self::NAT {
1643            return f64::NAN;
1644        }
1645        (a as f64) / (b as f64)
1646    }
1647
1648    /// Returns ISO 8601 duration format string.
1649    ///
1650    /// Matches pandas `pd.Timedelta.isoformat()`. Returns format like
1651    /// "P1DT2H3M4.567890123S" for 1 day, 2 hours, 3 minutes, 4.567890123 seconds.
1652    /// NaT returns "NaT".
1653    #[must_use]
1654    pub fn isoformat(nanos: i64) -> String {
1655        if nanos == Self::NAT {
1656            return "NaT".to_string();
1657        }
1658
1659        let negative = nanos < 0;
1660        let abs_nanos = nanos.saturating_abs();
1661
1662        let days = abs_nanos / Self::NANOS_PER_DAY;
1663        let remaining = abs_nanos % Self::NANOS_PER_DAY;
1664
1665        let hours = remaining / Self::NANOS_PER_HOUR;
1666        let remaining = remaining % Self::NANOS_PER_HOUR;
1667
1668        let minutes = remaining / Self::NANOS_PER_MIN;
1669        let remaining = remaining % Self::NANOS_PER_MIN;
1670
1671        let seconds = remaining / Self::NANOS_PER_SEC;
1672        let sub_sec_nanos = remaining % Self::NANOS_PER_SEC;
1673
1674        let mut result = String::new();
1675        if negative {
1676            result.push('-');
1677        }
1678
1679        result.push_str(&format!("P{days}DT{hours}H{minutes}M"));
1680
1681        if sub_sec_nanos == 0 {
1682            result.push_str(&format!("{seconds}S"));
1683        } else {
1684            let frac = format!("{:09}", sub_sec_nanos);
1685            let trimmed = frac.trim_end_matches('0');
1686            result.push_str(&format!("{seconds}.{trimmed}S"));
1687        }
1688
1689        result
1690    }
1691
1692    /// Rounds down to the nearest frequency unit.
1693    ///
1694    /// Matches pandas `pd.Timedelta.floor(freq)`. NaT is preserved.
1695    #[must_use]
1696    pub fn floor(nanos: i64, freq: &str) -> i64 {
1697        if nanos == Self::NAT {
1698            return Self::NAT;
1699        }
1700        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1701            return Self::NAT;
1702        };
1703        if unit_nanos == 0 {
1704            return Self::NAT;
1705        }
1706        nanos.div_euclid(unit_nanos).saturating_mul(unit_nanos)
1707    }
1708
1709    /// Rounds up to the nearest frequency unit.
1710    ///
1711    /// Matches pandas `pd.Timedelta.ceil(freq)`. NaT is preserved.
1712    #[must_use]
1713    pub fn ceil(nanos: i64, freq: &str) -> i64 {
1714        if nanos == Self::NAT {
1715            return Self::NAT;
1716        }
1717        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1718            return Self::NAT;
1719        };
1720        if unit_nanos == 0 {
1721            return Self::NAT;
1722        }
1723        let remainder = nanos.rem_euclid(unit_nanos);
1724        if remainder == 0 {
1725            nanos
1726        } else {
1727            nanos.saturating_add(unit_nanos - remainder)
1728        }
1729    }
1730
1731    /// Rounds to the nearest frequency unit.
1732    ///
1733    /// Matches pandas `pd.Timedelta.round(freq)`. Uses banker's rounding
1734    /// (round half to even). NaT is preserved.
1735    #[must_use]
1736    pub fn round(nanos: i64, freq: &str) -> i64 {
1737        if nanos == Self::NAT {
1738            return Self::NAT;
1739        }
1740        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1741            return Self::NAT;
1742        };
1743        if unit_nanos == 0 {
1744            return Self::NAT;
1745        }
1746        let negative = nanos < 0;
1747        let abs_nanos = nanos.saturating_abs();
1748
1749        let quotient = abs_nanos / unit_nanos;
1750        let remainder = abs_nanos % unit_nanos;
1751        let half = unit_nanos / 2;
1752
1753        let rounded = if remainder > half {
1754            (quotient + 1) * unit_nanos
1755        } else if remainder < half {
1756            quotient * unit_nanos
1757        } else {
1758            // Exactly half: round to even
1759            if quotient % 2 == 0 {
1760                quotient * unit_nanos
1761            } else {
1762                (quotient + 1) * unit_nanos
1763            }
1764        };
1765
1766        if negative { -rounded } else { rounded }
1767    }
1768}
1769
1770// ── Timestamp types (br-frankenpandas-9p0u — 4r56 Phase 2) ─────────────
1771//
1772// Nanosecond-precision i64 since Unix epoch + optional IANA tz name.
1773// TZ-dependent arithmetic (DST transitions, tz conversion) is deferred
1774// to Phase 3 which pulls chrono_tz into fp-types; Phase 2 stores the
1775// tz name as opaque metadata and performs arithmetic on the absolute
1776// nanos axis only.
1777
1778/// Number of days in a given month (1-12) of a given year.
1779fn days_in_month(year: i64, month: u32) -> Option<u32> {
1780    if !(1..=12).contains(&month) {
1781        return None;
1782    }
1783    let is_leap = (year % 4 == 0 && year % 100 != 0) || year % 400 == 0;
1784    let days: [u32; 12] = [
1785        31,
1786        if is_leap { 29 } else { 28 },
1787        31,
1788        30,
1789        31,
1790        30,
1791        31,
1792        31,
1793        30,
1794        31,
1795        30,
1796        31,
1797    ];
1798    Some(days[(month - 1) as usize])
1799}
1800
1801/// Number of ISO-8601 weeks in a year (52 or 53).
1802///
1803/// A year has 53 ISO weeks iff its first day falls on a Thursday, or it is a
1804/// leap year whose first day is a Wednesday — captured by the dominical
1805/// closed form `p(year) == 4 || p(year - 1) == 3`, where
1806/// `p(y) = (y + ⌊y/4⌋ − ⌊y/100⌋ + ⌊y/400⌋) mod 7` is the weekday of Dec 31.
1807fn iso_weeks_in_year(year: i64) -> i64 {
1808    fn p(y: i64) -> i64 {
1809        (y + y.div_euclid(4) - y.div_euclid(100) + y.div_euclid(400)).rem_euclid(7)
1810    }
1811    if p(year) == 4 || p(year - 1) == 3 {
1812        53
1813    } else {
1814        52
1815    }
1816}
1817
1818/// A nanosecond-precision point in time, Unix-epoch anchored.
1819///
1820/// Phase 2 scope: construction, arithmetic, equality, ordering, serde.
1821/// TZ semantics (IANA tz lookup, DST-aware shift) are deferred to Phase
1822/// 3 — see br-frankenpandas-4r56.
1823#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1824pub struct Timestamp {
1825    /// Nanoseconds since Unix epoch. `i64::MIN` is NaT.
1826    pub nanos: i64,
1827    /// Optional IANA time-zone name (e.g. `"US/Eastern"`). `None` means
1828    /// naive / UTC-anchored. Phase 2 treats this as opaque metadata;
1829    /// Phase 3 wires chrono_tz interpretation.
1830    #[serde(default, skip_serializing_if = "Option::is_none")]
1831    pub tz: Option<String>,
1832}
1833
1834impl Timestamp {
1835    /// NaT sentinel, parallel to `Timedelta::NAT`.
1836    pub const NAT: i64 = i64::MIN;
1837
1838    /// Construct a UTC-anchored (tz=None) Timestamp from nanoseconds
1839    /// since Unix epoch.
1840    #[must_use]
1841    pub const fn from_nanos(nanos: i64) -> Self {
1842        Self { nanos, tz: None }
1843    }
1844
1845    /// Construct a Timestamp tagged with an IANA tz name.
1846    ///
1847    /// Phase 2 doesn't interpret the tz — it only carries the name
1848    /// through arithmetic + serde. Phase 3 wires chrono_tz conversion.
1849    #[must_use]
1850    pub fn from_nanos_tz(nanos: i64, tz_name: impl Into<String>) -> Self {
1851        Self {
1852            nanos,
1853            tz: Some(tz_name.into()),
1854        }
1855    }
1856
1857    /// Returns the current UTC timestamp.
1858    ///
1859    /// Matches `pd.Timestamp.now()` / `pd.Timestamp.utcnow()`.
1860    #[must_use]
1861    pub fn now() -> Self {
1862        use std::time::{SystemTime, UNIX_EPOCH};
1863        let duration = SystemTime::now()
1864            .duration_since(UNIX_EPOCH)
1865            .unwrap_or_default();
1866        let nanos = duration.as_nanos() as i64;
1867        Self { nanos, tz: None }
1868    }
1869
1870    /// Alias for `now()`. Matches `pd.Timestamp.utcnow()`.
1871    #[must_use]
1872    pub fn utcnow() -> Self {
1873        Self::now()
1874    }
1875
1876    /// Returns today's date at midnight UTC.
1877    ///
1878    /// Matches `pd.Timestamp.today()`.
1879    #[must_use]
1880    pub fn today() -> Self {
1881        let now = Self::now();
1882        now.normalize()
1883    }
1884
1885    /// The NaT sentinel value for a Timestamp.
1886    #[must_use]
1887    pub const fn nat() -> Self {
1888        Self {
1889            nanos: Self::NAT,
1890            tz: None,
1891        }
1892    }
1893
1894    /// True iff this Timestamp is NaT.
1895    #[must_use]
1896    pub const fn is_nat(&self) -> bool {
1897        self.nanos == Self::NAT
1898    }
1899
1900    /// Nanoseconds since Unix epoch, matching `pd.Timestamp.value`.
1901    #[must_use]
1902    pub const fn value(&self) -> i64 {
1903        self.nanos
1904    }
1905
1906    /// Stored resolution unit, matching `pd.Timestamp.unit`.
1907    ///
1908    /// FrankenPandas `Timestamp` stores nanoseconds internally, so non-NaT
1909    /// values report `ns`. `NaT` has no unit.
1910    #[must_use]
1911    pub const fn unit(&self) -> Option<&'static str> {
1912        if self.is_nat() { None } else { Some("ns") }
1913    }
1914
1915    /// Return the resolution of the timestamp (always "ns" for nanoseconds).
1916    ///
1917    /// Matches `pd.Timestamp.resolution`. Returns None for NaT.
1918    #[must_use]
1919    pub const fn resolution(&self) -> Option<&'static str> {
1920        if self.is_nat() { None } else { Some("ns") }
1921    }
1922
1923    /// Numpy datetime64 scalar payload, matching `pd.Timestamp.asm8`.
1924    #[must_use]
1925    pub const fn asm8(&self) -> i64 {
1926        self.value()
1927    }
1928
1929    /// Convert to a datetime64 payload, matching `pd.Timestamp.to_datetime64()`.
1930    #[must_use]
1931    pub const fn to_datetime64(&self) -> i64 {
1932        self.value()
1933    }
1934
1935    /// Convert to a numpy scalar payload, matching `pd.Timestamp.to_numpy()`.
1936    #[must_use]
1937    pub const fn to_numpy(&self) -> i64 {
1938        self.value()
1939    }
1940
1941    /// POSIX timestamp in seconds, matching `pd.Timestamp.timestamp()`.
1942    ///
1943    /// Pandas exposes this through Python's datetime surface, so sub-microsecond
1944    /// nanoseconds are rounded to six decimal places. `NaT` raises in pandas;
1945    /// fp-types surfaces the same condition as a missing-value error.
1946    pub fn timestamp(&self) -> Result<f64, TypeError> {
1947        if self.is_nat() {
1948            return Err(TypeError::ValueIsMissing {
1949                kind: NullKind::NaT,
1950            });
1951        }
1952        let seconds = self.nanos as f64 / 1_000_000_000.0;
1953        let rounded = format!("{seconds:.6}").parse().unwrap_or(seconds);
1954        Ok(rounded)
1955    }
1956
1957    /// Add a Timedelta. NaT in either operand → NaT; saturates on overflow.
1958    /// TZ is preserved from `self`.
1959    #[must_use]
1960    pub fn add_timedelta(&self, td_nanos: i64) -> Self {
1961        if self.is_nat() || td_nanos == Timedelta::NAT {
1962            return Self::nat();
1963        }
1964        Self {
1965            nanos: self.nanos.saturating_add(td_nanos),
1966            tz: self.tz.clone(),
1967        }
1968    }
1969
1970    /// Subtract a Timedelta. NaT propagation + saturation; TZ preserved.
1971    #[must_use]
1972    pub fn sub_timedelta(&self, td_nanos: i64) -> Self {
1973        if self.is_nat() || td_nanos == Timedelta::NAT {
1974            return Self::nat();
1975        }
1976        Self {
1977            nanos: self.nanos.saturating_sub(td_nanos),
1978            tz: self.tz.clone(),
1979        }
1980    }
1981
1982    /// Subtract another Timestamp. Returns a Timedelta (i64 nanos).
1983    /// NaT in either → `Timedelta::NAT`; saturates on overflow.
1984    #[must_use]
1985    pub fn sub_timestamp(&self, other: &Self) -> i64 {
1986        if self.is_nat() || other.is_nat() {
1987            return Timedelta::NAT;
1988        }
1989        self.nanos.saturating_sub(other.nanos)
1990    }
1991
1992    /// NaT-aware semantic equality: two NaT Timestamps are equal to each
1993    /// other (matches pandas `pd.NaT == pd.NaT` under `equals()`, though
1994    /// pandas's `==` operator returns False for NaT==NaT — we follow the
1995    /// `semantic_eq` convention used elsewhere in fp-types).
1996    #[must_use]
1997    pub fn semantic_eq(&self, other: &Self) -> bool {
1998        if self.is_nat() && other.is_nat() {
1999            return true;
2000        }
2001        if self.is_nat() || other.is_nat() {
2002            return false;
2003        }
2004        self.nanos == other.nanos && self.tz == other.tz
2005    }
2006
2007    // ── Rounding to a Timedelta unit (br-frankenpandas-5h6n) ────────────
2008    //
2009    // Pure i64 arithmetic on the nanos axis. tz is preserved. Phase 3
2010    // chrono_tz integration will add a tz-aware variant that handles DST
2011    // boundaries correctly; these methods operate on the absolute time
2012    // axis, matching pandas's tz-naive `.floor` / `.ceil` / `.round`
2013    // semantics for unit values smaller than a day.
2014
2015    /// Round down to the nearest multiple of `unit_nanos`.
2016    ///
2017    /// Matches `pd.Timestamp(...).floor(unit)`. NaT in `self` or a
2018    /// non-positive `unit_nanos` returns NaT.
2019    #[must_use]
2020    pub fn floor_to(&self, unit_nanos: i64) -> Self {
2021        if self.is_nat() || unit_nanos <= 0 {
2022            return Self::nat();
2023        }
2024        let Some(nanos) = self.nanos.div_euclid(unit_nanos).checked_mul(unit_nanos) else {
2025            return Self::nat();
2026        };
2027        Self {
2028            nanos,
2029            tz: self.tz.clone(),
2030        }
2031    }
2032
2033    /// Round up to the nearest multiple of `unit_nanos`.
2034    ///
2035    /// Matches `pd.Timestamp(...).ceil(unit)`. NaT or non-positive
2036    /// `unit_nanos` returns NaT. Already-multiple inputs return self.
2037    #[must_use]
2038    pub fn ceil_to(&self, unit_nanos: i64) -> Self {
2039        if self.is_nat() || unit_nanos <= 0 {
2040            return Self::nat();
2041        }
2042        let rem = self.nanos.rem_euclid(unit_nanos);
2043        let nanos = if rem == 0 {
2044            self.nanos
2045        } else {
2046            self.nanos.saturating_add(unit_nanos - rem)
2047        };
2048        Self {
2049            nanos,
2050            tz: self.tz.clone(),
2051        }
2052    }
2053
2054    /// Round to the nearest multiple of `unit_nanos`, banker's rounding
2055    /// (half-to-even) on ties.
2056    ///
2057    /// Matches `pd.Timestamp(...).round(unit)`. NaT or non-positive
2058    /// `unit_nanos` returns NaT.
2059    #[must_use]
2060    pub fn round_to(&self, unit_nanos: i64) -> Self {
2061        if self.is_nat() || unit_nanos <= 0 {
2062            return Self::nat();
2063        }
2064        let floor = self.nanos.div_euclid(unit_nanos);
2065        let rem = self.nanos.rem_euclid(unit_nanos);
2066        let half = unit_nanos / 2;
2067        let chosen_floor = if rem < half {
2068            floor
2069        } else if rem > half {
2070            floor + 1
2071        } else if unit_nanos % 2 != 0 {
2072            // Odd unit can't have a true half; treat as round-up.
2073            floor + 1
2074        } else {
2075            // Tie: pick the even multiple.
2076            if floor % 2 == 0 { floor } else { floor + 1 }
2077        };
2078        Self {
2079            nanos: chosen_floor.saturating_mul(unit_nanos),
2080            tz: self.tz.clone(),
2081        }
2082    }
2083
2084    // ── String-unit rounding (br-frankenpandas-lbsx) ────────────────────
2085    //
2086    // Pandas convenience: `.floor('H')` / `.ceil('1D')` / `.round('s')`.
2087    // These delegate to `Timedelta::unit_to_nanos` for unit lookup, then to
2088    // the nanos-based `floor_to`/`ceil_to`/`round_to`. Unknown unit strings
2089    // return NaT, matching the rest of fp-types' "missing-input → missing-
2090    // output" convention.
2091
2092    /// Round down to the nearest multiple of the named unit.
2093    ///
2094    /// Matches `pd.Timestamp(...).floor(unit)`. Unknown unit → NaT.
2095    #[must_use]
2096    pub fn floor_to_unit(&self, unit: &str) -> Self {
2097        match Timedelta::unit_to_nanos(unit) {
2098            Some(unit_nanos) => self.floor_to(unit_nanos),
2099            None => Self::nat(),
2100        }
2101    }
2102
2103    /// Round up to the nearest multiple of the named unit.
2104    ///
2105    /// Matches `pd.Timestamp(...).ceil(unit)`. Unknown unit → NaT.
2106    #[must_use]
2107    pub fn ceil_to_unit(&self, unit: &str) -> Self {
2108        match Timedelta::unit_to_nanos(unit) {
2109            Some(unit_nanos) => self.ceil_to(unit_nanos),
2110            None => Self::nat(),
2111        }
2112    }
2113
2114    /// Round to the nearest multiple of the named unit, banker's rounding.
2115    ///
2116    /// Matches `pd.Timestamp(...).round(unit)`. Unknown unit → NaT.
2117    #[must_use]
2118    pub fn round_to_unit(&self, unit: &str) -> Self {
2119        match Timedelta::unit_to_nanos(unit) {
2120            Some(unit_nanos) => self.round_to(unit_nanos),
2121            None => Self::nat(),
2122        }
2123    }
2124
2125    /// Pandas-named alias for [`floor_to_unit`](Self::floor_to_unit).
2126    #[must_use]
2127    pub fn floor(&self, freq: &str) -> Self {
2128        self.floor_to_unit(freq)
2129    }
2130
2131    /// Pandas-named alias for [`ceil_to_unit`](Self::ceil_to_unit).
2132    #[must_use]
2133    pub fn ceil(&self, freq: &str) -> Self {
2134        self.ceil_to_unit(freq)
2135    }
2136
2137    /// Pandas-named alias for [`round_to_unit`](Self::round_to_unit).
2138    #[must_use]
2139    pub fn round(&self, freq: &str) -> Self {
2140        self.round_to_unit(freq)
2141    }
2142
2143    /// Extract the year component from the timestamp.
2144    ///
2145    /// Matches `pd.Timestamp.year`. Returns None for NaT.
2146    #[must_use]
2147    pub fn year(&self) -> Option<i64> {
2148        if self.is_nat() {
2149            return None;
2150        }
2151        // Floor (not truncate) so pre-1970 instants with a sub-day part map to
2152        // the correct calendar day (br-frankenpandas-wkjtw); div_euclid == `/`
2153        // for the post-1970 positive case.
2154        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2155        let days = days_since_epoch + 719_468;
2156        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2157        let doe = days - era * 146_097;
2158        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2159        let y = yoe + era * 400;
2160        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2161        let mp = (5 * doy + 2) / 153;
2162        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2163        Some(if m <= 2 { y + 1 } else { y })
2164    }
2165
2166    /// Extract the month component (1-12) from the timestamp.
2167    ///
2168    /// Matches `pd.Timestamp.month`. Returns None for NaT.
2169    #[must_use]
2170    pub fn month(&self) -> Option<i64> {
2171        if self.is_nat() {
2172            return None;
2173        }
2174        // Floor (not truncate) so pre-1970 instants with a sub-day part map to
2175        // the correct calendar day (br-frankenpandas-wkjtw); div_euclid == `/`
2176        // for the post-1970 positive case.
2177        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2178        let days = days_since_epoch + 719_468;
2179        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2180        let doe = days - era * 146_097;
2181        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2182        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2183        let mp = (5 * doy + 2) / 153;
2184        Some(if mp < 10 { mp + 3 } else { mp - 9 })
2185    }
2186
2187    /// Extract the day component (1-31) from the timestamp.
2188    ///
2189    /// Matches `pd.Timestamp.day`. Returns None for NaT.
2190    #[must_use]
2191    pub fn day(&self) -> Option<i64> {
2192        if self.is_nat() {
2193            return None;
2194        }
2195        // Floor (not truncate) so pre-1970 instants with a sub-day part map to
2196        // the correct calendar day (br-frankenpandas-wkjtw); div_euclid == `/`
2197        // for the post-1970 positive case.
2198        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2199        let days = days_since_epoch + 719_468;
2200        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2201        let doe = days - era * 146_097;
2202        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2203        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2204        let mp = (5 * doy + 2) / 153;
2205        Some(doy - (153 * mp + 2) / 5 + 1)
2206    }
2207
2208    /// Extract the hour component (0-23) from the timestamp.
2209    ///
2210    /// Matches `pd.Timestamp.hour`. Returns None for NaT.
2211    #[must_use]
2212    pub fn hour(&self) -> Option<i64> {
2213        if self.is_nat() {
2214            return None;
2215        }
2216        // rem_euclid keeps the seconds-of-day in [0, 86400) even for negative
2217        // (pre-1970) nanos with a sub-second part (br-frankenpandas-wkjtw);
2218        // rem_euclid == `%` for the post-1970 positive case.
2219        let secs_of_day =
2220            self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
2221        Some(secs_of_day / 3600)
2222    }
2223
2224    /// Extract the minute component (0-59) from the timestamp.
2225    ///
2226    /// Matches `pd.Timestamp.minute`. Returns None for NaT.
2227    #[must_use]
2228    pub fn minute(&self) -> Option<i64> {
2229        if self.is_nat() {
2230            return None;
2231        }
2232        // rem_euclid keeps the seconds-of-day in [0, 86400) even for negative
2233        // (pre-1970) nanos with a sub-second part (br-frankenpandas-wkjtw);
2234        // rem_euclid == `%` for the post-1970 positive case.
2235        let secs_of_day =
2236            self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
2237        Some((secs_of_day % 3600) / 60)
2238    }
2239
2240    /// Extract the second component (0-59) from the timestamp.
2241    ///
2242    /// Matches `pd.Timestamp.second`. Returns None for NaT.
2243    #[must_use]
2244    pub fn second(&self) -> Option<i64> {
2245        if self.is_nat() {
2246            return None;
2247        }
2248        // rem_euclid keeps the seconds-of-day in [0, 86400) even for negative
2249        // (pre-1970) nanos with a sub-second part (br-frankenpandas-wkjtw);
2250        // rem_euclid == `%` for the post-1970 positive case.
2251        let secs_of_day =
2252            self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
2253        Some(secs_of_day % 60)
2254    }
2255
2256    /// Extract the microsecond component (0-999999) from the timestamp.
2257    ///
2258    /// Matches `pd.Timestamp.microsecond`. Returns None for NaT.
2259    #[must_use]
2260    pub fn microsecond(&self) -> Option<i64> {
2261        if self.is_nat() {
2262            return None;
2263        }
2264        // rem_euclid keeps the sub-second part in [0, 1e9) for negative nanos
2265        // (br-frankenpandas-wkjtw); == `%` for the post-1970 positive case.
2266        let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
2267        Some((sub_nanos / 1000) as i64)
2268    }
2269
2270    /// Extract the nanosecond component (0-999) from the timestamp.
2271    ///
2272    /// Matches `pd.Timestamp.nanosecond`. Returns None for NaT.
2273    #[must_use]
2274    pub fn nanosecond(&self) -> Option<i64> {
2275        if self.is_nat() {
2276            return None;
2277        }
2278        // rem_euclid keeps the sub-second part in [0, 1e9) for negative nanos
2279        // (br-frankenpandas-wkjtw); == `%` for the post-1970 positive case.
2280        let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
2281        Some((sub_nanos % 1000) as i64)
2282    }
2283
2284    /// Return the day of the week (Monday=0, Sunday=6).
2285    ///
2286    /// Matches `pd.Timestamp.dayofweek`. Returns None for NaT.
2287    #[must_use]
2288    pub fn dayofweek(&self) -> Option<i64> {
2289        if self.is_nat() {
2290            return None;
2291        }
2292        // Floor days for pre-1970 (br-frankenpandas-wkjtw); == `/` for positive.
2293        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2294        let dow = ((days_since_epoch + 3) % 7 + 7) % 7;
2295        Some(dow)
2296    }
2297
2298    /// Alias for dayofweek(). Matches `pd.Timestamp.weekday`.
2299    #[must_use]
2300    pub fn weekday(&self) -> Option<i64> {
2301        self.dayofweek()
2302    }
2303
2304    /// Alias for dayofweek(). Matches `pd.Timestamp.day_of_week`.
2305    #[must_use]
2306    pub fn day_of_week(&self) -> Option<i64> {
2307        self.dayofweek()
2308    }
2309
2310    /// Return the day of the year (1-366).
2311    ///
2312    /// Matches `pd.Timestamp.dayofyear`. Returns None for NaT.
2313    #[must_use]
2314    pub fn dayofyear(&self) -> Option<i64> {
2315        if self.is_nat() {
2316            return None;
2317        }
2318        let m = self.month()?;
2319        let d = self.day()?;
2320        let y = self.year()?;
2321        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2322        let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
2323        let base = days_before[(m - 1) as usize] + d;
2324        if is_leap && m > 2 {
2325            Some(base + 1)
2326        } else {
2327            Some(base)
2328        }
2329    }
2330
2331    /// Alias for dayofyear(). Matches `pd.Timestamp.day_of_year`.
2332    #[must_use]
2333    pub fn day_of_year(&self) -> Option<i64> {
2334        self.dayofyear()
2335    }
2336
2337    /// Return the proleptic Gregorian ordinal (number of days since Jan 1, year 1).
2338    ///
2339    /// Matches `pd.Timestamp.toordinal()`. Returns None for NaT.
2340    #[must_use]
2341    pub fn toordinal(&self) -> Option<i64> {
2342        if self.is_nat() {
2343            return None;
2344        }
2345        let y = self.year()?;
2346        let m = self.month()?;
2347        let d = self.day()?;
2348        // Algorithm: count days from year 1 to the start of the given year,
2349        // add days in the months before the given month, add the day of month.
2350        // Account for leap years.
2351        let y_minus_1 = y - 1;
2352        let mut ordinal = y_minus_1 * 365 + y_minus_1 / 4 - y_minus_1 / 100 + y_minus_1 / 400;
2353        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2354        let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
2355        ordinal += days_before[(m - 1) as usize];
2356        if is_leap && m > 2 {
2357            ordinal += 1;
2358        }
2359        ordinal += d;
2360        Some(ordinal)
2361    }
2362
2363    /// Construct a Timestamp from a proleptic Gregorian ordinal.
2364    ///
2365    /// Matches `pd.Timestamp.fromordinal(ordinal)`. Returns NaT for invalid ordinals.
2366    #[must_use]
2367    pub fn fromordinal(ordinal: i64) -> Self {
2368        if ordinal <= 0 {
2369            return Self {
2370                nanos: Self::NAT,
2371                tz: None,
2372            };
2373        }
2374        // Convert y/m/d to days since Unix epoch, then to nanos
2375        // Unix epoch is 1970-01-01, which is ordinal 719163
2376        let days_since_epoch = ordinal - 719163;
2377        match days_since_epoch.checked_mul(Timedelta::NANOS_PER_DAY) {
2378            Some(nanos) => Self { nanos, tz: None },
2379            None => Self::nat(),
2380        }
2381    }
2382
2383    /// Return the Julian Date (astronomical day number).
2384    ///
2385    /// Matches `pd.Timestamp.to_julian_date()`. Returns NaN for NaT.
2386    /// The Julian Date is the continuous count of days since the beginning
2387    /// of the Julian Period (January 1, 4713 BC in the proleptic Julian calendar).
2388    #[must_use]
2389    pub fn to_julian_date(&self) -> f64 {
2390        if self.is_nat() {
2391            return f64::NAN;
2392        }
2393        // Gregorian ordinal 1 (Jan 1, year 1) corresponds to Julian Day 1721425.5
2394        // (at noon, since JD starts at noon)
2395        // For a timestamp at midnight, we subtract 0.5
2396        let ordinal = match self.toordinal() {
2397            Some(o) => o,
2398            None => return f64::NAN,
2399        };
2400        // Fractional day from time components
2401        let h = self.hour().unwrap_or(0) as f64;
2402        let m = self.minute().unwrap_or(0) as f64;
2403        let s = self.second().unwrap_or(0) as f64;
2404        let us = self.microsecond().unwrap_or(0) as f64;
2405        let ns = self.nanosecond().unwrap_or(0) as f64;
2406        let frac_day =
2407            (h + m / 60.0 + s / 3600.0 + us / 3_600_000_000.0 + ns / 3_600_000_000_000.0) / 24.0;
2408        // Julian day at midnight of ordinal 1 is 1721424.5
2409        1721424.5 + ordinal as f64 + frac_day
2410    }
2411
2412    /// Return the quarter (1-4) of the year.
2413    ///
2414    /// Matches `pd.Timestamp.quarter`. Returns None for NaT.
2415    #[must_use]
2416    pub fn quarter(&self) -> Option<i64> {
2417        self.month().map(|m| (m - 1) / 3 + 1)
2418    }
2419
2420    /// Return the ISO week number (1-53).
2421    ///
2422    /// Matches `pd.Timestamp.week`. Returns None for NaT.
2423    #[must_use]
2424    pub fn weekofyear(&self) -> Option<i64> {
2425        if self.is_nat() {
2426            return None;
2427        }
2428        let doy = self.dayofyear()?;
2429        let dow = self.dayofweek()?;
2430        let year = self.year()?;
2431        let iso_dow = if dow == 6 { 7 } else { dow + 1 };
2432        let week = (doy - iso_dow + 10) / 7;
2433        // ISO-8601 has 53-week years, so the clamps must consult the actual
2434        // week count, not hardcode 52/1: a week<1 belongs to the LAST week of
2435        // the previous year (52 OR 53), and a week beyond this year's count
2436        // wraps to week 1 of the next year. pandas isocalendar().week agrees:
2437        // 2021-01-01 -> 53 (2020 is a 53-week year), 2026-12-31 -> 53.
2438        if week < 1 {
2439            Some(iso_weeks_in_year(year - 1))
2440        } else if week > iso_weeks_in_year(year) {
2441            Some(1)
2442        } else {
2443            Some(week)
2444        }
2445    }
2446
2447    /// Alias for weekofyear(). Matches `pd.Timestamp.week`.
2448    #[must_use]
2449    pub fn week(&self) -> Option<i64> {
2450        self.weekofyear()
2451    }
2452
2453    /// Return the timestamp value in the specified unit.
2454    ///
2455    /// Matches `pd.Timestamp.value` when unit is nanoseconds.
2456    /// Supported units: "ns", "us", "ms", "s".
2457    #[must_use]
2458    pub fn to_unit(&self, unit: &str) -> Option<i64> {
2459        if self.is_nat() {
2460            return None;
2461        }
2462        match unit {
2463            "ns" | "nanosecond" | "nanoseconds" => Some(self.nanos),
2464            "us" | "microsecond" | "microseconds" => Some(self.nanos / 1_000),
2465            "ms" | "millisecond" | "milliseconds" => Some(self.nanos / 1_000_000),
2466            "s" | "second" | "seconds" => Some(self.nanos / 1_000_000_000),
2467            _ => None,
2468        }
2469    }
2470
2471    /// Whether the year is a leap year.
2472    ///
2473    /// Matches `pd.Timestamp.is_leap_year`. Returns None for NaT.
2474    #[must_use]
2475    pub fn is_leap_year(&self) -> Option<bool> {
2476        self.year()
2477            .map(|y| (y % 4 == 0 && y % 100 != 0) || y % 400 == 0)
2478    }
2479
2480    /// Whether the day is the first day of the month.
2481    ///
2482    /// Matches `pd.Timestamp.is_month_start`. Returns None for NaT.
2483    #[must_use]
2484    pub fn is_month_start(&self) -> Option<bool> {
2485        self.day().map(|d| d == 1)
2486    }
2487
2488    /// Whether the day is the last day of the month.
2489    ///
2490    /// Matches `pd.Timestamp.is_month_end`. Returns None for NaT.
2491    #[must_use]
2492    pub fn is_month_end(&self) -> Option<bool> {
2493        let y = self.year()?;
2494        let m = self.month()?;
2495        let d = self.day()?;
2496        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2497        let days_in_month: [i64; 12] = [
2498            31,
2499            if is_leap { 29 } else { 28 },
2500            31,
2501            30,
2502            31,
2503            30,
2504            31,
2505            31,
2506            30,
2507            31,
2508            30,
2509            31,
2510        ];
2511        Some(d == days_in_month[(m - 1) as usize])
2512    }
2513
2514    /// Whether the day is the first day of a quarter.
2515    ///
2516    /// Matches `pd.Timestamp.is_quarter_start`. Returns None for NaT.
2517    #[must_use]
2518    pub fn is_quarter_start(&self) -> Option<bool> {
2519        let m = self.month()?;
2520        let d = self.day()?;
2521        Some(d == 1 && (m == 1 || m == 4 || m == 7 || m == 10))
2522    }
2523
2524    /// Whether the day is the last day of a quarter.
2525    ///
2526    /// Matches `pd.Timestamp.is_quarter_end`. Returns None for NaT.
2527    #[must_use]
2528    pub fn is_quarter_end(&self) -> Option<bool> {
2529        let m = self.month()?;
2530        let d = self.day()?;
2531        Some(
2532            (m == 3 && d == 31)
2533                || (m == 6 && d == 30)
2534                || (m == 9 && d == 30)
2535                || (m == 12 && d == 31),
2536        )
2537    }
2538
2539    /// Whether the day is the first day of the year (Jan 1).
2540    ///
2541    /// Matches `pd.Timestamp.is_year_start`. Returns None for NaT.
2542    #[must_use]
2543    pub fn is_year_start(&self) -> Option<bool> {
2544        let m = self.month()?;
2545        let d = self.day()?;
2546        Some(m == 1 && d == 1)
2547    }
2548
2549    /// Whether the day is the last day of the year (Dec 31).
2550    ///
2551    /// Matches `pd.Timestamp.is_year_end`. Returns None for NaT.
2552    #[must_use]
2553    pub fn is_year_end(&self) -> Option<bool> {
2554        let m = self.month()?;
2555        let d = self.day()?;
2556        Some(m == 12 && d == 31)
2557    }
2558
2559    /// Return the number of days in the month of this timestamp.
2560    ///
2561    /// Matches `pd.Timestamp.days_in_month`. Returns None for NaT.
2562    #[must_use]
2563    pub fn days_in_month(&self) -> Option<i64> {
2564        let y = self.year()?;
2565        let m = self.month()?;
2566        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2567        let days: [i64; 12] = [
2568            31,
2569            if is_leap { 29 } else { 28 },
2570            31,
2571            30,
2572            31,
2573            30,
2574            31,
2575            31,
2576            30,
2577            31,
2578            30,
2579            31,
2580        ];
2581        Some(days[(m - 1) as usize])
2582    }
2583
2584    /// Alias for days_in_month(). Matches `pd.Timestamp.daysinmonth`.
2585    #[must_use]
2586    pub fn daysinmonth(&self) -> Option<i64> {
2587        self.days_in_month()
2588    }
2589
2590    /// Normalize to midnight/day boundary, matching `pd.Timestamp.normalize()`.
2591    #[must_use]
2592    pub fn normalize(&self) -> Self {
2593        self.floor_to_unit("D")
2594    }
2595
2596    /// Replace timestamp components with new values.
2597    ///
2598    /// Matches pd.Timestamp.replace(). None values keep the existing component.
2599    #[must_use]
2600    #[allow(clippy::too_many_arguments)]
2601    pub fn replace(
2602        &self,
2603        year: Option<i64>,
2604        month: Option<i64>,
2605        day: Option<i64>,
2606        hour: Option<i64>,
2607        minute: Option<i64>,
2608        second: Option<i64>,
2609        microsecond: Option<i64>,
2610        nanosecond: Option<i64>,
2611    ) -> Self {
2612        if self.is_nat() {
2613            return self.clone();
2614        }
2615        let cur_year = self.year().unwrap_or(1970);
2616        let cur_month = self.month().unwrap_or(1);
2617        let cur_day = self.day().unwrap_or(1);
2618        let cur_hour = self.hour().unwrap_or(0);
2619        let cur_minute = self.minute().unwrap_or(0);
2620        let cur_second = self.second().unwrap_or(0);
2621        let cur_micro = self.microsecond().unwrap_or(0);
2622        let cur_nano = self.nanosecond().unwrap_or(0);
2623
2624        let y = year.unwrap_or(cur_year);
2625        let mo = month.unwrap_or(cur_month);
2626        let d = day.unwrap_or(cur_day);
2627        let h = hour.unwrap_or(cur_hour);
2628        let mi = minute.unwrap_or(cur_minute);
2629        let s = second.unwrap_or(cur_second);
2630        let us = microsecond.unwrap_or(cur_micro);
2631        let ns = nanosecond.unwrap_or(cur_nano);
2632
2633        if !(1..=12).contains(&mo)
2634            || !(1..=days_in_month(y, mo as u32).unwrap_or(0) as i64).contains(&d)
2635            || !(0..=23).contains(&h)
2636            || !(0..=59).contains(&mi)
2637            || !(0..=59).contains(&s)
2638            || !(0..=999_999).contains(&us)
2639            || !(0..=999).contains(&ns)
2640        {
2641            return Self::nat();
2642        }
2643
2644        let days_from_epoch = Self::days_from_ymd(y, mo, d);
2645        let secs = h * 3600 + mi * 60 + s;
2646        let total_nanos = days_from_epoch * Timedelta::NANOS_PER_DAY
2647            + secs * Timedelta::NANOS_PER_SEC
2648            + us * Timedelta::NANOS_PER_MICRO
2649            + ns;
2650
2651        Self {
2652            nanos: total_nanos,
2653            tz: self.tz.clone(),
2654        }
2655    }
2656
2657    fn days_from_ymd(year: i64, month: i64, day: i64) -> i64 {
2658        let y = if month <= 2 { year - 1 } else { year };
2659        let era = if y >= 0 { y } else { y - 399 } / 400;
2660        let yoe = y - era * 400;
2661        let doy = (153 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
2662        let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
2663        era * 146097 + doe - 719468
2664    }
2665
2666    /// Return an ISO 8601 string representation of the timestamp.
2667    ///
2668    /// Matches `pd.Timestamp.isoformat()`. NaT returns "NaT".
2669    #[must_use]
2670    pub fn isoformat(&self) -> String {
2671        if self.is_nat() {
2672            return "NaT".to_string();
2673        }
2674        // rem_euclid keeps the sub-second part in [0, 1e9) for negative nanos
2675        // (br-frankenpandas-wkjtw); == `%` for the post-1970 positive case.
2676        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2677        let nanos_of_day = self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
2678        let secs_of_day = nanos_of_day / Timedelta::NANOS_PER_SEC;
2679        let sub_nanos = nanos_of_day.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
2680
2681        let days = days_since_epoch + 719_468;
2682        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2683        let doe = days - era * 146_097;
2684        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2685        let y = yoe + era * 400;
2686        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2687        let mp = (5 * doy + 2) / 153;
2688        let d = doy - (153 * mp + 2) / 5 + 1;
2689        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2690        let year = if m <= 2 { y + 1 } else { y };
2691
2692        let hour = secs_of_day / 3600;
2693        let minute = (secs_of_day % 3600) / 60;
2694        let second = secs_of_day % 60;
2695
2696        let base = if sub_nanos == 0 {
2697            format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}")
2698        } else if sub_nanos.is_multiple_of(1_000) {
2699            format!(
2700                "{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{:06}",
2701                sub_nanos / 1_000
2702            )
2703        } else {
2704            format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{sub_nanos:09}")
2705        };
2706        match &self.tz {
2707            Some(tz) if tz == "UTC" => format!("{base}+00:00"),
2708            Some(tz) => format!("{base}[{tz}]"),
2709            None => base,
2710        }
2711    }
2712
2713    /// Alias for isoformat.
2714    #[must_use]
2715    pub fn to_iso8601(&self) -> String {
2716        self.isoformat()
2717    }
2718
2719    /// Parse a datetime string into a Timestamp.
2720    ///
2721    /// Supports ISO 8601 formats:
2722    /// - "2024-01-15" (date only, time defaults to 00:00:00)
2723    /// - "2024-01-15T10:30:00" (datetime)
2724    /// - "2024-01-15 10:30:00" (space separator)
2725    /// - "2024-01-15T10:30:00.123456" (with fractional seconds)
2726    /// - "2024-01-15T10:30:00Z" (UTC timezone)
2727    /// - "2024-01-15T10:30:00+05:30" (offset timezone)
2728    /// - "NaT" (Not a Timestamp)
2729    ///
2730    /// Matches `pd.Timestamp()` constructor behavior.
2731    pub fn parse(s: &str) -> Result<Self, TypeError> {
2732        let s = s.trim();
2733
2734        if s.eq_ignore_ascii_case("nat") {
2735            return Ok(Self::nat());
2736        }
2737
2738        let Some((datetime_part, tz)) = Self::split_timezone(s) else {
2739            return Err(TypeError::ValueNotParseable {
2740                value: s.to_string(),
2741                target: "Timestamp".to_string(),
2742            });
2743        };
2744
2745        let (date_part, time_part) = if datetime_part.contains('T') {
2746            datetime_part
2747                .split_once('T')
2748                .ok_or_else(|| TypeError::ValueNotParseable {
2749                    value: s.to_string(),
2750                    target: "Timestamp".to_string(),
2751                })?
2752        } else if datetime_part.contains(' ')
2753            && datetime_part.chars().filter(|&c| c == ' ').count() == 1
2754        {
2755            datetime_part
2756                .split_once(' ')
2757                .ok_or_else(|| TypeError::ValueNotParseable {
2758                    value: s.to_string(),
2759                    target: "Timestamp".to_string(),
2760                })?
2761        } else {
2762            (datetime_part, "00:00:00")
2763        };
2764
2765        let (year, month, day) =
2766            Self::parse_date(date_part).ok_or_else(|| TypeError::ValueNotParseable {
2767                value: s.to_string(),
2768                target: "Timestamp".to_string(),
2769            })?;
2770
2771        let (hour, minute, second, nanos) =
2772            Self::parse_time(time_part).ok_or_else(|| TypeError::ValueNotParseable {
2773                value: s.to_string(),
2774                target: "Timestamp".to_string(),
2775            })?;
2776
2777        let total_nanos = Self::ymd_hms_to_nanos(year, month, day, hour, minute, second, nanos);
2778
2779        Ok(if let Some(tz_name) = tz {
2780            Self::from_nanos_tz(total_nanos, tz_name)
2781        } else {
2782            Self::from_nanos(total_nanos)
2783        })
2784    }
2785
2786    fn split_timezone(s: &str) -> Option<(&str, Option<String>)> {
2787        if let Some(stripped) = s.strip_suffix('Z') {
2788            Some((stripped, Some("UTC".to_string())))
2789        } else if let Some(idx) = s.rfind('+') {
2790            if idx > 10 && Self::is_timezone_offset(&s[idx..]) {
2791                Some((&s[..idx], Some(s[idx..].to_string())))
2792            } else if idx > 10 {
2793                None
2794            } else {
2795                Some((s, None))
2796            }
2797        } else if let Some(idx) = s.rfind('-') {
2798            if idx > 10 && Self::is_timezone_offset(&s[idx..]) {
2799                Some((&s[..idx], Some(s[idx..].to_string())))
2800            } else if idx > 10 {
2801                None
2802            } else {
2803                Some((s, None))
2804            }
2805        } else {
2806            Some((s, None))
2807        }
2808    }
2809
2810    fn is_timezone_offset(s: &str) -> bool {
2811        let bytes = s.as_bytes();
2812        if bytes.len() != 6 || !matches!(bytes[0], b'+' | b'-') || bytes[3] != b':' {
2813            return false;
2814        }
2815        if !bytes[1..3].iter().all(u8::is_ascii_digit)
2816            || !bytes[4..6].iter().all(u8::is_ascii_digit)
2817        {
2818            return false;
2819        }
2820        let hour = u32::from(bytes[1] - b'0') * 10 + u32::from(bytes[2] - b'0');
2821        let minute = u32::from(bytes[4] - b'0') * 10 + u32::from(bytes[5] - b'0');
2822        hour <= 23 && minute <= 59
2823    }
2824
2825    fn parse_date(s: &str) -> Option<(i64, u32, u32)> {
2826        let parts: Vec<&str> = s.split('-').collect();
2827        if parts.len() != 3 {
2828            return None;
2829        }
2830        let year: i64 = parts[0].parse().ok()?;
2831        let month: u32 = parts[1].parse().ok()?;
2832        let day: u32 = parts[2].parse().ok()?;
2833        if !(1..=days_in_month(year, month)?).contains(&day) {
2834            return None;
2835        }
2836        Some((year, month, day))
2837    }
2838
2839    fn parse_time(s: &str) -> Option<(u32, u32, u32, u64)> {
2840        let (time_str, frac_str) = match s.split_once('.') {
2841            Some((time, frac)) => (time, Some(frac)),
2842            None => (s, None),
2843        };
2844        let parts: Vec<&str> = time_str.split(':').collect();
2845        if parts.is_empty() || parts.len() > 3 {
2846            return None;
2847        }
2848        let hour: u32 = parts.first().and_then(|p| p.parse().ok())?;
2849        let minute: u32 = parts.get(1).and_then(|p| p.parse().ok()).unwrap_or(0);
2850        let second: u32 = parts.get(2).and_then(|p| p.parse().ok()).unwrap_or(0);
2851
2852        if hour > 23 || minute > 59 || second > 59 {
2853            return None;
2854        }
2855
2856        let nanos = match frac_str {
2857            None => 0,
2858            Some(frac) => {
2859                if frac.is_empty() || !frac.bytes().all(|b| b.is_ascii_digit()) {
2860                    return None;
2861                }
2862                let truncated = &frac[..frac.len().min(9)];
2863                let padded = format!("{truncated:0<9}");
2864                padded.parse::<u64>().ok()?
2865            }
2866        };
2867
2868        Some((hour, minute, second, nanos))
2869    }
2870
2871    fn ymd_hms_to_nanos(
2872        year: i64,
2873        month: u32,
2874        day: u32,
2875        hour: u32,
2876        minute: u32,
2877        second: u32,
2878        sub_nanos: u64,
2879    ) -> i64 {
2880        let m = month as i64;
2881        let d = day as i64;
2882
2883        let y = if m <= 2 { year - 1 } else { year };
2884        let era = if y >= 0 { y } else { y - 399 } / 400;
2885        let yoe = y - era * 400;
2886        let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1;
2887        let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
2888        let days_since_epoch = era * 146_097 + doe - 719_468;
2889
2890        let total_seconds = days_since_epoch * 86400
2891            + (hour as i64) * 3600
2892            + (minute as i64) * 60
2893            + (second as i64);
2894        total_seconds * Timedelta::NANOS_PER_SEC + sub_nanos as i64
2895    }
2896
2897    /// Format timestamp using strftime directives.
2898    ///
2899    /// Matches `pd.Timestamp.strftime(format)`. Supports: %Y (year), %m (month),
2900    /// %d (day), %H (hour), %M (minute), %S (second), %f (microsecond).
2901    /// NaT returns "NaT".
2902    #[must_use]
2903    pub fn strftime(&self, format: &str) -> String {
2904        if self.is_nat() {
2905            return "NaT".to_string();
2906        }
2907        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2908        // rem_euclid keeps the sub-second part in [0, 1e9) for negative nanos
2909        // (br-frankenpandas-wkjtw); == `%` for the post-1970 positive case.
2910        let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
2911
2912        let days_since_epoch = total_secs / 86400;
2913        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2914
2915        let days = days_since_epoch + 719_468;
2916        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2917        let doe = days - era * 146_097;
2918        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2919        let y = yoe + era * 400;
2920        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2921        let mp = (5 * doy + 2) / 153;
2922        let d = doy - (153 * mp + 2) / 5 + 1;
2923        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2924        let year = if m <= 2 { y + 1 } else { y };
2925
2926        let hour = secs_of_day / 3600;
2927        let minute = (secs_of_day % 3600) / 60;
2928        let second = secs_of_day % 60;
2929        let micros = sub_nanos / 1000;
2930
2931        format
2932            .replace("%Y", &format!("{year:04}"))
2933            .replace("%m", &format!("{m:02}"))
2934            .replace("%d", &format!("{d:02}"))
2935            .replace("%H", &format!("{hour:02}"))
2936            .replace("%M", &format!("{minute:02}"))
2937            .replace("%S", &format!("{second:02}"))
2938            .replace("%f", &format!("{micros:06}"))
2939    }
2940
2941    /// Return the day of the week as a string (e.g., "Monday").
2942    ///
2943    /// Matches `pd.Timestamp.day_name()`. NaT returns "NaT".
2944    #[must_use]
2945    pub fn day_name(&self) -> String {
2946        const NAMES: [&str; 7] = [
2947            "Thursday",
2948            "Friday",
2949            "Saturday",
2950            "Sunday",
2951            "Monday",
2952            "Tuesday",
2953            "Wednesday",
2954        ];
2955        if self.is_nat() {
2956            return "NaT".to_string();
2957        }
2958        let days_since_epoch = self.nanos / Timedelta::NANOS_PER_DAY;
2959        let dow = ((days_since_epoch % 7) + 7) % 7;
2960        NAMES[dow as usize].to_string()
2961    }
2962
2963    /// Return the month name as a string (e.g., "January").
2964    ///
2965    /// Matches `pd.Timestamp.month_name()`. NaT returns "NaT".
2966    #[must_use]
2967    pub fn month_name(&self) -> String {
2968        const NAMES: [&str; 12] = [
2969            "January",
2970            "February",
2971            "March",
2972            "April",
2973            "May",
2974            "June",
2975            "July",
2976            "August",
2977            "September",
2978            "October",
2979            "November",
2980            "December",
2981        ];
2982        if self.is_nat() {
2983            return "NaT".to_string();
2984        }
2985        // Floor (not truncate) so pre-1970 instants with a sub-day part map to
2986        // the correct calendar day (br-frankenpandas-wkjtw); div_euclid == `/`
2987        // for the post-1970 positive case.
2988        let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2989        let days = days_since_epoch + 719_468;
2990        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2991        let doe = days - era * 146_097;
2992        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2993        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2994        let mp = (5 * doy + 2) / 153;
2995        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2996        NAMES[(m - 1) as usize].to_string()
2997    }
2998
2999    /// Localize a naive timestamp to a timezone.
3000    ///
3001    /// Matches `pd.Timestamp.tz_localize(tz)`. If `tz` is None, removes the
3002    /// timezone (makes timestamp naive). NaT propagates.
3003    #[must_use]
3004    pub fn tz_localize(&self, tz: Option<&str>) -> Self {
3005        if self.is_nat() {
3006            return Self::nat();
3007        }
3008        Self {
3009            nanos: self.nanos,
3010            tz: tz.map(String::from),
3011        }
3012    }
3013
3014    /// Convert timezone-aware timestamp to another timezone.
3015    ///
3016    /// Matches `pd.Timestamp.tz_convert(tz)`. If timestamp is naive (no tz),
3017    /// the timezone is simply attached without conversion. NaT propagates.
3018    /// Note: actual UTC offset conversion requires chrono-tz (Phase 3).
3019    #[must_use]
3020    pub fn tz_convert(&self, tz: &str) -> Self {
3021        if self.is_nat() {
3022            return Self::nat();
3023        }
3024        Self {
3025            nanos: self.nanos,
3026            tz: Some(tz.to_string()),
3027        }
3028    }
3029
3030    /// Create a Timestamp from a Unix timestamp (seconds since epoch).
3031    ///
3032    /// Matches `pd.Timestamp.fromtimestamp(ts)`. The optional `tz` parameter
3033    /// specifies the timezone to localize to.
3034    #[must_use]
3035    pub fn fromtimestamp(ts: f64, tz: Option<&str>) -> Self {
3036        if ts.is_nan() || ts.is_infinite() {
3037            return Self::nat();
3038        }
3039        let nanos_f64 = ts * 1_000_000_000.0;
3040        // Check for overflow before casting - i64 range is roughly ±9.2e18
3041        const MAX_NANOS: f64 = i64::MAX as f64;
3042        const MIN_NANOS: f64 = i64::MIN as f64;
3043        if !(MIN_NANOS..=MAX_NANOS).contains(&nanos_f64) {
3044            return Self::nat();
3045        }
3046        Self {
3047            nanos: nanos_f64 as i64,
3048            tz: tz.map(String::from),
3049        }
3050    }
3051
3052    /// Create a Timestamp from milliseconds since epoch.
3053    ///
3054    /// Convenience constructor complementing fromtimestamp.
3055    #[must_use]
3056    pub fn from_millis(ms: i64, tz: Option<&str>) -> Self {
3057        Self {
3058            nanos: ms.saturating_mul(1_000_000),
3059            tz: tz.map(String::from),
3060        }
3061    }
3062
3063    /// Create a Timestamp from microseconds since epoch.
3064    ///
3065    /// Convenience constructor complementing fromtimestamp.
3066    #[must_use]
3067    pub fn from_micros(us: i64, tz: Option<&str>) -> Self {
3068        Self {
3069            nanos: us.saturating_mul(1_000),
3070            tz: tz.map(String::from),
3071        }
3072    }
3073
3074    /// Return the timezone string, or None if naive.
3075    #[must_use]
3076    pub fn tzinfo(&self) -> Option<&str> {
3077        self.tz.as_deref()
3078    }
3079
3080    /// Return the timezone name, or None if naive.
3081    ///
3082    /// Alias for tzinfo() matching pandas Timestamp.tzname().
3083    #[must_use]
3084    pub fn tzname(&self) -> Option<&str> {
3085        self.tzinfo()
3086    }
3087}
3088
3089impl std::fmt::Display for Timestamp {
3090    /// Phase 2 debug-style format; Phase 3 replaces with pandas ISO-8601
3091    /// notation once chrono interpretation lands.
3092    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3093        if self.is_nat() {
3094            return f.write_str("NaT");
3095        }
3096        match &self.tz {
3097            Some(tz) => write!(f, "Timestamp[{}, {}]", self.nanos, tz),
3098            None => write!(f, "Timestamp[{}, UTC]", self.nanos),
3099        }
3100    }
3101}
3102
3103impl PartialOrd for Timestamp {
3104    /// Orders by nanos axis; NaT is incomparable (`None`). Tz difference
3105    /// does not affect ordering — two Timestamps at the same absolute
3106    /// nanos compare equal regardless of tz label (Phase 3 will revisit
3107    /// whether tz affects ordering semantics).
3108    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
3109        if self.is_nat() || other.is_nat() {
3110            return None;
3111        }
3112        Some(self.nanos.cmp(&other.nanos))
3113    }
3114}
3115
3116// ── Missingness utilities ──────────────────────────────────────────────
3117
3118pub fn isna(values: &[Scalar]) -> Vec<bool> {
3119    values.iter().map(Scalar::is_missing).collect()
3120}
3121
3122pub fn isnull(values: &[Scalar]) -> Vec<bool> {
3123    isna(values)
3124}
3125
3126pub fn notna(values: &[Scalar]) -> Vec<bool> {
3127    values.iter().map(|v| !v.is_missing()).collect()
3128}
3129
3130pub fn notnull(values: &[Scalar]) -> Vec<bool> {
3131    notna(values)
3132}
3133
3134pub fn count_na(values: &[Scalar]) -> usize {
3135    values.iter().filter(|v| v.is_missing()).count()
3136}
3137
3138pub fn fill_na(values: &[Scalar], fill: &Scalar) -> Vec<Scalar> {
3139    values
3140        .iter()
3141        .map(|v| {
3142            if v.is_missing() {
3143                fill.clone()
3144            } else {
3145                v.clone()
3146            }
3147        })
3148        .collect()
3149}
3150
3151pub fn dropna(values: &[Scalar]) -> Vec<Scalar> {
3152    values.iter().filter(|v| !v.is_missing()).cloned().collect()
3153}
3154
3155// ── Nanops: null-skipping numeric reductions ───────────────────────────
3156
3157fn collect_finite(values: &[Scalar]) -> Vec<f64> {
3158    values
3159        .iter()
3160        .filter(|v| !v.is_missing())
3161        .filter_map(|v| v.to_f64().ok())
3162        .collect()
3163}
3164
3165/// Per br-frankenpandas-620mj: if a column is uniformly Timedelta64
3166/// (with optional NAT/Null missing), sum/mean preserve Timedelta dtype
3167/// matching pandas — instead of silently coercing to Float64(0.0) via
3168/// the collect_finite path (which drops Timedelta64 because to_f64
3169/// errors). Returns Some(sum_in_ns, observed_count) when applicable.
3170fn collect_timedelta_ns(values: &[Scalar]) -> Option<(i128, usize)> {
3171    let mut sum: i128 = 0;
3172    let mut count: usize = 0;
3173    let mut saw_timedelta = false;
3174    for v in values {
3175        if v.is_missing() {
3176            continue;
3177        }
3178        match v {
3179            Scalar::Timedelta64(ns) => {
3180                saw_timedelta = true;
3181                sum += i128::from(*ns);
3182                count += 1;
3183            }
3184            // Any non-Timedelta non-missing value bails out to the
3185            // existing Float64 path, preserving cross-type behavior.
3186            _ => return None,
3187        }
3188    }
3189    if saw_timedelta {
3190        Some((sum, count))
3191    } else {
3192        None
3193    }
3194}
3195
3196pub fn nansum(values: &[Scalar]) -> Scalar {
3197    if let Some((sum, _)) = collect_timedelta_ns(values) {
3198        let clamped = sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3199        return Scalar::Timedelta64(clamped as i64);
3200    }
3201    // Fused single-pass fold: filter missing / non-f64-coercible and accumulate
3202    // in one scan, avoiding the intermediate `collect_finite` Vec<f64> and its
3203    // second pass. Bit-identical to `collect_finite(..).iter().sum()`: same
3204    // finite values in the same order, same left-fold f64 `+` (empty -> 0.0).
3205    let mut sum = 0.0_f64;
3206    for v in values {
3207        if v.is_missing() {
3208            continue;
3209        }
3210        if let Ok(x) = v.to_f64() {
3211            sum += x;
3212        }
3213    }
3214    Scalar::Float64(sum)
3215}
3216
3217pub fn nanmean(values: &[Scalar]) -> Scalar {
3218    if let Some((sum, count)) = collect_timedelta_ns(values) {
3219        if count == 0 {
3220            return Scalar::Timedelta64(Timedelta::NAT);
3221        }
3222        let mean = sum / count as i128;
3223        let clamped = mean.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3224        return Scalar::Timedelta64(clamped as i64);
3225    }
3226    // Fused single-pass fold (see `nansum`): accumulate sum + count of finite
3227    // values in one scan. Bit-identical to the prior `collect_finite` two-pass:
3228    // count == nums.len(), sum folds the same values in the same order.
3229    let mut sum = 0.0_f64;
3230    let mut count = 0usize;
3231    for v in values {
3232        if v.is_missing() {
3233            continue;
3234        }
3235        if let Ok(x) = v.to_f64() {
3236            sum += x;
3237            count += 1;
3238        }
3239    }
3240    if count == 0 {
3241        return Scalar::Null(NullKind::NaN);
3242    }
3243    Scalar::Float64(sum / count as f64)
3244}
3245
3246pub fn nanany(values: &[Scalar]) -> Scalar {
3247    for v in values {
3248        if v.is_missing() {
3249            continue;
3250        }
3251        match v {
3252            Scalar::Bool(flag) if *flag => return Scalar::Bool(true),
3253            Scalar::Int64(val) if *val != 0 => return Scalar::Bool(true),
3254            Scalar::Float64(val) if !val.is_nan() && *val != 0.0 => return Scalar::Bool(true),
3255            Scalar::Utf8(val) if !val.is_empty() => return Scalar::Bool(true),
3256            // pandas Series([td]).any() returns True for any non-zero
3257            // Timedelta. NaT is already filtered by is_missing() above.
3258            Scalar::Timedelta64(ns) if *ns != 0 => return Scalar::Bool(true),
3259            _ => continue,
3260        }
3261    }
3262    Scalar::Bool(false)
3263}
3264
3265pub fn nanall(values: &[Scalar]) -> Scalar {
3266    for v in values {
3267        if v.is_missing() {
3268            continue;
3269        }
3270        match v {
3271            Scalar::Bool(flag) if !*flag => return Scalar::Bool(false),
3272            Scalar::Int64(val) if *val == 0 => return Scalar::Bool(false),
3273            Scalar::Float64(val) if val.is_nan() || *val == 0.0 => return Scalar::Bool(false),
3274            Scalar::Utf8(val) if val.is_empty() => return Scalar::Bool(false),
3275            // pandas Series([td(0)]).all() returns False; any non-zero
3276            // Timedelta is truthy. NaT is already filtered by is_missing.
3277            Scalar::Timedelta64(ns) if *ns == 0 => return Scalar::Bool(false),
3278            _ => continue,
3279        }
3280    }
3281    Scalar::Bool(true)
3282}
3283
3284pub fn nancount(values: &[Scalar]) -> Scalar {
3285    let n = values.iter().filter(|v| !v.is_missing()).count();
3286    Scalar::Int64(n as i64)
3287}
3288
3289pub fn nanmin(values: &[Scalar]) -> Scalar {
3290    let mut min: Option<&Scalar> = None;
3291    for v in values {
3292        if v.is_missing() {
3293            continue;
3294        }
3295        match (min, v) {
3296            (None, _) => min = Some(v),
3297            (Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
3298                if b < a {
3299                    min = Some(v)
3300                }
3301            }
3302            (Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
3303                if *b < *a {
3304                    min = Some(v)
3305                }
3306            }
3307            (Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
3308                if b < a {
3309                    min = Some(v)
3310                }
3311            }
3312            (Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
3313                if b < a {
3314                    min = Some(v)
3315                }
3316            }
3317            // Per br-frankenpandas-yic5m: Timedelta64.to_f64() errors, so
3318            // the catch-all below would silently return NaN. Compare ns
3319            // representations directly; NAT is already filtered by
3320            // is_missing() above.
3321            (Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
3322                if b < a {
3323                    min = Some(v)
3324                }
3325            }
3326            (Some(a), b) => match (a.to_f64(), b.to_f64()) {
3327                (Ok(af), Ok(bf)) if bf < af => min = Some(v),
3328                (Ok(_), Ok(_)) => {}
3329                _ => return Scalar::Null(NullKind::NaN),
3330            },
3331        }
3332    }
3333    match min {
3334        Some(v) => v.clone(),
3335        None => Scalar::Null(NullKind::NaN),
3336    }
3337}
3338
3339pub fn nanmax(values: &[Scalar]) -> Scalar {
3340    let mut max: Option<&Scalar> = None;
3341    for v in values {
3342        if v.is_missing() {
3343            continue;
3344        }
3345        match (max, v) {
3346            (None, _) => max = Some(v),
3347            (Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
3348                if b > a {
3349                    max = Some(v)
3350                }
3351            }
3352            (Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
3353                if *b > *a {
3354                    max = Some(v)
3355                }
3356            }
3357            (Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
3358                if b > a {
3359                    max = Some(v)
3360                }
3361            }
3362            (Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
3363                if b > a {
3364                    max = Some(v)
3365                }
3366            }
3367            // Per br-frankenpandas-yic5m: Timedelta64.to_f64() errors, so
3368            // the catch-all below would silently return NaN. Compare ns
3369            // representations directly; NAT is already filtered above.
3370            (Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
3371                if b > a {
3372                    max = Some(v)
3373                }
3374            }
3375            (Some(a), b) => match (a.to_f64(), b.to_f64()) {
3376                (Ok(af), Ok(bf)) if bf > af => max = Some(v),
3377                (Ok(_), Ok(_)) => {}
3378                _ => return Scalar::Null(NullKind::NaN),
3379            },
3380        }
3381    }
3382    match max {
3383        Some(v) => v.clone(),
3384        None => Scalar::Null(NullKind::NaN),
3385    }
3386}
3387
3388/// Per br-frankenpandas-j8ntk: harvest ns values from a uniformly-Timedelta64
3389/// input as f64 (the f64 representation has 53 bits of mantissa, sufficient
3390/// for ns spans up to ~104 days exactly; beyond that pandas itself loses
3391/// precision the same way). Returns None if any non-missing value is not
3392/// Timedelta64.
3393fn collect_timedelta_ns_f64(values: &[Scalar]) -> Option<Vec<f64>> {
3394    let mut out = Vec::with_capacity(values.len());
3395    let mut saw_td = false;
3396    for v in values {
3397        if v.is_missing() {
3398            continue;
3399        }
3400        match v {
3401            Scalar::Timedelta64(ns) => {
3402                saw_td = true;
3403                out.push(*ns as f64);
3404            }
3405            _ => return None,
3406        }
3407    }
3408    if saw_td { Some(out) } else { None }
3409}
3410
3411/// Clamp an f64 result into i64 range and wrap as Scalar::Timedelta64.
3412fn float_ns_to_timedelta(value: f64) -> Scalar {
3413    if !value.is_finite() {
3414        return Scalar::Timedelta64(Timedelta::NAT);
3415    }
3416    let clamped = value.clamp(i64::MIN as f64, i64::MAX as f64);
3417    Scalar::Timedelta64(clamped as i64)
3418}
3419
3420pub fn nanmedian(values: &[Scalar]) -> Scalar {
3421    // Per br-frankenpandas-j8ntk: Timedelta64 median preserves dtype.
3422    if let Some(mut td) = collect_timedelta_ns_f64(values) {
3423        if td.is_empty() {
3424            return Scalar::Timedelta64(Timedelta::NAT);
3425        }
3426        // O(n) selection instead of a full sort (see the numeric arm below):
3427        // collect_timedelta_ns_f64 yields finite ns (NaT excluded), so the
3428        // comparator is a total order; order statistics depend only on values,
3429        // so the unstable partition yields the same td[mid-1]/td[mid].
3430        let n = td.len();
3431        let mid = n / 2;
3432        let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3433        let (left, mid_ref, _right) = td.select_nth_unstable_by(mid, cmp);
3434        let mid_val = *mid_ref;
3435        let median_ns = if n.is_multiple_of(2) {
3436            let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
3437            (lower + mid_val) / 2.0
3438        } else {
3439            mid_val
3440        };
3441        return float_ns_to_timedelta(median_ns);
3442    }
3443    let mut nums = collect_finite(values);
3444    if nums.is_empty() {
3445        return Scalar::Null(NullKind::NaN);
3446    }
3447    // O(n) selection instead of an O(n log n) full sort: select_nth_unstable_by
3448    // places the `mid`-th smallest at index `mid` with all smaller elements
3449    // (unordered) in the left partition. For even n the (mid-1)-th smallest is
3450    // the MAX of that left partition. Bit-identical to the sort path: order
3451    // statistics depend only on VALUES, and ties share a value, so the
3452    // unstable partition yields the same nums[mid-1]/nums[mid] the sort did.
3453    let n = nums.len();
3454    let mid = n / 2;
3455    let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3456    let (left, mid_ref, _right) = nums.select_nth_unstable_by(mid, cmp);
3457    let mid_val = *mid_ref;
3458    if n.is_multiple_of(2) {
3459        let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
3460        Scalar::Float64((lower + mid_val) / 2.0)
3461    } else {
3462        Scalar::Float64(mid_val)
3463    }
3464}
3465
3466pub fn nanvar(values: &[Scalar], ddof: usize) -> Scalar {
3467    // Per br-frankenpandas-j8ntk: Timedelta64 var preserves dtype — pandas
3468    // returns Timedelta even though variance is ns² conceptually; matching.
3469    if let Some(td) = collect_timedelta_ns_f64(values) {
3470        if td.len() <= ddof {
3471            return Scalar::Timedelta64(Timedelta::NAT);
3472        }
3473        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3474        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3475        return float_ns_to_timedelta(sum_sq / (td.len() - ddof) as f64);
3476    }
3477    let nums = collect_finite(values);
3478    if nums.len() <= ddof {
3479        return Scalar::Null(NullKind::NaN);
3480    }
3481    let mean: f64 = nums.iter().sum::<f64>() / nums.len() as f64;
3482    let sum_sq: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3483    Scalar::Float64(sum_sq / (nums.len() - ddof) as f64)
3484}
3485
3486pub fn nanstd(values: &[Scalar], ddof: usize) -> Scalar {
3487    // Per br-frankenpandas-j8ntk: Timedelta64 std preserves dtype.
3488    if let Some(td) = collect_timedelta_ns_f64(values) {
3489        if td.len() <= ddof {
3490            return Scalar::Timedelta64(Timedelta::NAT);
3491        }
3492        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3493        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3494        let var = sum_sq / (td.len() - ddof) as f64;
3495        return float_ns_to_timedelta(var.sqrt());
3496    }
3497    match nanvar(values, ddof) {
3498        Scalar::Float64(v) => Scalar::Float64(v.sqrt()),
3499        other => other,
3500    }
3501}
3502
3503/// Standard error of the mean over non-missing values.
3504///
3505/// Matches `pd.Series.sem(ddof=1)` / `scipy.stats.sem`. Computed as
3506/// `std(values, ddof) / sqrt(n)` where `n` is the non-missing count.
3507/// Returns `Null(NaN)` when `n <= ddof`.
3508pub fn nansem(values: &[Scalar], ddof: usize) -> Scalar {
3509    // Per br-frankenpandas-j8ntk: Timedelta64 sem preserves dtype.
3510    if let Some(td) = collect_timedelta_ns_f64(values) {
3511        if td.len() <= ddof {
3512            return Scalar::Timedelta64(Timedelta::NAT);
3513        }
3514        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3515        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3516        let var = sum_sq / (td.len() - ddof) as f64;
3517        let std = var.sqrt();
3518        return float_ns_to_timedelta(std / (td.len() as f64).sqrt());
3519    }
3520    let nums = collect_finite(values);
3521    if nums.len() <= ddof {
3522        return Scalar::Null(NullKind::NaN);
3523    }
3524    match nanstd(values, ddof) {
3525        Scalar::Float64(s) => Scalar::Float64(s / (nums.len() as f64).sqrt()),
3526        other => other,
3527    }
3528}
3529
3530/// Peak-to-peak range of non-missing values (max − min).
3531///
3532/// Matches `np.ptp` behavior on nan-safe inputs. Returns `Null(NaN)`
3533/// for empty or all-missing inputs.
3534pub fn nanptp(values: &[Scalar]) -> Scalar {
3535    // Per br-frankenpandas-u2g0r: Timedelta64 peak-to-peak returns
3536    // Timedelta64 (max - min in ns). collect_timedelta_ns_f64 is defined
3537    // in the cumulative-aggregations section below.
3538    if let Some(td) = collect_timedelta_ns_f64(values) {
3539        if td.is_empty() {
3540            return Scalar::Timedelta64(Timedelta::NAT);
3541        }
3542        let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
3543        for x in &td {
3544            if *x < lo {
3545                lo = *x;
3546            }
3547            if *x > hi {
3548                hi = *x;
3549            }
3550        }
3551        return float_ns_to_timedelta(hi - lo);
3552    }
3553    // Fused single-pass min/max (see `nansum`): track lo/hi while filtering, no
3554    // intermediate Vec<f64>. Bit-identical to the prior collect_finite two-pass:
3555    // `seen` is true exactly when collect_finite would be non-empty, and the
3556    // lo/hi comparisons fold the same finite values in the same order.
3557    let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
3558    let mut seen = false;
3559    for v in values {
3560        if v.is_missing() {
3561            continue;
3562        }
3563        if let Ok(x) = v.to_f64() {
3564            seen = true;
3565            if x < lo {
3566                lo = x;
3567            }
3568            if x > hi {
3569                hi = x;
3570            }
3571        }
3572    }
3573    if !seen {
3574        return Scalar::Null(NullKind::NaN);
3575    }
3576    Scalar::Float64(hi - lo)
3577}
3578
3579/// Sample skewness (bias-corrected, Fisher-Pearson) over non-missing values.
3580///
3581/// Matches `pd.Series.skew()`. Requires at least 3 non-missing values;
3582/// returns `Null(NaN)` otherwise, and when the sample standard deviation
3583/// is zero.
3584pub fn nanskew(values: &[Scalar]) -> Scalar {
3585    let nums = collect_finite(values);
3586    let n = nums.len() as f64;
3587    if n < 3.0 {
3588        return Scalar::Null(NullKind::NaN);
3589    }
3590    let mean = nums.iter().sum::<f64>() / n;
3591    let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3592    let m3: f64 = nums.iter().map(|x| (x - mean).powi(3)).sum();
3593    let s2 = m2 / (n - 1.0);
3594    if s2 == 0.0 {
3595        return Scalar::Float64(0.0);
3596    }
3597    let s3 = s2.powf(1.5);
3598    Scalar::Float64((n / ((n - 1.0) * (n - 2.0))) * (m3 / s3))
3599}
3600
3601/// Excess sample kurtosis (Fisher's definition, bias-corrected) over
3602/// non-missing values.
3603///
3604/// Matches `pd.Series.kurt()`. Requires at least 4 non-missing values;
3605/// returns `Null(NaN)` otherwise, and when the sample standard deviation
3606/// is zero.
3607pub fn nankurt(values: &[Scalar]) -> Scalar {
3608    let nums = collect_finite(values);
3609    let n = nums.len() as f64;
3610    if n < 4.0 {
3611        return Scalar::Null(NullKind::NaN);
3612    }
3613    let mean = nums.iter().sum::<f64>() / n;
3614    let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3615    let m4: f64 = nums.iter().map(|x| (x - mean).powi(4)).sum();
3616    let s2 = m2 / (n - 1.0);
3617    if s2 == 0.0 {
3618        return Scalar::Float64(0.0);
3619    }
3620    let adj = (n * (n + 1.0)) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
3621    let sub = (3.0 * (n - 1.0).powi(2)) / ((n - 2.0) * (n - 3.0));
3622    Scalar::Float64(adj * (m4 / (s2 * s2)) - sub)
3623}
3624
3625/// Product of non-missing values. Returns 1.0 for empty input (matching pandas).
3626pub fn nanprod(values: &[Scalar]) -> Scalar {
3627    // Per br-frankenpandas-szq6a: pandas raises TypeError on
3628    // td_series.prod() because Timedelta² has no dimension. Returning the
3629    // misleading Float64(1.0) (empty-iterator default after collect_finite
3630    // drops every Timedelta64) is worse than surfacing missing. NaT
3631    // propagates the "type-incompatible" signal in lieu of a Result-level
3632    // error.
3633    if is_timedelta_input(values) {
3634        return Scalar::Null(NullKind::NaN);
3635    }
3636    // Fused single-pass fold (see `nansum`): filter missing / non-coercible and
3637    // multiply in one scan, no intermediate Vec<f64>. Bit-identical to
3638    // `collect_finite(..).iter().product()`: same finite values, same order,
3639    // same f64 `*` (Product for f64 == fold(1.0, *)); empty -> 1.0.
3640    let mut prod = 1.0_f64;
3641    for v in values {
3642        if v.is_missing() {
3643            continue;
3644        }
3645        if let Ok(x) = v.to_f64() {
3646            prod *= x;
3647        }
3648    }
3649    Scalar::Float64(prod)
3650}
3651
3652/// Cumulative sum respecting null propagation.
3653///
3654/// Per br-frankenpandas-x0x91: detect uniformly-Timedelta64 input
3655/// (allowing Null/NAT missing markers). Returns true when at least one
3656/// non-missing value is Timedelta64 and no other dtype appears.
3657fn is_timedelta_input(values: &[Scalar]) -> bool {
3658    let mut saw_td = false;
3659    for v in values {
3660        if v.is_missing() {
3661            continue;
3662        }
3663        match v {
3664            Scalar::Timedelta64(_) => saw_td = true,
3665            _ => return false,
3666        }
3667    }
3668    saw_td
3669}
3670
3671/// Per br-frankenpandas-x0x91: cumulative running aggregation over a
3672/// uniformly-Timedelta64 input. NaT/Null positions emit NaT and skip
3673/// the accumulator. Saturating i128 keeps overflow contained at i64
3674/// bounds when emitting.
3675fn timedelta_cumulative<F>(values: &[Scalar], init: i128, mut step: F) -> Vec<Scalar>
3676where
3677    F: FnMut(i128, i128) -> i128,
3678{
3679    let mut out = Vec::with_capacity(values.len());
3680    let mut running: i128 = init;
3681    for v in values {
3682        if v.is_missing() {
3683            out.push(Scalar::Null(NullKind::NaT));
3684            continue;
3685        }
3686        if let Scalar::Timedelta64(ns) = v {
3687            running = step(running, i128::from(*ns));
3688            let clamped = running.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3689            out.push(Scalar::Timedelta64(clamped as i64));
3690        } else {
3691            out.push(Scalar::Null(NullKind::NaT));
3692        }
3693    }
3694    out
3695}
3696
3697/// Per br-frankenpandas-x0x91: running extrema (min/max) over a
3698/// uniformly-Timedelta64 input. `sentinel` is the identity element
3699/// (i64::MAX for min, i64::MIN for max) used until the first
3700/// non-missing value initializes the accumulator.
3701fn timedelta_cumulative_extrema<F>(values: &[Scalar], sentinel: i64, mut step: F) -> Vec<Scalar>
3702where
3703    F: FnMut(i64, i64) -> i64,
3704{
3705    let mut out = Vec::with_capacity(values.len());
3706    let mut running: Option<i64> = None;
3707    for v in values {
3708        if v.is_missing() {
3709            out.push(Scalar::Null(NullKind::NaT));
3710            continue;
3711        }
3712        if let Scalar::Timedelta64(ns) = v {
3713            let new_val = match running {
3714                Some(prev) => step(prev, *ns),
3715                None => *ns,
3716            };
3717            running = Some(new_val);
3718            out.push(Scalar::Timedelta64(new_val));
3719        } else {
3720            out.push(Scalar::Null(NullKind::NaT));
3721        }
3722    }
3723    let _ = sentinel; // silence unused warning if closure ignores it
3724    out
3725}
3726
3727/// Matches `np.nancumsum` / `pd.Series.cumsum()`. Missing input positions
3728/// pass through as `Null(NaN)` in the output; the running sum ignores
3729/// those positions when accumulating.
3730pub fn nancumsum(values: &[Scalar]) -> Vec<Scalar> {
3731    // Per br-frankenpandas-x0x91: when input is uniformly Timedelta64 (with
3732    // optional NaT/Null missing markers), preserve Timedelta dtype to match
3733    // pandas td_series.cumsum() returning Timedelta64.
3734    if is_timedelta_input(values) {
3735        return timedelta_cumulative(values, 0_i128, |acc, x| acc.saturating_add(x));
3736    }
3737    let mut out = Vec::with_capacity(values.len());
3738    let mut running = 0.0_f64;
3739    for v in values {
3740        if v.is_missing() {
3741            out.push(Scalar::Null(NullKind::NaN));
3742            continue;
3743        }
3744        match v.to_f64() {
3745            Ok(x) if !x.is_nan() => {
3746                running += x;
3747                out.push(Scalar::Float64(running));
3748            }
3749            _ => out.push(Scalar::Null(NullKind::NaN)),
3750        }
3751    }
3752    out
3753}
3754
3755/// Cumulative product respecting null propagation.
3756///
3757/// Matches `np.nancumprod` / `pd.Series.cumprod()`. Missing positions
3758/// pass through as `Null(NaN)` without advancing the running product.
3759pub fn nancumprod(values: &[Scalar]) -> Vec<Scalar> {
3760    let mut out = Vec::with_capacity(values.len());
3761    let mut running = 1.0_f64;
3762    for v in values {
3763        if v.is_missing() {
3764            out.push(Scalar::Null(NullKind::NaN));
3765            continue;
3766        }
3767        match v.to_f64() {
3768            Ok(x) if !x.is_nan() => {
3769                running *= x;
3770                out.push(Scalar::Float64(running));
3771            }
3772            _ => out.push(Scalar::Null(NullKind::NaN)),
3773        }
3774    }
3775    out
3776}
3777
3778/// Cumulative maximum respecting null propagation.
3779///
3780/// Matches `pd.Series.cummax()`. Missing positions pass through as
3781/// `Null(NaN)` without updating the running maximum. The first
3782/// non-missing value initializes the running maximum.
3783pub fn nancummax(values: &[Scalar]) -> Vec<Scalar> {
3784    // Per br-frankenpandas-x0x91: Timedelta64 preserves dtype.
3785    if is_timedelta_input(values) {
3786        return timedelta_cumulative_extrema(values, i64::MAX, |acc, x| acc.max(x));
3787    }
3788    let mut out = Vec::with_capacity(values.len());
3789    let mut running: Option<f64> = None;
3790    for v in values {
3791        if v.is_missing() {
3792            out.push(Scalar::Null(NullKind::NaN));
3793            continue;
3794        }
3795        match v.to_f64() {
3796            Ok(x) if !x.is_nan() => {
3797                let new_val = match running {
3798                    Some(prev) => prev.max(x),
3799                    None => x,
3800                };
3801                running = Some(new_val);
3802                out.push(Scalar::Float64(new_val));
3803            }
3804            _ => out.push(Scalar::Null(NullKind::NaN)),
3805        }
3806    }
3807    out
3808}
3809
3810/// Cumulative minimum respecting null propagation.
3811///
3812/// Matches `pd.Series.cummin()`. Symmetric to `nancummax`.
3813pub fn nancummin(values: &[Scalar]) -> Vec<Scalar> {
3814    // Per br-frankenpandas-x0x91: Timedelta64 preserves dtype.
3815    if is_timedelta_input(values) {
3816        return timedelta_cumulative_extrema(values, i64::MIN, |acc, x| acc.min(x));
3817    }
3818    let mut out = Vec::with_capacity(values.len());
3819    let mut running: Option<f64> = None;
3820    for v in values {
3821        if v.is_missing() {
3822            out.push(Scalar::Null(NullKind::NaN));
3823            continue;
3824        }
3825        match v.to_f64() {
3826            Ok(x) if !x.is_nan() => {
3827                let new_val = match running {
3828                    Some(prev) => prev.min(x),
3829                    None => x,
3830                };
3831                running = Some(new_val);
3832                out.push(Scalar::Float64(new_val));
3833            }
3834            _ => out.push(Scalar::Null(NullKind::NaN)),
3835        }
3836    }
3837    out
3838}
3839
3840/// Linear-interpolation quantile over non-missing numeric values.
3841///
3842/// Matches `np.nanquantile(values, q)` with `interpolation='linear'`.
3843/// Returns `Null(NaN)` for empty inputs or when `q` is outside
3844/// `[0.0, 1.0]`.
3845pub fn nanquantile(values: &[Scalar], q: f64) -> Scalar {
3846    if !(0.0..=1.0).contains(&q) {
3847        return Scalar::Null(NullKind::NaN);
3848    }
3849    // Per br-frankenpandas-5djk7: pandas td_series.quantile(q) returns
3850    // Timedelta64 with linear-interpolated ns. Was silently NaN before.
3851    if let Some(mut td) = collect_timedelta_ns_f64(values) {
3852        if td.is_empty() {
3853            return Scalar::Timedelta64(Timedelta::NAT);
3854        }
3855        let n = td.len();
3856        if n == 1 {
3857            return float_ns_to_timedelta(td[0]);
3858        }
3859        let pos = q * (n - 1) as f64;
3860        let lo = pos.floor() as usize;
3861        let hi = pos.ceil() as usize;
3862        // O(n) selection instead of a full sort (see the numeric arm below):
3863        // select the lo-th order statistic; the (lo+1)-th is the MIN of the
3864        // right partition. Bit-identical (finite ns, values-only order stats).
3865        let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3866        let (_left, lo_ref, right) = td.select_nth_unstable_by(lo, cmp);
3867        let lo_val = *lo_ref;
3868        let ns = if lo == hi {
3869            lo_val
3870        } else {
3871            let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
3872            let weight = pos - lo as f64;
3873            lo_val + (hi_val - lo_val) * weight
3874        };
3875        return float_ns_to_timedelta(ns);
3876    }
3877    let mut nums = collect_finite(values);
3878    if nums.is_empty() {
3879        return Scalar::Null(NullKind::NaN);
3880    }
3881    let n = nums.len();
3882    if n == 1 {
3883        return Scalar::Float64(nums[0]);
3884    }
3885    let pos = q * (n - 1) as f64;
3886    let lo = pos.floor() as usize;
3887    let hi = pos.ceil() as usize;
3888    // O(n) selection instead of a full sort: select the `lo`-th order statistic;
3889    // when interpolation is needed (hi == lo+1) the (lo+1)-th smallest is the
3890    // MIN of the right partition. Bit-identical to the sort path (same
3891    // nums[lo]/nums[hi] values, since order statistics depend only on values).
3892    let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3893    let (_left, lo_ref, right) = nums.select_nth_unstable_by(lo, cmp);
3894    let lo_val = *lo_ref;
3895    if lo == hi {
3896        return Scalar::Float64(lo_val);
3897    }
3898    let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
3899    let weight = pos - lo as f64;
3900    Scalar::Float64(lo_val + (hi_val - lo_val) * weight)
3901}
3902
3903/// Position (in the original slice) of the non-missing maximum.
3904///
3905/// Matches `np.nanargmax`. Returns `None` if every value is missing.
3906/// Ties resolve to the first position seen (matching numpy).
3907pub fn nanargmax(values: &[Scalar]) -> Option<usize> {
3908    // Per br-frankenpandas-ql1t5: Timedelta64.to_f64() errors, so the
3909    // generic path would silently skip every Timedelta64 value and
3910    // return None. Pandas td_series.argmax() returns the position of
3911    // the largest Timedelta — compare i64 ns directly.
3912    if is_timedelta_input(values) {
3913        let mut best: Option<(usize, i64)> = None;
3914        for (i, v) in values.iter().enumerate() {
3915            if v.is_missing() {
3916                continue;
3917            }
3918            if let Scalar::Timedelta64(ns) = v {
3919                match best {
3920                    None => best = Some((i, *ns)),
3921                    Some((_, cur)) if *ns > cur => best = Some((i, *ns)),
3922                    _ => {}
3923                }
3924            }
3925        }
3926        return best.map(|(i, _)| i);
3927    }
3928    let mut best: Option<(usize, f64)> = None;
3929    for (i, v) in values.iter().enumerate() {
3930        if v.is_missing() {
3931            continue;
3932        }
3933        if let Ok(x) = v.to_f64() {
3934            if x.is_nan() {
3935                continue;
3936            }
3937            match best {
3938                None => best = Some((i, x)),
3939                Some((_, cur)) if x > cur => best = Some((i, x)),
3940                _ => {}
3941            }
3942        }
3943    }
3944    best.map(|(i, _)| i)
3945}
3946
3947/// Position (in the original slice) of the non-missing minimum.
3948///
3949/// Matches `np.nanargmin`. Returns `None` if every value is missing.
3950pub fn nanargmin(values: &[Scalar]) -> Option<usize> {
3951    // Per br-frankenpandas-ql1t5: Timedelta64 argmin via i64 ns compare.
3952    if is_timedelta_input(values) {
3953        let mut best: Option<(usize, i64)> = None;
3954        for (i, v) in values.iter().enumerate() {
3955            if v.is_missing() {
3956                continue;
3957            }
3958            if let Scalar::Timedelta64(ns) = v {
3959                match best {
3960                    None => best = Some((i, *ns)),
3961                    Some((_, cur)) if *ns < cur => best = Some((i, *ns)),
3962                    _ => {}
3963                }
3964            }
3965        }
3966        return best.map(|(i, _)| i);
3967    }
3968    let mut best: Option<(usize, f64)> = None;
3969    for (i, v) in values.iter().enumerate() {
3970        if v.is_missing() {
3971            continue;
3972        }
3973        if let Ok(x) = v.to_f64() {
3974            if x.is_nan() {
3975                continue;
3976            }
3977            match best {
3978                None => best = Some((i, x)),
3979                Some((_, cur)) if x < cur => best = Some((i, x)),
3980                _ => {}
3981            }
3982        }
3983    }
3984    best.map(|(i, _)| i)
3985}
3986
3987/// Count of unique non-missing values.
3988pub fn nannunique(values: &[Scalar]) -> Scalar {
3989    use rustc_hash::FxHashSet;
3990    #[derive(Hash, PartialEq, Eq)]
3991    enum ScalarKey<'a> {
3992        Bool(bool),
3993        Int64(i64),
3994        FloatBits(u64),
3995        Utf8(&'a str),
3996        Timedelta64(i64),
3997        Datetime64(i64),
3998        Period(i64, PeriodFreq),
3999        Interval(u64, u64, IntervalClosed),
4000    }
4001
4002    let mut seen = FxHashSet::default();
4003    for val in values {
4004        if val.is_missing() {
4005            continue;
4006        }
4007        let key = match val {
4008            Scalar::Bool(v) => ScalarKey::Bool(*v),
4009            Scalar::Int64(v) => ScalarKey::Int64(*v),
4010            Scalar::Float64(v) => {
4011                let normalized = if *v == 0.0 { 0.0 } else { *v };
4012                ScalarKey::FloatBits(normalized.to_bits())
4013            }
4014            Scalar::Utf8(v) => ScalarKey::Utf8(v.as_str()),
4015            Scalar::Timedelta64(v) => ScalarKey::Timedelta64(*v),
4016            Scalar::Datetime64(v) => ScalarKey::Datetime64(*v),
4017            Scalar::Period(p) => ScalarKey::Period(p.ordinal, p.freq),
4018            Scalar::Interval(v) => ScalarKey::Interval(
4019                normalized_float_bits(v.left),
4020                normalized_float_bits(v.right),
4021                v.closed,
4022            ),
4023            Scalar::Null(_) => continue,
4024        };
4025        seen.insert(key);
4026    }
4027    Scalar::Int64(seen.len() as i64)
4028}
4029
4030fn normalized_float_bits(value: f64) -> u64 {
4031    let normalized = if value == 0.0 { 0.0 } else { value };
4032    normalized.to_bits()
4033}
4034
4035// ── Interval types (br-frankenpandas-j8k4 Phase 1) ──────────────────────
4036//
4037// Scaffolding for pandas `pd.Interval` / `pd.IntervalIndex` / `pd.IntervalDtype`.
4038//
4039// Phase 1 ships float-valued intervals only (matches `cut`/`qcut` output on
4040// numeric bins — the dominant pandas use case). Generic-subtype intervals
4041// over Int64 / Timestamp are deferred to Phase 2 alongside the DType::Interval
4042// enum-variant wiring. See br-j8k4 for the phased roadmap.
4043//
4044// Semantics mirror pandas: closed tells which endpoints are included.
4045//   Left    → [left, right)
4046//   Right   → (left, right]       ← pandas default
4047//   Both    → [left, right]
4048//   Neither → (left, right)
4049
4050/// Endpoint-inclusion policy for an `Interval`.
4051///
4052/// Matches pandas `pd.Interval.closed` / `pd.IntervalDtype.closed` string
4053/// values ("left" / "right" / "both" / "neither").
4054#[derive(
4055    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize, Deserialize,
4056)]
4057#[serde(rename_all = "snake_case")]
4058#[non_exhaustive]
4059pub enum IntervalClosed {
4060    /// `[left, right)` — left-inclusive, right-exclusive.
4061    Left,
4062    /// `(left, right]` — left-exclusive, right-inclusive. Pandas default.
4063    #[default]
4064    Right,
4065    /// `[left, right]` — both endpoints included.
4066    Both,
4067    /// `(left, right)` — neither endpoint included.
4068    Neither,
4069}
4070
4071impl IntervalClosed {
4072    /// Left endpoint included?
4073    #[must_use]
4074    pub fn left_closed(self) -> bool {
4075        matches!(self, Self::Left | Self::Both)
4076    }
4077
4078    /// Right endpoint included?
4079    #[must_use]
4080    pub fn right_closed(self) -> bool {
4081        matches!(self, Self::Right | Self::Both)
4082    }
4083}
4084
4085impl std::fmt::Display for IntervalClosed {
4086    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4087        match self {
4088            Self::Left => write!(f, "left"),
4089            Self::Right => write!(f, "right"),
4090            Self::Both => write!(f, "both"),
4091            Self::Neither => write!(f, "neither"),
4092        }
4093    }
4094}
4095
4096/// A bounded numeric interval between two `f64` endpoints.
4097///
4098/// Matches `pd.Interval(left, right, closed)` on the numeric-subtype path.
4099/// Accessors match pandas: `.left`, `.right`, `.closed`, `.length`, `.mid`,
4100/// `.contains`, `.is_empty`, `.overlaps`.
4101#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
4102pub struct Interval {
4103    pub left: f64,
4104    pub right: f64,
4105    #[serde(default)]
4106    pub closed: IntervalClosed,
4107}
4108
4109impl Interval {
4110    /// Construct an interval. No validation on `left <= right` — pandas also
4111    /// accepts reversed intervals (they're non-empty only if empty-by-design).
4112    #[must_use]
4113    pub const fn new(left: f64, right: f64, closed: IntervalClosed) -> Self {
4114        Self {
4115            left,
4116            right,
4117            closed,
4118        }
4119    }
4120
4121    /// `right - left` (pandas `.length`). Negative for reversed intervals.
4122    #[must_use]
4123    pub fn length(&self) -> f64 {
4124        self.right - self.left
4125    }
4126
4127    /// Midpoint `(left + right) / 2` (pandas `.mid`).
4128    #[must_use]
4129    pub fn mid(&self) -> f64 {
4130        (self.left + self.right) / 2.0
4131    }
4132
4133    /// Empty iff endpoints coincide AND at least one side is open.
4134    /// Pandas semantics: `pd.Interval(3, 3, 'right').is_empty → True`.
4135    #[must_use]
4136    pub fn is_empty(&self) -> bool {
4137        self.left == self.right && !matches!(self.closed, IntervalClosed::Both)
4138    }
4139
4140    /// Does `value` fall inside this interval?
4141    ///
4142    /// NaN always returns false, matching pandas `pd.Interval.__contains__`
4143    /// behavior (NaN doesn't compare equal to anything).
4144    #[must_use]
4145    pub fn contains(&self, value: f64) -> bool {
4146        if value.is_nan() {
4147            return false;
4148        }
4149        let left_ok = if self.closed.left_closed() {
4150            value >= self.left
4151        } else {
4152            value > self.left
4153        };
4154        let right_ok = if self.closed.right_closed() {
4155            value <= self.right
4156        } else {
4157            value < self.right
4158        };
4159        left_ok && right_ok
4160    }
4161
4162    /// Do `self` and `other` share any point?
4163    ///
4164    /// Matches `pd.Interval.overlaps(other)`. Two intervals overlap iff the
4165    /// max of their lefts is less than the min of their rights, with
4166    /// endpoint-inclusion determining the strictness of the comparison when
4167    /// they touch exactly.
4168    #[must_use]
4169    pub fn overlaps(&self, other: &Self) -> bool {
4170        if self.left > other.right || other.left > self.right {
4171            return false;
4172        }
4173        // Touching-at-a-point: overlap iff both sides at that touchpoint are closed.
4174        if self.right == other.left {
4175            return self.closed.right_closed() && other.closed.left_closed();
4176        }
4177        if other.right == self.left {
4178            return other.closed.right_closed() && self.closed.left_closed();
4179        }
4180        true
4181    }
4182
4183    /// Parse an interval string. Supports bracket notation:
4184    /// - '[0, 1]' -> closed on both ends
4185    /// - '(0, 1)' -> open on both ends
4186    /// - '[0, 1)' -> closed left, open right (pandas default)
4187    /// - '(0, 1]' -> open left, closed right
4188    pub fn parse(s: &str) -> Result<Self, TypeError> {
4189        let s = s.trim();
4190        if s.len() < 5 {
4191            return Err(TypeError::ValueNotParseable {
4192                value: s.to_string(),
4193                target: "Interval".to_string(),
4194            });
4195        }
4196
4197        let first_char = s.chars().next().unwrap();
4198        let last_char = s.chars().last().unwrap();
4199
4200        let left_closed = match first_char {
4201            '[' => true,
4202            '(' => false,
4203            _ => {
4204                return Err(TypeError::ValueNotParseable {
4205                    value: s.to_string(),
4206                    target: "Interval".to_string(),
4207                });
4208            }
4209        };
4210
4211        let right_closed = match last_char {
4212            ']' => true,
4213            ')' => false,
4214            _ => {
4215                return Err(TypeError::ValueNotParseable {
4216                    value: s.to_string(),
4217                    target: "Interval".to_string(),
4218                });
4219            }
4220        };
4221
4222        let closed = match (left_closed, right_closed) {
4223            (true, true) => IntervalClosed::Both,
4224            (true, false) => IntervalClosed::Left,
4225            (false, true) => IntervalClosed::Right,
4226            (false, false) => IntervalClosed::Neither,
4227        };
4228
4229        let inner = &s[1..s.len() - 1];
4230        let parts: Vec<&str> = inner.split(',').collect();
4231        if parts.len() != 2 {
4232            return Err(TypeError::ValueNotParseable {
4233                value: s.to_string(),
4234                target: "Interval".to_string(),
4235            });
4236        }
4237
4238        let left: f64 = parts[0]
4239            .trim()
4240            .parse()
4241            .map_err(|_| TypeError::ValueNotParseable {
4242                value: s.to_string(),
4243                target: "Interval".to_string(),
4244            })?;
4245
4246        let right: f64 = parts[1]
4247            .trim()
4248            .parse()
4249            .map_err(|_| TypeError::ValueNotParseable {
4250                value: s.to_string(),
4251                target: "Interval".to_string(),
4252            })?;
4253
4254        Ok(Self::new(left, right, closed))
4255    }
4256}
4257
4258impl std::fmt::Display for Interval {
4259    /// Matches `str(pd.Interval(...))` for the `interval[float64]` subtype, which
4260    /// is the only subtype FrankenPandas stores (f64 endpoints): the endpoints
4261    /// render with Python `str(float)` semantics, so whole numbers KEEP ".0"
4262    /// (`str(pd.Interval(0.0, 5.0, 'right'))` is `"(0.0, 5.0]"`, not `"(0, 5]"`).
4263    /// Verified vs pandas 2.2.3 across whole/fractional/negative/scientific
4264    /// endpoints. (br-frankenpandas-5xw1b)
4265    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4266        let left_bracket = if self.closed.left_closed() { '[' } else { '(' };
4267        let right_bracket = if self.closed.right_closed() { ']' } else { ')' };
4268        write!(
4269            f,
4270            "{left_bracket}{}, {}{right_bracket}",
4271            float_to_string_for_astype(self.left),
4272            float_to_string_for_astype(self.right)
4273        )
4274    }
4275}
4276
4277// ── interval_range builders (br-frankenpandas-xaom — Phase 2 of j8k4) ────
4278
4279/// Build `periods` equal-width intervals spanning `[start, end]`.
4280///
4281/// Matches `pd.interval_range(start, end, periods=N, closed=...)` for the
4282/// numeric-subtype case. Returns exactly `periods` intervals; when
4283/// `periods == 0` or `start >= end`, returns an empty vector (matches
4284/// pandas's empty IntervalIndex).
4285///
4286/// ```
4287/// use fp_types::{interval_range_by_periods, IntervalClosed};
4288/// let bins = interval_range_by_periods(0.0, 10.0, 5, IntervalClosed::Right);
4289/// assert_eq!(bins.len(), 5);
4290/// assert_eq!(bins[0].left, 0.0);
4291/// assert_eq!(bins[0].right, 2.0);
4292/// assert_eq!(bins[4].right, 10.0);
4293/// ```
4294#[must_use]
4295pub fn interval_range_by_periods(
4296    start: f64,
4297    end: f64,
4298    periods: usize,
4299    closed: IntervalClosed,
4300) -> Vec<Interval> {
4301    if periods == 0 || !start.is_finite() || !end.is_finite() || start >= end {
4302        return Vec::new();
4303    }
4304    let step = (end - start) / (periods as f64);
4305    let mut out = Vec::with_capacity(periods);
4306    for i in 0..periods {
4307        let left = start + step * (i as f64);
4308        // Use end exactly for the final right edge to avoid float drift.
4309        let right = if i + 1 == periods {
4310            end
4311        } else {
4312            start + step * ((i + 1) as f64)
4313        };
4314        out.push(Interval::new(left, right, closed));
4315    }
4316    out
4317}
4318
4319/// Build equal-`step`-width intervals spanning `[start, end]`.
4320///
4321/// Matches `pd.interval_range(start, end, freq=step, closed=...)` for the
4322/// numeric-subtype case. `step` must be finite and positive; `(end - start)`
4323/// must be an integer multiple of `step` (within float tolerance) — pandas
4324/// raises `ValueError` otherwise; this fn returns `Err(TypeError::IntervalStepDoesNotDivide)`.
4325///
4326/// Returns an empty vector when `start == end` (matches pandas' zero-bin
4327/// IntervalIndex); returns an empty vector when `start > end` (pandas also
4328/// returns empty rather than erroring in this case).
4329pub fn interval_range_by_step(
4330    start: f64,
4331    end: f64,
4332    step: f64,
4333    closed: IntervalClosed,
4334) -> Result<Vec<Interval>, TypeError> {
4335    if !step.is_finite() || !step.is_sign_positive() || step == 0.0 {
4336        return Err(TypeError::InvalidIntervalStep { step });
4337    }
4338    if !start.is_finite() || !end.is_finite() || start >= end {
4339        return Ok(Vec::new());
4340    }
4341    let span = end - start;
4342    let periods_f = span / step;
4343    let periods = periods_f.round() as i64;
4344    if periods <= 0 {
4345        return Ok(Vec::new());
4346    }
4347    let reconstructed = step * (periods as f64);
4348    // Relative tolerance: allow float-rounding noise proportional to span.
4349    if (span - reconstructed).abs() > span.abs() * 1e-9 + 1e-12 {
4350        return Err(TypeError::IntervalStepDoesNotDivide { step, span });
4351    }
4352    let periods = periods as usize;
4353    let mut out = Vec::with_capacity(periods);
4354    for i in 0..periods {
4355        let left = start + step * (i as f64);
4356        let right = if i + 1 == periods {
4357            end
4358        } else {
4359            start + step * ((i + 1) as f64)
4360        };
4361        out.push(Interval::new(left, right, closed));
4362    }
4363    Ok(out)
4364}
4365
4366// ── Period types (br-frankenpandas-epoj Phase 1) ────────────────────────
4367//
4368// Scaffolding for pandas `pd.Period` / `pd.PeriodIndex` / `pd.PeriodDtype`.
4369//
4370// A Period is a calendar *span* (Q1 2024, Jan 2024, 2024-03-15), distinct
4371// from a Timestamp (an instant). Phase 1 ships the PeriodFreq enum +
4372// Period struct with ordinal-based arithmetic (Period + n, Period - Period),
4373// Display in pandas notation, and parse from standard strings. Calendar-
4374// conversion (ordinal ↔ ymd) and DType::Period wiring land in Phase 2.
4375
4376/// Period frequency code. Matches pandas offset alias core set.
4377///
4378/// The ordinal axis is frequency-specific: for Monthly, ordinal 0 is a
4379/// fixed anchor (pandas uses months since 1970-01). Phase 1 doesn't
4380/// commit to a specific epoch yet — the ordinal scheme is opaque until
4381/// Phase 2 wires calendar arithmetic. What Phase 1 DOES nail down is:
4382/// same-freq Periods compare + subtract; Period + i64 shifts by `n`
4383/// periods of the declared frequency.
4384#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
4385#[serde(rename_all = "SCREAMING-KEBAB-CASE")]
4386#[non_exhaustive]
4387pub enum PeriodFreq {
4388    /// `Y-DEC` / `A` / `Y` — annual periods.
4389    Annual,
4390    /// `Q-DEC` / `Q` — quarterly periods.
4391    Quarterly,
4392    /// `M` — monthly periods.
4393    Monthly,
4394    /// `W-SUN` / `W` — weekly periods.
4395    Weekly,
4396    /// `D` — daily periods.
4397    Daily,
4398    /// `B` — business-day periods.
4399    Business,
4400    /// `h` / `H` — hourly periods.
4401    Hourly,
4402    /// `min` / `T` — minutely periods.
4403    Minutely,
4404    /// `s` / `S` — secondly periods.
4405    Secondly,
4406}
4407
4408impl PeriodFreq {
4409    /// Parse a pandas-style frequency alias. Recognizes the common subset
4410    /// (Y-DEC/A/Y, Q-DEC/Q, M, W-SUN/W, D, B, h/H, min/T, s/S).
4411    /// Case-insensitive.
4412    pub fn parse(alias: &str) -> Option<Self> {
4413        match alias.to_ascii_uppercase().as_str() {
4414            "A" | "Y" | "A-DEC" | "Y-DEC" | "ANNUAL" | "YEARLY" => Some(Self::Annual),
4415            "Q" | "Q-DEC" | "QUARTERLY" => Some(Self::Quarterly),
4416            "M" | "MONTHLY" => Some(Self::Monthly),
4417            "W" | "W-SUN" | "WEEKLY" => Some(Self::Weekly),
4418            "D" | "DAILY" => Some(Self::Daily),
4419            "B" | "BUSINESS" => Some(Self::Business),
4420            "H" | "HOURLY" => Some(Self::Hourly),
4421            "T" | "MIN" | "MINUTELY" => Some(Self::Minutely),
4422            "S" | "SECONDLY" => Some(Self::Secondly),
4423            _ => None,
4424        }
4425    }
4426
4427    /// Canonical pandas alias string.
4428    #[must_use]
4429    pub const fn alias(self) -> &'static str {
4430        match self {
4431            Self::Annual => "Y-DEC",
4432            Self::Quarterly => "Q-DEC",
4433            Self::Monthly => "M",
4434            Self::Weekly => "W-SUN",
4435            Self::Daily => "D",
4436            Self::Business => "B",
4437            Self::Hourly => "h",
4438            Self::Minutely => "min",
4439            Self::Secondly => "s",
4440        }
4441    }
4442
4443    /// Per br-frankenpandas-qigpe: resolution string for PeriodIndex.resolution.
4444    #[must_use]
4445    pub const fn resolution(self) -> &'static str {
4446        match self {
4447            Self::Annual => "A-DEC",
4448            Self::Quarterly => "Q-DEC",
4449            Self::Monthly => "M",
4450            Self::Weekly => "W-SUN",
4451            Self::Daily => "D",
4452            Self::Business => "B",
4453            Self::Hourly => "H",
4454            Self::Minutely => "T",
4455            Self::Secondly => "S",
4456        }
4457    }
4458}
4459
4460impl std::fmt::Display for PeriodFreq {
4461    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4462        f.write_str(self.alias())
4463    }
4464}
4465
4466/// A single pandas-style Period value.
4467///
4468/// Stored as an integer ordinal on a frequency-specific axis plus the
4469/// frequency code. Two Periods with different `freq` are incompatible —
4470/// arithmetic and comparison require same-freq operands.
4471#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
4472pub struct Period {
4473    pub ordinal: i64,
4474    pub freq: PeriodFreq,
4475}
4476
4477impl Period {
4478    #[must_use]
4479    pub const fn new(ordinal: i64, freq: PeriodFreq) -> Self {
4480        Self { ordinal, freq }
4481    }
4482
4483    /// Integer position on this period's frequency axis, matching
4484    /// `pd.Period.ordinal`.
4485    #[must_use]
4486    pub const fn ordinal(&self) -> i64 {
4487        self.ordinal
4488    }
4489
4490    /// Frequency code for this period, matching `pd.Period.freq`.
4491    #[must_use]
4492    pub const fn freq(&self) -> PeriodFreq {
4493        self.freq
4494    }
4495
4496    /// Canonical pandas frequency alias, matching `pd.Period.freqstr`.
4497    #[must_use]
4498    pub const fn freqstr(&self) -> &'static str {
4499        self.freq.alias()
4500    }
4501
4502    /// Same-freq ordinal comparison. Returns `None` if `freq` differs —
4503    /// caller decides whether that's an error or a panic site.
4504    #[must_use]
4505    pub fn cmp_same_freq(&self, other: &Self) -> Option<std::cmp::Ordering> {
4506        if self.freq != other.freq {
4507            return None;
4508        }
4509        Some(self.ordinal.cmp(&other.ordinal))
4510    }
4511
4512    /// Shift by `n` periods of the current frequency.
4513    /// Matches `pd.Period + n` and `pd.Period - n`.
4514    #[must_use]
4515    pub fn shift(&self, n: i64) -> Self {
4516        Self {
4517            ordinal: self.ordinal.saturating_add(n),
4518            freq: self.freq,
4519        }
4520    }
4521
4522    /// Period-difference in units of the shared frequency.
4523    /// Returns `None` if `freq` differs (pandas raises IncompatibleFrequency).
4524    #[must_use]
4525    pub fn diff(&self, other: &Self) -> Option<i64> {
4526        if self.freq != other.freq {
4527            return None;
4528        }
4529        Some(self.ordinal.saturating_sub(other.ordinal))
4530    }
4531
4532    /// Parse common pandas `Period(...)` strings and infer the frequency.
4533    ///
4534    /// Supported forms mirror pandas' unambiguous scalar constructor cases:
4535    /// annual (`"2024"`), quarterly (`"2024Q1"`), monthly (`"2024-01"`),
4536    /// and daily (`"2024-01-15"`). The ordinal axes match pandas:
4537    /// 1970, 1970Q1, 1970-01, and 1970-01-01 all have ordinal 0.
4538    pub fn parse(s: &str) -> Result<Self, TypeError> {
4539        let trimmed = s.trim();
4540        if trimmed.eq_ignore_ascii_case("nat") {
4541            return Ok(Self::new(i64::MIN, PeriodFreq::Daily));
4542        }
4543
4544        if let Some((year, quarter)) = parse_quarter_period(trimmed) {
4545            let ordinal = year
4546                .checked_sub(1970)
4547                .and_then(|offset| offset.checked_mul(4))
4548                .and_then(|base| base.checked_add(i64::from(quarter) - 1))
4549                .ok_or_else(|| TypeError::ValueNotParseable {
4550                    value: s.to_owned(),
4551                    target: "Period".to_owned(),
4552                })?;
4553            return Ok(Self::new(ordinal, PeriodFreq::Quarterly));
4554        }
4555
4556        if let Some((year, month, day)) = parse_ymd_period(trimmed) {
4557            let ordinal = Timestamp::days_from_ymd(year, i64::from(month), i64::from(day));
4558            return Ok(Self::new(ordinal, PeriodFreq::Daily));
4559        }
4560
4561        if let Some((year, month)) = parse_year_month_period(trimmed) {
4562            let ordinal = year
4563                .checked_sub(1970)
4564                .and_then(|offset| offset.checked_mul(12))
4565                .and_then(|base| base.checked_add(i64::from(month) - 1))
4566                .ok_or_else(|| TypeError::ValueNotParseable {
4567                    value: s.to_owned(),
4568                    target: "Period".to_owned(),
4569                })?;
4570            return Ok(Self::new(ordinal, PeriodFreq::Monthly));
4571        }
4572
4573        if let Some(year) = parse_annual_period(trimmed) {
4574            let ordinal = year
4575                .checked_sub(1970)
4576                .ok_or_else(|| TypeError::ValueNotParseable {
4577                    value: s.to_owned(),
4578                    target: "Period".to_owned(),
4579                })?;
4580            return Ok(Self::new(ordinal, PeriodFreq::Annual));
4581        }
4582
4583        Err(TypeError::ValueNotParseable {
4584            value: s.to_owned(),
4585            target: "Period".to_owned(),
4586        })
4587    }
4588
4589    /// Pandas calendar string for this period, matching `str(pd.Period)`.
4590    ///
4591    /// Inverts the frequency-specific ordinal axes anchored at 1970:
4592    /// `1970`/`1970Q1`/`1970-01`/`1970-01-01`/`1970-01-01 00:00` all have
4593    /// ordinal 0. Returns `"NaT"` for the missing sentinel (`i64::MIN`).
4594    ///
4595    /// Annual/Quarterly/Monthly/Daily and the sub-daily clocks
4596    /// (Hourly/Minutely/Secondly) are exact. Weekly and Business use a
4597    /// best-effort `YYYY-MM-DD` rendering (their pandas axes — a Sunday-ended
4598    /// week range and a business-day count — are not yet wired; neither is
4599    /// reachable through the current parse/cast paths).
4600    #[must_use]
4601    pub fn calendar_string(&self) -> String {
4602        if self.ordinal == i64::MIN {
4603            return "NaT".to_owned();
4604        }
4605        let ord = self.ordinal;
4606        match self.freq {
4607            PeriodFreq::Annual => {
4608                let year = 1970 + ord;
4609                format!("{year}")
4610            }
4611            PeriodFreq::Quarterly => {
4612                let year = 1970 + ord.div_euclid(4);
4613                let quarter = ord.rem_euclid(4) + 1;
4614                format!("{year}Q{quarter}")
4615            }
4616            PeriodFreq::Monthly => {
4617                let year = 1970 + ord.div_euclid(12);
4618                let month = ord.rem_euclid(12) + 1;
4619                format!("{year:04}-{month:02}")
4620            }
4621            PeriodFreq::Daily | PeriodFreq::Business | PeriodFreq::Weekly => {
4622                let (y, m, d) = civil_from_days(ord);
4623                format!("{y:04}-{m:02}-{d:02}")
4624            }
4625            PeriodFreq::Hourly => {
4626                let (y, m, d) = civil_from_days(ord.div_euclid(24));
4627                let hour = ord.rem_euclid(24);
4628                format!("{y:04}-{m:02}-{d:02} {hour:02}:00")
4629            }
4630            PeriodFreq::Minutely => {
4631                let day = ord.div_euclid(1440);
4632                let mins = ord.rem_euclid(1440);
4633                let (y, m, d) = civil_from_days(day);
4634                format!("{y:04}-{m:02}-{d:02} {:02}:{:02}", mins / 60, mins % 60)
4635            }
4636            PeriodFreq::Secondly => {
4637                let day = ord.div_euclid(86_400);
4638                let secs = ord.rem_euclid(86_400);
4639                let (y, m, d) = civil_from_days(day);
4640                format!(
4641                    "{y:04}-{m:02}-{d:02} {:02}:{:02}:{:02}",
4642                    secs / 3600,
4643                    (secs % 3600) / 60,
4644                    secs % 60
4645                )
4646            }
4647        }
4648    }
4649}
4650
4651/// Convert a day count (days since 1970-01-01) to a proleptic-Gregorian
4652/// `(year, month, day)`, using Howard Hinnant's civil-from-days algorithm
4653/// (same kernel as `Timestamp::isoformat`).
4654fn civil_from_days(days_since_epoch: i64) -> (i64, u32, u32) {
4655    let days = days_since_epoch + 719_468;
4656    let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
4657    let doe = days - era * 146_097;
4658    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
4659    let y = yoe + era * 400;
4660    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
4661    let mp = (5 * doy + 2) / 153;
4662    let d = doy - (153 * mp + 2) / 5 + 1;
4663    let m = if mp < 10 { mp + 3 } else { mp - 9 };
4664    let year = if m <= 2 { y + 1 } else { y };
4665    (year, m as u32, d as u32)
4666}
4667
4668fn parse_annual_period(value: &str) -> Option<i64> {
4669    (value.len() == 4 && value.chars().all(|ch| ch.is_ascii_digit()))
4670        .then(|| value.parse::<i64>().ok())
4671        .flatten()
4672}
4673
4674fn parse_year_month_period(value: &str) -> Option<(i64, u32)> {
4675    let (year, month) = value.split_once('-')?;
4676    if year.len() != 4 || month.len() != 2 {
4677        return None;
4678    }
4679    let year = year.parse::<i64>().ok()?;
4680    let month = month.parse::<u32>().ok()?;
4681    (1..=12).contains(&month).then_some((year, month))
4682}
4683
4684fn parse_ymd_period(value: &str) -> Option<(i64, u32, u32)> {
4685    let mut parts = value.split('-');
4686    let year = parts.next()?;
4687    let month = parts.next()?;
4688    let day = parts.next()?;
4689    if parts.next().is_some() || year.len() != 4 || month.len() != 2 || day.len() != 2 {
4690        return None;
4691    }
4692    let year = year.parse::<i64>().ok()?;
4693    let month = month.parse::<u32>().ok()?;
4694    let day = day.parse::<u32>().ok()?;
4695    (1..=days_in_month(year, month)?)
4696        .contains(&day)
4697        .then_some((year, month, day))
4698}
4699
4700fn parse_quarter_period(value: &str) -> Option<(i64, u32)> {
4701    let (year, quarter) = value.split_once('Q').or_else(|| value.split_once('q'))?;
4702    if year.len() != 4 || quarter.len() != 1 {
4703        return None;
4704    }
4705    let year = year.parse::<i64>().ok()?;
4706    let quarter = quarter.parse::<u32>().ok()?;
4707    (1..=4).contains(&quarter).then_some((year, quarter))
4708}
4709
4710impl std::fmt::Display for Period {
4711    /// Pandas `str(Period)` form: the calendar string (`2024`, `2024Q1`,
4712    /// `2024-03`, `2024-01-15`, ...). NaT (ordinal `i64::MIN`) renders `NaT`.
4713    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4714        f.write_str(&self.calendar_string())
4715    }
4716}
4717
4718/// Build `periods` consecutive Periods starting at `start`.
4719///
4720/// Matches `pd.period_range(start, periods=N, freq=start.freq)` for the
4721/// count-based form. The frequency is taken from `start` — pandas requires
4722/// `freq` to match when both are passed; mismatches are an error in
4723/// pandas, but here we sidestep ambiguity by deriving from `start.freq`.
4724///
4725/// Per br-frankenpandas-2jef (epoj Phase 2). Pure ordinal arithmetic — no
4726/// calendar conversion (Phase 3 wires chrono). `periods=0` returns empty.
4727///
4728/// ```
4729/// use fp_types::{period_range, Period, PeriodFreq};
4730/// let q1 = Period::new(216, PeriodFreq::Quarterly);
4731/// let year = period_range(q1, 4);
4732/// assert_eq!(year.len(), 4);
4733/// assert_eq!(year[0].ordinal, 216);
4734/// assert_eq!(year[3].ordinal, 219);
4735/// ```
4736#[must_use]
4737pub fn period_range(start: Period, periods: usize) -> Vec<Period> {
4738    (0..periods).map(|i| start.shift(i as i64)).collect()
4739}
4740
4741#[cfg(test)]
4742mod tests {
4743    use super::{
4744        DType, Interval, IntervalClosed, NullKind, Period, PeriodFreq, Scalar, SparseDType,
4745        cast_scalar, common_dtype, infer_dtype,
4746    };
4747
4748    /// br-frankenpandas-ay8o9: Scalar::semantic_cmp underpins ALL ordering in
4749    /// the library (sort, min/max, is_monotonic, searchsorted, groupby key
4750    /// order) and is the reference the differential harnesses rely on. Property
4751    /// test of its total-order axioms over finite/non-NaT same-dtype scalars,
4752    /// plus the intentional NaN degeneracy. Deterministic seeded LCG — no rand
4753    /// crate, no mocks.
4754    /// br-frankenpandas-767ak: extends ay8o9's NaN pinning to the temporal NAT
4755    /// sentinels (i64::MIN). semantic_cmp treats NAT as degenerate (Equal to all
4756    /// same-dtype), which is why temporal ordering ops must treat NAT as missing.
4757    #[test]
4758    fn semantic_cmp_cross_numeric_int_float_cdpai() {
4759        // Property (br-frankenpandas-cdpai): semantic_cmp compares Int64 vs Float64
4760        // as f64, antisymmetric across the operand order. Seeded LCG, no mocks.
4761        use std::cmp::Ordering;
4762        let mut st: u64 = 0xc205_a1b2_c3d4_e5f6;
4763        let mut next = || {
4764            st = st
4765                .wrapping_mul(6_364_136_223_846_793_005)
4766                .wrapping_add(1_442_695_040_888_963_407);
4767            (st >> 33) as u32
4768        };
4769        for _ in 0..3000u32 {
4770            let i = (next() % 21) as i64 - 10;
4771            let f = (next() % 400) as f64 / 20.0 - 10.0; // finite
4772            let exp = (i as f64).partial_cmp(&f).unwrap();
4773            assert_eq!(Scalar::Int64(i).semantic_cmp(&Scalar::Float64(f)), exp);
4774            assert_eq!(
4775                Scalar::Float64(f).semantic_cmp(&Scalar::Int64(i)),
4776                exp.reverse()
4777            );
4778        }
4779        // Exact int/float equality compares Equal in both directions.
4780        assert_eq!(
4781            Scalar::Int64(5).semantic_cmp(&Scalar::Float64(5.0)),
4782            Ordering::Equal
4783        );
4784        assert_eq!(
4785            Scalar::Float64(5.0).semantic_cmp(&Scalar::Int64(5)),
4786            Ordering::Equal
4787        );
4788        assert_eq!(
4789            Scalar::Int64(3).semantic_cmp(&Scalar::Float64(3.5)),
4790            Ordering::Less
4791        );
4792        assert_eq!(
4793            Scalar::Int64(4).semantic_cmp(&Scalar::Float64(3.5)),
4794            Ordering::Greater
4795        );
4796    }
4797
4798    #[test]
4799    fn semantic_cmp_nat_degeneracy_temporal_767ak() {
4800        use std::cmp::Ordering;
4801        const NAT: i64 = i64::MIN;
4802
4803        // Timedelta64 NAT is degenerate (Equal to finite and to itself).
4804        let td_nat = Scalar::Timedelta64(NAT);
4805        for v in [-3i64, 0, 5, 99] {
4806            let td = Scalar::Timedelta64(v);
4807            assert_eq!(td_nat.semantic_cmp(&td), Ordering::Equal, "td NAT vs {v}");
4808            assert_eq!(td.semantic_cmp(&td_nat), Ordering::Equal, "td {v} vs NAT");
4809        }
4810        assert_eq!(td_nat.semantic_cmp(&td_nat), Ordering::Equal);
4811
4812        // Datetime64 NAT likewise.
4813        let dt_nat = Scalar::Datetime64(NAT);
4814        for v in [-3i64, 0, 5, 99] {
4815            let dt = Scalar::Datetime64(v);
4816            assert_eq!(dt_nat.semantic_cmp(&dt), Ordering::Equal, "dt NAT vs {v}");
4817            assert_eq!(dt.semantic_cmp(&dt_nat), Ordering::Equal, "dt {v} vs NAT");
4818        }
4819        assert_eq!(dt_nat.semantic_cmp(&dt_nat), Ordering::Equal);
4820
4821        // Non-NAT temporal values order normally (reflexive, antisymmetric, lt).
4822        for (a, b) in [(1i64, 2i64), (5, 5), (9, -1)] {
4823            let (ta, tb) = (Scalar::Timedelta64(a), Scalar::Timedelta64(b));
4824            assert_eq!(ta.semantic_cmp(&ta), Ordering::Equal);
4825            assert_eq!(ta.semantic_cmp(&tb), b.cmp(&a).reverse()); // a.cmp(b)
4826            assert_eq!(ta.semantic_cmp(&tb), a.cmp(&b));
4827            let (da, db) = (Scalar::Datetime64(a), Scalar::Datetime64(b));
4828            assert_eq!(da.semantic_cmp(&db), a.cmp(&b));
4829        }
4830    }
4831
4832    #[test]
4833    fn semantic_cmp_total_order_axioms_ay8o9() {
4834        use std::cmp::Ordering;
4835
4836        let mut state: u64 = 0xc0ff_eeba_df00_d123;
4837        let mut next = || {
4838            state = state
4839                .wrapping_mul(6_364_136_223_846_793_005)
4840                .wrapping_add(1_442_695_040_888_963_407);
4841            (state >> 33) as u32
4842        };
4843
4844        for iter in 0..6000u32 {
4845            let dt = next() % 4;
4846            // Build a same-dtype, finite, non-NaT scalar from a random u32.
4847            let mk = |r: u32| match dt {
4848                0 => Scalar::Int64((r % 11) as i64 - 5),
4849                // finite, in -5.0..=5.0 by 0.5 — no NaN/inf.
4850                1 => Scalar::Float64(f64::from((r % 21) as i32 - 10) / 2.0),
4851                2 => Scalar::Utf8(format!("s{}", r % 5)),
4852                _ => Scalar::Bool(r.is_multiple_of(2)),
4853            };
4854            let a = mk(next());
4855            let b = mk(next());
4856            let c = mk(next());
4857            let ctx = format!("iter={iter} a={a:?} b={b:?} c={c:?}");
4858
4859            // Reflexivity.
4860            assert_eq!(a.semantic_cmp(&a), Ordering::Equal, "reflexive {ctx}");
4861            // Antisymmetry.
4862            assert_eq!(
4863                a.semantic_cmp(&b),
4864                b.semantic_cmp(&a).reverse(),
4865                "antisymmetric {ctx}"
4866            );
4867            // le / ge / eq consistency with the ordering.
4868            let ab = a.semantic_cmp(&b);
4869            assert_eq!(
4870                a.semantic_le(&b),
4871                ab != Ordering::Greater,
4872                "le-consistent {ctx}"
4873            );
4874            assert_eq!(
4875                a.semantic_ge(&b),
4876                ab != Ordering::Less,
4877                "ge-consistent {ctx}"
4878            );
4879            assert_eq!(
4880                a.semantic_le(&b) && a.semantic_ge(&b),
4881                ab == Ordering::Equal,
4882                "le&ge<=>eq {ctx}"
4883            );
4884            // Transitivity: a<=b && b<=c  =>  a<=c.
4885            if a.semantic_cmp(&b) != Ordering::Greater && b.semantic_cmp(&c) != Ordering::Greater {
4886                assert_ne!(a.semantic_cmp(&c), Ordering::Greater, "transitivity {ctx}");
4887            }
4888        }
4889
4890        // Pin the intentional NaN degeneracy: a Float64 NaN compares Equal to
4891        // every finite Float64 (and to itself) — this is why ordering ops must
4892        // treat NaN as missing rather than relying on semantic_cmp to order it.
4893        let nan = Scalar::Float64(f64::NAN);
4894        for v in [
4895            Scalar::Float64(-3.5),
4896            Scalar::Float64(0.0),
4897            Scalar::Float64(7.25),
4898        ] {
4899            assert_eq!(nan.semantic_cmp(&v), Ordering::Equal, "NaN cmp finite");
4900            assert_eq!(v.semantic_cmp(&nan), Ordering::Equal, "finite cmp NaN");
4901        }
4902        assert_eq!(nan.semantic_cmp(&nan), Ordering::Equal, "NaN cmp NaN");
4903    }
4904
4905    /// br-frankenpandas-be314: common_dtype is the dtype-promotion lattice
4906    /// underpinning every binary op, alignment, and concat (dtype coercion is a
4907    /// crown-jewel correctness area). Exhaustively (all 13x13 DType pairs) assert
4908    /// its lattice axioms — an asymmetric arm would make df1+df2 and df2+df1
4909    /// disagree on dtype.
4910    #[test]
4911    fn common_dtype_lattice_axioms_be314() {
4912        const ALL: [DType; 13] = [
4913            DType::Null,
4914            DType::Bool,
4915            DType::BoolNullable,
4916            DType::Int64,
4917            DType::Int64Nullable,
4918            DType::Float64,
4919            DType::Utf8,
4920            DType::Categorical,
4921            DType::Timedelta64,
4922            DType::Datetime64,
4923            DType::Period,
4924            DType::Interval,
4925            DType::Sparse,
4926        ];
4927
4928        for &a in &ALL {
4929            // Idempotence: a promoted with itself is itself.
4930            assert_eq!(common_dtype(a, a), Ok(a), "idempotent {a:?}");
4931            // Null is the identity element of the promotion lattice.
4932            assert_eq!(
4933                common_dtype(DType::Null, a),
4934                Ok(a),
4935                "null-left identity {a:?}"
4936            );
4937            assert_eq!(
4938                common_dtype(a, DType::Null),
4939                Ok(a),
4940                "null-right identity {a:?}"
4941            );
4942
4943            for &b in &ALL {
4944                // Commutativity: same Ok value AND same ok-ness. An asymmetric
4945                // match arm would make binary-op output dtype order-dependent.
4946                assert_eq!(
4947                    common_dtype(a, b).ok(),
4948                    common_dtype(b, a).ok(),
4949                    "commutative value {a:?},{b:?}"
4950                );
4951                assert_eq!(
4952                    common_dtype(a, b).is_ok(),
4953                    common_dtype(b, a).is_ok(),
4954                    "commutative ok-ness {a:?},{b:?}"
4955                );
4956            }
4957        }
4958
4959        // Associativity over the Ok-closed subset: when both nestings succeed,
4960        // promotion order must not change the result.
4961        for &a in &ALL {
4962            for &b in &ALL {
4963                for &c in &ALL {
4964                    if let (Ok(ab), Ok(bc)) = (common_dtype(a, b), common_dtype(b, c))
4965                        && let (Ok(left), Ok(right)) = (common_dtype(ab, c), common_dtype(a, bc))
4966                    {
4967                        assert_eq!(left, right, "associative {a:?},{b:?},{c:?}");
4968                    }
4969                }
4970            }
4971        }
4972    }
4973
4974    /// br-frankenpandas-e3sfq: infer_dtype drives Column/Series construction
4975    /// dtype inference. Assert its homogeneous + mixed-coercion rules.
4976    #[test]
4977    fn infer_dtype_coercion_rules_e3sfq() {
4978        use DType::{Bool, Float64, Int64, Null, Utf8};
4979
4980        // Empty and all-null infer to Null.
4981        assert_eq!(infer_dtype(&[]), Ok(Null));
4982        assert_eq!(
4983            infer_dtype(&[Scalar::Null(NullKind::Null), Scalar::Null(NullKind::NaN)]),
4984            Ok(Null)
4985        );
4986
4987        // Homogeneous slices infer to their own dtype (random, seeded LCG).
4988        let mut s: u64 = 0x132d_a7e0_0e3f_c0de;
4989        let mut next = || {
4990            s = s
4991                .wrapping_mul(6_364_136_223_846_793_005)
4992                .wrapping_add(1_442_695_040_888_963_407);
4993            (s >> 33) as u32
4994        };
4995        for _ in 0..600u32 {
4996            let n = (next() % 6) as usize + 1;
4997            let ints: Vec<Scalar> = (0..n)
4998                .map(|_| Scalar::Int64((next() % 9) as i64 - 4))
4999                .collect();
5000            assert_eq!(infer_dtype(&ints), Ok(Int64));
5001            let floats: Vec<Scalar> = (0..n)
5002                .map(|_| Scalar::Float64(f64::from((next() % 7) as i32)))
5003                .collect();
5004            assert_eq!(infer_dtype(&floats), Ok(Float64));
5005            let bools: Vec<Scalar> = (0..n).map(|_| Scalar::Bool(next() % 2 == 0)).collect();
5006            assert_eq!(infer_dtype(&bools), Ok(Bool));
5007            let strs: Vec<Scalar> = (0..n)
5008                .map(|_| Scalar::Utf8(format!("s{}", next() % 4)))
5009                .collect();
5010            assert_eq!(infer_dtype(&strs), Ok(Utf8));
5011        }
5012
5013        // Mixed-coercion rules.
5014        assert_eq!(
5015            infer_dtype(&[
5016                Scalar::Int64(1),
5017                Scalar::Null(NullKind::Null),
5018                Scalar::Int64(2)
5019            ]),
5020            Ok(Int64),
5021            "Int64 + nulls -> Int64"
5022        );
5023        assert_eq!(
5024            infer_dtype(&[Scalar::Int64(1), Scalar::Float64(2.5)]),
5025            Ok(Float64),
5026            "Int64 + Float64 -> Float64"
5027        );
5028        assert_eq!(
5029            infer_dtype(&[Scalar::Bool(true), Scalar::Int64(3)]),
5030            Ok(Int64),
5031            "Bool + Int64 -> Int64"
5032        );
5033        assert_eq!(
5034            infer_dtype(&[Scalar::Utf8("a".into()), Scalar::Int64(3)]),
5035            Ok(Utf8),
5036            "Utf8 + Int64 -> Utf8 (object fallback)"
5037        );
5038    }
5039
5040    /// br-frankenpandas-1ews0: missing_for_dtype is the canonical per-dtype
5041    /// missing sentinel (used by null-fill / with_validity / cast). Exhaustively
5042    /// assert it is always missing, and that casting any missing to any dtype
5043    /// stays missing.
5044    #[test]
5045    fn missing_for_dtype_always_missing_1ews0() {
5046        const ALL: [DType; 13] = [
5047            DType::Null,
5048            DType::Bool,
5049            DType::BoolNullable,
5050            DType::Int64,
5051            DType::Int64Nullable,
5052            DType::Float64,
5053            DType::Utf8,
5054            DType::Categorical,
5055            DType::Timedelta64,
5056            DType::Datetime64,
5057            DType::Period,
5058            DType::Interval,
5059            DType::Sparse,
5060        ];
5061        for &dt in &ALL {
5062            let m = Scalar::missing_for_dtype(dt);
5063            assert!(m.is_missing(), "missing_for_dtype({dt:?}) must be missing");
5064            for &target in &ALL {
5065                let cast = cast_scalar(&m, target).expect("cast of missing");
5066                if target == DType::Utf8 {
5067                    // Casting a missing value to string follows pandas astype(str):
5068                    // it yields a string ("None"/"NaN"/"NaT"), NOT a missing value.
5069                    assert!(
5070                        matches!(cast, Scalar::Utf8(_)),
5071                        "cast(missing {dt:?} -> Utf8) yields a string, got {cast:?}"
5072                    );
5073                } else {
5074                    // Every other target preserves missingness via cast_scalar's
5075                    // value.is_missing() -> missing_for_dtype(target) branch.
5076                    assert!(
5077                        cast.is_missing(),
5078                        "cast(missing {dt:?} -> {target:?}) must stay missing, got {cast:?}"
5079                    );
5080                }
5081            }
5082        }
5083    }
5084
5085    #[test]
5086    fn cast_scalar_bool_int_roundtrip_6w07b() {
5087        use super::cast_scalar;
5088        // Property (br-frankenpandas-6w07b): Bool<->Int64 cast. Seeded LCG, no mocks.
5089        assert_eq!(
5090            cast_scalar(&Scalar::Bool(true), DType::Int64).unwrap(),
5091            Scalar::Int64(1)
5092        );
5093        assert_eq!(
5094            cast_scalar(&Scalar::Bool(false), DType::Int64).unwrap(),
5095            Scalar::Int64(0)
5096        );
5097        assert_eq!(
5098            cast_scalar(&Scalar::Int64(0), DType::Bool).unwrap(),
5099            Scalar::Bool(false)
5100        );
5101        let mut st: u64 = 0x4b07_0b1c_2d3e_4f50;
5102        let mut next = || {
5103            st = st
5104                .wrapping_mul(6_364_136_223_846_793_005)
5105                .wrapping_add(1_442_695_040_888_963_407);
5106            (st >> 33) as u32
5107        };
5108        for _ in 0..2000u32 {
5109            let v = (next() % 21) as i64 - 10;
5110            assert_eq!(
5111                cast_scalar(&Scalar::Int64(v), DType::Bool).unwrap(),
5112                Scalar::Bool(v != 0),
5113                "int->bool v={v}"
5114            );
5115        }
5116    }
5117
5118    /// br-frankenpandas-6a83t: cast_scalar is the scalar dtype-coercion path
5119    #[test]
5120    fn cast_scalar_float_to_int_truncates_toward_zero_u9lec() {
5121        use super::cast_scalar;
5122        // Property (br-frankenpandas-u9lec): Float64->Int64 truncates toward zero, not
5123        // floor (-3.7 -> -3). Seeded LCG, no mocks.
5124        let mut st: u64 = 0x4ca5_0b1c_2d3e_4f50;
5125        let mut next = || {
5126            st = st
5127                .wrapping_mul(6_364_136_223_846_793_005)
5128                .wrapping_add(1_442_695_040_888_963_407);
5129            (st >> 33) as u32
5130        };
5131        // Explicit negative-direction checks (the gotcha).
5132        for (f, exp) in [
5133            (-3.7, -3i64),
5134            (3.7, 3),
5135            (-3.2, -3),
5136            (3.2, 3),
5137            (-0.9, 0),
5138            (0.9, 0),
5139            (-5.0, -5),
5140            (5.0, 5),
5141        ] {
5142            assert_eq!(
5143                cast_scalar(&Scalar::Float64(f), DType::Int64).unwrap(),
5144                Scalar::Int64(exp),
5145                "cast {f}"
5146            );
5147        }
5148        // Property over random signed fractional values.
5149        for _ in 0..3000u32 {
5150            let v = (next() % 2_000_001) as f64 / 1000.0 - 1000.0; // [-1000, 1000]
5151            let got = cast_scalar(&Scalar::Float64(v), DType::Int64).unwrap();
5152            assert_eq!(
5153                got,
5154                Scalar::Int64(v.trunc() as i64),
5155                "trunc-toward-zero v={v}"
5156            );
5157        }
5158    }
5159
5160    #[test]
5161    fn nancount_nunique_prod_any_all_mx60x() {
5162        use super::{nanall, nanany, nancount, nannunique, nanprod};
5163        // br-frankenpandas-mx60x: nancount/nannunique/nanprod/nanany/nanall skip NaN
5164        // and match finite-only oracles. Seeded LCG, no mocks.
5165        let mut s: u64 = 0x4e2a_0b1c_2d3e_4f50;
5166        let mut next = || {
5167            s = s
5168                .wrapping_mul(6_364_136_223_846_793_005)
5169                .wrapping_add(1_442_695_040_888_963_407);
5170            (s >> 33) as u32
5171        };
5172        let asf = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
5173        let asb = |sc: Scalar| -> bool { matches!(sc, Scalar::Bool(true)) };
5174        for iter in 0..1000u32 {
5175            let n = (next() % 10) as usize + 1;
5176            let raw: Vec<f64> = (0..n)
5177                .map(|_| {
5178                    if next() % 4 == 0 {
5179                        f64::NAN
5180                    } else {
5181                        (next() % 5) as f64
5182                    }
5183                })
5184                .collect();
5185            let finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
5186            if finite.is_empty() {
5187                continue;
5188            }
5189            let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
5190            let distinct: std::collections::HashSet<u64> =
5191                finite.iter().map(|x| x.to_bits()).collect();
5192            let prod: f64 = finite.iter().product();
5193            assert!(
5194                (asf(nancount(&scalars)) - finite.len() as f64).abs() < 1e-9,
5195                "nancount iter={iter}"
5196            );
5197            assert!(
5198                (asf(nannunique(&scalars)) - distinct.len() as f64).abs() < 1e-9,
5199                "nannunique iter={iter}"
5200            );
5201            assert!(
5202                (asf(nanprod(&scalars)) - prod).abs() < 1e-7,
5203                "nanprod iter={iter}"
5204            );
5205            assert_eq!(
5206                asb(nanany(&scalars)),
5207                finite.iter().any(|&x| x != 0.0),
5208                "nanany iter={iter}"
5209            );
5210            assert_eq!(
5211                asb(nanall(&scalars)),
5212                finite.iter().all(|&x| x != 0.0),
5213                "nanall iter={iter}"
5214            );
5215        }
5216    }
5217
5218    #[test]
5219    fn nan_reduction_kernels_skip_correctness_1uagc() {
5220        use super::{nanmax, nanmedian, nanmin, nansum};
5221        // br-frankenpandas-1uagc: nansum/nanmin/nanmax/nanmedian skip NaN and match
5222        // finite-only oracles. Seeded LCG, no mocks.
5223        let mut s: u64 = 0x4e1a_0b2c_2d3e_4f50;
5224        let mut next = || {
5225            s = s
5226                .wrapping_mul(6_364_136_223_846_793_005)
5227                .wrapping_add(1_442_695_040_888_963_407);
5228            (s >> 33) as u32
5229        };
5230        let val = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
5231        for iter in 0..1000u32 {
5232            let n = (next() % 12) as usize + 1;
5233            let raw: Vec<f64> = (0..n)
5234                .map(|_| {
5235                    if next() % 4 == 0 {
5236                        f64::NAN
5237                    } else {
5238                        (next() % 200) as f64 - 100.0
5239                    }
5240                })
5241                .collect();
5242            let mut finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
5243            if finite.is_empty() {
5244                continue;
5245            }
5246            let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
5247            let sum: f64 = finite.iter().sum();
5248            let mn = finite.iter().copied().fold(f64::INFINITY, f64::min);
5249            let mx = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
5250            finite.sort_by(|a, b| a.partial_cmp(b).unwrap());
5251            let m = finite.len();
5252            let med = if m % 2 == 1 {
5253                finite[m / 2]
5254            } else {
5255                (finite[m / 2 - 1] + finite[m / 2]) / 2.0
5256            };
5257            assert!(
5258                (val(nansum(&scalars)) - sum).abs() < 1e-7,
5259                "nansum iter={iter}"
5260            );
5261            assert!(
5262                (val(nanmin(&scalars)) - mn).abs() < 1e-9,
5263                "nanmin iter={iter}"
5264            );
5265            assert!(
5266                (val(nanmax(&scalars)) - mx).abs() < 1e-9,
5267                "nanmax iter={iter}"
5268            );
5269            assert!(
5270                (val(nanmedian(&scalars)) - med).abs() < 1e-9,
5271                "nanmedian iter={iter}"
5272            );
5273        }
5274    }
5275
5276    #[test]
5277    fn nanvar_ddof_nanstd_nan_skip_p00ag() {
5278        use super::{nanmean, nanstd, nanvar};
5279        // br-frankenpandas-p00ag: nanmean/nanvar/nanstd skip NaN; ddof picks the
5280        // denominator; nanstd==sqrt(nanvar). Seeded LCG, no mocks.
5281        let mut s: u64 = 0x4e0a_0b1c_2d3e_4f50;
5282        let mut next = || {
5283            s = s
5284                .wrapping_mul(6_364_136_223_846_793_005)
5285                .wrapping_add(1_442_695_040_888_963_407);
5286            (s >> 33) as u32
5287        };
5288        let val = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
5289        for iter in 0..1000u32 {
5290            let n = (next() % 10) as usize + 2;
5291            let raw: Vec<f64> = (0..n)
5292                .map(|_| {
5293                    if next() % 4 == 0 {
5294                        f64::NAN
5295                    } else {
5296                        (next() % 200) as f64 / 7.0
5297                    }
5298                })
5299                .collect();
5300            let finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
5301            if finite.len() < 2 {
5302                continue;
5303            }
5304            let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
5305            let nf = finite.len() as f64;
5306            let mean = finite.iter().sum::<f64>() / nf;
5307            let ss = finite.iter().map(|x| (x - mean).powi(2)).sum::<f64>();
5308            assert!(
5309                (val(nanmean(&scalars)) - mean).abs() < 1e-7,
5310                "nanmean iter={iter}"
5311            );
5312            assert!(
5313                (val(nanvar(&scalars, 0)) - ss / nf).abs() < 1e-7,
5314                "nanvar ddof0 iter={iter}"
5315            );
5316            assert!(
5317                (val(nanvar(&scalars, 1)) - ss / (nf - 1.0)).abs() < 1e-7,
5318                "nanvar ddof1 iter={iter}"
5319            );
5320            assert!(
5321                (val(nanstd(&scalars, 1)) - (ss / (nf - 1.0)).sqrt()).abs() < 1e-7,
5322                "nanstd ddof1 iter={iter}"
5323            );
5324        }
5325    }
5326
5327    #[test]
5328    fn nanskew_nankurt_min_sample_and_known_xybnq() {
5329        // br-frankenpandas-xybnq: guard nanskew/nankurt min-sample-size (NaN below
5330        // threshold) + known pandas G1/G2 values (the f4dc5540 inline-copy bug area).
5331        use super::{nankurt, nanskew};
5332        let f = |xs: &[f64]| -> Vec<Scalar> { xs.iter().map(|&x| Scalar::Float64(x)).collect() };
5333        let val = |s: Scalar| -> Option<f64> {
5334            if s.is_missing() {
5335                None
5336            } else {
5337                s.to_f64().ok()
5338            }
5339        };
5340
5341        // skew needs >= 3 observations.
5342        assert_eq!(val(nanskew(&f(&[1.0, 2.0]))), None, "skew n=2 -> NaN");
5343        let sym = val(nanskew(&f(&[1.0, 2.0, 3.0]))).expect("skew n=3");
5344        assert!(sym.abs() < 1e-9, "symmetric skew ~0, got {sym}");
5345        let right = val(nanskew(&f(&[1.0, 1.0, 1.0, 5.0]))).expect("skew n=4");
5346        assert!(right > 0.0, "right-skewed -> positive skew, got {right}");
5347
5348        // kurt needs >= 4 observations.
5349        assert_eq!(val(nankurt(&f(&[1.0, 2.0, 3.0]))), None, "kurt n=3 -> NaN");
5350        let k = val(nankurt(&f(&[1.0, 2.0, 3.0, 4.0, 5.0]))).expect("kurt n=5");
5351        assert!(
5352            (k - (-1.2)).abs() < 1e-6,
5353            "pandas kurt([1..5]) == -1.2, got {k}"
5354        );
5355    }
5356
5357    /// behind astype/promotion. Property test of its confirmed identity +
5358    /// numeric/bool coercion rules over random scalars. Deterministic seeded LCG.
5359    #[test]
5360    fn cast_scalar_to_utf8_formatting_yes7i() {
5361        // Property (br-frankenpandas-yes7i): cast to Utf8 formats per pandas
5362        // astype(str): Int64 -> decimal, Bool -> True/False. Seeded LCG, no mocks.
5363        let mut st: u64 = 0x4e57_0b1c_2d3e_4f50;
5364        let mut next = || {
5365            st = st
5366                .wrapping_mul(6_364_136_223_846_793_005)
5367                .wrapping_add(1_442_695_040_888_963_407);
5368            (st >> 33) as u32
5369        };
5370        for _ in 0..3000u32 {
5371            let n = (next() % 4_000_001) as i64 - 2_000_000;
5372            assert_eq!(
5373                cast_scalar(&Scalar::Int64(n), DType::Utf8).unwrap(),
5374                Scalar::Utf8(n.to_string())
5375            );
5376        }
5377        assert_eq!(
5378            cast_scalar(&Scalar::Bool(true), DType::Utf8).unwrap(),
5379            Scalar::Utf8("True".to_string())
5380        );
5381        assert_eq!(
5382            cast_scalar(&Scalar::Bool(false), DType::Utf8).unwrap(),
5383            Scalar::Utf8("False".to_string())
5384        );
5385    }
5386
5387    #[test]
5388    fn cast_scalar_coercion_rules_6a83t() {
5389        let mut state: u64 = 0x5a17_c0de_1234_abcd;
5390        let mut next = || {
5391            state = state
5392                .wrapping_mul(6_364_136_223_846_793_005)
5393                .wrapping_add(1_442_695_040_888_963_407);
5394            (state >> 33) as u32
5395        };
5396
5397        for _ in 0..4000u32 {
5398            let n = (next() % 21) as i64 - 10; // -10..=10, incl 0
5399            let b = next() % 2 == 0;
5400            let f = f64::from((next() % 41) as i32 - 20) / 4.0; // finite, incl 0.0
5401
5402            let i = Scalar::Int64(n);
5403            let bo = Scalar::Bool(b);
5404            let fl = Scalar::Float64(f);
5405
5406            // Identity casts.
5407            assert_eq!(cast_scalar(&i, DType::Int64), Ok(i.clone()));
5408            assert_eq!(cast_scalar(&bo, DType::Bool), Ok(bo.clone()));
5409            assert_eq!(cast_scalar(&fl, DType::Float64), Ok(fl.clone()));
5410
5411            // Representation-preserving nullable identities.
5412            assert_eq!(cast_scalar(&i, DType::Int64Nullable), Ok(i.clone()));
5413            assert_eq!(cast_scalar(&bo, DType::BoolNullable), Ok(bo.clone()));
5414
5415            // Int64 coercions.
5416            assert_eq!(cast_scalar(&i, DType::Bool), Ok(Scalar::Bool(n != 0)));
5417            assert_eq!(
5418                cast_scalar(&i, DType::Float64),
5419                Ok(Scalar::Float64(n as f64))
5420            );
5421
5422            // Bool coercions.
5423            assert_eq!(
5424                cast_scalar(&bo, DType::Int64),
5425                Ok(Scalar::Int64(i64::from(b)))
5426            );
5427            assert_eq!(
5428                cast_scalar(&bo, DType::Float64),
5429                Ok(Scalar::Float64(if b { 1.0 } else { 0.0 }))
5430            );
5431
5432            // Finite Float64 -> Int64 truncates toward zero (x as i64).
5433            assert_eq!(cast_scalar(&fl, DType::Int64), Ok(Scalar::Int64(f as i64)));
5434        }
5435    }
5436
5437    /// br-frankenpandas-esjjy / fd90.182: ergonomic From impls for Scalar.
5438    #[test]
5439    fn scalar_from_primitive_types() {
5440        // Each primitive maps to its canonical Scalar variant.
5441        assert_eq!(Scalar::from(true), Scalar::Bool(true));
5442        assert_eq!(Scalar::from(42i64), Scalar::Int64(42));
5443        assert_eq!(Scalar::from(1.5f64), Scalar::Float64(1.5));
5444        assert_eq!(Scalar::from("hi"), Scalar::Utf8("hi".to_owned()));
5445        assert_eq!(
5446            Scalar::from(String::from("world")),
5447            Scalar::Utf8("world".to_owned())
5448        );
5449
5450        // .into() works in mixed-type Vec<Scalar> contexts (the README's
5451        // case_when example pattern, and what fd90.181 needed for apply_row
5452        // closures).
5453        let mixed: Vec<Scalar> = vec![1i64.into(), 2.0f64.into(), "three".into()];
5454        assert_eq!(mixed.len(), 3);
5455        assert_eq!(mixed[0], Scalar::Int64(1));
5456        assert_eq!(mixed[1], Scalar::Float64(2.0));
5457        assert_eq!(mixed[2], Scalar::Utf8("three".to_owned()));
5458    }
5459
5460    #[test]
5461    fn dtype_inference_coerces_numeric_values() {
5462        let values = vec![Scalar::Bool(true), Scalar::Int64(7), Scalar::Float64(3.5)];
5463        assert_eq!(
5464            infer_dtype(&values).expect("dtype should infer"),
5465            DType::Float64
5466        );
5467    }
5468
5469    #[test]
5470    fn interval_scalar_has_dtype_storage_and_unique_semantics_5g5uj() {
5471        let left = Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right));
5472        let right = Scalar::Interval(Interval::new(1.0, 2.0, IntervalClosed::Right));
5473        assert_eq!(left.dtype(), DType::Interval);
5474        assert!(!left.is_missing());
5475        assert_eq!(
5476            infer_dtype(&[left.clone(), right.clone()]).expect("interval dtype"),
5477            DType::Interval
5478        );
5479        assert_eq!(
5480            common_dtype(DType::Interval, DType::Interval).expect("same interval dtype"),
5481            DType::Interval
5482        );
5483        assert_eq!(
5484            cast_scalar(&Scalar::Null(NullKind::Null), DType::Interval).expect("missing casts"),
5485            Scalar::Null(NullKind::Null)
5486        );
5487        assert_eq!(
5488            cast_scalar(&left, DType::Utf8).expect("interval string cast"),
5489            Scalar::Utf8("(0.0, 1.0]".to_owned())
5490        );
5491        assert_eq!(
5492            super::nannunique(&[left.clone(), right, left, Scalar::Null(NullKind::Null)]),
5493            Scalar::Int64(2)
5494        );
5495    }
5496
5497    #[test]
5498    fn cast_scalar_parses_temporal_extension_strings_avm08() {
5499        let expected_nanos = super::Timestamp::parse("2024-01-15T10:30:45")
5500            .expect("timestamp parse")
5501            .nanos;
5502        assert_eq!(
5503            cast_scalar(
5504                &Scalar::Utf8("2024-01-15T10:30:45".to_owned()),
5505                DType::Datetime64
5506            )
5507            .expect("datetime cast"),
5508            Scalar::Datetime64(expected_nanos)
5509        );
5510        assert_eq!(
5511            cast_scalar(&Scalar::Utf8("2024Q1".to_owned()), DType::Period).expect("period cast"),
5512            Scalar::Period(Period::new(216, PeriodFreq::Quarterly))
5513        );
5514        assert_eq!(
5515            cast_scalar(&Scalar::Utf8("(0, 1]".to_owned()), DType::Interval)
5516                .expect("interval cast"),
5517            Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right))
5518        );
5519    }
5520
5521    #[test]
5522    fn missing_values_get_target_missing_marker() {
5523        let missing = Scalar::Null(NullKind::Null);
5524        let cast = cast_scalar(&missing, DType::Float64).expect("missing casts");
5525        assert_eq!(cast, Scalar::Null(NullKind::NaN));
5526    }
5527
5528    #[test]
5529    fn cast_scalar_to_utf8_uses_pandas_string_spellings() {
5530        let cases = [
5531            (Scalar::Bool(true), "True"),
5532            (Scalar::Bool(false), "False"),
5533            (Scalar::Int64(-7), "-7"),
5534            (Scalar::Float64(1.0), "1.0"),
5535            (Scalar::Float64(1.5), "1.5"),
5536            (Scalar::Float64(f64::NAN), "nan"),
5537            (Scalar::Null(NullKind::Null), "None"),
5538            (Scalar::Null(NullKind::NaN), "nan"),
5539            (Scalar::Null(NullKind::NaT), "NaT"),
5540        ];
5541
5542        for (value, expected) in cases {
5543            assert_eq!(
5544                cast_scalar(&value, DType::Utf8).expect("cast"),
5545                Scalar::Utf8(expected.to_owned())
5546            );
5547        }
5548    }
5549
5550    #[test]
5551    fn semantic_eq_treats_nan_as_equal() {
5552        let left = Scalar::Float64(f64::NAN);
5553        let right = Scalar::Null(NullKind::NaN);
5554        assert!(left.semantic_eq(&right));
5555    }
5556
5557    #[test]
5558    fn semantic_eq_treats_nan_as_missing_null() {
5559        let left = Scalar::Float64(f64::NAN);
5560        let right = Scalar::Null(NullKind::Null);
5561        assert!(left.semantic_eq(&right));
5562    }
5563
5564    #[test]
5565    fn common_dtype_rejects_string_numeric_mix() {
5566        let err = common_dtype(DType::Utf8, DType::Int64).expect_err("must fail");
5567        assert_eq!(
5568            err.to_string(),
5569            "dtype coercion from Utf8 to Int64 has no compatible common type"
5570        );
5571        let err = common_dtype(DType::Float64, DType::Utf8).expect_err("must fail");
5572        assert_eq!(
5573            err.to_string(),
5574            "dtype coercion from Float64 to Utf8 has no compatible common type"
5575        );
5576    }
5577
5578    #[test]
5579    fn sparse_dtype_normalizes_fill_value_to_value_dtype() {
5580        let dtype = SparseDType::new(DType::Float64, Scalar::Int64(0)).expect("fill should cast");
5581
5582        assert_eq!(dtype.value_dtype, DType::Float64);
5583        assert_eq!(dtype.fill_value, Scalar::Float64(0.0));
5584    }
5585
5586    #[test]
5587    fn sparse_dtype_rejects_sparse_value_dtype() {
5588        let err = SparseDType::new(DType::Sparse, Scalar::Int64(0)).expect_err("must reject");
5589
5590        assert_eq!(err.to_string(), "sparse value dtype cannot be Sparse");
5591    }
5592
5593    #[test]
5594    fn common_dtype_rejects_sparse_dense_mix() {
5595        let err = common_dtype(DType::Sparse, DType::Int64).expect_err("must fail");
5596
5597        assert_eq!(
5598            err.to_string(),
5599            "dtype coercion from Sparse to Int64 has no compatible common type"
5600        );
5601    }
5602
5603    // ── Nullable Int64/Bool dtype tests (br-frankenpandas-rg8ys.6.4) ────
5604
5605    #[test]
5606    fn nullable_int64_promotion_matrix() {
5607        // Int64 + Int64Nullable -> Int64Nullable (nullable absorbs)
5608        assert_eq!(
5609            common_dtype(DType::Int64, DType::Int64Nullable).unwrap(),
5610            DType::Int64Nullable
5611        );
5612        assert_eq!(
5613            common_dtype(DType::Int64Nullable, DType::Int64).unwrap(),
5614            DType::Int64Nullable
5615        );
5616
5617        // Int64Nullable + Float64 -> Float64 (float always wins)
5618        assert_eq!(
5619            common_dtype(DType::Int64Nullable, DType::Float64).unwrap(),
5620            DType::Float64
5621        );
5622        assert_eq!(
5623            common_dtype(DType::Float64, DType::Int64Nullable).unwrap(),
5624            DType::Float64
5625        );
5626
5627        // Int64Nullable + Int64Nullable -> Int64Nullable
5628        assert_eq!(
5629            common_dtype(DType::Int64Nullable, DType::Int64Nullable).unwrap(),
5630            DType::Int64Nullable
5631        );
5632
5633        // Bool + Int64Nullable -> Int64Nullable
5634        assert_eq!(
5635            common_dtype(DType::Bool, DType::Int64Nullable).unwrap(),
5636            DType::Int64Nullable
5637        );
5638
5639        // BoolNullable + Int64 -> Int64Nullable
5640        assert_eq!(
5641            common_dtype(DType::BoolNullable, DType::Int64).unwrap(),
5642            DType::Int64Nullable
5643        );
5644    }
5645
5646    #[test]
5647    fn nullable_bool_promotion_matrix() {
5648        // Bool + BoolNullable -> BoolNullable
5649        assert_eq!(
5650            common_dtype(DType::Bool, DType::BoolNullable).unwrap(),
5651            DType::BoolNullable
5652        );
5653        assert_eq!(
5654            common_dtype(DType::BoolNullable, DType::Bool).unwrap(),
5655            DType::BoolNullable
5656        );
5657
5658        // BoolNullable + Float64 -> Float64
5659        assert_eq!(
5660            common_dtype(DType::BoolNullable, DType::Float64).unwrap(),
5661            DType::Float64
5662        );
5663    }
5664
5665    #[test]
5666    fn dtype_is_nullable_helper() {
5667        assert!(DType::Int64Nullable.is_nullable());
5668        assert!(DType::BoolNullable.is_nullable());
5669        assert!(!DType::Int64.is_nullable());
5670        assert!(!DType::Bool.is_nullable());
5671        assert!(!DType::Float64.is_nullable());
5672        assert!(!DType::Utf8.is_nullable());
5673    }
5674
5675    #[test]
5676    fn dtype_to_nullable_conversions() {
5677        assert_eq!(DType::Int64.to_nullable(), DType::Int64Nullable);
5678        assert_eq!(DType::Bool.to_nullable(), DType::BoolNullable);
5679        assert_eq!(DType::Float64.to_nullable(), DType::Float64); // unchanged
5680        assert_eq!(DType::Int64Nullable.to_nullable(), DType::Int64Nullable);
5681    }
5682
5683    #[test]
5684    fn dtype_to_non_nullable_conversions() {
5685        assert_eq!(DType::Int64Nullable.to_non_nullable(), DType::Int64);
5686        assert_eq!(DType::BoolNullable.to_non_nullable(), DType::Bool);
5687        assert_eq!(DType::Int64.to_non_nullable(), DType::Int64); // unchanged
5688        assert_eq!(DType::Float64.to_non_nullable(), DType::Float64);
5689    }
5690
5691    #[test]
5692    fn nullable_dtype_name_reports_pandas_style() {
5693        assert_eq!(DType::Int64.name(), "int64");
5694        assert_eq!(DType::Int64Nullable.name(), "Int64"); // capital I
5695        assert_eq!(DType::Bool.name(), "bool");
5696        assert_eq!(DType::BoolNullable.name(), "boolean");
5697    }
5698
5699    #[test]
5700    fn cast_scalar_int64_nullable_identity() {
5701        let val = Scalar::Int64(42);
5702        // Int64 -> Int64Nullable is identity (no actual conversion)
5703        let result = cast_scalar(&val, DType::Int64Nullable).unwrap();
5704        assert_eq!(result, Scalar::Int64(42));
5705
5706        // Int64Nullable -> Int64 is also identity
5707        let result2 = cast_scalar(&val, DType::Int64).unwrap();
5708        assert_eq!(result2, Scalar::Int64(42));
5709    }
5710
5711    #[test]
5712    fn cast_float_to_utf8_uses_pandas_str_float_with_scientific() {
5713        // pandas astype(str) of floats == Python str(float): whole -> ".0",
5714        // shortest round-trip decimals, scientific (e+NN/e-NN) for large/small,
5715        // inf -> "inf", NaN -> "nan". Verified vs live pandas 2.2.3. (Previously
5716        // large/small lost scientific notation, e.g. 1e16 -> "10000000000000000.0".)
5717        let cases: &[(f64, &str)] = &[
5718            (1.0, "1.0"),
5719            (2.5, "2.5"),
5720            (100.0, "100.0"),
5721            (0.1, "0.1"),
5722            (0.0001, "0.0001"),
5723            (1e16, "1e+16"),
5724            (1e20, "1e+20"),
5725            (1e-5, "1e-05"),
5726            (1e-7, "1e-07"),
5727            (f64::INFINITY, "inf"),
5728            (f64::NEG_INFINITY, "-inf"),
5729        ];
5730        for (v, expected) in cases {
5731            assert_eq!(
5732                cast_scalar(&Scalar::Float64(*v), DType::Utf8).unwrap(),
5733                Scalar::Utf8((*expected).to_owned()),
5734                "float {v} -> str"
5735            );
5736        }
5737    }
5738
5739    #[test]
5740    fn cast_float_to_utf8_threshold_boundaries_match_python() {
5741        // Python str(float) switches to scientific notation only at |x| >= 1e16
5742        // or |x| < 1e-4. Values JUST INSIDE those bounds must stay decimal — a
5743        // formatter that switches to sci early (or late) diverges. All expected
5744        // values verified against Python 3 str()/repr (== pandas astype(str)).
5745        let cases: &[(f64, &str)] = &[
5746            (1e15, "1000000000000000.0"),
5747            (9_999_999_999_999_998.0, "9999999999999998.0"),
5748            (1_234_567_890_123_456.0, "1234567890123456.0"),
5749            (123_456_789_012_345.0, "123456789012345.0"),
5750            (12_345_678_901_234_567.0, "1.2345678901234568e+16"),
5751            (1e16, "1e+16"),
5752            (1.5e16, "1.5e+16"),
5753            (1e17, "1e+17"),
5754            (1e-4, "0.0001"),
5755            (5e-5, "5e-05"),
5756            (-1e15, "-1000000000000000.0"),
5757            (-1e16, "-1e+16"),
5758            (-1e-5, "-1e-05"),
5759        ];
5760        for (v, expected) in cases {
5761            assert_eq!(
5762                cast_scalar(&Scalar::Float64(*v), DType::Utf8).unwrap(),
5763                Scalar::Utf8((*expected).to_owned()),
5764                "float {v} -> str"
5765            );
5766        }
5767    }
5768
5769    #[test]
5770    fn cast_to_bool_uses_pandas_nonzero_truthiness() {
5771        // pandas astype(bool): zero -> False, any nonzero -> True (not just 0/1),
5772        // -0.0 -> False, and NaN -> True (numpy bool(nan), br-cyi4h). Verified vs
5773        // live pandas 2.2.3.
5774        let cases_int: &[(i64, bool)] = &[(0, false), (1, true), (-3, true), (2, true)];
5775        for (v, expected) in cases_int {
5776            assert_eq!(
5777                cast_scalar(&Scalar::Int64(*v), DType::Bool).unwrap(),
5778                Scalar::Bool(*expected),
5779                "int {v} -> bool"
5780            );
5781        }
5782        let cases_float: &[(f64, bool)] = &[
5783            (0.0, false),
5784            (-0.0, false),
5785            (0.1, true),
5786            (2.5, true),
5787            (1.0, true),
5788            // pandas astype(bool): NaN is truthy -> True (numpy bool). br-cyi4h.
5789            (f64::NAN, true),
5790        ];
5791        for (v, expected) in cases_float {
5792            assert_eq!(
5793                cast_scalar(&Scalar::Float64(*v), DType::Bool).unwrap(),
5794                Scalar::Bool(*expected),
5795                "float {v} -> bool"
5796            );
5797        }
5798    }
5799
5800    #[test]
5801    fn nullable_dtype_is_extension() {
5802        assert!(DType::Int64Nullable.is_extension());
5803        assert!(DType::BoolNullable.is_extension());
5804        assert!(!DType::Int64.is_extension());
5805        assert!(!DType::Bool.is_extension());
5806    }
5807
5808    #[test]
5809    fn infer_dtype_preserves_string_numeric_mix_as_utf8_bucket() {
5810        let values = vec![Scalar::Utf8("x".into()), Scalar::Int64(7)];
5811        assert_eq!(
5812            infer_dtype(&values).expect("dtype should infer"),
5813            DType::Utf8
5814        );
5815    }
5816
5817    // ── Scalar missingness methods ─────────────────────────────────────
5818
5819    #[test]
5820    fn is_null_detects_explicit_nulls() {
5821        assert!(Scalar::Null(NullKind::Null).is_null());
5822        assert!(Scalar::Null(NullKind::NaN).is_null());
5823        assert!(!Scalar::Int64(42).is_null());
5824        assert!(!Scalar::Float64(f64::NAN).is_null());
5825    }
5826
5827    #[test]
5828    fn is_na_matches_is_missing() {
5829        let vals = vec![
5830            Scalar::Null(NullKind::Null),
5831            Scalar::Float64(f64::NAN),
5832            Scalar::Int64(0),
5833            Scalar::Bool(false),
5834        ];
5835        for v in &vals {
5836            assert_eq!(v.is_na(), v.is_missing());
5837        }
5838    }
5839
5840    #[test]
5841    fn coalesce_picks_first_non_missing() {
5842        let null = Scalar::Null(NullKind::Null);
5843        let fill = Scalar::Int64(99);
5844        assert_eq!(null.coalesce(&fill), fill);
5845        assert_eq!(fill.coalesce(&null), fill);
5846    }
5847
5848    // ── Missingness utilities ──────────────────────────────────────────
5849
5850    #[test]
5851    fn isna_notna_complement() {
5852        let vals = vec![
5853            Scalar::Int64(1),
5854            Scalar::Null(NullKind::Null),
5855            Scalar::Float64(f64::NAN),
5856            Scalar::Float64(3.0),
5857        ];
5858        let na = super::isna(&vals);
5859        let not = super::notna(&vals);
5860        assert_eq!(na, vec![false, true, true, false]);
5861        for (a, b) in na.iter().zip(not.iter()) {
5862            assert_ne!(a, b);
5863        }
5864    }
5865
5866    #[test]
5867    fn count_na_counts_missing() {
5868        let vals = vec![
5869            Scalar::Int64(1),
5870            Scalar::Null(NullKind::Null),
5871            Scalar::Float64(f64::NAN),
5872        ];
5873        assert_eq!(super::count_na(&vals), 2);
5874    }
5875
5876    #[test]
5877    fn fill_na_replaces_missing() {
5878        let vals = vec![
5879            Scalar::Int64(1),
5880            Scalar::Null(NullKind::Null),
5881            Scalar::Float64(f64::NAN),
5882            Scalar::Int64(4),
5883        ];
5884        let filled = super::fill_na(&vals, &Scalar::Int64(0));
5885        assert_eq!(filled[0], Scalar::Int64(1));
5886        assert_eq!(filled[1], Scalar::Int64(0));
5887        assert_eq!(filled[2], Scalar::Int64(0));
5888        assert_eq!(filled[3], Scalar::Int64(4));
5889    }
5890
5891    #[test]
5892    fn dropna_removes_missing() {
5893        let vals = vec![
5894            Scalar::Int64(1),
5895            Scalar::Null(NullKind::Null),
5896            Scalar::Int64(3),
5897            Scalar::Float64(f64::NAN),
5898        ];
5899        let kept = super::dropna(&vals);
5900        assert_eq!(kept.len(), 2);
5901        assert_eq!(kept[0], Scalar::Int64(1));
5902        assert_eq!(kept[1], Scalar::Int64(3));
5903    }
5904
5905    #[test]
5906    fn null_helpers_match_scalar_oracle_imt0c() {
5907        // Differential vs independent scalar null-helper oracle
5908        // (br-frankenpandas-imt0c). Seeded LCG, no mocks.
5909        fn next(seed: &mut u64) -> u64 {
5910            *seed = seed
5911                .wrapping_mul(3202034522624059733)
5912                .wrapping_add(4354685564936845319);
5913            *seed
5914        }
5915
5916        fn assert_null_helpers(case: usize, values: &[Scalar], fill: &Scalar) {
5917            let expected_missing = values.iter().filter(|value| value.is_missing()).count();
5918            let expected_dropped = values
5919                .iter()
5920                .filter(|value| !value.is_missing())
5921                .cloned()
5922                .collect::<Vec<_>>();
5923            let expected_filled = values
5924                .iter()
5925                .map(|value| {
5926                    if value.is_missing() {
5927                        fill.clone()
5928                    } else {
5929                        value.clone()
5930                    }
5931                })
5932                .collect::<Vec<_>>();
5933
5934            assert_eq!(
5935                super::count_na(values),
5936                expected_missing,
5937                "case={case}: count_na mismatch for {values:?}"
5938            );
5939
5940            let dropped = super::dropna(values);
5941            assert_eq!(
5942                dropped.len(),
5943                expected_dropped.len(),
5944                "case={case}: dropna length mismatch for {values:?}"
5945            );
5946            for (pos, (actual, expected)) in dropped.iter().zip(expected_dropped.iter()).enumerate()
5947            {
5948                assert!(
5949                    actual.semantic_eq(expected),
5950                    "case={case} pos={pos}: dropna expected {expected:?}, got {actual:?}"
5951                );
5952            }
5953
5954            let filled = super::fill_na(values, fill);
5955            assert_eq!(
5956                filled.len(),
5957                expected_filled.len(),
5958                "case={case}: fill_na length mismatch for {values:?}"
5959            );
5960            for (pos, (actual, expected)) in filled.iter().zip(expected_filled.iter()).enumerate() {
5961                assert!(
5962                    actual.semantic_eq(expected),
5963                    "case={case} pos={pos}: fill_na expected {expected:?}, got {actual:?}"
5964                );
5965            }
5966        }
5967
5968        let all_missing = [
5969            Scalar::Null(NullKind::Null),
5970            Scalar::Null(NullKind::NaN),
5971            Scalar::Float64(f64::NAN),
5972            Scalar::Timedelta64(i64::MIN),
5973        ];
5974        assert_null_helpers(usize::MAX, &all_missing, &Scalar::Utf8("filled".into()));
5975
5976        let mut seed = 0xc011_a7ed_0b5e_1a55_u64;
5977        for case in 0..260 {
5978            let len = (next(&mut seed) % 83 + 1) as usize;
5979            let mut values = Vec::with_capacity(len);
5980            for pos in 0..len {
5981                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
5982                values.push(match next(&mut seed) % 11 {
5983                    0 => Scalar::Null(NullKind::Null),
5984                    1 => Scalar::Null(NullKind::NaN),
5985                    2 => Scalar::Float64(f64::NAN),
5986                    3 => Scalar::Timedelta64(i64::MIN),
5987                    4 => Scalar::Bool(raw & 1 == 0),
5988                    5 => Scalar::Int64(raw),
5989                    6 => Scalar::Float64(raw as f64 / 37.0),
5990                    7 => Scalar::Float64(if raw & 1 == 0 { 0.0 } else { -0.0 }),
5991                    8 => Scalar::Utf8(format!("null_helper_{case}_{pos}")),
5992                    9 => Scalar::Utf8(String::new()),
5993                    _ => Scalar::Timedelta64(raw),
5994                });
5995            }
5996
5997            let fill = match case % 5 {
5998                0 => Scalar::Bool(true),
5999                1 => Scalar::Int64(-777),
6000                2 => Scalar::Float64(12.5),
6001                3 => Scalar::Utf8("filled".into()),
6002                _ => Scalar::Timedelta64(123_456),
6003            };
6004            assert_null_helpers(case, &values, &fill);
6005        }
6006    }
6007
6008    // ── Nanops ─────────────────────────────────────────────────────────
6009
6010    #[test]
6011    fn nansum_skips_nulls() {
6012        let vals = vec![
6013            Scalar::Float64(1.0),
6014            Scalar::Null(NullKind::Null),
6015            Scalar::Float64(2.0),
6016            Scalar::Float64(f64::NAN),
6017            Scalar::Int64(7),
6018        ];
6019        assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
6020    }
6021
6022    #[test]
6023    fn nansum_empty_returns_zero() {
6024        assert_eq!(super::nansum(&[]), Scalar::Float64(0.0));
6025    }
6026
6027    #[test]
6028    fn nansum_nanmean_match_numeric_and_timedelta_oracle_1xmi7() {
6029        // Differential vs independent sum/mean oracles
6030        // (br-frankenpandas-1xmi7). Seeded LCG, no mocks.
6031        fn next(seed: &mut u64) -> u64 {
6032            *seed = seed
6033                .wrapping_mul(6364136223846793005)
6034                .wrapping_add(1442695040888963407);
6035            *seed
6036        }
6037
6038        fn expected_numeric(values: &[Scalar]) -> (Scalar, Scalar) {
6039            let mut sum = 0.0;
6040            let mut count = 0usize;
6041            for value in values {
6042                if value.is_missing() {
6043                    continue;
6044                }
6045                if let Ok(value) = value.to_f64() {
6046                    sum += value;
6047                    count += 1;
6048                }
6049            }
6050            let mean = if count == 0 {
6051                Scalar::Null(NullKind::NaN)
6052            } else {
6053                Scalar::Float64(sum / count as f64)
6054            };
6055            (Scalar::Float64(sum), mean)
6056        }
6057
6058        fn expected_timedelta(values: &[Scalar]) -> (Scalar, Scalar) {
6059            let mut sum = 0_i128;
6060            let mut count = 0_i128;
6061            for value in values {
6062                if let Scalar::Timedelta64(ns) = value
6063                    && !value.is_missing()
6064                {
6065                    sum += i128::from(*ns);
6066                    count += 1;
6067                }
6068            }
6069            if count == 0 {
6070                return (Scalar::Float64(0.0), Scalar::Null(NullKind::NaN));
6071            }
6072            let sum = sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
6073            let mean = (sum / count).clamp(i128::from(i64::MIN), i128::from(i64::MAX));
6074            (
6075                Scalar::Timedelta64(sum as i64),
6076                Scalar::Timedelta64(mean as i64),
6077            )
6078        }
6079
6080        fn assert_sum_mean(
6081            case: usize,
6082            family: &str,
6083            values: &[Scalar],
6084            expected_sum: Scalar,
6085            expected_mean: Scalar,
6086        ) {
6087            let actual_sum = super::nansum(values);
6088            let actual_mean = super::nanmean(values);
6089            assert!(
6090                actual_sum.semantic_eq(&expected_sum),
6091                "case={case} family={family}: expected sum {expected_sum:?}, got {actual_sum:?} for {values:?}"
6092            );
6093            assert!(
6094                actual_mean.semantic_eq(&expected_mean),
6095                "case={case} family={family}: expected mean {expected_mean:?}, got {actual_mean:?} for {values:?}"
6096            );
6097        }
6098
6099        let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
6100        let (sum, mean) = expected_numeric(&all_missing);
6101        assert_sum_mean(usize::MAX, "numeric_all_missing", &all_missing, sum, mean);
6102
6103        let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
6104        let (sum, mean) = expected_timedelta(&td_all_missing);
6105        assert_sum_mean(
6106            usize::MAX - 1,
6107            "timedelta_all_missing",
6108            &td_all_missing,
6109            sum,
6110            mean,
6111        );
6112
6113        let mut seed = 0x511d_ed5a_7a11_1a55_u64;
6114        for case in 0..260 {
6115            let len = (next(&mut seed) % 89 + 1) as usize;
6116
6117            let mut numeric = Vec::with_capacity(len);
6118            numeric.push(Scalar::Int64(case as i64 - 130));
6119            for _ in 1..len {
6120                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6121                numeric.push(match next(&mut seed) % 8 {
6122                    0 => Scalar::Null(NullKind::Null),
6123                    1 => Scalar::Null(NullKind::NaN),
6124                    2 => Scalar::Float64(f64::NAN),
6125                    3 => Scalar::Bool(raw & 1 == 0),
6126                    4 => Scalar::Int64(raw % 257),
6127                    5 => Scalar::Float64(raw as f64 / 67.0),
6128                    6 => Scalar::Float64(0.0),
6129                    _ => Scalar::Float64(-0.0),
6130                });
6131            }
6132            let (sum, mean) = expected_numeric(&numeric);
6133            assert_sum_mean(case, "numeric", &numeric, sum, mean);
6134
6135            let mut timedeltas = Vec::with_capacity(len);
6136            timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
6137            for _ in 1..len {
6138                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6139                timedeltas.push(match next(&mut seed) % 7 {
6140                    0 => Scalar::Null(NullKind::Null),
6141                    1 => Scalar::Timedelta64(i64::MIN),
6142                    _ => Scalar::Timedelta64(raw),
6143                });
6144            }
6145            let (sum, mean) = expected_timedelta(&timedeltas);
6146            assert_sum_mean(case, "timedelta", &timedeltas, sum, mean);
6147        }
6148    }
6149
6150    #[test]
6151    fn nannunique_merges_negative_zero_and_zero() {
6152        let vals = vec![
6153            Scalar::Float64(-0.0),
6154            Scalar::Float64(0.0),
6155            Scalar::Float64(1.0),
6156        ];
6157        assert_eq!(super::nannunique(&vals), Scalar::Int64(2));
6158    }
6159
6160    #[test]
6161    fn nannunique_matches_scalar_bucket_oracle_elvbg() {
6162        // Differential vs independent scalar unique-bucket oracle
6163        // (br-frankenpandas-elvbg). Seeded LCG, no mocks.
6164        fn next(seed: &mut u64) -> u64 {
6165            *seed = seed
6166                .wrapping_mul(2862933555777941757)
6167                .wrapping_add(3037000493);
6168            *seed
6169        }
6170
6171        fn same_bucket(left: &Scalar, right: &Scalar) -> bool {
6172            match (left, right) {
6173                (Scalar::Float64(left), Scalar::Float64(right)) => {
6174                    let left = if *left == 0.0 { 0.0 } else { *left };
6175                    let right = if *right == 0.0 { 0.0 } else { *right };
6176                    left.to_bits() == right.to_bits()
6177                }
6178                _ => left == right,
6179            }
6180        }
6181
6182        fn expected_nunique(values: &[Scalar]) -> i64 {
6183            let mut seen = Vec::<Scalar>::new();
6184            for value in values {
6185                if value.is_missing() {
6186                    continue;
6187                }
6188                if !seen.iter().any(|existing| same_bucket(existing, value)) {
6189                    seen.push(value.clone());
6190                }
6191            }
6192            seen.len() as i64
6193        }
6194
6195        fn assert_nannunique(case: usize, values: &[Scalar]) {
6196            assert_eq!(
6197                super::nannunique(values),
6198                Scalar::Int64(expected_nunique(values)),
6199                "case={case}: nannunique mismatch for {values:?}"
6200            );
6201        }
6202
6203        assert_nannunique(
6204            usize::MAX,
6205            &[
6206                Scalar::Float64(-0.0),
6207                Scalar::Float64(0.0),
6208                Scalar::Float64(f64::NAN),
6209                Scalar::Null(NullKind::Null),
6210                Scalar::Timedelta64(i64::MIN),
6211            ],
6212        );
6213
6214        let mut seed = 0x0e1b_60d0_b5e7_u64;
6215        for case in 0..320 {
6216            let len = (next(&mut seed) % 97 + 1) as usize;
6217            let mut values = Vec::with_capacity(len);
6218            for pos in 0..len {
6219                let raw = (next(&mut seed) % 1_001) as i64 - 500;
6220                values.push(match next(&mut seed) % 11 {
6221                    0 => Scalar::Null(NullKind::Null),
6222                    1 => Scalar::Null(NullKind::NaN),
6223                    2 => Scalar::Float64(f64::NAN),
6224                    3 => Scalar::Bool(raw & 1 == 0),
6225                    4 => Scalar::Int64(raw % 37),
6226                    5 => Scalar::Float64(raw as f64 / 19.0),
6227                    6 => Scalar::Float64(0.0),
6228                    7 => Scalar::Float64(-0.0),
6229                    8 => Scalar::Utf8(format!("uniq_{}", pos % 13)),
6230                    9 => Scalar::Utf8(String::new()),
6231                    _ => Scalar::Timedelta64(raw % 41),
6232                });
6233            }
6234            assert_nannunique(case, &values);
6235        }
6236    }
6237
6238    #[test]
6239    fn nanmean_basic() {
6240        let vals = vec![
6241            Scalar::Float64(2.0),
6242            Scalar::Null(NullKind::Null),
6243            Scalar::Float64(4.0),
6244        ];
6245        assert_eq!(super::nanmean(&vals), Scalar::Float64(3.0));
6246    }
6247
6248    #[test]
6249    fn nanmean_all_null_returns_nan() {
6250        let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
6251        assert!(super::nanmean(&vals).is_missing());
6252    }
6253
6254    #[test]
6255    fn nansum_nanmean_timedelta64_preserves_dtype_620mj() {
6256        // Per br-frankenpandas-620mj: pandas td_series.sum()/mean() return
6257        // Timedelta64, not Float64(0.0). Was silently zero before because
6258        // collect_finite drops Timedelta64 (to_f64 errors).
6259        let one_hour = 3_600 * 1_000_000_000_i64;
6260        let vals = vec![
6261            Scalar::Timedelta64(one_hour),
6262            Scalar::Timedelta64(2 * one_hour),
6263            Scalar::Timedelta64(3 * one_hour),
6264        ];
6265        assert_eq!(super::nansum(&vals), Scalar::Timedelta64(6 * one_hour));
6266        assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
6267    }
6268
6269    #[test]
6270    fn nansum_nanmean_timedelta64_skips_nat_620mj() {
6271        let one_hour = 3_600 * 1_000_000_000_i64;
6272        let vals = vec![
6273            Scalar::Timedelta64(Timedelta::NAT),
6274            Scalar::Timedelta64(one_hour),
6275            Scalar::Timedelta64(3 * one_hour),
6276            Scalar::Timedelta64(Timedelta::NAT),
6277        ];
6278        // NAT is missing → skipped. Sum: 1h+3h=4h; mean: 2h.
6279        assert_eq!(super::nansum(&vals), Scalar::Timedelta64(4 * one_hour));
6280        assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
6281    }
6282
6283    #[test]
6284    fn nansum_nanmean_mixed_timedelta_other_falls_back_620mj() {
6285        // Mixed Timedelta64 + other type bails out of the Timedelta path
6286        // and uses Float64 collect_finite (which drops Timedelta).
6287        // Preserves existing cross-type behavior (effectively ignores TD).
6288        let vals = vec![Scalar::Timedelta64(3600 * 1_000_000_000), Scalar::Int64(5)];
6289        // Int64(5) makes it through to_f64 → 5.0; Timedelta is dropped.
6290        assert_eq!(super::nansum(&vals), Scalar::Float64(5.0));
6291    }
6292
6293    #[test]
6294    fn nancount_counts_non_missing() {
6295        let vals = vec![
6296            Scalar::Int64(1),
6297            Scalar::Null(NullKind::Null),
6298            Scalar::Float64(3.0),
6299        ];
6300        assert_eq!(super::nancount(&vals), Scalar::Int64(2));
6301    }
6302
6303    #[test]
6304    fn nanany_nanall_nancount_match_scalar_oracle_zr2qg() {
6305        // Differential vs scalar truthiness/count oracle
6306        // (br-frankenpandas-zr2qg). Seeded LCG, no mocks.
6307        fn next(seed: &mut u64) -> u64 {
6308            *seed = seed
6309                .wrapping_mul(6364136223846793005)
6310                .wrapping_add(1442695040888963407);
6311            *seed
6312        }
6313
6314        fn truthy(value: &Scalar) -> Option<bool> {
6315            if value.is_missing() {
6316                return None;
6317            }
6318            match value {
6319                Scalar::Bool(value) => Some(*value),
6320                Scalar::Int64(value) => Some(*value != 0),
6321                Scalar::Float64(value) => Some(*value != 0.0),
6322                Scalar::Utf8(value) => Some(!value.is_empty()),
6323                Scalar::Timedelta64(value) => Some(*value != 0),
6324                _ => None,
6325            }
6326        }
6327
6328        fn assert_nanops(case: usize, values: &[Scalar]) {
6329            let truth_values = values.iter().filter_map(truthy).collect::<Vec<_>>();
6330            let expected_any = truth_values.iter().any(|value| *value);
6331            let expected_all = !truth_values.iter().any(|value| !*value);
6332            let expected_count = values.iter().filter(|value| !value.is_missing()).count() as i64;
6333
6334            assert_eq!(
6335                super::nanany(values),
6336                Scalar::Bool(expected_any),
6337                "case={case}: nanany mismatch for {values:?}"
6338            );
6339            assert_eq!(
6340                super::nanall(values),
6341                Scalar::Bool(expected_all),
6342                "case={case}: nanall mismatch for {values:?}"
6343            );
6344            assert_eq!(
6345                super::nancount(values),
6346                Scalar::Int64(expected_count),
6347                "case={case}: nancount mismatch for {values:?}"
6348            );
6349        }
6350
6351        assert_nanops(
6352            usize::MAX,
6353            &[
6354                Scalar::Null(NullKind::Null),
6355                Scalar::Null(NullKind::NaN),
6356                Scalar::Float64(f64::NAN),
6357                Scalar::Timedelta64(i64::MIN),
6358            ],
6359        );
6360
6361        let mut seed = 0x7a20_2f7e_5ca1_ab1e_u64;
6362        for case in 0..320 {
6363            let len = (next(&mut seed) % 89 + 1) as usize;
6364            let mut values = Vec::with_capacity(len);
6365            for pos in 0..len {
6366                let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
6367                let value = match next(&mut seed) % 12 {
6368                    0 => Scalar::Null(NullKind::Null),
6369                    1 => Scalar::Null(NullKind::NaN),
6370                    2 => Scalar::Float64(f64::NAN),
6371                    3 => Scalar::Bool(raw & 1 == 0),
6372                    4 => Scalar::Bool(false),
6373                    5 => Scalar::Int64(raw % 17),
6374                    6 => Scalar::Int64(0),
6375                    7 => Scalar::Float64(raw as f64 / 23.0),
6376                    8 => Scalar::Float64(0.0),
6377                    9 => Scalar::Utf8(if raw & 1 == 0 {
6378                        String::new()
6379                    } else {
6380                        format!("nanops_{case}_{pos}")
6381                    }),
6382                    10 => Scalar::Timedelta64(raw),
6383                    _ => Scalar::Timedelta64(0),
6384                };
6385                values.push(value);
6386            }
6387            assert_nanops(case, &values);
6388        }
6389    }
6390
6391    #[test]
6392    fn nanmin_basic() {
6393        let vals = vec![
6394            Scalar::Float64(5.0),
6395            Scalar::Null(NullKind::Null),
6396            Scalar::Float64(2.0),
6397            Scalar::Float64(8.0),
6398        ];
6399        assert_eq!(super::nanmin(&vals), Scalar::Float64(2.0));
6400    }
6401
6402    #[test]
6403    fn nanmax_basic() {
6404        let vals = vec![
6405            Scalar::Float64(5.0),
6406            Scalar::Float64(f64::NAN),
6407            Scalar::Float64(8.0),
6408        ];
6409        assert_eq!(super::nanmax(&vals), Scalar::Float64(8.0));
6410    }
6411
6412    #[test]
6413    fn nanmin_nanmax_empty_returns_nan() {
6414        assert!(super::nanmin(&[]).is_missing());
6415        assert!(super::nanmax(&[]).is_missing());
6416    }
6417
6418    #[test]
6419    fn nanmin_nanmax_match_same_family_oracle_vj7ds() {
6420        // Differential vs independent same-family comparator oracle
6421        // (br-frankenpandas-vj7ds). Seeded LCG, no mocks.
6422        fn next(seed: &mut u64) -> u64 {
6423            *seed = seed
6424                .wrapping_mul(6364136223846793005)
6425                .wrapping_add(1442695040888963407);
6426            *seed
6427        }
6428
6429        fn family_cmp(left: &Scalar, right: &Scalar) -> std::cmp::Ordering {
6430            match (left, right) {
6431                (Scalar::Bool(left), Scalar::Bool(right)) => left.cmp(right),
6432                (Scalar::Int64(left), Scalar::Int64(right)) => left.cmp(right),
6433                (Scalar::Float64(left), Scalar::Float64(right)) => {
6434                    left.partial_cmp(right).expect("finite floats")
6435                }
6436                (Scalar::Utf8(left), Scalar::Utf8(right)) => left.cmp(right),
6437                (Scalar::Timedelta64(left), Scalar::Timedelta64(right)) => left.cmp(right),
6438                _ => panic!("mixed family in nanmin/nanmax oracle"),
6439            }
6440        }
6441
6442        fn assert_minmax(case: usize, family: &str, values: &[Scalar]) {
6443            let present = values
6444                .iter()
6445                .filter(|value| !value.is_missing())
6446                .cloned()
6447                .collect::<Vec<_>>();
6448            let actual_min = super::nanmin(values);
6449            let actual_max = super::nanmax(values);
6450            if present.is_empty() {
6451                assert!(
6452                    actual_min.is_missing(),
6453                    "case={case} family={family}: expected missing min for {values:?}, got {actual_min:?}"
6454                );
6455                assert!(
6456                    actual_max.is_missing(),
6457                    "case={case} family={family}: expected missing max for {values:?}, got {actual_max:?}"
6458                );
6459                return;
6460            }
6461
6462            let expected_min = present.iter().min_by(|left, right| family_cmp(left, right));
6463            let expected_max = present.iter().max_by(|left, right| family_cmp(left, right));
6464            assert!(
6465                actual_min.semantic_eq(expected_min.expect("min")),
6466                "case={case} family={family}: expected min {:?}, got {actual_min:?} for {values:?}",
6467                expected_min.expect("min")
6468            );
6469            assert!(
6470                actual_max.semantic_eq(expected_max.expect("max")),
6471                "case={case} family={family}: expected max {:?}, got {actual_max:?} for {values:?}",
6472                expected_max.expect("max")
6473            );
6474        }
6475
6476        let all_missing = [
6477            Scalar::Null(NullKind::Null),
6478            Scalar::Null(NullKind::NaN),
6479            Scalar::Float64(f64::NAN),
6480            Scalar::Timedelta64(i64::MIN),
6481        ];
6482        assert_minmax(usize::MAX, "all_missing", &all_missing);
6483
6484        let mut seed = 0xa11c_0aba_2e7d_f00d_u64;
6485        for case in 0..240 {
6486            let len = (next(&mut seed) % 73 + 1) as usize;
6487
6488            let mut ints = Vec::with_capacity(len);
6489            ints.push(Scalar::Int64(case as i64 - 120));
6490            for _ in 1..len {
6491                let raw = (next(&mut seed) % 1_001) as i64 - 500;
6492                ints.push(match next(&mut seed) % 6 {
6493                    0 => Scalar::Null(NullKind::Null),
6494                    1 => Scalar::Null(NullKind::NaN),
6495                    _ => Scalar::Int64(raw),
6496                });
6497            }
6498            assert_minmax(case, "int", &ints);
6499
6500            let mut floats = Vec::with_capacity(len);
6501            floats.push(Scalar::Float64(case as f64 / 11.0));
6502            for _ in 1..len {
6503                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6504                floats.push(match next(&mut seed) % 8 {
6505                    0 | 1 => Scalar::Float64(f64::NAN),
6506                    2 => Scalar::Float64(f64::INFINITY),
6507                    3 => Scalar::Float64(f64::NEG_INFINITY),
6508                    4 => Scalar::Float64(0.0),
6509                    5 => Scalar::Float64(-0.0),
6510                    _ => Scalar::Float64(raw as f64 / 41.0),
6511                });
6512            }
6513            assert_minmax(case, "float", &floats);
6514
6515            let mut bools = Vec::with_capacity(len);
6516            bools.push(Scalar::Bool(case & 1 == 0));
6517            for _ in 1..len {
6518                bools.push(match next(&mut seed) % 5 {
6519                    0 => Scalar::Null(NullKind::Null),
6520                    1 => Scalar::Null(NullKind::NaN),
6521                    raw => Scalar::Bool(raw & 1 == 0),
6522                });
6523            }
6524            assert_minmax(case, "bool", &bools);
6525
6526            let mut utf8 = Vec::with_capacity(len);
6527            utf8.push(Scalar::Utf8(format!("minmax_{}", case % 17)));
6528            for pos in 1..len {
6529                utf8.push(match next(&mut seed) % 7 {
6530                    0 => Scalar::Null(NullKind::Null),
6531                    1 => Scalar::Null(NullKind::NaN),
6532                    raw => Scalar::Utf8(format!("minmax_{}_{}", raw, pos % 11)),
6533                });
6534            }
6535            assert_minmax(case, "utf8", &utf8);
6536
6537            let mut timedeltas = Vec::with_capacity(len);
6538            timedeltas.push(Scalar::Timedelta64(case as i64 - 120));
6539            for _ in 1..len {
6540                let raw = (next(&mut seed) % 1_003) as i64 - 501;
6541                timedeltas.push(match next(&mut seed) % 7 {
6542                    0 => Scalar::Null(NullKind::Null),
6543                    1 => Scalar::Timedelta64(i64::MIN),
6544                    _ => Scalar::Timedelta64(raw),
6545                });
6546            }
6547            assert_minmax(case, "timedelta", &timedeltas);
6548        }
6549    }
6550
6551    #[test]
6552    fn nanmin_nanmax_mixed_incompatible_types_returns_nan() {
6553        let vals = vec![Scalar::Int64(5), Scalar::Utf8("hello".into())];
6554        assert!(super::nanmin(&vals).is_missing());
6555        assert!(super::nanmax(&vals).is_missing());
6556
6557        let vals2 = vec![Scalar::Utf8("a".into()), Scalar::Float64(3.0)];
6558        assert!(super::nanmin(&vals2).is_missing());
6559        assert!(super::nanmax(&vals2).is_missing());
6560    }
6561
6562    #[test]
6563    fn nanmin_nanmax_compatible_numeric_types_ok() {
6564        let vals = vec![Scalar::Int64(5), Scalar::Float64(3.0), Scalar::Bool(true)];
6565        assert_eq!(super::nanmin(&vals), Scalar::Bool(true));
6566        assert_eq!(super::nanmax(&vals), Scalar::Int64(5));
6567    }
6568
6569    #[test]
6570    fn nanmin_nanmax_timedelta64_returns_timedelta_yic5m() {
6571        // Per br-frankenpandas-yic5m: nanmin/nanmax on Timedelta64 returns
6572        // the smallest/largest Timedelta64 — was silently NaN before
6573        // because Timedelta64.to_f64() errors and the catch-all swallowed it.
6574        let one_hour = 3_600 * 1_000_000_000_i64;
6575        let vals = vec![
6576            Scalar::Timedelta64(3 * one_hour),
6577            Scalar::Timedelta64(one_hour),
6578            Scalar::Timedelta64(2 * one_hour),
6579        ];
6580        assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
6581        assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(3 * one_hour));
6582    }
6583
6584    #[test]
6585    fn nanmin_nanmax_timedelta64_skips_nat_yic5m() {
6586        let one_hour = 3_600 * 1_000_000_000_i64;
6587        let vals = vec![
6588            Scalar::Timedelta64(Timedelta::NAT),
6589            Scalar::Timedelta64(one_hour),
6590            Scalar::Timedelta64(2 * one_hour),
6591            Scalar::Timedelta64(Timedelta::NAT),
6592        ];
6593        assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
6594        assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(2 * one_hour));
6595    }
6596
6597    #[test]
6598    fn nanmedian_odd_count() {
6599        let vals = vec![
6600            Scalar::Float64(3.0),
6601            Scalar::Null(NullKind::Null),
6602            Scalar::Float64(1.0),
6603            Scalar::Float64(2.0),
6604        ];
6605        assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.0));
6606    }
6607
6608    #[test]
6609    fn nanmedian_even_count() {
6610        let vals = vec![
6611            Scalar::Float64(1.0),
6612            Scalar::Float64(3.0),
6613            Scalar::Float64(2.0),
6614            Scalar::Float64(4.0),
6615        ];
6616        assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.5));
6617    }
6618
6619    #[test]
6620    fn nanmedian_matches_numeric_and_timedelta_oracle_oabhi() {
6621        // Differential vs independent sort-based median oracles
6622        // (br-frankenpandas-oabhi). Seeded LCG, no mocks.
6623        fn next(seed: &mut u64) -> u64 {
6624            *seed = seed
6625                .wrapping_mul(6364136223846793005)
6626                .wrapping_add(1442695040888963407);
6627            *seed
6628        }
6629
6630        fn expected_numeric(values: &[Scalar]) -> Scalar {
6631            let mut finite = values
6632                .iter()
6633                .filter(|value| !value.is_missing())
6634                .filter_map(|value| value.to_f64().ok())
6635                .filter(|value| !value.is_nan())
6636                .collect::<Vec<_>>();
6637            if finite.is_empty() {
6638                return Scalar::Null(NullKind::NaN);
6639            }
6640            finite.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
6641            let mid = finite.len() / 2;
6642            if finite.len().is_multiple_of(2) {
6643                Scalar::Float64((finite[mid - 1] + finite[mid]) / 2.0)
6644            } else {
6645                Scalar::Float64(finite[mid])
6646            }
6647        }
6648
6649        fn expected_timedelta(values: &[Scalar]) -> Scalar {
6650            let mut finite = values
6651                .iter()
6652                .filter_map(|value| match value {
6653                    Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
6654                    _ => None,
6655                })
6656                .collect::<Vec<_>>();
6657            if finite.is_empty() {
6658                return Scalar::Null(NullKind::NaN);
6659            }
6660            finite.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
6661            let mid = finite.len() / 2;
6662            let median = if finite.len().is_multiple_of(2) {
6663                (finite[mid - 1] + finite[mid]) / 2.0
6664            } else {
6665                finite[mid]
6666            };
6667            Scalar::Timedelta64(median as i64)
6668        }
6669
6670        fn assert_median(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
6671            let actual = super::nanmedian(values);
6672            assert!(
6673                actual.semantic_eq(&expected),
6674                "case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
6675            );
6676        }
6677
6678        assert_median(
6679            usize::MAX,
6680            "numeric_all_missing",
6681            &[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
6682            Scalar::Null(NullKind::NaN),
6683        );
6684        assert_median(
6685            usize::MAX - 1,
6686            "timedelta_all_missing",
6687            &[Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)],
6688            Scalar::Null(NullKind::NaN),
6689        );
6690
6691        let mut seed = 0x0ab1_1eda_57a7_15e5_u64;
6692        for case in 0..220 {
6693            let len = (next(&mut seed) % 79 + 1) as usize;
6694
6695            let mut numeric = Vec::with_capacity(len);
6696            numeric.push(Scalar::Int64(case as i64 - 110));
6697            for _ in 1..len {
6698                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6699                numeric.push(match next(&mut seed) % 8 {
6700                    0 => Scalar::Null(NullKind::Null),
6701                    1 => Scalar::Null(NullKind::NaN),
6702                    2 => Scalar::Float64(f64::NAN),
6703                    3 => Scalar::Bool(raw & 1 == 0),
6704                    4 => Scalar::Int64(raw % 251),
6705                    5 => Scalar::Float64(raw as f64 / 61.0),
6706                    6 => Scalar::Float64(0.0),
6707                    _ => Scalar::Float64(-0.0),
6708                });
6709            }
6710            assert_median(case, "numeric", &numeric, expected_numeric(&numeric));
6711
6712            let mut timedeltas = Vec::with_capacity(len);
6713            timedeltas.push(Scalar::Timedelta64(case as i64 - 110));
6714            for _ in 1..len {
6715                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6716                timedeltas.push(match next(&mut seed) % 7 {
6717                    0 => Scalar::Null(NullKind::Null),
6718                    1 => Scalar::Timedelta64(i64::MIN),
6719                    _ => Scalar::Timedelta64(raw),
6720                });
6721            }
6722            assert_median(
6723                case,
6724                "timedelta",
6725                &timedeltas,
6726                expected_timedelta(&timedeltas),
6727            );
6728        }
6729    }
6730
6731    #[test]
6732    fn nanvar_nanstd_nansem_match_numeric_and_timedelta_oracle_k7apg() {
6733        // Differential vs independent variance/std/sem oracles
6734        // (br-frankenpandas-k7apg). Seeded LCG, no mocks.
6735        fn next(seed: &mut u64) -> u64 {
6736            *seed = seed
6737                .wrapping_mul(2862933555777941757)
6738                .wrapping_add(3037000493);
6739            *seed
6740        }
6741
6742        fn numeric_samples(values: &[Scalar]) -> Vec<f64> {
6743            values
6744                .iter()
6745                .filter(|value| !value.is_missing())
6746                .filter_map(|value| value.to_f64().ok())
6747                .collect()
6748        }
6749
6750        fn timedelta_samples(values: &[Scalar]) -> Vec<f64> {
6751            values
6752                .iter()
6753                .filter_map(|value| match value {
6754                    Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
6755                    _ => None,
6756                })
6757                .collect()
6758        }
6759
6760        fn reductions_from_samples(samples: &[f64], ddof: usize) -> Option<(f64, f64, f64)> {
6761            if samples.len() <= ddof {
6762                return None;
6763            }
6764            let mean = samples.iter().sum::<f64>() / samples.len() as f64;
6765            let sum_sq = samples
6766                .iter()
6767                .map(|value| (value - mean).powi(2))
6768                .sum::<f64>();
6769            let var = sum_sq / (samples.len() - ddof) as f64;
6770            let std = var.sqrt();
6771            let sem = std / (samples.len() as f64).sqrt();
6772            Some((var, std, sem))
6773        }
6774
6775        fn expected_numeric(values: &[Scalar], ddof: usize) -> (Scalar, Scalar, Scalar) {
6776            let samples = numeric_samples(values);
6777            let Some((var, std, sem)) = reductions_from_samples(&samples, ddof) else {
6778                let missing = Scalar::Null(NullKind::NaN);
6779                return (missing.clone(), missing.clone(), missing);
6780            };
6781            (
6782                Scalar::Float64(var),
6783                Scalar::Float64(std),
6784                Scalar::Float64(sem),
6785            )
6786        }
6787
6788        fn expected_timedelta(values: &[Scalar], ddof: usize) -> (Scalar, Scalar, Scalar) {
6789            let samples = timedelta_samples(values);
6790            if samples.is_empty() {
6791                let missing = Scalar::Null(NullKind::NaN);
6792                return (missing.clone(), missing.clone(), missing);
6793            }
6794            let Some((var, std, sem)) = reductions_from_samples(&samples, ddof) else {
6795                let missing = Scalar::Timedelta64(i64::MIN);
6796                return (missing.clone(), missing.clone(), missing);
6797            };
6798            (
6799                Scalar::Timedelta64(var as i64),
6800                Scalar::Timedelta64(std as i64),
6801                Scalar::Timedelta64(sem as i64),
6802            )
6803        }
6804
6805        fn assert_reductions(
6806            case: usize,
6807            family: &str,
6808            values: &[Scalar],
6809            ddof: usize,
6810            expected: (Scalar, Scalar, Scalar),
6811        ) {
6812            let (expected_var, expected_std, expected_sem) = expected;
6813            let actual_var = super::nanvar(values, ddof);
6814            let actual_std = super::nanstd(values, ddof);
6815            let actual_sem = super::nansem(values, ddof);
6816            assert!(
6817                actual_var.semantic_eq(&expected_var),
6818                "case={case} family={family} ddof={ddof}: expected var {expected_var:?}, got {actual_var:?} for {values:?}"
6819            );
6820            assert!(
6821                actual_std.semantic_eq(&expected_std),
6822                "case={case} family={family} ddof={ddof}: expected std {expected_std:?}, got {actual_std:?} for {values:?}"
6823            );
6824            assert!(
6825                actual_sem.semantic_eq(&expected_sem),
6826                "case={case} family={family} ddof={ddof}: expected sem {expected_sem:?}, got {actual_sem:?} for {values:?}"
6827            );
6828        }
6829
6830        let numeric_all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
6831        assert_reductions(
6832            usize::MAX,
6833            "numeric_all_missing",
6834            &numeric_all_missing,
6835            0,
6836            expected_numeric(&numeric_all_missing, 0),
6837        );
6838
6839        let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
6840        assert_reductions(
6841            usize::MAX - 1,
6842            "timedelta_all_missing",
6843            &td_all_missing,
6844            0,
6845            expected_timedelta(&td_all_missing, 0),
6846        );
6847
6848        let mut seed = 0x7a11_c0de_5eed_0421_u64;
6849        for case in 0..240 {
6850            let len = (next(&mut seed) % 83 + 1) as usize;
6851            let ddof = (next(&mut seed) % 4) as usize;
6852
6853            let mut numeric = Vec::with_capacity(len);
6854            numeric.push(Scalar::Float64(case as f64 / 13.0));
6855            for _ in 1..len {
6856                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6857                numeric.push(match next(&mut seed) % 8 {
6858                    0 => Scalar::Null(NullKind::Null),
6859                    1 => Scalar::Null(NullKind::NaN),
6860                    2 => Scalar::Float64(f64::NAN),
6861                    3 => Scalar::Bool(raw & 1 == 0),
6862                    4 => Scalar::Int64(raw % 251),
6863                    5 => Scalar::Float64(raw as f64 / 73.0),
6864                    6 => Scalar::Float64(0.0),
6865                    _ => Scalar::Float64(-0.0),
6866                });
6867            }
6868            assert_reductions(
6869                case,
6870                "numeric",
6871                &numeric,
6872                ddof,
6873                expected_numeric(&numeric, ddof),
6874            );
6875
6876            let mut timedeltas = Vec::with_capacity(len);
6877            timedeltas.push(Scalar::Timedelta64(case as i64 - 120));
6878            for _ in 1..len {
6879                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
6880                timedeltas.push(match next(&mut seed) % 7 {
6881                    0 => Scalar::Null(NullKind::Null),
6882                    1 => Scalar::Timedelta64(i64::MIN),
6883                    _ => Scalar::Timedelta64(raw),
6884                });
6885            }
6886            assert_reductions(
6887                case,
6888                "timedelta",
6889                &timedeltas,
6890                ddof,
6891                expected_timedelta(&timedeltas, ddof),
6892            );
6893        }
6894    }
6895
6896    #[test]
6897    fn nanvar_population() {
6898        let vals = vec![
6899            Scalar::Float64(2.0),
6900            Scalar::Float64(4.0),
6901            Scalar::Float64(4.0),
6902            Scalar::Float64(4.0),
6903            Scalar::Float64(5.0),
6904            Scalar::Float64(5.0),
6905            Scalar::Float64(7.0),
6906            Scalar::Float64(9.0),
6907        ];
6908        let var = super::nanvar(&vals, 0);
6909        assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
6910        if let Scalar::Float64(v) = var {
6911            assert!((v - 4.0).abs() < 1e-10);
6912        }
6913    }
6914
6915    #[test]
6916    fn nanvar_sample_ddof1() {
6917        let vals = vec![
6918            Scalar::Float64(2.0),
6919            Scalar::Float64(4.0),
6920            Scalar::Float64(4.0),
6921            Scalar::Float64(4.0),
6922            Scalar::Float64(5.0),
6923            Scalar::Float64(5.0),
6924            Scalar::Float64(7.0),
6925            Scalar::Float64(9.0),
6926        ];
6927        let var = super::nanvar(&vals, 1);
6928        assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
6929        if let Scalar::Float64(v) = var {
6930            assert!((v - 32.0 / 7.0).abs() < 1e-10);
6931        }
6932    }
6933
6934    #[test]
6935    fn nanvar_insufficient_values_returns_nan() {
6936        let vals = vec![Scalar::Float64(5.0)];
6937        assert!(super::nanvar(&vals, 1).is_missing());
6938    }
6939
6940    #[test]
6941    fn nanstd_is_sqrt_of_var() {
6942        let vals = vec![
6943            Scalar::Float64(2.0),
6944            Scalar::Float64(4.0),
6945            Scalar::Float64(4.0),
6946            Scalar::Float64(4.0),
6947            Scalar::Float64(5.0),
6948            Scalar::Float64(5.0),
6949            Scalar::Float64(7.0),
6950            Scalar::Float64(9.0),
6951        ];
6952        let std = super::nanstd(&vals, 0);
6953        assert!(matches!(std, Scalar::Float64(_)), "expected Float64");
6954        if let Scalar::Float64(v) = std {
6955            assert!((v - 2.0).abs() < 1e-10);
6956        }
6957    }
6958
6959    #[test]
6960    fn nanmedian_timedelta64_preserves_dtype_j8ntk() {
6961        // Per br-frankenpandas-j8ntk: pandas td_series.median() returns
6962        // Timedelta64; was silently NaN before via collect_finite.
6963        let one_hour = 3_600 * 1_000_000_000_i64;
6964        let vals = vec![
6965            Scalar::Timedelta64(one_hour),
6966            Scalar::Timedelta64(2 * one_hour),
6967            Scalar::Timedelta64(3 * one_hour),
6968        ];
6969        assert_eq!(super::nanmedian(&vals), Scalar::Timedelta64(2 * one_hour));
6970    }
6971
6972    #[test]
6973    fn nanstd_timedelta64_preserves_dtype_j8ntk() {
6974        // Per br-frankenpandas-j8ntk: pandas td_series.std() returns
6975        // Timedelta64. Check Timedelta64 output and reasonable magnitude
6976        // for population std of [1h, 2h, 3h] = sqrt(2/3) * 1h.
6977        let one_hour: i64 = 3_600 * 1_000_000_000;
6978        let vals = vec![
6979            Scalar::Timedelta64(one_hour),
6980            Scalar::Timedelta64(2 * one_hour),
6981            Scalar::Timedelta64(3 * one_hour),
6982        ];
6983        let std = super::nanstd(&vals, 0);
6984        match std {
6985            Scalar::Timedelta64(ns) => {
6986                let expected = (2.0_f64 / 3.0).sqrt() * one_hour as f64;
6987                assert!(
6988                    (ns as f64 - expected).abs() < 1e6,
6989                    "expected ~{expected} ns, got {ns}"
6990                );
6991            }
6992            other => panic!("expected Timedelta64, got {other:?}"),
6993        }
6994    }
6995
6996    #[test]
6997    fn nanstd_nansem_timedelta64_insufficient_returns_nat_j8ntk() {
6998        let one_hour = 3_600 * 1_000_000_000_i64;
6999        let vals = vec![Scalar::Timedelta64(one_hour)];
7000        // ddof=1 with n=1 → underflow, returns NaT
7001        match super::nanstd(&vals, 1) {
7002            Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
7003            other => panic!("expected Timedelta64 NAT, got {other:?}"),
7004        }
7005        match super::nansem(&vals, 1) {
7006            Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
7007            other => panic!("expected Timedelta64 NAT, got {other:?}"),
7008        }
7009    }
7010
7011    #[test]
7012    fn nanops_with_mixed_types() {
7013        let vals = vec![
7014            Scalar::Bool(true),
7015            Scalar::Int64(3),
7016            Scalar::Float64(6.0),
7017            Scalar::Null(NullKind::Null),
7018        ];
7019        assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
7020        assert_eq!(super::nancount(&vals), Scalar::Int64(3));
7021    }
7022
7023    #[test]
7024    fn nanops_all_missing_returns_identity() {
7025        let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
7026        assert_eq!(super::nansum(&vals), Scalar::Float64(0.0));
7027        assert!(super::nanmean(&vals).is_missing());
7028        assert!(super::nanmedian(&vals).is_missing());
7029        assert!(super::nanvar(&vals, 0).is_missing());
7030        assert!(super::nanstd(&vals, 0).is_missing());
7031    }
7032
7033    // ── Timedelta tests ────────────────────────────────────────────────
7034
7035    #[test]
7036    fn timedelta_parse_simple_units() {
7037        use super::Timedelta;
7038        assert_eq!(Timedelta::parse("1d").unwrap(), Timedelta::NANOS_PER_DAY);
7039        assert_eq!(
7040            Timedelta::parse("2h").unwrap(),
7041            2 * Timedelta::NANOS_PER_HOUR
7042        );
7043        assert_eq!(
7044            Timedelta::parse("30m").unwrap(),
7045            30 * Timedelta::NANOS_PER_MIN
7046        );
7047        assert_eq!(
7048            Timedelta::parse("45s").unwrap(),
7049            45 * Timedelta::NANOS_PER_SEC
7050        );
7051        assert_eq!(
7052            Timedelta::parse("100ms").unwrap(),
7053            100 * Timedelta::NANOS_PER_MILLI
7054        );
7055        assert_eq!(
7056            Timedelta::parse("500us").unwrap(),
7057            500 * Timedelta::NANOS_PER_MICRO
7058        );
7059        assert_eq!(Timedelta::parse("1000ns").unwrap(), 1000);
7060    }
7061
7062    #[test]
7063    fn timedelta_parse_compound() {
7064        use super::Timedelta;
7065        let expected = Timedelta::NANOS_PER_DAY
7066            + 2 * Timedelta::NANOS_PER_HOUR
7067            + 30 * Timedelta::NANOS_PER_MIN;
7068        assert_eq!(Timedelta::parse("1d 2h 30m").unwrap(), expected);
7069        assert_eq!(Timedelta::parse("1d2h30m").unwrap(), expected);
7070    }
7071
7072    #[test]
7073    fn timedelta_parse_iso8601_matches_pandas_tdiso() {
7074        use super::Timedelta;
7075        // Verified vs pandas 2.2.3 Timedelta(...).value.
7076        assert_eq!(Timedelta::parse("P1DT2H3M4S").unwrap(), 93_784_000_000_000);
7077        assert_eq!(Timedelta::parse("PT1H").unwrap(), 3_600_000_000_000);
7078        assert_eq!(Timedelta::parse("PT1H30M").unwrap(), 5_400_000_000_000);
7079        assert_eq!(Timedelta::parse("P1D").unwrap(), 86_400_000_000_000);
7080        assert_eq!(Timedelta::parse("P2W").unwrap(), 1_209_600_000_000_000);
7081        assert_eq!(Timedelta::parse("PT0.5S").unwrap(), 500_000_000);
7082        // pandas quirks: T ignored, M is minutes everywhere, units in any order.
7083        assert_eq!(Timedelta::parse("P1M").unwrap(), 60_000_000_000);
7084        assert_eq!(Timedelta::parse("P1H").unwrap(), 3_600_000_000_000);
7085        assert_eq!(Timedelta::parse("PT1D").unwrap(), 86_400_000_000_000);
7086        assert_eq!(Timedelta::parse("P1D1H").unwrap(), 90_000_000_000_000);
7087        assert_eq!(Timedelta::parse("-P1DT2H").unwrap(), -93_600_000_000_000);
7088        // Rejected like pandas: years, lowercase, bare P/PT.
7089        assert!(Timedelta::parse("P1Y").is_err());
7090        assert!(Timedelta::parse("p1d").is_err());
7091        assert!(Timedelta::parse("P").is_err());
7092        assert!(Timedelta::parse("PT").is_err());
7093    }
7094
7095    #[test]
7096    fn timedelta_parse_time_format() {
7097        use super::Timedelta;
7098        let expected = Timedelta::NANOS_PER_HOUR
7099            + 30 * Timedelta::NANOS_PER_MIN
7100            + 45 * Timedelta::NANOS_PER_SEC;
7101        assert_eq!(Timedelta::parse("01:30:45").unwrap(), expected);
7102    }
7103
7104    #[test]
7105    fn timedelta_parse_time_fraction_rejects_unicode_without_panic() {
7106        use super::{Timedelta, TimedeltaError};
7107        let err = Timedelta::parse("00:00:00.\u{00e9}\u{00e9}\u{00e9}\u{00e9}\u{00e9}")
7108            .expect_err("non-ASCII fractional seconds must reject");
7109        assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
7110    }
7111
7112    #[test]
7113    fn timedelta_parse_time_format_rejects_overflow_without_panic() {
7114        use super::{Timedelta, TimedeltaError};
7115        let err = Timedelta::parse("9223372036854775807:00")
7116            .expect_err("oversized hour component must reject");
7117        assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
7118    }
7119
7120    #[test]
7121    fn timedelta_parse_rejects_huge_value_overflow_zw3mg() {
7122        // Per br-frankenpandas-zw3mg: the compound parser used a raw
7123        // `as i64` cast that silently saturated to i64::MAX when the
7124        // product of (decimal-digit f64) × unit multiplier overflows.
7125        // Use a large literal (no scientific notation — the lexer only
7126        // accepts digits, '.', '-'). 1e18 days × NANOS_PER_DAY (~8.64e13)
7127        // overflows i64.
7128        use super::{Timedelta, TimedeltaError};
7129        let huge = format!("{} days", "9".repeat(18));
7130        assert!(matches!(
7131            Timedelta::parse(&huge).expect_err("9...(18 9s) days must overflow"),
7132            TimedeltaError::Overflow
7133        ));
7134    }
7135
7136    #[test]
7137    fn timedelta_parse_nat() {
7138        use super::Timedelta;
7139        assert_eq!(Timedelta::parse("NaT").unwrap(), Timedelta::NAT);
7140        assert_eq!(Timedelta::parse("nat").unwrap(), Timedelta::NAT);
7141    }
7142
7143    #[test]
7144    fn timedelta_parse_negative() {
7145        use super::Timedelta;
7146        assert_eq!(Timedelta::parse("-1d").unwrap(), -Timedelta::NANOS_PER_DAY);
7147    }
7148
7149    #[test]
7150    fn timedelta_components() {
7151        use super::Timedelta;
7152        let nanos = Timedelta::NANOS_PER_DAY
7153            + Timedelta::NANOS_PER_HOUR
7154            + Timedelta::NANOS_PER_MIN
7155            + Timedelta::NANOS_PER_SEC
7156            + Timedelta::NANOS_PER_MILLI
7157            + 2 * Timedelta::NANOS_PER_MICRO
7158            + 3;
7159        let comp = Timedelta::components(nanos);
7160        assert_eq!(comp.days, 1);
7161        assert_eq!(comp.hours, 1);
7162        assert_eq!(comp.minutes, 1);
7163        assert_eq!(comp.seconds, 1);
7164        assert_eq!(comp.milliseconds, 1);
7165        assert_eq!(comp.microseconds, 2);
7166        assert_eq!(comp.nanoseconds, 3);
7167    }
7168
7169    #[test]
7170    fn timedelta_negative_components_floor_div() {
7171        use super::Timedelta;
7172        // pandas floor-normalizes negative timedeltas: pd.Timedelta(-1,'s') has
7173        // days=-1, seconds=86399, components=(-1, 23, 59, 59, 0, 0, 0).
7174        let neg_1s = -Timedelta::NANOS_PER_SEC;
7175        assert_eq!(Timedelta::days(neg_1s), -1);
7176        assert_eq!(Timedelta::seconds(neg_1s), 86399);
7177        assert_eq!(Timedelta::microseconds(neg_1s), 0);
7178        assert_eq!(Timedelta::nanoseconds(neg_1s), 0);
7179        let comp = Timedelta::components(neg_1s);
7180        assert_eq!(
7181            (
7182                comp.days,
7183                comp.hours,
7184                comp.minutes,
7185                comp.seconds,
7186                comp.milliseconds,
7187                comp.microseconds,
7188                comp.nanoseconds
7189            ),
7190            (-1, 23, 59, 59, 0, 0, 0)
7191        );
7192
7193        // pd.Timedelta(-86401,'s'): days=-2, seconds=86399.
7194        let neg = -86_401 * Timedelta::NANOS_PER_SEC;
7195        assert_eq!(Timedelta::days(neg), -2);
7196        assert_eq!(Timedelta::seconds(neg), 86399);
7197    }
7198
7199    #[test]
7200    fn timedelta_total_seconds() {
7201        use super::Timedelta;
7202        let nanos = 90_000_000_000i64; // 90 seconds
7203        assert!((Timedelta::total_seconds(nanos) - 90.0).abs() < 1e-9);
7204        assert!(Timedelta::total_seconds(Timedelta::NAT).is_nan());
7205    }
7206
7207    #[test]
7208    fn timedelta_format_basic() {
7209        use super::Timedelta;
7210        assert_eq!(Timedelta::format(Timedelta::NAT), "NaT");
7211        assert_eq!(
7212            Timedelta::format(Timedelta::NANOS_PER_DAY),
7213            "1 days 00:00:00"
7214        );
7215        assert_eq!(
7216            Timedelta::format(Timedelta::NANOS_PER_DAY + 2 * Timedelta::NANOS_PER_HOUR),
7217            "1 days 02:00:00"
7218        );
7219    }
7220
7221    #[test]
7222    fn timedelta_format_subsecond_matches_pandas() {
7223        use super::Timedelta;
7224        // pandas str(Timedelta) uses 6 fractional digits (microseconds) unless a
7225        // sub-microsecond (nanosecond) component is present, then 9 digits.
7226        // Verified vs live pandas 2.2.3.
7227        assert_eq!(
7228            Timedelta::format(1_500_000_000), // 1.5s
7229            "0 days 00:00:01.500000"
7230        );
7231        assert_eq!(
7232            Timedelta::format(1_000_000), // 1ms
7233            "0 days 00:00:00.001000"
7234        );
7235        assert_eq!(
7236            Timedelta::format(123_456_000), // 123456us
7237            "0 days 00:00:00.123456"
7238        );
7239        // Nanosecond component -> 9 digits.
7240        assert_eq!(
7241            Timedelta::format(500), // 500ns
7242            "0 days 00:00:00.000000500"
7243        );
7244        assert_eq!(Timedelta::format(123_456_789), "0 days 00:00:00.123456789");
7245    }
7246
7247    #[test]
7248    fn timedelta_format_negative_uses_python_borrow_form() {
7249        use super::Timedelta;
7250        // pandas/Python normalize negatives via floor division: the days count
7251        // goes negative, the time remainder stays non-negative, and a '+' joins
7252        // them. Verified vs live pandas 2.2.3.
7253        assert_eq!(Timedelta::format(-1_000_000_000), "-1 days +23:59:59");
7254        assert_eq!(
7255            Timedelta::format(-Timedelta::NANOS_PER_DAY),
7256            "-1 days +00:00:00"
7257        );
7258        assert_eq!(
7259            Timedelta::format(-25 * Timedelta::NANOS_PER_HOUR),
7260            "-2 days +23:00:00"
7261        );
7262        assert_eq!(
7263            Timedelta::format(-1_500_000_000),
7264            "-1 days +23:59:58.500000"
7265        );
7266        assert_eq!(Timedelta::format(-500), "-1 days +23:59:59.999999500");
7267        assert_eq!(Timedelta::format(-1), "-1 days +23:59:59.999999999");
7268    }
7269
7270    #[test]
7271    fn timedelta_isoformat_basic() {
7272        use super::Timedelta;
7273        assert_eq!(Timedelta::isoformat(Timedelta::NAT), "NaT");
7274        assert_eq!(Timedelta::isoformat(0), "P0DT0H0M0S");
7275        assert_eq!(Timedelta::isoformat(Timedelta::NANOS_PER_DAY), "P1DT0H0M0S");
7276        assert_eq!(
7277            Timedelta::isoformat(
7278                Timedelta::NANOS_PER_DAY
7279                    + 2 * Timedelta::NANOS_PER_HOUR
7280                    + 30 * Timedelta::NANOS_PER_MIN
7281                    + 45 * Timedelta::NANOS_PER_SEC
7282            ),
7283            "P1DT2H30M45S"
7284        );
7285        assert_eq!(
7286            Timedelta::isoformat(Timedelta::NANOS_PER_SEC + 500_000_000),
7287            "P0DT0H0M1.5S"
7288        );
7289        assert_eq!(
7290            Timedelta::isoformat(-(Timedelta::NANOS_PER_DAY + Timedelta::NANOS_PER_HOUR)),
7291            "-P1DT1H0M0S"
7292        );
7293    }
7294
7295    #[test]
7296    fn timedelta_floor_ceil_round() {
7297        use super::Timedelta;
7298        let nanos = Timedelta::NANOS_PER_HOUR + 30 * Timedelta::NANOS_PER_MIN;
7299
7300        // floor: rounds down
7301        assert_eq!(Timedelta::floor(nanos, "h"), Timedelta::NANOS_PER_HOUR);
7302        assert_eq!(Timedelta::floor(nanos, "d"), 0);
7303
7304        // ceil: rounds up
7305        assert_eq!(Timedelta::ceil(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
7306        assert_eq!(Timedelta::ceil(nanos, "d"), Timedelta::NANOS_PER_DAY);
7307
7308        // round: rounds to nearest (banker's rounding on tie)
7309        assert_eq!(Timedelta::round(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
7310
7311        // NaT preserved
7312        assert_eq!(Timedelta::floor(Timedelta::NAT, "h"), Timedelta::NAT);
7313        assert_eq!(Timedelta::ceil(Timedelta::NAT, "h"), Timedelta::NAT);
7314        assert_eq!(Timedelta::round(Timedelta::NAT, "h"), Timedelta::NAT);
7315
7316        // Invalid freq returns NAT
7317        assert_eq!(Timedelta::floor(nanos, "invalid"), Timedelta::NAT);
7318    }
7319
7320    #[test]
7321    fn timedelta_floor_ceil_negative_use_euclidean_rounding_t79yh() {
7322        use super::Timedelta;
7323
7324        assert_eq!(
7325            Timedelta::floor(-1, "s"),
7326            -Timedelta::NANOS_PER_SEC,
7327            "floor(-1ns, 1s)"
7328        );
7329        assert_eq!(Timedelta::ceil(-1, "s"), 0, "ceil(-1ns, 1s)");
7330        assert_eq!(
7331            Timedelta::floor(-1_500_000_000, "s"),
7332            -2 * Timedelta::NANOS_PER_SEC
7333        );
7334        assert_eq!(
7335            Timedelta::ceil(-1_500_000_000, "s"),
7336            -Timedelta::NANOS_PER_SEC
7337        );
7338        assert_eq!(
7339            Timedelta::floor(-Timedelta::NANOS_PER_SEC, "s"),
7340            -Timedelta::NANOS_PER_SEC
7341        );
7342        assert_eq!(
7343            Timedelta::ceil(-Timedelta::NANOS_PER_SEC, "s"),
7344            -Timedelta::NANOS_PER_SEC
7345        );
7346        assert_eq!(
7347            Timedelta::floor(1_500_000_000, "s"),
7348            Timedelta::NANOS_PER_SEC
7349        );
7350        assert_eq!(
7351            Timedelta::ceil(1_500_000_000, "s"),
7352            2 * Timedelta::NANOS_PER_SEC
7353        );
7354    }
7355
7356    #[test]
7357    fn timedelta_scalar_dtype() {
7358        let td = Scalar::Timedelta64(86_400_000_000_000);
7359        assert_eq!(td.dtype(), DType::Timedelta64);
7360    }
7361
7362    #[test]
7363    fn timedelta_scalar_is_missing() {
7364        use super::Timedelta;
7365        let valid = Scalar::Timedelta64(1000);
7366        let nat = Scalar::Timedelta64(Timedelta::NAT);
7367        assert!(!valid.is_missing());
7368        assert!(nat.is_missing());
7369    }
7370
7371    #[test]
7372    fn dtype_utf8_deserializes_legacy_aliases() {
7373        let dtype: DType = serde_json::from_str("\"str\"").unwrap();
7374        assert_eq!(dtype, DType::Utf8);
7375
7376        let dtype: DType = serde_json::from_str("\"string\"").unwrap();
7377        assert_eq!(dtype, DType::Utf8);
7378    }
7379
7380    #[test]
7381    fn scalar_utf8_deserializes_legacy_aliases() {
7382        let scalar: Scalar = serde_json::from_str(r#"{"kind":"str","value":"x"}"#).unwrap();
7383        assert_eq!(scalar, Scalar::Utf8("x".to_owned()));
7384
7385        let scalar: Scalar = serde_json::from_str(r#"{"kind":"string","value":"y"}"#).unwrap();
7386        assert_eq!(scalar, Scalar::Utf8("y".to_owned()));
7387    }
7388
7389    #[test]
7390    fn nancumsum_skips_nulls_and_accumulates() {
7391        let values = vec![
7392            Scalar::Float64(1.0),
7393            Scalar::Null(NullKind::NaN),
7394            Scalar::Float64(2.0),
7395            Scalar::Float64(3.0),
7396        ];
7397        let out = super::nancumsum(&values);
7398        assert!(matches!(out[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-9));
7399        assert!(out[1].is_missing());
7400        assert!(matches!(out[2], Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
7401        assert!(matches!(out[3], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
7402    }
7403
7404    #[test]
7405    fn nancumprod_skips_nulls_and_multiplies() {
7406        let values = vec![
7407            Scalar::Float64(2.0),
7408            Scalar::Null(NullKind::NaN),
7409            Scalar::Float64(3.0),
7410            Scalar::Float64(4.0),
7411        ];
7412        let out = super::nancumprod(&values);
7413        assert!(matches!(out[0], Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
7414        assert!(out[1].is_missing());
7415        assert!(matches!(out[2], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
7416        assert!(matches!(out[3], Scalar::Float64(v) if (v - 24.0).abs() < 1e-9));
7417    }
7418
7419    #[test]
7420    fn nancummax_tracks_running_max() {
7421        let values = vec![
7422            Scalar::Float64(1.0),
7423            Scalar::Float64(3.0),
7424            Scalar::Null(NullKind::NaN),
7425            Scalar::Float64(2.0),
7426            Scalar::Float64(5.0),
7427        ];
7428        let out = super::nancummax(&values);
7429        assert_eq!(out[0], Scalar::Float64(1.0));
7430        assert_eq!(out[1], Scalar::Float64(3.0));
7431        assert!(out[2].is_missing());
7432        assert_eq!(out[3], Scalar::Float64(3.0));
7433        assert_eq!(out[4], Scalar::Float64(5.0));
7434    }
7435
7436    #[test]
7437    fn nancummin_tracks_running_min() {
7438        let values = vec![
7439            Scalar::Float64(5.0),
7440            Scalar::Float64(3.0),
7441            Scalar::Null(NullKind::NaN),
7442            Scalar::Float64(4.0),
7443            Scalar::Float64(1.0),
7444        ];
7445        let out = super::nancummin(&values);
7446        assert_eq!(out[0], Scalar::Float64(5.0));
7447        assert_eq!(out[1], Scalar::Float64(3.0));
7448        assert!(out[2].is_missing());
7449        assert_eq!(out[3], Scalar::Float64(3.0));
7450        assert_eq!(out[4], Scalar::Float64(1.0));
7451    }
7452
7453    #[test]
7454    fn nancumsum_timedelta64_preserves_dtype_x0x91() {
7455        // Per br-frankenpandas-x0x91: pandas td_series.cumsum() returns
7456        // Timedelta64 running sums. Was silently NaN before.
7457        let one_hour = 3_600 * 1_000_000_000_i64;
7458        let values = vec![
7459            Scalar::Timedelta64(one_hour),
7460            Scalar::Timedelta64(2 * one_hour),
7461            Scalar::Timedelta64(3 * one_hour),
7462        ];
7463        let out = super::nancumsum(&values);
7464        assert_eq!(out[0], Scalar::Timedelta64(one_hour));
7465        assert_eq!(out[1], Scalar::Timedelta64(3 * one_hour));
7466        assert_eq!(out[2], Scalar::Timedelta64(6 * one_hour));
7467    }
7468
7469    #[test]
7470    fn nancummax_nancummin_timedelta64_preserves_dtype_x0x91() {
7471        let one_hour = 3_600 * 1_000_000_000_i64;
7472        let values = vec![
7473            Scalar::Timedelta64(2 * one_hour),
7474            Scalar::Timedelta64(5 * one_hour),
7475            Scalar::Timedelta64(one_hour),
7476            Scalar::Timedelta64(3 * one_hour),
7477        ];
7478        let mx = super::nancummax(&values);
7479        assert_eq!(mx[0], Scalar::Timedelta64(2 * one_hour));
7480        assert_eq!(mx[1], Scalar::Timedelta64(5 * one_hour));
7481        assert_eq!(mx[2], Scalar::Timedelta64(5 * one_hour));
7482        assert_eq!(mx[3], Scalar::Timedelta64(5 * one_hour));
7483
7484        let mn = super::nancummin(&values);
7485        assert_eq!(mn[0], Scalar::Timedelta64(2 * one_hour));
7486        assert_eq!(mn[1], Scalar::Timedelta64(2 * one_hour));
7487        assert_eq!(mn[2], Scalar::Timedelta64(one_hour));
7488        assert_eq!(mn[3], Scalar::Timedelta64(one_hour));
7489    }
7490
7491    #[test]
7492    fn nancumulative_timedelta64_skips_nat_x0x91() {
7493        let one_hour = 3_600 * 1_000_000_000_i64;
7494        let values = vec![
7495            Scalar::Timedelta64(one_hour),
7496            Scalar::Timedelta64(Timedelta::NAT),
7497            Scalar::Timedelta64(2 * one_hour),
7498        ];
7499        let cs = super::nancumsum(&values);
7500        assert_eq!(cs[0], Scalar::Timedelta64(one_hour));
7501        assert!(cs[1].is_missing());
7502        assert_eq!(cs[2], Scalar::Timedelta64(3 * one_hour));
7503    }
7504
7505    #[test]
7506    fn nancumulative_matches_numeric_and_timedelta_oracle_k63oz() {
7507        // Differential vs independent cumulative nanops oracles
7508        // (br-frankenpandas-k63oz). Seeded LCG, no mocks.
7509        fn next(seed: &mut u64) -> u64 {
7510            *seed = seed
7511                .wrapping_mul(3202034522624059733)
7512                .wrapping_add(4354685564936845319);
7513            *seed
7514        }
7515
7516        fn assert_vec(case: usize, family: &str, op: &str, actual: &[Scalar], expected: &[Scalar]) {
7517            assert_eq!(
7518                actual.len(),
7519                expected.len(),
7520                "case={case} family={family} op={op}: length mismatch"
7521            );
7522            for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
7523                assert!(
7524                    actual.semantic_eq(expected),
7525                    "case={case} family={family} op={op} pos={pos}: expected {expected:?}, got {actual:?}"
7526                );
7527            }
7528        }
7529
7530        fn expected_numeric(
7531            values: &[Scalar],
7532        ) -> (Vec<Scalar>, Vec<Scalar>, Vec<Scalar>, Vec<Scalar>) {
7533            let mut sum = Vec::with_capacity(values.len());
7534            let mut prod = Vec::with_capacity(values.len());
7535            let mut max = Vec::with_capacity(values.len());
7536            let mut min = Vec::with_capacity(values.len());
7537            let mut running_sum = 0.0_f64;
7538            let mut running_prod = 1.0_f64;
7539            let mut running_max: Option<f64> = None;
7540            let mut running_min: Option<f64> = None;
7541
7542            for value in values {
7543                if value.is_missing() {
7544                    sum.push(Scalar::Null(NullKind::NaN));
7545                    prod.push(Scalar::Null(NullKind::NaN));
7546                    max.push(Scalar::Null(NullKind::NaN));
7547                    min.push(Scalar::Null(NullKind::NaN));
7548                    continue;
7549                }
7550                let Ok(value) = value.to_f64() else {
7551                    sum.push(Scalar::Null(NullKind::NaN));
7552                    prod.push(Scalar::Null(NullKind::NaN));
7553                    max.push(Scalar::Null(NullKind::NaN));
7554                    min.push(Scalar::Null(NullKind::NaN));
7555                    continue;
7556                };
7557                if value.is_nan() {
7558                    sum.push(Scalar::Null(NullKind::NaN));
7559                    prod.push(Scalar::Null(NullKind::NaN));
7560                    max.push(Scalar::Null(NullKind::NaN));
7561                    min.push(Scalar::Null(NullKind::NaN));
7562                    continue;
7563                }
7564                running_sum += value;
7565                running_prod *= value;
7566                running_max = Some(running_max.map_or(value, |current| current.max(value)));
7567                running_min = Some(running_min.map_or(value, |current| current.min(value)));
7568                sum.push(Scalar::Float64(running_sum));
7569                prod.push(Scalar::Float64(running_prod));
7570                max.push(Scalar::Float64(running_max.expect("initialized")));
7571                min.push(Scalar::Float64(running_min.expect("initialized")));
7572            }
7573
7574            (sum, prod, max, min)
7575        }
7576
7577        fn expected_timedelta(values: &[Scalar]) -> (Vec<Scalar>, Vec<Scalar>, Vec<Scalar>) {
7578            let mut sum = Vec::with_capacity(values.len());
7579            let mut max = Vec::with_capacity(values.len());
7580            let mut min = Vec::with_capacity(values.len());
7581            let mut running_sum = 0_i128;
7582            let mut running_max: Option<i64> = None;
7583            let mut running_min: Option<i64> = None;
7584
7585            for value in values {
7586                if value.is_missing() {
7587                    sum.push(Scalar::Null(NullKind::NaT));
7588                    max.push(Scalar::Null(NullKind::NaT));
7589                    min.push(Scalar::Null(NullKind::NaT));
7590                    continue;
7591                }
7592                let Scalar::Timedelta64(ns) = value else {
7593                    sum.push(Scalar::Null(NullKind::NaT));
7594                    max.push(Scalar::Null(NullKind::NaT));
7595                    min.push(Scalar::Null(NullKind::NaT));
7596                    continue;
7597                };
7598                running_sum = running_sum.saturating_add(i128::from(*ns));
7599                let clamped = running_sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
7600                running_max = Some(running_max.map_or(*ns, |current| current.max(*ns)));
7601                running_min = Some(running_min.map_or(*ns, |current| current.min(*ns)));
7602                sum.push(Scalar::Timedelta64(clamped as i64));
7603                max.push(Scalar::Timedelta64(running_max.expect("initialized")));
7604                min.push(Scalar::Timedelta64(running_min.expect("initialized")));
7605            }
7606
7607            (sum, max, min)
7608        }
7609
7610        let mut seed = 0xc0de_c63a_5eed_0421_u64;
7611        for case in 0..260 {
7612            let len = (next(&mut seed) % 89 + 1) as usize;
7613
7614            let mut numeric = Vec::with_capacity(len);
7615            numeric.push(Scalar::Int64(case as i64 - 130));
7616            for _ in 1..len {
7617                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
7618                numeric.push(match next(&mut seed) % 8 {
7619                    0 => Scalar::Null(NullKind::Null),
7620                    1 => Scalar::Null(NullKind::NaN),
7621                    2 => Scalar::Float64(f64::NAN),
7622                    3 => Scalar::Bool(raw & 1 == 0),
7623                    4 => Scalar::Int64(raw % 251),
7624                    5 => Scalar::Float64(raw as f64 / 79.0),
7625                    6 => Scalar::Float64(0.0),
7626                    _ => Scalar::Float64(-0.0),
7627                });
7628            }
7629            let (sum, prod, max, min) = expected_numeric(&numeric);
7630            assert_vec(case, "numeric", "cumsum", &super::nancumsum(&numeric), &sum);
7631            assert_vec(
7632                case,
7633                "numeric",
7634                "cumprod",
7635                &super::nancumprod(&numeric),
7636                &prod,
7637            );
7638            assert_vec(case, "numeric", "cummax", &super::nancummax(&numeric), &max);
7639            assert_vec(case, "numeric", "cummin", &super::nancummin(&numeric), &min);
7640
7641            let mut timedeltas = Vec::with_capacity(len);
7642            timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
7643            for _ in 1..len {
7644                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
7645                timedeltas.push(match next(&mut seed) % 7 {
7646                    0 => Scalar::Null(NullKind::Null),
7647                    1 => Scalar::Timedelta64(i64::MIN),
7648                    _ => Scalar::Timedelta64(raw),
7649                });
7650            }
7651            let (td_sum, td_max, td_min) = expected_timedelta(&timedeltas);
7652            assert_vec(
7653                case,
7654                "timedelta",
7655                "cumsum",
7656                &super::nancumsum(&timedeltas),
7657                &td_sum,
7658            );
7659            assert_vec(
7660                case,
7661                "timedelta",
7662                "cummax",
7663                &super::nancummax(&timedeltas),
7664                &td_max,
7665            );
7666            assert_vec(
7667                case,
7668                "timedelta",
7669                "cummin",
7670                &super::nancummin(&timedeltas),
7671                &td_min,
7672            );
7673        }
7674    }
7675
7676    #[test]
7677    fn nanquantile_linear_interpolation_matches_numpy() {
7678        let values = vec![
7679            Scalar::Float64(1.0),
7680            Scalar::Float64(2.0),
7681            Scalar::Float64(3.0),
7682            Scalar::Float64(4.0),
7683            Scalar::Float64(5.0),
7684        ];
7685        // median
7686        let q = super::nanquantile(&values, 0.5);
7687        assert!(matches!(q, Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
7688        // 25th percentile: interpolate between 2.0 and 3.0 at pos 1.0 → 2.0
7689        let q25 = super::nanquantile(&values, 0.25);
7690        assert!(matches!(q25, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
7691    }
7692
7693    #[test]
7694    fn nanquantile_ignores_nulls() {
7695        let values = vec![
7696            Scalar::Float64(1.0),
7697            Scalar::Null(NullKind::NaN),
7698            Scalar::Float64(3.0),
7699        ];
7700        let q = super::nanquantile(&values, 0.5);
7701        assert!(matches!(q, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
7702    }
7703
7704    #[test]
7705    fn nanquantile_empty_and_out_of_range_yield_null() {
7706        assert!(super::nanquantile(&[], 0.5).is_missing());
7707        assert!(super::nanquantile(&[Scalar::Float64(1.0)], 1.5).is_missing());
7708        assert!(super::nanquantile(&[Scalar::Float64(1.0)], -0.1).is_missing());
7709    }
7710
7711    #[test]
7712    fn nanquantile_timedelta64_preserves_dtype_5djk7() {
7713        // Per br-frankenpandas-5djk7: pandas td_series.quantile(q) returns
7714        // Timedelta64 — was silently NaN before via collect_finite.
7715        let one_hour: i64 = 3_600 * 1_000_000_000;
7716        let vals = vec![
7717            Scalar::Timedelta64(one_hour),
7718            Scalar::Timedelta64(2 * one_hour),
7719            Scalar::Timedelta64(3 * one_hour),
7720            Scalar::Timedelta64(4 * one_hour),
7721            Scalar::Timedelta64(5 * one_hour),
7722        ];
7723        assert_eq!(
7724            super::nanquantile(&vals, 0.5),
7725            Scalar::Timedelta64(3 * one_hour)
7726        );
7727        assert_eq!(
7728            super::nanquantile(&vals, 0.0),
7729            Scalar::Timedelta64(one_hour)
7730        );
7731        assert_eq!(
7732            super::nanquantile(&vals, 1.0),
7733            Scalar::Timedelta64(5 * one_hour)
7734        );
7735    }
7736
7737    #[test]
7738    fn nanquantile_timedelta64_linear_interpolation_5djk7() {
7739        let one_hour: i64 = 3_600 * 1_000_000_000;
7740        let vals = vec![
7741            Scalar::Timedelta64(one_hour),
7742            Scalar::Timedelta64(3 * one_hour),
7743        ];
7744        // Linear interpolation: at q=0.5, midpoint = 2h
7745        assert_eq!(
7746            super::nanquantile(&vals, 0.5),
7747            Scalar::Timedelta64(2 * one_hour)
7748        );
7749    }
7750
7751    #[test]
7752    fn nanquantile_matches_numeric_and_timedelta_oracle_ecb7r() {
7753        // Differential vs independent sort-based quantile oracles
7754        // (br-frankenpandas-ecb7r). Seeded LCG, no mocks.
7755        fn next(seed: &mut u64) -> u64 {
7756            *seed = seed
7757                .wrapping_mul(3202034522624059733)
7758                .wrapping_add(4354685564936845319);
7759            *seed
7760        }
7761
7762        fn interpolated(sorted: &[f64], q: f64) -> f64 {
7763            if sorted.len() == 1 {
7764                return sorted[0];
7765            }
7766            let pos = q * (sorted.len() - 1) as f64;
7767            let lo = pos.floor() as usize;
7768            let hi = pos.ceil() as usize;
7769            if lo == hi {
7770                sorted[lo]
7771            } else {
7772                let weight = pos - lo as f64;
7773                sorted[lo] + (sorted[hi] - sorted[lo]) * weight
7774            }
7775        }
7776
7777        fn expected_numeric(values: &[Scalar], q: f64) -> Scalar {
7778            if !(0.0..=1.0).contains(&q) {
7779                return Scalar::Null(NullKind::NaN);
7780            }
7781            let mut samples = values
7782                .iter()
7783                .filter(|value| !value.is_missing())
7784                .filter_map(|value| value.to_f64().ok())
7785                .collect::<Vec<_>>();
7786            if samples.is_empty() {
7787                return Scalar::Null(NullKind::NaN);
7788            }
7789            samples.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
7790            Scalar::Float64(interpolated(&samples, q))
7791        }
7792
7793        fn expected_timedelta(values: &[Scalar], q: f64) -> Scalar {
7794            if !(0.0..=1.0).contains(&q) {
7795                return Scalar::Null(NullKind::NaN);
7796            }
7797            let mut samples = values
7798                .iter()
7799                .filter_map(|value| match value {
7800                    Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
7801                    _ => None,
7802                })
7803                .collect::<Vec<_>>();
7804            if samples.is_empty() {
7805                return Scalar::Null(NullKind::NaN);
7806            }
7807            samples.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
7808            Scalar::Timedelta64(interpolated(&samples, q) as i64)
7809        }
7810
7811        fn assert_quantile(case: usize, family: &str, values: &[Scalar], q: f64, expected: Scalar) {
7812            let actual = super::nanquantile(values, q);
7813            assert!(
7814                actual.semantic_eq(&expected),
7815                "case={case} family={family} q={q}: expected {expected:?}, got {actual:?} for {values:?}"
7816            );
7817        }
7818
7819        let numeric_all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
7820        assert_quantile(
7821            usize::MAX,
7822            "numeric_all_missing",
7823            &numeric_all_missing,
7824            0.5,
7825            expected_numeric(&numeric_all_missing, 0.5),
7826        );
7827        assert_quantile(
7828            usize::MAX - 1,
7829            "numeric_out_of_range",
7830            &[Scalar::Float64(1.0), Scalar::Float64(2.0)],
7831            1.25,
7832            Scalar::Null(NullKind::NaN),
7833        );
7834
7835        let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
7836        assert_quantile(
7837            usize::MAX - 2,
7838            "timedelta_all_missing",
7839            &td_all_missing,
7840            0.5,
7841            expected_timedelta(&td_all_missing, 0.5),
7842        );
7843        assert_quantile(
7844            usize::MAX - 3,
7845            "timedelta_out_of_range",
7846            &[Scalar::Timedelta64(1), Scalar::Timedelta64(2)],
7847            -0.25,
7848            Scalar::Null(NullKind::NaN),
7849        );
7850
7851        let mut seed = 0x4a17_1e5e_0b5e_a11d_u64;
7852        for case in 0..260 {
7853            let len = (next(&mut seed) % 83 + 1) as usize;
7854            let q = match next(&mut seed) % 8 {
7855                0 => 0.0,
7856                1 => 0.25,
7857                2 => 0.5,
7858                3 => 0.75,
7859                4 => 1.0,
7860                _ => (next(&mut seed) % 1_001) as f64 / 1_000.0,
7861            };
7862
7863            let mut numeric = Vec::with_capacity(len);
7864            numeric.push(Scalar::Int64(case as i64 - 130));
7865            for _ in 1..len {
7866                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
7867                numeric.push(match next(&mut seed) % 8 {
7868                    0 => Scalar::Null(NullKind::Null),
7869                    1 => Scalar::Null(NullKind::NaN),
7870                    2 => Scalar::Float64(f64::NAN),
7871                    3 => Scalar::Bool(raw & 1 == 0),
7872                    4 => Scalar::Int64(raw % 251),
7873                    5 => Scalar::Float64(raw as f64 / 67.0),
7874                    6 => Scalar::Float64(0.0),
7875                    _ => Scalar::Float64(-0.0),
7876                });
7877            }
7878            assert_quantile(case, "numeric", &numeric, q, expected_numeric(&numeric, q));
7879
7880            let mut timedeltas = Vec::with_capacity(len);
7881            timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
7882            for _ in 1..len {
7883                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
7884                timedeltas.push(match next(&mut seed) % 7 {
7885                    0 => Scalar::Null(NullKind::Null),
7886                    1 => Scalar::Timedelta64(i64::MIN),
7887                    _ => Scalar::Timedelta64(raw),
7888                });
7889            }
7890            assert_quantile(
7891                case,
7892                "timedelta",
7893                &timedeltas,
7894                q,
7895                expected_timedelta(&timedeltas, q),
7896            );
7897        }
7898    }
7899
7900    #[test]
7901    fn nanargmax_returns_first_position() {
7902        let values = vec![
7903            Scalar::Float64(1.0),
7904            Scalar::Null(NullKind::NaN),
7905            Scalar::Float64(4.0),
7906            Scalar::Float64(4.0),
7907            Scalar::Float64(2.0),
7908        ];
7909        assert_eq!(super::nanargmax(&values), Some(2));
7910    }
7911
7912    #[test]
7913    fn nanargmin_returns_first_position() {
7914        let values = vec![
7915            Scalar::Float64(3.0),
7916            Scalar::Null(NullKind::NaN),
7917            Scalar::Float64(1.0),
7918            Scalar::Float64(1.0),
7919        ];
7920        assert_eq!(super::nanargmin(&values), Some(2));
7921    }
7922
7923    #[test]
7924    fn nanargmax_all_missing_returns_none() {
7925        let values = vec![Scalar::Null(NullKind::NaN), Scalar::Null(NullKind::Null)];
7926        assert_eq!(super::nanargmax(&values), None);
7927        assert_eq!(super::nanargmin(&values), None);
7928    }
7929
7930    #[test]
7931    fn nanargmax_nanargmin_match_numeric_and_timedelta_oracle_unkj6() {
7932        // Differential vs independent first-tie arg oracles
7933        // (br-frankenpandas-unkj6). Seeded LCG, no mocks.
7934        fn next(seed: &mut u64) -> u64 {
7935            *seed = seed
7936                .wrapping_mul(2862933555777941757)
7937                .wrapping_add(3037000493);
7938            *seed
7939        }
7940
7941        fn expected_numeric(values: &[Scalar], find_max: bool) -> Option<usize> {
7942            let mut best: Option<(usize, f64)> = None;
7943            for (idx, value) in values.iter().enumerate() {
7944                if value.is_missing() {
7945                    continue;
7946                }
7947                let Ok(value) = value.to_f64() else {
7948                    continue;
7949                };
7950                if value.is_nan() {
7951                    continue;
7952                }
7953                match best {
7954                    None => best = Some((idx, value)),
7955                    Some((_, current))
7956                        if (find_max && value > current) || (!find_max && value < current) =>
7957                    {
7958                        best = Some((idx, value));
7959                    }
7960                    _ => {}
7961                }
7962            }
7963            best.map(|(idx, _)| idx)
7964        }
7965
7966        fn expected_timedelta(values: &[Scalar], find_max: bool) -> Option<usize> {
7967            let mut best: Option<(usize, i64)> = None;
7968            for (idx, value) in values.iter().enumerate() {
7969                if value.is_missing() {
7970                    continue;
7971                }
7972                let Scalar::Timedelta64(ns) = value else {
7973                    continue;
7974                };
7975                match best {
7976                    None => best = Some((idx, *ns)),
7977                    Some((_, current))
7978                        if (find_max && *ns > current) || (!find_max && *ns < current) =>
7979                    {
7980                        best = Some((idx, *ns));
7981                    }
7982                    _ => {}
7983                }
7984            }
7985            best.map(|(idx, _)| idx)
7986        }
7987
7988        fn assert_args(
7989            case: usize,
7990            family: &str,
7991            values: &[Scalar],
7992            expected_min: Option<usize>,
7993            expected_max: Option<usize>,
7994        ) {
7995            assert_eq!(
7996                super::nanargmin(values),
7997                expected_min,
7998                "case={case} family={family}: nanargmin mismatch for {values:?}"
7999            );
8000            assert_eq!(
8001                super::nanargmax(values),
8002                expected_max,
8003                "case={case} family={family}: nanargmax mismatch for {values:?}"
8004            );
8005        }
8006
8007        let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
8008        assert_args(usize::MAX, "numeric_all_missing", &all_missing, None, None);
8009        let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
8010        assert_args(
8011            usize::MAX - 1,
8012            "timedelta_all_missing",
8013            &td_all_missing,
8014            None,
8015            None,
8016        );
8017
8018        let mut seed = 0xa126_5eed_ed9e_u64;
8019        for case in 0..260 {
8020            let len = (next(&mut seed) % 83 + 1) as usize;
8021
8022            let mut numeric = Vec::with_capacity(len);
8023            numeric.push(Scalar::Int64(case as i64 - 130));
8024            if len > 1 {
8025                numeric.push(Scalar::Int64(case as i64 - 130));
8026            }
8027            for _ in numeric.len()..len {
8028                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
8029                numeric.push(match next(&mut seed) % 9 {
8030                    0 => Scalar::Null(NullKind::Null),
8031                    1 => Scalar::Null(NullKind::NaN),
8032                    2 => Scalar::Float64(f64::NAN),
8033                    3 => Scalar::Bool(raw & 1 == 0),
8034                    4 => Scalar::Int64(raw % 211),
8035                    5 => Scalar::Float64(raw as f64 / 47.0),
8036                    6 => Scalar::Float64(0.0),
8037                    7 => Scalar::Float64(-0.0),
8038                    _ => Scalar::Float64(raw.signum() as f64 * f64::INFINITY),
8039                });
8040            }
8041            assert_args(
8042                case,
8043                "numeric",
8044                &numeric,
8045                expected_numeric(&numeric, false),
8046                expected_numeric(&numeric, true),
8047            );
8048
8049            let mut timedeltas = Vec::with_capacity(len);
8050            timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
8051            if len > 1 {
8052                timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
8053            }
8054            for _ in timedeltas.len()..len {
8055                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
8056                timedeltas.push(match next(&mut seed) % 7 {
8057                    0 => Scalar::Null(NullKind::Null),
8058                    1 => Scalar::Timedelta64(i64::MIN),
8059                    _ => Scalar::Timedelta64(raw),
8060                });
8061            }
8062            assert_args(
8063                case,
8064                "timedelta",
8065                &timedeltas,
8066                expected_timedelta(&timedeltas, false),
8067                expected_timedelta(&timedeltas, true),
8068            );
8069        }
8070    }
8071
8072    #[test]
8073    fn nansem_matches_std_over_sqrt_n() {
8074        let values = vec![
8075            Scalar::Float64(2.0),
8076            Scalar::Float64(4.0),
8077            Scalar::Float64(4.0),
8078            Scalar::Float64(4.0),
8079            Scalar::Float64(5.0),
8080            Scalar::Float64(5.0),
8081            Scalar::Float64(7.0),
8082            Scalar::Float64(9.0),
8083        ];
8084        // numpy/scipy: std(ddof=1) = 2.138089935299395; sem = std/sqrt(8) = 0.7559
8085        let sem = super::nansem(&values, 1);
8086        assert!(matches!(sem, Scalar::Float64(_)));
8087        let Scalar::Float64(v) = sem else {
8088            return;
8089        };
8090        assert!((v - 0.7559289460184544).abs() < 1e-9);
8091    }
8092
8093    #[test]
8094    fn nansem_empty_returns_null() {
8095        assert!(super::nansem(&[], 1).is_missing());
8096        assert!(super::nansem(&[Scalar::Float64(1.0)], 1).is_missing());
8097    }
8098
8099    #[test]
8100    fn nanptp_returns_max_minus_min() {
8101        let values = vec![
8102            Scalar::Float64(3.0),
8103            Scalar::Null(NullKind::NaN),
8104            Scalar::Float64(7.0),
8105            Scalar::Float64(1.0),
8106        ];
8107        assert_eq!(super::nanptp(&values), Scalar::Float64(6.0));
8108    }
8109
8110    #[test]
8111    fn nanptp_empty_returns_null() {
8112        assert!(super::nanptp(&[]).is_missing());
8113        assert!(super::nanptp(&[Scalar::Null(NullKind::NaN)]).is_missing());
8114    }
8115
8116    #[test]
8117    fn nanptp_timedelta64_preserves_dtype_u2g0r() {
8118        // Per br-frankenpandas-u2g0r: ptp on Timedelta64 returns Timedelta64.
8119        let one_hour: i64 = 3_600 * 1_000_000_000;
8120        let values = vec![
8121            Scalar::Timedelta64(one_hour),
8122            Scalar::Timedelta64(5 * one_hour),
8123            Scalar::Timedelta64(2 * one_hour),
8124        ];
8125        assert_eq!(super::nanptp(&values), Scalar::Timedelta64(4 * one_hour));
8126    }
8127
8128    #[test]
8129    fn nanptp_matches_numeric_and_timedelta_oracle_affjt() {
8130        // Differential vs independent max-min oracles
8131        // (br-frankenpandas-affjt). Seeded LCG, no mocks.
8132        fn next(seed: &mut u64) -> u64 {
8133            *seed = seed
8134                .wrapping_mul(3202034522624059733)
8135                .wrapping_add(4354685564936845319);
8136            *seed
8137        }
8138
8139        fn expected_numeric(values: &[Scalar]) -> Scalar {
8140            let mut lo = f64::INFINITY;
8141            let mut hi = f64::NEG_INFINITY;
8142            let mut seen = false;
8143            for value in values {
8144                if value.is_missing() {
8145                    continue;
8146                }
8147                if let Ok(value) = value.to_f64() {
8148                    seen = true;
8149                    lo = lo.min(value);
8150                    hi = hi.max(value);
8151                }
8152            }
8153            if seen {
8154                Scalar::Float64(hi - lo)
8155            } else {
8156                Scalar::Null(NullKind::NaN)
8157            }
8158        }
8159
8160        fn expected_timedelta(values: &[Scalar]) -> Scalar {
8161            let mut lo = i64::MAX;
8162            let mut hi = i64::MIN;
8163            let mut seen = false;
8164            for value in values {
8165                if let Scalar::Timedelta64(ns) = value
8166                    && !value.is_missing()
8167                {
8168                    seen = true;
8169                    lo = lo.min(*ns);
8170                    hi = hi.max(*ns);
8171                }
8172            }
8173            if seen {
8174                Scalar::Timedelta64(hi - lo)
8175            } else {
8176                Scalar::Null(NullKind::NaN)
8177            }
8178        }
8179
8180        fn assert_ptp(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
8181            let actual = super::nanptp(values);
8182            assert!(
8183                actual.semantic_eq(&expected),
8184                "case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
8185            );
8186        }
8187
8188        assert_ptp(
8189            usize::MAX,
8190            "numeric_all_missing",
8191            &[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
8192            Scalar::Null(NullKind::NaN),
8193        );
8194        assert_ptp(
8195            usize::MAX - 1,
8196            "timedelta_all_missing",
8197            &[Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)],
8198            Scalar::Null(NullKind::NaN),
8199        );
8200
8201        let mut seed = 0xa22f_17ed_57a7_15e5_u64;
8202        for case in 0..260 {
8203            let len = (next(&mut seed) % 83 + 1) as usize;
8204
8205            let mut numeric = Vec::with_capacity(len);
8206            numeric.push(Scalar::Int64(case as i64 - 130));
8207            for _ in 1..len {
8208                let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
8209                numeric.push(match next(&mut seed) % 9 {
8210                    0 => Scalar::Null(NullKind::Null),
8211                    1 => Scalar::Null(NullKind::NaN),
8212                    2 => Scalar::Float64(f64::NAN),
8213                    3 => Scalar::Bool(raw & 1 == 0),
8214                    4 => Scalar::Int64(raw),
8215                    5 => Scalar::Float64(raw as f64 / 53.0),
8216                    6 => Scalar::Float64(0.0),
8217                    7 => Scalar::Float64(-0.0),
8218                    _ => Scalar::Float64(raw.signum() as f64 * f64::INFINITY),
8219                });
8220            }
8221            assert_ptp(case, "numeric", &numeric, expected_numeric(&numeric));
8222
8223            let mut timedeltas = Vec::with_capacity(len);
8224            timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
8225            for _ in 1..len {
8226                let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
8227                timedeltas.push(match next(&mut seed) % 7 {
8228                    0 => Scalar::Null(NullKind::Null),
8229                    1 => Scalar::Timedelta64(i64::MIN),
8230                    _ => Scalar::Timedelta64(raw),
8231                });
8232            }
8233            assert_ptp(
8234                case,
8235                "timedelta",
8236                &timedeltas,
8237                expected_timedelta(&timedeltas),
8238            );
8239        }
8240    }
8241
8242    #[test]
8243    fn nanargmax_nanargmin_timedelta64_compare_by_ns_ql1t5() {
8244        // Per br-frankenpandas-ql1t5: argmax/argmin on Timedelta64 compare
8245        // i64 ns directly instead of silently skipping via to_f64.
8246        let one_hour: i64 = 3_600 * 1_000_000_000;
8247        let values = vec![
8248            Scalar::Timedelta64(2 * one_hour),
8249            Scalar::Timedelta64(5 * one_hour),
8250            Scalar::Timedelta64(one_hour),
8251            Scalar::Timedelta64(3 * one_hour),
8252        ];
8253        assert_eq!(super::nanargmax(&values), Some(1));
8254        assert_eq!(super::nanargmin(&values), Some(2));
8255    }
8256
8257    #[test]
8258    fn nanprod_timedelta64_returns_null_szq6a() {
8259        // Per br-frankenpandas-szq6a: pandas raises on Timedelta prod
8260        // (dimensionally undefined). We surface Null instead of the
8261        // misleading Float64(1.0) the old empty-iterator default emitted.
8262        let one_hour: i64 = 3_600 * 1_000_000_000;
8263        let values = vec![
8264            Scalar::Timedelta64(2 * one_hour),
8265            Scalar::Timedelta64(3 * one_hour),
8266        ];
8267        assert!(super::nanprod(&values).is_missing());
8268    }
8269
8270    #[test]
8271    fn nanprod_matches_numeric_and_timedelta_oracle_9938h() {
8272        // Differential vs independent product oracles
8273        // (br-frankenpandas-9938h). Seeded LCG, no mocks.
8274        fn next(seed: &mut u64) -> u64 {
8275            *seed = seed
8276                .wrapping_mul(6364136223846793005)
8277                .wrapping_add(1442695040888963407);
8278            *seed
8279        }
8280
8281        fn expected_numeric(values: &[Scalar]) -> Scalar {
8282            let mut product = 1.0_f64;
8283            for value in values {
8284                if value.is_missing() {
8285                    continue;
8286                }
8287                if let Ok(value) = value.to_f64() {
8288                    product *= value;
8289                }
8290            }
8291            Scalar::Float64(product)
8292        }
8293
8294        fn expected_timedelta(values: &[Scalar]) -> Scalar {
8295            if values
8296                .iter()
8297                .any(|value| matches!(value, Scalar::Timedelta64(_)) && !value.is_missing())
8298            {
8299                Scalar::Null(NullKind::NaN)
8300            } else {
8301                Scalar::Float64(1.0)
8302            }
8303        }
8304
8305        fn assert_prod(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
8306            let actual = super::nanprod(values);
8307            assert!(
8308                actual.semantic_eq(&expected),
8309                "case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
8310            );
8311        }
8312
8313        let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
8314        assert_prod(
8315            usize::MAX,
8316            "numeric_all_missing",
8317            &all_missing,
8318            Scalar::Float64(1.0),
8319        );
8320
8321        let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
8322        assert_prod(
8323            usize::MAX - 1,
8324            "timedelta_all_missing",
8325            &td_all_missing,
8326            expected_timedelta(&td_all_missing),
8327        );
8328
8329        let mut seed = 0x6e0d_9938_a11c_0de5_u64;
8330        for case in 0..280 {
8331            let len = (next(&mut seed) % 89 + 1) as usize;
8332
8333            let mut numeric = Vec::with_capacity(len);
8334            numeric.push(Scalar::Int64((case % 17) as i64 - 8));
8335            for _ in 1..len {
8336                let raw = (next(&mut seed) % 2_001) as i64 - 1_000;
8337                numeric.push(match next(&mut seed) % 9 {
8338                    0 => Scalar::Null(NullKind::Null),
8339                    1 => Scalar::Null(NullKind::NaN),
8340                    2 => Scalar::Float64(f64::NAN),
8341                    3 => Scalar::Bool(raw & 1 == 0),
8342                    4 => Scalar::Int64(raw % 19),
8343                    5 => Scalar::Float64(raw as f64 / 97.0),
8344                    6 => Scalar::Float64(0.0),
8345                    7 => Scalar::Float64(-0.0),
8346                    _ => Scalar::Float64(1.0),
8347                });
8348            }
8349            assert_prod(case, "numeric", &numeric, expected_numeric(&numeric));
8350
8351            let mut timedeltas = Vec::with_capacity(len);
8352            timedeltas.push(Scalar::Timedelta64(case as i64 - 140));
8353            for _ in 1..len {
8354                let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
8355                timedeltas.push(match next(&mut seed) % 7 {
8356                    0 => Scalar::Null(NullKind::Null),
8357                    1 => Scalar::Timedelta64(i64::MIN),
8358                    _ => Scalar::Timedelta64(raw),
8359                });
8360            }
8361            assert_prod(
8362                case,
8363                "timedelta",
8364                &timedeltas,
8365                expected_timedelta(&timedeltas),
8366            );
8367        }
8368    }
8369
8370    #[test]
8371    fn nanskew_symmetric_distribution_near_zero() {
8372        let values = vec![
8373            Scalar::Float64(1.0),
8374            Scalar::Float64(2.0),
8375            Scalar::Float64(3.0),
8376            Scalar::Float64(4.0),
8377            Scalar::Float64(5.0),
8378        ];
8379        // Perfectly symmetric -> skew = 0
8380        let skew = super::nanskew(&values);
8381        assert!(matches!(skew, Scalar::Float64(_)));
8382        let Scalar::Float64(v) = skew else {
8383            return;
8384        };
8385        assert!(v.abs() < 1e-9);
8386    }
8387
8388    #[test]
8389    fn nanskew_too_few_values_returns_null() {
8390        assert!(super::nanskew(&[]).is_missing());
8391        assert!(super::nanskew(&[Scalar::Float64(1.0), Scalar::Float64(2.0)]).is_missing());
8392    }
8393
8394    #[test]
8395    fn nankurt_symmetric_uniform_distribution() {
8396        let values = vec![
8397            Scalar::Float64(1.0),
8398            Scalar::Float64(2.0),
8399            Scalar::Float64(3.0),
8400            Scalar::Float64(4.0),
8401            Scalar::Float64(5.0),
8402        ];
8403        // pandas kurt([1,2,3,4,5]) = -1.2
8404        let kurt = super::nankurt(&values);
8405        assert!(matches!(kurt, Scalar::Float64(_)));
8406        let Scalar::Float64(v) = kurt else {
8407            return;
8408        };
8409        assert!((v + 1.2).abs() < 1e-9);
8410    }
8411
8412    #[test]
8413    fn nankurt_too_few_values_returns_null() {
8414        let vals: Vec<Scalar> = (0..3).map(|i| Scalar::Float64(i as f64)).collect();
8415        assert!(super::nankurt(&vals).is_missing());
8416    }
8417
8418    #[test]
8419    fn nanskew_constant_series_returns_zero() {
8420        let values = vec![
8421            Scalar::Float64(5.0),
8422            Scalar::Float64(5.0),
8423            Scalar::Float64(5.0),
8424        ];
8425        assert_eq!(super::nanskew(&values), Scalar::Float64(0.0));
8426        assert_eq!(
8427            super::nankurt(&[
8428                Scalar::Float64(5.0),
8429                Scalar::Float64(5.0),
8430                Scalar::Float64(5.0),
8431                Scalar::Float64(5.0),
8432            ]),
8433            Scalar::Float64(0.0)
8434        );
8435    }
8436
8437    #[test]
8438    fn nanskew_nankurt_match_numeric_oracle_jr7zk() {
8439        // Differential vs independent bias-corrected moment oracles
8440        // (br-frankenpandas-jr7zk). Seeded LCG, no mocks.
8441        fn next(seed: &mut u64) -> u64 {
8442            *seed = seed
8443                .wrapping_mul(2862933555777941757)
8444                .wrapping_add(3037000493);
8445            *seed
8446        }
8447
8448        fn samples(values: &[Scalar]) -> Vec<f64> {
8449            values
8450                .iter()
8451                .filter(|value| !value.is_missing())
8452                .filter_map(|value| value.to_f64().ok())
8453                .collect()
8454        }
8455
8456        fn expected_skew(values: &[Scalar]) -> Scalar {
8457            let samples = samples(values);
8458            let n = samples.len() as f64;
8459            if n < 3.0 {
8460                return Scalar::Null(NullKind::NaN);
8461            }
8462            let mean = samples.iter().sum::<f64>() / n;
8463            let m2 = samples
8464                .iter()
8465                .map(|value| (value - mean).powi(2))
8466                .sum::<f64>();
8467            let m3 = samples
8468                .iter()
8469                .map(|value| (value - mean).powi(3))
8470                .sum::<f64>();
8471            let s2 = m2 / (n - 1.0);
8472            if s2 == 0.0 {
8473                return Scalar::Float64(0.0);
8474            }
8475            Scalar::Float64((n / ((n - 1.0) * (n - 2.0))) * (m3 / s2.powf(1.5)))
8476        }
8477
8478        fn expected_kurt(values: &[Scalar]) -> Scalar {
8479            let samples = samples(values);
8480            let n = samples.len() as f64;
8481            if n < 4.0 {
8482                return Scalar::Null(NullKind::NaN);
8483            }
8484            let mean = samples.iter().sum::<f64>() / n;
8485            let m2 = samples
8486                .iter()
8487                .map(|value| (value - mean).powi(2))
8488                .sum::<f64>();
8489            let m4 = samples
8490                .iter()
8491                .map(|value| (value - mean).powi(4))
8492                .sum::<f64>();
8493            let s2 = m2 / (n - 1.0);
8494            if s2 == 0.0 {
8495                return Scalar::Float64(0.0);
8496            }
8497            let adj = (n * (n + 1.0)) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
8498            let sub = (3.0 * (n - 1.0).powi(2)) / ((n - 2.0) * (n - 3.0));
8499            Scalar::Float64(adj * (m4 / (s2 * s2)) - sub)
8500        }
8501
8502        fn assert_moments(case: usize, values: &[Scalar]) {
8503            let expected_skew = expected_skew(values);
8504            let expected_kurt = expected_kurt(values);
8505            let actual_skew = super::nanskew(values);
8506            let actual_kurt = super::nankurt(values);
8507            assert!(
8508                actual_skew.semantic_eq(&expected_skew),
8509                "case={case}: expected skew {expected_skew:?}, got {actual_skew:?} for {values:?}"
8510            );
8511            assert!(
8512                actual_kurt.semantic_eq(&expected_kurt),
8513                "case={case}: expected kurt {expected_kurt:?}, got {actual_kurt:?} for {values:?}"
8514            );
8515        }
8516
8517        assert_moments(
8518            usize::MAX,
8519            &[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
8520        );
8521        assert_moments(
8522            usize::MAX - 1,
8523            &[
8524                Scalar::Float64(7.0),
8525                Scalar::Float64(7.0),
8526                Scalar::Float64(7.0),
8527                Scalar::Float64(7.0),
8528            ],
8529        );
8530
8531        let mut seed = 0x5ce7_9a55_a11c_0de5_u64;
8532        for case in 0..260 {
8533            let len = (next(&mut seed) % 89 + 1) as usize;
8534            let mut values = Vec::with_capacity(len);
8535            if case % 11 == 0 {
8536                values.extend((0..len).map(|_| Scalar::Float64(3.0)));
8537            } else {
8538                values.push(Scalar::Float64(case as f64 / 19.0));
8539                for _ in 1..len {
8540                    let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
8541                    values.push(match next(&mut seed) % 8 {
8542                        0 => Scalar::Null(NullKind::Null),
8543                        1 => Scalar::Null(NullKind::NaN),
8544                        2 => Scalar::Float64(f64::NAN),
8545                        3 => Scalar::Bool(raw & 1 == 0),
8546                        4 => Scalar::Int64(raw % 251),
8547                        5 => Scalar::Float64(raw as f64 / 83.0),
8548                        6 => Scalar::Float64(0.0),
8549                        _ => Scalar::Float64(-0.0),
8550                    });
8551                }
8552            }
8553            assert_moments(case, &values);
8554        }
8555    }
8556
8557    // ── Interval tests (br-frankenpandas-j8k4) ──────────────────────────
8558
8559    #[test]
8560    fn interval_default_closed_is_right() {
8561        assert_eq!(IntervalClosed::default(), IntervalClosed::Right);
8562    }
8563
8564    #[test]
8565    fn interval_left_and_right_closed_helpers() {
8566        assert!(IntervalClosed::Left.left_closed());
8567        assert!(!IntervalClosed::Left.right_closed());
8568        assert!(!IntervalClosed::Right.left_closed());
8569        assert!(IntervalClosed::Right.right_closed());
8570        assert!(IntervalClosed::Both.left_closed());
8571        assert!(IntervalClosed::Both.right_closed());
8572        assert!(!IntervalClosed::Neither.left_closed());
8573        assert!(!IntervalClosed::Neither.right_closed());
8574    }
8575
8576    #[test]
8577    fn interval_display_matches_pandas_notation() {
8578        assert_eq!(
8579            Interval::new(0.0, 5.0, IntervalClosed::Right).to_string(),
8580            "(0.0, 5.0]"
8581        );
8582        assert_eq!(
8583            Interval::new(0.0, 5.0, IntervalClosed::Left).to_string(),
8584            "[0.0, 5.0)"
8585        );
8586        assert_eq!(
8587            Interval::new(0.0, 5.0, IntervalClosed::Both).to_string(),
8588            "[0.0, 5.0]"
8589        );
8590        assert_eq!(
8591            Interval::new(0.0, 5.0, IntervalClosed::Neither).to_string(),
8592            "(0.0, 5.0)"
8593        );
8594        assert_eq!(
8595            Interval::new(2.5, 3.5, IntervalClosed::Right).to_string(),
8596            "(2.5, 3.5]"
8597        );
8598        assert_eq!(
8599            Interval::new(-1.0, 0.0, IntervalClosed::Right).to_string(),
8600            "(-1.0, 0.0]"
8601        );
8602        assert_eq!(
8603            Interval::new(1e20, 2e20, IntervalClosed::Right).to_string(),
8604            "(1e+20, 2e+20]"
8605        );
8606    }
8607
8608    #[test]
8609    fn interval_length_and_mid() {
8610        let i = Interval::new(2.0, 10.0, IntervalClosed::Right);
8611        assert_eq!(i.length(), 8.0);
8612        assert_eq!(i.mid(), 6.0);
8613    }
8614
8615    #[test]
8616    fn interval_contains_matches_closed_policy() {
8617        let right = Interval::new(0.0, 5.0, IntervalClosed::Right);
8618        assert!(!right.contains(0.0));
8619        assert!(right.contains(2.5));
8620        assert!(right.contains(5.0));
8621
8622        let left = Interval::new(0.0, 5.0, IntervalClosed::Left);
8623        assert!(left.contains(0.0));
8624        assert!(left.contains(2.5));
8625        assert!(!left.contains(5.0));
8626
8627        let both = Interval::new(0.0, 5.0, IntervalClosed::Both);
8628        assert!(both.contains(0.0));
8629        assert!(both.contains(5.0));
8630
8631        let neither = Interval::new(0.0, 5.0, IntervalClosed::Neither);
8632        assert!(!neither.contains(0.0));
8633        assert!(!neither.contains(5.0));
8634        assert!(neither.contains(2.5));
8635    }
8636
8637    #[test]
8638    fn interval_contains_nan_returns_false() {
8639        let i = Interval::new(0.0, 10.0, IntervalClosed::Both);
8640        assert!(!i.contains(f64::NAN));
8641    }
8642
8643    #[test]
8644    fn interval_is_empty_matches_pandas() {
8645        // pd.Interval(3, 3, 'right').is_empty → True
8646        assert!(Interval::new(3.0, 3.0, IntervalClosed::Right).is_empty());
8647        assert!(Interval::new(3.0, 3.0, IntervalClosed::Left).is_empty());
8648        assert!(Interval::new(3.0, 3.0, IntervalClosed::Neither).is_empty());
8649        // pd.Interval(3, 3, 'both').is_empty → False (single point)
8650        assert!(!Interval::new(3.0, 3.0, IntervalClosed::Both).is_empty());
8651        // Non-degenerate intervals are never empty.
8652        assert!(!Interval::new(0.0, 5.0, IntervalClosed::Right).is_empty());
8653    }
8654
8655    #[test]
8656    fn interval_overlaps_disjoint_returns_false() {
8657        let a = Interval::new(0.0, 1.0, IntervalClosed::Right);
8658        let b = Interval::new(2.0, 3.0, IntervalClosed::Right);
8659        assert!(!a.overlaps(&b));
8660        assert!(!b.overlaps(&a));
8661    }
8662
8663    #[test]
8664    fn interval_overlaps_nested_returns_true() {
8665        let outer = Interval::new(0.0, 10.0, IntervalClosed::Right);
8666        let inner = Interval::new(3.0, 7.0, IntervalClosed::Right);
8667        assert!(outer.overlaps(&inner));
8668        assert!(inner.overlaps(&outer));
8669    }
8670
8671    #[test]
8672    fn interval_overlaps_touching_respects_closed_policy() {
8673        // (0, 1] touching (1, 2] at point 1.
8674        let right_right = (
8675            Interval::new(0.0, 1.0, IntervalClosed::Right),
8676            Interval::new(1.0, 2.0, IntervalClosed::Right),
8677        );
8678        // right_right.0 is closed at 1; right_right.1 is open at 1 → no overlap.
8679        assert!(!right_right.0.overlaps(&right_right.1));
8680
8681        // [0, 1] touching [1, 2] — both closed at 1 → overlap.
8682        let both_both = (
8683            Interval::new(0.0, 1.0, IntervalClosed::Both),
8684            Interval::new(1.0, 2.0, IntervalClosed::Both),
8685        );
8686        assert!(both_both.0.overlaps(&both_both.1));
8687    }
8688
8689    #[test]
8690    fn interval_roundtrips_through_serde_json() {
8691        let i = Interval::new(1.5, 3.25, IntervalClosed::Both);
8692        let json = serde_json::to_string(&i).expect("serialize");
8693        let back: Interval = serde_json::from_str(&json).expect("deserialize");
8694        assert_eq!(i, back);
8695    }
8696
8697    #[test]
8698    fn interval_serde_default_closed_is_right_when_missing() {
8699        // JSON payloads that omit `closed` deserialize with the pandas default.
8700        let back: Interval =
8701            serde_json::from_str(r#"{"left":0.0,"right":5.0}"#).expect("deserialize");
8702        assert_eq!(back.closed, IntervalClosed::Right);
8703    }
8704
8705    // ── Period tests (br-frankenpandas-epoj) ────────────────────────────
8706
8707    #[test]
8708    fn period_freq_parses_canonical_aliases() {
8709        assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
8710        assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
8711        assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
8712        assert_eq!(PeriodFreq::parse("M"), Some(PeriodFreq::Monthly));
8713        assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
8714        assert_eq!(PeriodFreq::parse("D"), Some(PeriodFreq::Daily));
8715        assert_eq!(PeriodFreq::parse("B"), Some(PeriodFreq::Business));
8716        assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
8717        assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
8718        assert_eq!(PeriodFreq::parse("min"), Some(PeriodFreq::Minutely));
8719        assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
8720    }
8721
8722    #[test]
8723    fn period_freq_parse_is_case_insensitive() {
8724        assert_eq!(PeriodFreq::parse("quarterly"), Some(PeriodFreq::Quarterly));
8725        assert_eq!(PeriodFreq::parse("MONTHLY"), Some(PeriodFreq::Monthly));
8726    }
8727
8728    #[test]
8729    fn period_freq_rejects_unknown_aliases() {
8730        assert_eq!(PeriodFreq::parse("nanosec"), None);
8731        assert_eq!(PeriodFreq::parse(""), None);
8732        assert_eq!(PeriodFreq::parse("xyz"), None);
8733    }
8734
8735    #[test]
8736    fn period_freq_alias_roundtrip() {
8737        for freq in [
8738            PeriodFreq::Annual,
8739            PeriodFreq::Quarterly,
8740            PeriodFreq::Monthly,
8741            PeriodFreq::Weekly,
8742            PeriodFreq::Daily,
8743            PeriodFreq::Business,
8744            PeriodFreq::Hourly,
8745            PeriodFreq::Minutely,
8746            PeriodFreq::Secondly,
8747        ] {
8748            assert_eq!(PeriodFreq::parse(freq.alias()), Some(freq));
8749        }
8750    }
8751
8752    #[test]
8753    fn period_freq_anchored_aliases_are_pandas_canonical_h2wiv() {
8754        assert_eq!(PeriodFreq::Annual.alias(), "Y-DEC");
8755        assert_eq!(PeriodFreq::Quarterly.alias(), "Q-DEC");
8756        assert_eq!(PeriodFreq::Weekly.alias(), "W-SUN");
8757
8758        assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
8759        assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
8760        assert_eq!(PeriodFreq::parse("Y-DEC"), Some(PeriodFreq::Annual));
8761        assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
8762        assert_eq!(PeriodFreq::parse("Q-DEC"), Some(PeriodFreq::Quarterly));
8763        assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
8764        assert_eq!(PeriodFreq::parse("W-SUN"), Some(PeriodFreq::Weekly));
8765    }
8766
8767    #[test]
8768    fn period_freq_intraday_aliases_are_pandas_canonical_8kfdo() {
8769        assert_eq!(PeriodFreq::Hourly.alias(), "h");
8770        assert_eq!(PeriodFreq::Minutely.alias(), "min");
8771        assert_eq!(PeriodFreq::Secondly.alias(), "s");
8772
8773        assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
8774        assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
8775        assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
8776    }
8777
8778    #[test]
8779    fn period_scalar_accessors_match_pandas_star8() {
8780        let period = Period::new(600, PeriodFreq::Monthly);
8781
8782        assert_eq!(period.ordinal(), 600);
8783        assert_eq!(period.freq(), PeriodFreq::Monthly);
8784        assert_eq!(period.freqstr(), "M");
8785    }
8786
8787    #[test]
8788    fn period_parse_common_pandas_ordinals_avm08() {
8789        assert_eq!(
8790            Period::parse("2024").unwrap(),
8791            Period::new(54, PeriodFreq::Annual)
8792        );
8793        assert_eq!(
8794            Period::parse("2024Q1").unwrap(),
8795            Period::new(216, PeriodFreq::Quarterly)
8796        );
8797        assert_eq!(
8798            Period::parse("2024-01").unwrap(),
8799            Period::new(648, PeriodFreq::Monthly)
8800        );
8801        assert_eq!(
8802            Period::parse("2024-01-15").unwrap(),
8803            Period::new(19_737, PeriodFreq::Daily)
8804        );
8805        assert!(Period::parse("216").is_err());
8806    }
8807
8808    #[test]
8809    fn period_shift_advances_ordinal() {
8810        let q1 = Period::new(216, PeriodFreq::Quarterly);
8811        let q2 = q1.shift(1);
8812        assert_eq!(q2.ordinal, 217);
8813        assert_eq!(q2.freq, PeriodFreq::Quarterly);
8814        let q0 = q1.shift(-1);
8815        assert_eq!(q0.ordinal, 215);
8816    }
8817
8818    #[test]
8819    fn period_shift_saturates_on_overflow() {
8820        let p = Period::new(i64::MAX - 2, PeriodFreq::Daily);
8821        assert_eq!(p.shift(100).ordinal, i64::MAX);
8822        let p = Period::new(i64::MIN + 2, PeriodFreq::Daily);
8823        assert_eq!(p.shift(-100).ordinal, i64::MIN);
8824    }
8825
8826    #[test]
8827    fn period_diff_returns_period_count() {
8828        let a = Period::new(216, PeriodFreq::Quarterly);
8829        let b = Period::new(220, PeriodFreq::Quarterly);
8830        assert_eq!(b.diff(&a), Some(4));
8831        assert_eq!(a.diff(&b), Some(-4));
8832    }
8833
8834    #[test]
8835    fn period_diff_rejects_mismatched_freq() {
8836        let monthly = Period::new(100, PeriodFreq::Monthly);
8837        let quarterly = Period::new(100, PeriodFreq::Quarterly);
8838        assert_eq!(monthly.diff(&quarterly), None);
8839        assert_eq!(quarterly.diff(&monthly), None);
8840    }
8841
8842    #[test]
8843    fn period_cmp_same_freq_respects_ordinal_order() {
8844        use std::cmp::Ordering;
8845        let a = Period::new(10, PeriodFreq::Monthly);
8846        let b = Period::new(20, PeriodFreq::Monthly);
8847        assert_eq!(a.cmp_same_freq(&b), Some(Ordering::Less));
8848        assert_eq!(b.cmp_same_freq(&a), Some(Ordering::Greater));
8849        assert_eq!(a.cmp_same_freq(&a), Some(Ordering::Equal));
8850    }
8851
8852    #[test]
8853    fn period_cmp_cross_freq_returns_none() {
8854        let m = Period::new(1, PeriodFreq::Monthly);
8855        let q = Period::new(1, PeriodFreq::Quarterly);
8856        assert_eq!(m.cmp_same_freq(&q), None);
8857    }
8858
8859    #[test]
8860    fn period_arithmetic_matches_seeded_oracles_bac28() {
8861        use std::cmp::Ordering;
8862
8863        fn next(seed: &mut u64) -> u64 {
8864            *seed = seed
8865                .wrapping_mul(2862933555777941757)
8866                .wrapping_add(3037000493);
8867            *seed
8868        }
8869
8870        fn freq_for(raw: u64) -> PeriodFreq {
8871            match raw % 9 {
8872                0 => PeriodFreq::Annual,
8873                1 => PeriodFreq::Quarterly,
8874                2 => PeriodFreq::Monthly,
8875                3 => PeriodFreq::Weekly,
8876                4 => PeriodFreq::Daily,
8877                5 => PeriodFreq::Business,
8878                6 => PeriodFreq::Hourly,
8879                7 => PeriodFreq::Minutely,
8880                _ => PeriodFreq::Secondly,
8881            }
8882        }
8883
8884        fn different_freq(freq: PeriodFreq) -> PeriodFreq {
8885            match freq {
8886                PeriodFreq::Annual => PeriodFreq::Quarterly,
8887                PeriodFreq::Quarterly => PeriodFreq::Monthly,
8888                PeriodFreq::Monthly => PeriodFreq::Weekly,
8889                PeriodFreq::Weekly => PeriodFreq::Daily,
8890                PeriodFreq::Daily => PeriodFreq::Business,
8891                PeriodFreq::Business => PeriodFreq::Hourly,
8892                PeriodFreq::Hourly => PeriodFreq::Minutely,
8893                PeriodFreq::Minutely => PeriodFreq::Secondly,
8894                PeriodFreq::Secondly => PeriodFreq::Annual,
8895            }
8896        }
8897
8898        fn assert_period_case(
8899            case: usize,
8900            freq: PeriodFreq,
8901            ordinal: i64,
8902            shift_by: i64,
8903            other_ordinal: i64,
8904        ) {
8905            let period = Period::new(ordinal, freq);
8906            let shifted = period.shift(shift_by);
8907            assert_eq!(
8908                shifted.ordinal,
8909                ordinal.saturating_add(shift_by),
8910                "case {case}: shift ordinal"
8911            );
8912            assert_eq!(shifted.freq, freq, "case {case}: shift freq");
8913
8914            let same_freq_other = Period::new(other_ordinal, freq);
8915            assert_eq!(
8916                period.diff(&same_freq_other),
8917                Some(ordinal.saturating_sub(other_ordinal)),
8918                "case {case}: same-freq diff"
8919            );
8920            assert_eq!(
8921                period.cmp_same_freq(&same_freq_other),
8922                Some(ordinal.cmp(&other_ordinal)),
8923                "case {case}: same-freq cmp"
8924            );
8925
8926            let cross_freq_other = Period::new(other_ordinal, different_freq(freq));
8927            assert_eq!(
8928                period.diff(&cross_freq_other),
8929                None,
8930                "case {case}: cross-freq diff"
8931            );
8932            assert_eq!(
8933                period.cmp_same_freq(&cross_freq_other),
8934                None,
8935                "case {case}: cross-freq cmp"
8936            );
8937        }
8938
8939        assert_period_case(usize::MAX, PeriodFreq::Daily, i64::MAX - 2, 10, i64::MIN);
8940        assert_period_case(
8941            usize::MAX - 1,
8942            PeriodFreq::Daily,
8943            i64::MIN + 2,
8944            -10,
8945            i64::MAX,
8946        );
8947        assert_eq!(
8948            Period::new(10, PeriodFreq::Monthly)
8949                .cmp_same_freq(&Period::new(10, PeriodFreq::Monthly)),
8950            Some(Ordering::Equal)
8951        );
8952
8953        let mut seed = 0xbac2_8d1f_0d1c_5eed_u64;
8954        for case in 0..260 {
8955            let freq = freq_for(next(&mut seed));
8956            let ordinal = match case % 53 {
8957                0 => i64::MAX - (next(&mut seed) % 8) as i64,
8958                1 => i64::MIN + (next(&mut seed) % 8) as i64,
8959                _ => (next(&mut seed) % 200_001) as i64 - 100_000,
8960            };
8961            let shift_by = match case % 47 {
8962                0 => 512,
8963                1 => -512,
8964                _ => (next(&mut seed) % 4097) as i64 - 2048,
8965            };
8966            let other_ordinal = match case % 41 {
8967                0 => i64::MAX,
8968                1 => i64::MIN,
8969                _ => (next(&mut seed) % 200_001) as i64 - 100_000,
8970            };
8971            assert_period_case(case, freq, ordinal, shift_by, other_ordinal);
8972        }
8973    }
8974
8975    #[test]
8976    fn period_display_is_pandas_calendar_string() {
8977        // Ordinal 216 on the quarterly axis (1970Q1 == 0) is 1970 + 54y = 2024Q1.
8978        assert_eq!(
8979            Period::new(216, PeriodFreq::Quarterly).to_string(),
8980            "2024Q1"
8981        );
8982        // 1970 + 54 == 2024 on the annual axis.
8983        assert_eq!(Period::new(54, PeriodFreq::Annual).to_string(), "2024");
8984        // 1970-01 == 0 -> 2024-03 is 54*12 + 2 == 650 months.
8985        assert_eq!(Period::new(650, PeriodFreq::Monthly).to_string(), "2024-03");
8986        // Day 0 == 1970-01-01; 2024-01-15.
8987        assert_eq!(
8988            Period::new(fp_days("2024-01-15"), PeriodFreq::Daily).to_string(),
8989            "2024-01-15"
8990        );
8991        assert_eq!(
8992            Scalar::Period(Period::new(i64::MIN, PeriodFreq::Daily)).to_string(),
8993            "NaT"
8994        );
8995    }
8996
8997    #[cfg(test)]
8998    fn fp_days(ymd: &str) -> i64 {
8999        Period::parse(ymd).expect("daily period").ordinal
9000    }
9001
9002    #[test]
9003    fn period_roundtrips_through_serde_json() {
9004        let p = Period::new(42, PeriodFreq::Weekly);
9005        let json = serde_json::to_string(&p).expect("serialize");
9006        let back: Period = serde_json::from_str(&json).expect("deserialize");
9007        assert_eq!(p, back);
9008    }
9009
9010    // ── period_range tests (br-frankenpandas-2jef — epoj Phase 2) ───────
9011
9012    use super::period_range;
9013
9014    #[test]
9015    fn period_range_zero_periods_is_empty() {
9016        let start = Period::new(216, PeriodFreq::Quarterly);
9017        assert!(period_range(start, 0).is_empty());
9018    }
9019
9020    #[test]
9021    fn period_range_single_period_returns_start_only() {
9022        let start = Period::new(216, PeriodFreq::Quarterly);
9023        let r = period_range(start, 1);
9024        assert_eq!(r.len(), 1);
9025        assert_eq!(r[0], start);
9026    }
9027
9028    #[test]
9029    fn period_range_increments_ordinal_by_one_per_step() {
9030        let start = Period::new(216, PeriodFreq::Quarterly);
9031        let r = period_range(start, 4);
9032        assert_eq!(r.len(), 4);
9033        assert_eq!(r[0].ordinal, 216);
9034        assert_eq!(r[1].ordinal, 217);
9035        assert_eq!(r[2].ordinal, 218);
9036        assert_eq!(r[3].ordinal, 219);
9037    }
9038
9039    #[test]
9040    fn period_range_preserves_frequency() {
9041        let start = Period::new(0, PeriodFreq::Monthly);
9042        let r = period_range(start, 12);
9043        assert!(r.iter().all(|p| p.freq == PeriodFreq::Monthly));
9044    }
9045
9046    #[test]
9047    fn period_range_negative_starting_ordinal_works() {
9048        // Ordinal axis is signed — pre-epoch periods are valid.
9049        let start = Period::new(-3, PeriodFreq::Annual);
9050        let r = period_range(start, 5);
9051        assert_eq!(
9052            r.iter().map(|p| p.ordinal).collect::<Vec<_>>(),
9053            vec![-3, -2, -1, 0, 1]
9054        );
9055    }
9056
9057    #[test]
9058    fn period_range_large_n_does_not_panic() {
9059        // 1024 monthly periods — large enough to catch any allocation bug.
9060        let start = Period::new(0, PeriodFreq::Monthly);
9061        let r = period_range(start, 1024);
9062        assert_eq!(r.len(), 1024);
9063        assert_eq!(r[1023].ordinal, 1023);
9064    }
9065
9066    #[test]
9067    fn period_range_matches_seeded_ordinal_oracle_z3zh2() {
9068        fn next(seed: &mut u64) -> u64 {
9069            *seed = seed
9070                .wrapping_mul(6364136223846793005)
9071                .wrapping_add(1442695040888963407);
9072            *seed
9073        }
9074
9075        fn freq_for(raw: u64) -> PeriodFreq {
9076            match raw % 9 {
9077                0 => PeriodFreq::Annual,
9078                1 => PeriodFreq::Quarterly,
9079                2 => PeriodFreq::Monthly,
9080                3 => PeriodFreq::Weekly,
9081                4 => PeriodFreq::Daily,
9082                5 => PeriodFreq::Business,
9083                6 => PeriodFreq::Hourly,
9084                7 => PeriodFreq::Minutely,
9085                _ => PeriodFreq::Secondly,
9086            }
9087        }
9088
9089        fn assert_oracle_case(case: usize, start: Period, periods: usize) {
9090            let got = period_range(start, periods);
9091            assert_eq!(got.len(), periods, "case {case}: length");
9092
9093            for (position, period) in got.iter().enumerate() {
9094                let expected_ordinal = start.ordinal.saturating_add(position as i64);
9095                assert_eq!(
9096                    period.ordinal, expected_ordinal,
9097                    "case {case}: ordinal at {position}"
9098                );
9099                assert_eq!(period.freq, start.freq, "case {case}: freq at {position}");
9100            }
9101        }
9102
9103        assert_oracle_case(usize::MAX, Period::new(42, PeriodFreq::Monthly), 0);
9104        assert_oracle_case(
9105            usize::MAX - 1,
9106            Period::new(i64::MAX - 3, PeriodFreq::Daily),
9107            8,
9108        );
9109
9110        let mut seed = 0x9e21_0d1c_5eed_0421_u64;
9111        for case in 0..260 {
9112            let freq = freq_for(next(&mut seed));
9113            let periods = (next(&mut seed) % 80) as usize;
9114            let start_ordinal = if case % 37 == 0 {
9115                i64::MAX - 7
9116            } else {
9117                (next(&mut seed) % 20_001) as i64 - 10_000
9118            };
9119            assert_oracle_case(case, Period::new(start_ordinal, freq), periods);
9120        }
9121    }
9122
9123    // ── interval_range tests (br-frankenpandas-xaom) ────────────────────
9124
9125    use super::{TypeError, interval_range_by_periods, interval_range_by_step};
9126
9127    #[test]
9128    fn interval_range_by_periods_matches_pandas_default_case() {
9129        // pd.interval_range(0, 10, periods=5) → [(0,2],(2,4],(4,6],(6,8],(8,10]]
9130        let bins = interval_range_by_periods(0.0, 10.0, 5, IntervalClosed::Right);
9131        assert_eq!(bins.len(), 5);
9132        for (i, bin) in bins.iter().enumerate() {
9133            assert_eq!(bin.left, (i as f64) * 2.0);
9134            assert_eq!(bin.right, ((i + 1) as f64) * 2.0);
9135            assert_eq!(bin.closed, IntervalClosed::Right);
9136        }
9137    }
9138
9139    #[test]
9140    fn interval_range_by_periods_final_edge_is_exact_end() {
9141        // Guards against accumulated float drift on the last right edge.
9142        let bins = interval_range_by_periods(0.0, 1.0, 7, IntervalClosed::Right);
9143        assert_eq!(bins.last().unwrap().right, 1.0);
9144    }
9145
9146    #[test]
9147    fn interval_range_by_periods_zero_periods_is_empty() {
9148        assert!(interval_range_by_periods(0.0, 10.0, 0, IntervalClosed::Right).is_empty());
9149    }
9150
9151    #[test]
9152    fn interval_range_by_periods_reversed_range_is_empty() {
9153        // pandas: pd.interval_range(10, 0, periods=5) → IntervalIndex([]).
9154        assert!(interval_range_by_periods(10.0, 0.0, 5, IntervalClosed::Right).is_empty());
9155    }
9156
9157    #[test]
9158    fn interval_range_by_periods_preserves_closed_policy() {
9159        for closed in [
9160            IntervalClosed::Left,
9161            IntervalClosed::Right,
9162            IntervalClosed::Both,
9163            IntervalClosed::Neither,
9164        ] {
9165            let bins = interval_range_by_periods(0.0, 4.0, 2, closed);
9166            assert!(bins.iter().all(|b| b.closed == closed));
9167        }
9168    }
9169
9170    #[test]
9171    fn interval_range_by_step_matches_pandas_default_case() {
9172        // pd.interval_range(0, 10, freq=2) → [(0,2],(2,4],(4,6],(6,8],(8,10]]
9173        let bins = interval_range_by_step(0.0, 10.0, 2.0, IntervalClosed::Right).expect("ok");
9174        assert_eq!(bins.len(), 5);
9175        assert_eq!(bins[0].left, 0.0);
9176        assert_eq!(bins[4].right, 10.0);
9177    }
9178
9179    #[test]
9180    fn interval_range_by_step_rejects_non_positive_step() {
9181        assert!(matches!(
9182            interval_range_by_step(0.0, 10.0, 0.0, IntervalClosed::Right),
9183            Err(TypeError::InvalidIntervalStep { .. })
9184        ));
9185        assert!(matches!(
9186            interval_range_by_step(0.0, 10.0, -2.0, IntervalClosed::Right),
9187            Err(TypeError::InvalidIntervalStep { .. })
9188        ));
9189        assert!(matches!(
9190            interval_range_by_step(0.0, 10.0, f64::NAN, IntervalClosed::Right),
9191            Err(TypeError::InvalidIntervalStep { .. })
9192        ));
9193        assert!(matches!(
9194            interval_range_by_step(0.0, 10.0, f64::INFINITY, IntervalClosed::Right),
9195            Err(TypeError::InvalidIntervalStep { .. })
9196        ));
9197    }
9198
9199    #[test]
9200    fn interval_range_by_step_rejects_non_dividing_step() {
9201        // pandas: pd.interval_range(0, 10, freq=3) → ValueError
9202        // (span=10 not divisible by step=3). Reject with IntervalStepDoesNotDivide.
9203        assert!(matches!(
9204            interval_range_by_step(0.0, 10.0, 3.0, IntervalClosed::Right),
9205            Err(TypeError::IntervalStepDoesNotDivide { .. })
9206        ));
9207    }
9208
9209    #[test]
9210    fn interval_range_by_step_reversed_range_is_empty() {
9211        let bins = interval_range_by_step(10.0, 0.0, 2.0, IntervalClosed::Right).expect("ok");
9212        assert!(bins.is_empty());
9213    }
9214
9215    #[test]
9216    fn interval_range_by_step_degenerate_zero_span_is_empty() {
9217        let bins = interval_range_by_step(5.0, 5.0, 1.0, IntervalClosed::Right).expect("ok");
9218        assert!(bins.is_empty());
9219    }
9220
9221    #[test]
9222    fn interval_range_by_step_accepts_float_step_within_tolerance() {
9223        // step=0.1 ten times == 1.0 but float arithmetic produces 0.9999...
9224        let bins = interval_range_by_step(0.0, 1.0, 0.1, IntervalClosed::Right).expect("ok");
9225        assert_eq!(bins.len(), 10);
9226        assert_eq!(bins.last().unwrap().right, 1.0);
9227    }
9228
9229    #[test]
9230    fn interval_range_matches_seeded_arithmetic_oracle_t9ozf() {
9231        // Differential vs independent interval edge oracles
9232        // (br-frankenpandas-t9ozf). Seeded LCG, no mocks.
9233        fn next(seed: &mut u64) -> u64 {
9234            *seed = seed
9235                .wrapping_mul(2862933555777941757)
9236                .wrapping_add(3037000493);
9237            *seed
9238        }
9239
9240        fn closed_for(raw: u64) -> IntervalClosed {
9241            match raw % 4 {
9242                0 => IntervalClosed::Left,
9243                1 => IntervalClosed::Right,
9244                2 => IntervalClosed::Both,
9245                _ => IntervalClosed::Neither,
9246            }
9247        }
9248
9249        fn assert_interval(
9250            case: usize,
9251            kind: &str,
9252            pos: usize,
9253            actual: &Interval,
9254            expected: &Interval,
9255        ) {
9256            assert!(
9257                (actual.left - expected.left).abs() < 1e-12,
9258                "case={case} kind={kind} pos={pos}: expected left {}, got {}",
9259                expected.left,
9260                actual.left
9261            );
9262            assert!(
9263                (actual.right - expected.right).abs() < 1e-12,
9264                "case={case} kind={kind} pos={pos}: expected right {}, got {}",
9265                expected.right,
9266                actual.right
9267            );
9268            assert_eq!(
9269                actual.closed, expected.closed,
9270                "case={case} kind={kind} pos={pos}: closed mismatch"
9271            );
9272        }
9273
9274        fn expected_by_periods(
9275            start: f64,
9276            end: f64,
9277            periods: usize,
9278            closed: IntervalClosed,
9279        ) -> Vec<Interval> {
9280            if periods == 0 || !start.is_finite() || !end.is_finite() || start >= end {
9281                return Vec::new();
9282            }
9283            let step = (end - start) / periods as f64;
9284            (0..periods)
9285                .map(|pos| {
9286                    let left = start + step * pos as f64;
9287                    let right = if pos + 1 == periods {
9288                        end
9289                    } else {
9290                        start + step * (pos + 1) as f64
9291                    };
9292                    Interval::new(left, right, closed)
9293                })
9294                .collect()
9295        }
9296
9297        fn expected_by_step(
9298            start: f64,
9299            end: f64,
9300            step: f64,
9301            closed: IntervalClosed,
9302        ) -> Vec<Interval> {
9303            if start >= end {
9304                return Vec::new();
9305            }
9306            let count = ((end - start) / step).round() as usize;
9307            (0..count)
9308                .map(|pos| {
9309                    let left = start + step * pos as f64;
9310                    let right = if pos + 1 == count {
9311                        end
9312                    } else {
9313                        start + step * (pos + 1) as f64
9314                    };
9315                    Interval::new(left, right, closed)
9316                })
9317                .collect()
9318        }
9319
9320        assert!(interval_range_by_periods(5.0, 5.0, 4, IntervalClosed::Right).is_empty());
9321        assert!(interval_range_by_periods(5.0, 4.0, 4, IntervalClosed::Right).is_empty());
9322        assert!(
9323            interval_range_by_step(5.0, 5.0, 1.0, IntervalClosed::Right)
9324                .expect("zero span")
9325                .is_empty()
9326        );
9327
9328        let mut seed = 0x171e_7a11_c0de_5eed_u64;
9329        for case in 0..220 {
9330            let start = (next(&mut seed) % 2_001) as f64 / 10.0 - 100.0;
9331            let periods = (next(&mut seed) % 24 + 1) as usize;
9332            let width = (next(&mut seed) % 1_000 + 1) as f64 / 4.0;
9333            let end = start + width;
9334            let closed = closed_for(next(&mut seed));
9335
9336            let actual = interval_range_by_periods(start, end, periods, closed);
9337            let expected = expected_by_periods(start, end, periods, closed);
9338            assert_eq!(
9339                actual.len(),
9340                expected.len(),
9341                "case={case} periods: length mismatch"
9342            );
9343            for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
9344                assert_interval(case, "periods", pos, actual, expected);
9345            }
9346
9347            let step_count = (next(&mut seed) % 20 + 1) as usize;
9348            let step = (next(&mut seed) % 25 + 1) as f64;
9349            let step_end = start + step * step_count as f64;
9350            let actual = interval_range_by_step(start, step_end, step, closed).expect("divides");
9351            let expected = expected_by_step(start, step_end, step, closed);
9352            assert_eq!(
9353                actual.len(),
9354                expected.len(),
9355                "case={case} step: length mismatch"
9356            );
9357            for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
9358                assert_interval(case, "step", pos, actual, expected);
9359            }
9360        }
9361    }
9362
9363    // ── Timedelta arithmetic tests (br-frankenpandas-4r56 Phase 1) ──────
9364
9365    use super::Timedelta;
9366
9367    #[test]
9368    fn timedelta_add_sums_non_nat() {
9369        let one_hour = Timedelta::NANOS_PER_HOUR;
9370        let one_day = Timedelta::NANOS_PER_DAY;
9371        assert_eq!(Timedelta::add(one_hour, one_day), one_hour + one_day);
9372    }
9373
9374    #[test]
9375    fn timedelta_add_propagates_nat() {
9376        assert_eq!(Timedelta::add(Timedelta::NAT, 100), Timedelta::NAT);
9377        assert_eq!(Timedelta::add(100, Timedelta::NAT), Timedelta::NAT);
9378        assert_eq!(
9379            Timedelta::add(Timedelta::NAT, Timedelta::NAT),
9380            Timedelta::NAT
9381        );
9382    }
9383
9384    #[test]
9385    fn timedelta_add_saturates_on_overflow() {
9386        assert_eq!(Timedelta::add(i64::MAX - 10, 100), i64::MAX);
9387        // Note: i64::MIN is NaT; use MIN+1 to test saturation on the negative side.
9388        assert_eq!(Timedelta::add(i64::MIN + 10, -100), i64::MIN);
9389    }
9390
9391    #[test]
9392    fn timedelta_sub_subtracts_non_nat() {
9393        let one_hour = Timedelta::NANOS_PER_HOUR;
9394        assert_eq!(
9395            Timedelta::sub(one_hour, Timedelta::NANOS_PER_MIN),
9396            one_hour - Timedelta::NANOS_PER_MIN
9397        );
9398    }
9399
9400    #[test]
9401    fn timedelta_sub_propagates_nat() {
9402        assert_eq!(Timedelta::sub(Timedelta::NAT, 100), Timedelta::NAT);
9403        assert_eq!(Timedelta::sub(100, Timedelta::NAT), Timedelta::NAT);
9404    }
9405
9406    #[test]
9407    fn timedelta_neg_flips_sign_non_nat() {
9408        assert_eq!(Timedelta::neg(5), -5);
9409        assert_eq!(Timedelta::neg(-5), 5);
9410        assert_eq!(Timedelta::neg(0), 0);
9411    }
9412
9413    #[test]
9414    fn timedelta_neg_preserves_nat() {
9415        assert_eq!(Timedelta::neg(Timedelta::NAT), Timedelta::NAT);
9416    }
9417
9418    #[test]
9419    fn timedelta_abs_returns_magnitude() {
9420        assert_eq!(Timedelta::abs(-5), 5);
9421        assert_eq!(Timedelta::abs(5), 5);
9422        assert_eq!(Timedelta::abs(0), 0);
9423        assert_eq!(Timedelta::abs(Timedelta::NAT), Timedelta::NAT);
9424    }
9425
9426    #[test]
9427    fn timedelta_mul_scalar_scales() {
9428        let three_hours = Timedelta::NANOS_PER_HOUR * 3;
9429        assert_eq!(
9430            Timedelta::mul_scalar(Timedelta::NANOS_PER_HOUR, 3),
9431            three_hours
9432        );
9433        assert_eq!(Timedelta::mul_scalar(100, 0), 0);
9434        assert_eq!(Timedelta::mul_scalar(100, -2), -200);
9435    }
9436
9437    #[test]
9438    fn timedelta_mul_scalar_saturates() {
9439        assert_eq!(Timedelta::mul_scalar(i64::MAX, 2), i64::MAX);
9440        // (i64::MIN + 1) * 2 saturates to i64::MIN (magnitude too large).
9441        assert_eq!(Timedelta::mul_scalar(i64::MIN + 1, 2), i64::MIN);
9442    }
9443
9444    #[test]
9445    fn timedelta_mul_scalar_propagates_nat() {
9446        assert_eq!(Timedelta::mul_scalar(Timedelta::NAT, 5), Timedelta::NAT);
9447    }
9448
9449    #[test]
9450    fn timedelta_div_scalar_floor_divides() {
9451        // Floor division (matches Python / pandas): -100 // 3 == -34, not -33.
9452        assert_eq!(Timedelta::div_scalar(100, 3), 33);
9453        assert_eq!(Timedelta::div_scalar(-100, 3), -34);
9454        assert_eq!(Timedelta::div_scalar(100, -3), -34);
9455        assert_eq!(Timedelta::div_scalar(-100, -3), 33);
9456    }
9457
9458    #[test]
9459    fn timedelta_div_scalar_zero_divisor_returns_nat() {
9460        assert_eq!(Timedelta::div_scalar(100, 0), Timedelta::NAT);
9461    }
9462
9463    #[test]
9464    fn timedelta_div_scalar_min_neg_one_propagates_nat() {
9465        // i64::MIN aliases NaT, so `div_scalar(i64::MIN, _)` propagates NaT
9466        // — the `i64::MIN / -1` arithmetic-overflow case is subsumed.
9467        assert_eq!(Timedelta::div_scalar(i64::MIN, -1), Timedelta::NAT);
9468        // (i64::MIN + 1) is a real timedelta; `/ -1` does not overflow.
9469        assert_eq!(Timedelta::div_scalar(i64::MIN + 1, -1), i64::MAX);
9470    }
9471
9472    #[test]
9473    fn timedelta_div_scalar_propagates_nat() {
9474        assert_eq!(Timedelta::div_scalar(Timedelta::NAT, 10), Timedelta::NAT);
9475    }
9476
9477    #[test]
9478    fn timedelta_div_timedelta_returns_float_ratio() {
9479        let two_hours = Timedelta::NANOS_PER_HOUR * 2;
9480        let one_hour = Timedelta::NANOS_PER_HOUR;
9481        assert!((Timedelta::div_timedelta(two_hours, one_hour) - 2.0).abs() < 1e-12);
9482        assert!((Timedelta::div_timedelta(one_hour, two_hours) - 0.5).abs() < 1e-12);
9483    }
9484
9485    #[test]
9486    fn timedelta_div_timedelta_nat_returns_nan() {
9487        assert!(Timedelta::div_timedelta(Timedelta::NAT, 100).is_nan());
9488        assert!(Timedelta::div_timedelta(100, Timedelta::NAT).is_nan());
9489    }
9490
9491    // ── Timestamp tests (br-frankenpandas-9p0u — 4r56 Phase 2) ──────────
9492
9493    use super::Timestamp;
9494
9495    #[test]
9496    fn timestamp_from_nanos_is_naive_utc() {
9497        let ts = Timestamp::from_nanos(1_700_000_000_000_000_000);
9498        assert_eq!(ts.nanos, 1_700_000_000_000_000_000);
9499        assert_eq!(ts.tz, None);
9500        assert!(!ts.is_nat());
9501    }
9502
9503    #[test]
9504    fn timestamp_from_nanos_tz_carries_tz_name() {
9505        let ts = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
9506        assert_eq!(ts.tz.as_deref(), Some("US/Eastern"));
9507    }
9508
9509    #[test]
9510    fn timestamp_now_returns_current_time() {
9511        let before = std::time::SystemTime::now()
9512            .duration_since(std::time::UNIX_EPOCH)
9513            .unwrap()
9514            .as_nanos() as i64;
9515        let ts = Timestamp::now();
9516        let after = std::time::SystemTime::now()
9517            .duration_since(std::time::UNIX_EPOCH)
9518            .unwrap()
9519            .as_nanos() as i64;
9520        assert!(ts.nanos >= before);
9521        assert!(ts.nanos <= after);
9522        assert!(!ts.is_nat());
9523    }
9524
9525    #[test]
9526    fn timestamp_today_returns_midnight() {
9527        let ts = Timestamp::today();
9528        assert!(!ts.is_nat());
9529        // Today should be normalized (midnight), so hour/min/sec should be 0
9530        assert_eq!(ts.hour(), Some(0));
9531        assert_eq!(ts.minute(), Some(0));
9532        assert_eq!(ts.second(), Some(0));
9533    }
9534
9535    #[test]
9536    fn timestamp_add_timedelta_shifts_nanos_and_preserves_tz() {
9537        let ts = Timestamp::from_nanos_tz(0, "US/Eastern");
9538        let one_day = Timedelta::NANOS_PER_DAY;
9539        let shifted = ts.add_timedelta(one_day);
9540        assert_eq!(shifted.nanos, one_day);
9541        assert_eq!(shifted.tz.as_deref(), Some("US/Eastern"));
9542    }
9543
9544    #[test]
9545    fn timestamp_add_timedelta_saturates_on_overflow() {
9546        let ts = Timestamp::from_nanos(i64::MAX - 10);
9547        let shifted = ts.add_timedelta(100);
9548        assert_eq!(shifted.nanos, i64::MAX);
9549    }
9550
9551    #[test]
9552    fn timestamp_add_timedelta_propagates_nat() {
9553        // NaT Timestamp + anything = NaT.
9554        assert!(Timestamp::nat().add_timedelta(100).is_nat());
9555        // Timestamp + NaT Timedelta = NaT.
9556        assert!(
9557            Timestamp::from_nanos(0)
9558                .add_timedelta(Timedelta::NAT)
9559                .is_nat()
9560        );
9561    }
9562
9563    #[test]
9564    fn timestamp_sub_timedelta_shifts_backward() {
9565        let ts = Timestamp::from_nanos(1_000);
9566        let shifted = ts.sub_timedelta(Timedelta::NANOS_PER_MICRO);
9567        assert_eq!(shifted.nanos, 0);
9568    }
9569
9570    #[test]
9571    fn timestamp_sub_timestamp_returns_timedelta_nanos() {
9572        let t0 = Timestamp::from_nanos(0);
9573        let t1 = Timestamp::from_nanos(Timedelta::NANOS_PER_HOUR);
9574        assert_eq!(t1.sub_timestamp(&t0), Timedelta::NANOS_PER_HOUR);
9575        assert_eq!(t0.sub_timestamp(&t1), -Timedelta::NANOS_PER_HOUR);
9576    }
9577
9578    #[test]
9579    fn timestamp_sub_timestamp_nat_propagates() {
9580        let ts = Timestamp::from_nanos(1_000);
9581        assert_eq!(Timestamp::nat().sub_timestamp(&ts), Timedelta::NAT);
9582        assert_eq!(ts.sub_timestamp(&Timestamp::nat()), Timedelta::NAT);
9583    }
9584
9585    #[test]
9586    fn timestamp_semantic_eq_treats_two_nat_as_equal() {
9587        assert!(Timestamp::nat().semantic_eq(&Timestamp::nat()));
9588        assert!(!Timestamp::nat().semantic_eq(&Timestamp::from_nanos(0)));
9589        assert!(!Timestamp::from_nanos(0).semantic_eq(&Timestamp::nat()));
9590    }
9591
9592    #[test]
9593    fn timestamp_partial_cmp_orders_by_nanos_nat_is_incomparable() {
9594        use std::cmp::Ordering;
9595        let a = Timestamp::from_nanos(0);
9596        let b = Timestamp::from_nanos(100);
9597        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
9598        assert_eq!(b.partial_cmp(&a), Some(Ordering::Greater));
9599        assert_eq!(a.partial_cmp(&a), Some(Ordering::Equal));
9600        assert_eq!(a.partial_cmp(&Timestamp::nat()), None);
9601        assert_eq!(Timestamp::nat().partial_cmp(&Timestamp::nat()), None);
9602    }
9603
9604    #[test]
9605    fn timestamp_display_matches_phase2_debug_format() {
9606        assert_eq!(Timestamp::from_nanos(42).to_string(), "Timestamp[42, UTC]");
9607        assert_eq!(
9608            Timestamp::from_nanos_tz(42, "US/Eastern").to_string(),
9609            "Timestamp[42, US/Eastern]"
9610        );
9611        assert_eq!(Timestamp::nat().to_string(), "NaT");
9612    }
9613
9614    #[test]
9615    fn timestamp_value_and_unit_match_pandas_l0edr() {
9616        let ts = Timestamp::from_nanos(1_000_000_123);
9617        assert_eq!(ts.value(), 1_000_000_123);
9618        assert_eq!(ts.unit(), Some("ns"));
9619
9620        let nat = Timestamp::nat();
9621        assert_eq!(nat.value(), Timestamp::NAT);
9622        assert_eq!(nat.unit(), None);
9623    }
9624
9625    #[test]
9626    fn timestamp_numpy_datetime64_materializers_match_value_twksi() {
9627        let ts = Timestamp::from_nanos(1_000_000_123);
9628        assert_eq!(ts.asm8(), ts.value());
9629        assert_eq!(ts.to_datetime64(), ts.value());
9630        assert_eq!(ts.to_numpy(), ts.value());
9631
9632        let nat = Timestamp::nat();
9633        assert_eq!(nat.asm8(), Timestamp::NAT);
9634        assert_eq!(nat.to_datetime64(), Timestamp::NAT);
9635        assert_eq!(nat.to_numpy(), Timestamp::NAT);
9636    }
9637
9638    #[test]
9639    fn timestamp_timestamp_accessor_matches_pandas_microsecond_rounding_py0h3() {
9640        assert_eq!(Timestamp::from_nanos(0).timestamp(), Ok(0.0));
9641        assert_eq!(Timestamp::from_nanos(1_500_000_000).timestamp(), Ok(1.5));
9642        assert_eq!(Timestamp::from_nanos(500).timestamp(), Ok(0.0));
9643        assert_eq!(Timestamp::from_nanos(501).timestamp(), Ok(0.000001));
9644        assert_eq!(Timestamp::from_nanos(2_500).timestamp(), Ok(0.000003));
9645
9646        assert!(matches!(
9647            Timestamp::from_nanos(-500).timestamp(),
9648            Ok(value) if value == -0.0 && value.is_sign_negative()
9649        ));
9650        assert_eq!(Timestamp::from_nanos(-2_500).timestamp(), Ok(-0.000003));
9651        assert_eq!(
9652            Timestamp::nat().timestamp(),
9653            Err(TypeError::ValueIsMissing {
9654                kind: NullKind::NaT,
9655            })
9656        );
9657    }
9658
9659    #[test]
9660    fn timestamp_roundtrips_through_serde_json() {
9661        let naive = Timestamp::from_nanos(1_700_000_000_000_000_000);
9662        let json = serde_json::to_string(&naive).expect("serialize");
9663        let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
9664        assert_eq!(naive, back);
9665
9666        let tz_aware = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
9667        let json = serde_json::to_string(&tz_aware).expect("serialize");
9668        let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
9669        assert_eq!(tz_aware, back);
9670    }
9671
9672    #[test]
9673    fn timestamp_is_send_and_sync() {
9674        fn assert_send_sync<T: Send + Sync>() {}
9675        assert_send_sync::<Timestamp>();
9676    }
9677
9678    // ── Timestamp rounding tests (br-frankenpandas-5h6n) ────────────────
9679
9680    #[test]
9681    fn timestamp_pre_epoch_accessors_floor_not_truncate() {
9682        // br-frankenpandas-wkjtw: pre-1970 instants with a sub-day part used to
9683        // truncate toward zero, landing on the wrong calendar day. -1 ns is
9684        // 1969-12-31 23:59:59.999999999 UTC.
9685        let ts = Timestamp::from_nanos(-1);
9686        assert_eq!(ts.year(), Some(1969));
9687        assert_eq!(ts.month(), Some(12));
9688        assert_eq!(ts.day(), Some(31));
9689        assert_eq!(ts.hour(), Some(23));
9690        assert_eq!(ts.minute(), Some(59));
9691        assert_eq!(ts.second(), Some(59));
9692        assert_eq!(ts.microsecond(), Some(999_999));
9693        assert_eq!(ts.nanosecond(), Some(999));
9694        // 1969-12-31 is a Wednesday (pandas Monday=0 -> 2).
9695        assert_eq!(ts.dayofweek(), Some(2));
9696
9697        // 1969-12-31 12:00:00 — the classic truncation case (-43200 s).
9698        let noon = Timestamp::from_nanos(-43200 * Timedelta::NANOS_PER_SEC);
9699        assert_eq!(noon.year(), Some(1969));
9700        assert_eq!(noon.month(), Some(12));
9701        assert_eq!(noon.day(), Some(31));
9702        assert_eq!(noon.hour(), Some(12));
9703
9704        // Exact midnight pre-epoch was already correct; keep it green.
9705        let midnight = Timestamp::from_nanos(-Timedelta::NANOS_PER_DAY);
9706        assert_eq!(midnight.year(), Some(1969));
9707        assert_eq!(midnight.day(), Some(31));
9708        assert_eq!(midnight.hour(), Some(0));
9709    }
9710
9711    #[test]
9712    fn timestamp_floor_to_rounds_down() {
9713        // 12:34:56 → floor by 1H → 12:00:00
9714        let h = Timedelta::NANOS_PER_HOUR;
9715        let twelve_h = h * 12;
9716        let twelve_thirty_four =
9717            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
9718        let ts = Timestamp::from_nanos(twelve_thirty_four);
9719        let floored = ts.floor_to(h);
9720        assert_eq!(floored.nanos, twelve_h);
9721    }
9722
9723    #[test]
9724    fn timestamp_floor_to_handles_already_aligned() {
9725        // 12:00:00 floored by 1H → 12:00:00 (no change).
9726        let h = Timedelta::NANOS_PER_HOUR;
9727        let twelve_h = h * 12;
9728        let ts = Timestamp::from_nanos(twelve_h);
9729        assert_eq!(ts.floor_to(h).nanos, twelve_h);
9730    }
9731
9732    #[test]
9733    fn timestamp_floor_to_handles_negative_nanos() {
9734        // -100 ns floored by 60 ns:
9735        //   div_euclid(-100, 60) = -2 (since -2*60=-120, rem=20 ≥ 0).
9736        //   result = -2 * 60 = -120.
9737        let ts = Timestamp::from_nanos(-100);
9738        assert_eq!(ts.floor_to(60).nanos, -120);
9739    }
9740
9741    #[test]
9742    fn timestamp_floor_to_returns_nat_on_axis_underflow_30hdi() {
9743        let ts = Timestamp::from_nanos(i64::MIN + 1);
9744        assert!(ts.floor_to(10).is_nat());
9745
9746        let safe = Timestamp::from_nanos(i64::MIN + 10);
9747        assert_eq!(safe.floor_to(10).nanos, i64::MIN + 8);
9748
9749        let tz = Timestamp::from_nanos_tz(-100, "UTC").floor_to(60);
9750        assert_eq!(tz.nanos, -120);
9751        assert_eq!(tz.tz.as_deref(), Some("UTC"));
9752    }
9753
9754    #[test]
9755    fn timestamp_ceil_to_rounds_up() {
9756        // 12:34:56 → ceil by 1H → 13:00:00.
9757        let h = Timedelta::NANOS_PER_HOUR;
9758        let twelve_h = h * 12;
9759        let thirteen_h = h * 13;
9760        let twelve_thirty_four =
9761            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
9762        let ts = Timestamp::from_nanos(twelve_thirty_four);
9763        assert_eq!(ts.ceil_to(h).nanos, thirteen_h);
9764    }
9765
9766    #[test]
9767    fn timestamp_ceil_to_no_op_on_aligned() {
9768        let h = Timedelta::NANOS_PER_HOUR;
9769        let twelve_h = h * 12;
9770        let ts = Timestamp::from_nanos(twelve_h);
9771        assert_eq!(ts.ceil_to(h).nanos, twelve_h);
9772    }
9773
9774    #[test]
9775    fn timestamp_round_to_rounds_to_nearest() {
9776        // 12:30:01 (one second past the half-hour): round to 1H → 13:00:00.
9777        let h = Timedelta::NANOS_PER_HOUR;
9778        let twelve_h = h * 12;
9779        let twelve_thirty_one_sec =
9780            twelve_h + Timedelta::NANOS_PER_MIN * 30 + Timedelta::NANOS_PER_SEC;
9781        let ts = Timestamp::from_nanos(twelve_thirty_one_sec);
9782        assert_eq!(ts.round_to(h).nanos, h * 13);
9783
9784        // 12:29:59 (one second before half): round to 1H → 12:00:00.
9785        let twelve_twenty_nine_sec =
9786            twelve_h + Timedelta::NANOS_PER_MIN * 29 + Timedelta::NANOS_PER_SEC * 59;
9787        let ts = Timestamp::from_nanos(twelve_twenty_nine_sec);
9788        assert_eq!(ts.round_to(h).nanos, twelve_h);
9789    }
9790
9791    #[test]
9792    fn timestamp_round_to_bankers_tie_to_even() {
9793        // Tie cases: rem == unit/2 exactly. Pick even-multiple floor.
9794        // unit=10, so half=5. nanos=5: floor=0 (even), so → 0.
9795        // nanos=15: floor=1 (odd), so → 20.
9796        // nanos=25: floor=2 (even), so → 20.
9797        // nanos=35: floor=3 (odd), so → 40.
9798        assert_eq!(Timestamp::from_nanos(5).round_to(10).nanos, 0);
9799        assert_eq!(Timestamp::from_nanos(15).round_to(10).nanos, 20);
9800        assert_eq!(Timestamp::from_nanos(25).round_to(10).nanos, 20);
9801        assert_eq!(Timestamp::from_nanos(35).round_to(10).nanos, 40);
9802    }
9803
9804    #[test]
9805    fn timestamp_round_to_zero_unit_returns_nat() {
9806        let ts = Timestamp::from_nanos(100);
9807        assert!(ts.round_to(0).is_nat());
9808        assert!(ts.floor_to(0).is_nat());
9809        assert!(ts.ceil_to(0).is_nat());
9810    }
9811
9812    #[test]
9813    fn timestamp_round_to_negative_unit_returns_nat() {
9814        let ts = Timestamp::from_nanos(100);
9815        assert!(ts.round_to(-10).is_nat());
9816        assert!(ts.floor_to(-10).is_nat());
9817        assert!(ts.ceil_to(-10).is_nat());
9818    }
9819
9820    #[test]
9821    fn timestamp_rounding_propagates_nat() {
9822        let nat = Timestamp::nat();
9823        assert!(nat.floor_to(60).is_nat());
9824        assert!(nat.ceil_to(60).is_nat());
9825        assert!(nat.round_to(60).is_nat());
9826    }
9827
9828    #[test]
9829    fn timestamp_rounding_preserves_tz() {
9830        let ts = Timestamp::from_nanos_tz(100, "US/Eastern");
9831        assert_eq!(ts.floor_to(60).tz.as_deref(), Some("US/Eastern"));
9832        assert_eq!(ts.ceil_to(60).tz.as_deref(), Some("US/Eastern"));
9833        assert_eq!(ts.round_to(60).tz.as_deref(), Some("US/Eastern"));
9834    }
9835
9836    // ── Timestamp string-unit rounding tests (br-frankenpandas-lbsx) ────
9837
9838    #[test]
9839    fn timestamp_floor_to_unit_h_rounds_to_hour() {
9840        let h = Timedelta::NANOS_PER_HOUR;
9841        let twelve_h = h * 12;
9842        let twelve_thirty_four =
9843            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
9844        let ts = Timestamp::from_nanos(twelve_thirty_four);
9845        assert_eq!(ts.floor_to_unit("H").nanos, twelve_h);
9846        assert_eq!(ts.floor_to_unit("h").nanos, twelve_h);
9847        assert_eq!(ts.floor_to_unit("hour").nanos, twelve_h);
9848        assert_eq!(ts.floor_to_unit("hours").nanos, twelve_h);
9849        assert_eq!(ts.floor_to_unit("hr").nanos, twelve_h);
9850    }
9851
9852    #[test]
9853    fn timestamp_ceil_to_unit_d_rounds_to_day() {
9854        // 12:34:56 → ceil to 1 day → 24:00:00 (next day).
9855        let h = Timedelta::NANOS_PER_HOUR;
9856        let d = Timedelta::NANOS_PER_DAY;
9857        let twelve_thirty_four = h * 12 + Timedelta::NANOS_PER_MIN * 34;
9858        let ts = Timestamp::from_nanos(twelve_thirty_four);
9859        assert_eq!(ts.ceil_to_unit("D").nanos, d);
9860        assert_eq!(ts.ceil_to_unit("day").nanos, d);
9861        assert_eq!(ts.ceil_to_unit("days").nanos, d);
9862    }
9863
9864    #[test]
9865    fn timestamp_round_to_unit_min_rounds_to_minute() {
9866        // 12:34:31 → round to 1 minute → 12:35:00.
9867        let m = Timedelta::NANOS_PER_MIN;
9868        let twelve_thirty_four_thirty_one =
9869            Timedelta::NANOS_PER_HOUR * 12 + m * 34 + Timedelta::NANOS_PER_SEC * 31;
9870        let ts = Timestamp::from_nanos(twelve_thirty_four_thirty_one);
9871        let expected = Timedelta::NANOS_PER_HOUR * 12 + m * 35;
9872        assert_eq!(ts.round_to_unit("min").nanos, expected);
9873        assert_eq!(ts.round_to_unit("T").nanos, expected); // pandas pre-2.2 alias
9874        assert_eq!(ts.round_to_unit("minute").nanos, expected);
9875    }
9876
9877    #[test]
9878    fn timestamp_floor_ceil_round_aliases_match_unit_methods_li897() {
9879        let ts = Timestamp::from_nanos(
9880            Timedelta::NANOS_PER_HOUR * 12
9881                + Timedelta::NANOS_PER_MIN * 34
9882                + Timedelta::NANOS_PER_SEC * 31,
9883        );
9884
9885        assert_eq!(ts.floor("H"), ts.floor_to_unit("H"));
9886        assert_eq!(ts.ceil("D"), ts.ceil_to_unit("D"));
9887        assert_eq!(ts.round("min"), ts.round_to_unit("min"));
9888    }
9889
9890    #[test]
9891    fn timestamp_normalize_floors_to_day_and_preserves_tz_455op() {
9892        let ts = Timestamp::from_nanos_tz(
9893            Timedelta::NANOS_PER_DAY * 3
9894                + Timedelta::NANOS_PER_HOUR * 12
9895                + Timedelta::NANOS_PER_MIN * 34,
9896            "US/Eastern",
9897        );
9898        let normalized = ts.normalize();
9899
9900        assert_eq!(normalized.nanos, Timedelta::NANOS_PER_DAY * 3);
9901        assert_eq!(normalized.tz.as_deref(), Some("US/Eastern"));
9902        assert!(Timestamp::nat().normalize().is_nat());
9903    }
9904
9905    #[test]
9906    fn timestamp_unit_rounding_unknown_unit_returns_nat() {
9907        let ts = Timestamp::from_nanos(100);
9908        assert!(ts.floor_to_unit("fortnight").is_nat());
9909        assert!(ts.ceil_to_unit("century").is_nat());
9910        assert!(ts.round_to_unit("xyz").is_nat());
9911    }
9912
9913    #[test]
9914    fn timestamp_unit_rounding_propagates_nat() {
9915        let nat = Timestamp::nat();
9916        assert!(nat.floor_to_unit("H").is_nat());
9917        assert!(nat.ceil_to_unit("H").is_nat());
9918        assert!(nat.round_to_unit("H").is_nat());
9919    }
9920
9921    #[test]
9922    fn timestamp_unit_rounding_preserves_tz() {
9923        let ts = Timestamp::from_nanos_tz(Timedelta::NANOS_PER_HOUR * 12 + 100, "US/Eastern");
9924        assert_eq!(ts.floor_to_unit("H").tz.as_deref(), Some("US/Eastern"));
9925        assert_eq!(ts.ceil_to_unit("H").tz.as_deref(), Some("US/Eastern"));
9926        assert_eq!(ts.round_to_unit("H").tz.as_deref(), Some("US/Eastern"));
9927    }
9928
9929    #[test]
9930    fn timedelta_unit_to_nanos_is_now_public_and_matches_pandas_aliases() {
9931        // Public surface check: pandas alias core set.
9932        assert_eq!(
9933            Timedelta::unit_to_nanos("W"),
9934            Some(Timedelta::NANOS_PER_WEEK)
9935        );
9936        assert_eq!(
9937            Timedelta::unit_to_nanos("D"),
9938            Some(Timedelta::NANOS_PER_DAY)
9939        );
9940        assert_eq!(
9941            Timedelta::unit_to_nanos("H"),
9942            Some(Timedelta::NANOS_PER_HOUR)
9943        );
9944        assert_eq!(
9945            Timedelta::unit_to_nanos("min"),
9946            Some(Timedelta::NANOS_PER_MIN)
9947        );
9948        assert_eq!(
9949            Timedelta::unit_to_nanos("s"),
9950            Some(Timedelta::NANOS_PER_SEC)
9951        );
9952        assert_eq!(
9953            Timedelta::unit_to_nanos("ms"),
9954            Some(Timedelta::NANOS_PER_MILLI)
9955        );
9956        assert_eq!(
9957            Timedelta::unit_to_nanos("us"),
9958            Some(Timedelta::NANOS_PER_MICRO)
9959        );
9960        assert_eq!(Timedelta::unit_to_nanos("ns"), Some(1));
9961        // Empty string → days (pandas default).
9962        assert_eq!(Timedelta::unit_to_nanos(""), Some(Timedelta::NANOS_PER_DAY));
9963        // Unknown alias → None.
9964        assert_eq!(Timedelta::unit_to_nanos("century"), None);
9965    }
9966
9967    #[test]
9968    fn timestamp_isoformat_basic() {
9969        let ts = Timestamp::from_nanos(0);
9970        assert_eq!(ts.isoformat(), "1970-01-01T00:00:00");
9971
9972        let ts_utc = Timestamp::from_nanos_tz(0, "UTC");
9973        assert_eq!(ts_utc.isoformat(), "1970-01-01T00:00:00+00:00");
9974
9975        let ts_tz = Timestamp::from_nanos_tz(
9976            Timedelta::NANOS_PER_DAY
9977                + Timedelta::NANOS_PER_HOUR * 14
9978                + Timedelta::NANOS_PER_MIN * 30,
9979            "America/New_York",
9980        );
9981        assert!(ts_tz.isoformat().contains("1970-01-02T14:30:00"));
9982        assert!(ts_tz.isoformat().contains("[America/New_York]"));
9983
9984        assert_eq!(Timestamp::nat().isoformat(), "NaT");
9985    }
9986
9987    #[test]
9988    fn timestamp_isoformat_pre_epoch_subsecond_uses_floor_day_263m5() {
9989        assert_eq!(
9990            Timestamp::from_nanos(-1).isoformat(),
9991            "1969-12-31T23:59:59.999999999"
9992        );
9993        assert_eq!(
9994            Timestamp::from_nanos(-Timedelta::NANOS_PER_SEC).isoformat(),
9995            "1969-12-31T23:59:59"
9996        );
9997        assert_eq!(
9998            Timestamp::from_nanos(-Timedelta::NANOS_PER_DAY).isoformat(),
9999            "1969-12-31T00:00:00"
10000        );
10001        assert_eq!(
10002            Timestamp::from_nanos_tz(-1, "UTC").isoformat(),
10003            "1969-12-31T23:59:59.999999999+00:00"
10004        );
10005    }
10006
10007    #[test]
10008    fn timestamp_isoformat_preserves_nanosecond_fraction_4r99y() {
10009        assert_eq!(
10010            Timestamp::from_nanos(123_456_789).isoformat(),
10011            "1970-01-01T00:00:00.123456789"
10012        );
10013        assert_eq!(
10014            Timestamp::from_nanos(123_456_000).isoformat(),
10015            "1970-01-01T00:00:00.123456"
10016        );
10017        assert_eq!(
10018            Timestamp::from_nanos(123_000_000).isoformat(),
10019            "1970-01-01T00:00:00.123000"
10020        );
10021        assert_eq!(
10022            Timestamp::from_nanos_tz(1, "UTC").isoformat(),
10023            "1970-01-01T00:00:00.000000001+00:00"
10024        );
10025    }
10026
10027    #[test]
10028    fn timestamp_strftime_basic() {
10029        let ts = Timestamp::from_nanos(
10030            Timedelta::NANOS_PER_DAY * 365
10031                + Timedelta::NANOS_PER_HOUR * 9
10032                + Timedelta::NANOS_PER_MIN * 15,
10033        );
10034        assert_eq!(ts.strftime("%Y-%m-%d"), "1971-01-01");
10035        assert_eq!(ts.strftime("%H:%M:%S"), "09:15:00");
10036        assert_eq!(ts.strftime("%Y/%m/%d %H:%M"), "1971/01/01 09:15");
10037        assert_eq!(Timestamp::nat().strftime("%Y-%m-%d"), "NaT");
10038    }
10039
10040    #[test]
10041    fn timestamp_day_name_and_month_name() {
10042        let ts = Timestamp::from_nanos(0);
10043        assert_eq!(ts.day_name(), "Thursday");
10044        assert_eq!(ts.month_name(), "January");
10045
10046        let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
10047        assert_eq!(ts2.day_name(), "Friday");
10048        assert_eq!(ts2.month_name(), "January");
10049
10050        assert_eq!(Timestamp::nat().day_name(), "NaT");
10051        assert_eq!(Timestamp::nat().month_name(), "NaT");
10052    }
10053
10054    #[test]
10055    fn timestamp_replace_validates_components_zw0y2() {
10056        let ts = Timestamp::parse("2024-01-15T10:30:45.123456789").unwrap();
10057        let replaced = ts.replace(
10058            Some(2024),
10059            Some(2),
10060            Some(29),
10061            Some(23),
10062            Some(59),
10063            Some(58),
10064            Some(987_654),
10065            Some(321),
10066        );
10067        assert_eq!(replaced.year(), Some(2024));
10068        assert_eq!(replaced.month(), Some(2));
10069        assert_eq!(replaced.day(), Some(29));
10070        assert_eq!(replaced.hour(), Some(23));
10071        assert_eq!(replaced.minute(), Some(59));
10072        assert_eq!(replaced.second(), Some(58));
10073        assert_eq!(replaced.microsecond(), Some(987_654));
10074        assert_eq!(replaced.nanosecond(), Some(321));
10075
10076        let tz = Timestamp::from_nanos_tz(ts.nanos, "UTC");
10077        assert_eq!(
10078            tz.replace(None, Some(2), Some(29), None, None, None, None, None)
10079                .tz
10080                .as_deref(),
10081            Some("UTC")
10082        );
10083
10084        assert!(
10085            ts.replace(None, Some(13), None, None, None, None, None, None)
10086                .is_nat()
10087        );
10088        assert!(
10089            ts.replace(Some(2023), Some(2), Some(29), None, None, None, None, None)
10090                .is_nat()
10091        );
10092        assert!(
10093            ts.replace(None, None, None, Some(24), None, None, None, None)
10094                .is_nat()
10095        );
10096        assert!(
10097            ts.replace(None, None, None, None, Some(60), None, None, None)
10098                .is_nat()
10099        );
10100        assert!(
10101            ts.replace(None, None, None, None, None, Some(60), None, None)
10102                .is_nat()
10103        );
10104        assert!(
10105            ts.replace(None, None, None, None, None, None, Some(1_000_000), None)
10106                .is_nat()
10107        );
10108        assert!(
10109            ts.replace(None, None, None, None, None, None, None, Some(1_000))
10110                .is_nat()
10111        );
10112        assert!(
10113            Timestamp::nat()
10114                .replace(Some(2024), Some(1), Some(1), None, None, None, None, None)
10115                .is_nat()
10116        );
10117    }
10118
10119    #[test]
10120    fn timestamp_component_accessors() {
10121        let ts = Timestamp::from_nanos(0);
10122        assert_eq!(ts.year(), Some(1970));
10123        assert_eq!(ts.month(), Some(1));
10124        assert_eq!(ts.day(), Some(1));
10125        assert_eq!(ts.hour(), Some(0));
10126        assert_eq!(ts.minute(), Some(0));
10127        assert_eq!(ts.second(), Some(0));
10128        assert_eq!(ts.microsecond(), Some(0));
10129        assert_eq!(ts.nanosecond(), Some(0));
10130
10131        let ts2 = Timestamp::from_nanos(
10132            Timedelta::NANOS_PER_DAY * 365
10133                + Timedelta::NANOS_PER_HOUR * 14
10134                + Timedelta::NANOS_PER_MIN * 30
10135                + Timedelta::NANOS_PER_SEC * 45
10136                + 123_456_789,
10137        );
10138        assert_eq!(ts2.year(), Some(1971));
10139        assert_eq!(ts2.month(), Some(1));
10140        assert_eq!(ts2.day(), Some(1));
10141        assert_eq!(ts2.hour(), Some(14));
10142        assert_eq!(ts2.minute(), Some(30));
10143        assert_eq!(ts2.second(), Some(45));
10144        assert_eq!(ts2.microsecond(), Some(123456));
10145        assert_eq!(ts2.nanosecond(), Some(789));
10146
10147        assert_eq!(Timestamp::nat().year(), None);
10148        assert_eq!(Timestamp::nat().month(), None);
10149        assert_eq!(Timestamp::nat().day(), None);
10150    }
10151
10152    #[test]
10153    fn timestamp_dayofweek_dayofyear_quarter() {
10154        let ts = Timestamp::from_nanos(0);
10155        assert_eq!(ts.dayofweek(), Some(3));
10156        assert_eq!(ts.weekday(), Some(3));
10157        assert_eq!(ts.dayofyear(), Some(1));
10158        assert_eq!(ts.quarter(), Some(1));
10159
10160        let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 90);
10161        assert_eq!(ts2.quarter(), Some(2));
10162
10163        let ts3 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
10164        assert_eq!(ts3.dayofyear(), Some(1));
10165        assert_eq!(ts3.dayofweek(), Some(4));
10166
10167        assert_eq!(Timestamp::nat().dayofweek(), None);
10168        assert_eq!(Timestamp::nat().dayofyear(), None);
10169        assert_eq!(Timestamp::nat().quarter(), None);
10170    }
10171
10172    #[test]
10173    fn timestamp_is_boundary_methods() {
10174        let jan1 = Timestamp::from_nanos(0);
10175        assert_eq!(jan1.is_leap_year(), Some(false));
10176        assert_eq!(jan1.is_month_start(), Some(true));
10177        assert_eq!(jan1.is_month_end(), Some(false));
10178        assert_eq!(jan1.is_quarter_start(), Some(true));
10179        assert_eq!(jan1.is_quarter_end(), Some(false));
10180        assert_eq!(jan1.is_year_start(), Some(true));
10181        assert_eq!(jan1.is_year_end(), Some(false));
10182
10183        let dec31 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 364);
10184        assert_eq!(dec31.is_month_start(), Some(false));
10185        assert_eq!(dec31.is_month_end(), Some(true));
10186        assert_eq!(dec31.is_quarter_end(), Some(true));
10187        assert_eq!(dec31.is_year_end(), Some(true));
10188
10189        assert_eq!(Timestamp::nat().is_leap_year(), None);
10190        assert_eq!(Timestamp::nat().is_month_start(), None);
10191    }
10192
10193    #[test]
10194    fn timestamp_days_in_month() {
10195        let jan = Timestamp::from_nanos(0);
10196        assert_eq!(jan.days_in_month(), Some(31));
10197        assert_eq!(jan.daysinmonth(), Some(31));
10198
10199        let feb_non_leap = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 31);
10200        assert_eq!(feb_non_leap.days_in_month(), Some(28));
10201
10202        assert_eq!(Timestamp::nat().days_in_month(), None);
10203    }
10204
10205    #[test]
10206    fn timestamp_weekofyear() {
10207        let jan1 = Timestamp::from_nanos(0);
10208        assert_eq!(jan1.weekofyear(), Some(1));
10209        assert_eq!(jan1.week(), Some(1));
10210
10211        let jan8 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 7);
10212        assert_eq!(jan8.weekofyear(), Some(2));
10213
10214        assert_eq!(Timestamp::nat().weekofyear(), None);
10215        assert_eq!(Timestamp::nat().week(), None);
10216    }
10217
10218    #[test]
10219    fn timestamp_weekofyear_iso_53_week_boundaries() {
10220        // ISO-8601 53-week-year boundaries vs pandas 2.2.3 isocalendar().week.
10221        // (br-frankenpandas-xmfmd) Date -> nanos via days since 1970-01-01.
10222        fn week_of(date_days: i64) -> Option<i64> {
10223            Timestamp::from_nanos(date_days * Timedelta::NANOS_PER_DAY).weekofyear()
10224        }
10225        // Days from 1970-01-01 for each date (UTC, no tz).
10226        // 2021-01-01 -> week 53 (2020 is a 53-week year); FP used to give 52.
10227        assert_eq!(week_of(18_628), Some(53)); // 2021-01-01
10228        // 2016-01-01 -> week 53 (2015 is a 53-week year).
10229        assert_eq!(week_of(16_801), Some(53)); // 2016-01-01
10230        // 2026-12-31 -> week 53; FP used to give 1.
10231        assert_eq!(week_of(20_818), Some(53)); // 2026-12-31
10232        // 2020-12-31 -> week 53.
10233        assert_eq!(week_of(18_627), Some(53)); // 2020-12-31
10234        // Cases that must stay correct (non-53 boundaries):
10235        assert_eq!(week_of(19_358), Some(52)); // 2023-01-01 -> week 52
10236        assert_eq!(week_of(20_087), Some(1)); // 2024-12-30 -> week 1
10237        assert_eq!(week_of(18_260), Some(1)); // 2019-12-30 -> week 1
10238    }
10239
10240    #[test]
10241    fn iso_weeks_in_year_53_week_years() {
10242        use super::iso_weeks_in_year;
10243        // Known 53-week years; everything else is 52.
10244        for y in [2004, 2009, 2015, 2020, 2026] {
10245            assert_eq!(iso_weeks_in_year(y), 53, "{y} should have 53 ISO weeks");
10246        }
10247        for y in [2018, 2019, 2021, 2022, 2023, 2024] {
10248            assert_eq!(iso_weeks_in_year(y), 52, "{y} should have 52 ISO weeks");
10249        }
10250    }
10251
10252    #[test]
10253    fn timestamp_to_unit() {
10254        let ts = Timestamp::from_nanos(1_000_000_000);
10255        assert_eq!(ts.to_unit("ns"), Some(1_000_000_000));
10256        assert_eq!(ts.to_unit("us"), Some(1_000_000));
10257        assert_eq!(ts.to_unit("ms"), Some(1_000));
10258        assert_eq!(ts.to_unit("s"), Some(1));
10259        assert_eq!(ts.to_unit("invalid"), None);
10260
10261        assert_eq!(Timestamp::nat().to_unit("ns"), None);
10262    }
10263
10264    #[test]
10265    fn timestamp_toordinal() {
10266        // 2026-01-01 is ordinal 738886 (days since Jan 1, year 1)
10267        // Days from Unix epoch: 738886 - 719163 = 19723
10268        let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
10269        let ts = Timestamp::from_nanos(nanos_2026_01_01);
10270        assert_eq!(ts.toordinal(), Some(738886));
10271
10272        // NaT returns None
10273        assert_eq!(Timestamp::nat().toordinal(), None);
10274    }
10275
10276    #[test]
10277    fn timestamp_fromordinal() {
10278        // Round-trip test: create a timestamp from ordinal derived from toordinal
10279        // First create a known timestamp
10280        let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
10281        let ts_orig = Timestamp::from_nanos(nanos_2026_01_01);
10282        let ordinal = ts_orig.toordinal().unwrap();
10283
10284        // Now convert back using fromordinal
10285        let ts = Timestamp::fromordinal(ordinal);
10286        assert_eq!(ts.year(), ts_orig.year());
10287        assert_eq!(ts.month(), ts_orig.month());
10288        assert_eq!(ts.day(), ts_orig.day());
10289
10290        // Invalid ordinal returns NaT
10291        let nat = Timestamp::fromordinal(0);
10292        assert!(nat.is_nat());
10293    }
10294
10295    #[test]
10296    fn timestamp_fromordinal_guards_nanosecond_overflow_ycvrd() {
10297        const EPOCH_ORDINAL: i64 = 719_163;
10298        let max_day_offset = i64::MAX / Timedelta::NANOS_PER_DAY;
10299        let max_valid_ordinal = EPOCH_ORDINAL + max_day_offset;
10300
10301        let max_valid = Timestamp::fromordinal(max_valid_ordinal);
10302        assert!(!max_valid.is_nat());
10303        assert_eq!(max_valid.nanos, max_day_offset * Timedelta::NANOS_PER_DAY);
10304
10305        assert!(Timestamp::fromordinal(max_valid_ordinal + 1).is_nat());
10306        assert!(Timestamp::fromordinal(i64::MAX).is_nat());
10307    }
10308
10309    #[test]
10310    fn timestamp_ordinal_matches_seeded_epoch_oracle_l2f0p() {
10311        const EPOCH_ORDINAL: i64 = 719_163;
10312        const DAY: i64 = Timedelta::NANOS_PER_DAY;
10313
10314        fn next(seed: &mut u64) -> u64 {
10315            *seed = seed
10316                .wrapping_mul(3202034522624059733)
10317                .wrapping_add(4354685564936845319);
10318            *seed
10319        }
10320
10321        fn assert_ordinal_case(case: usize, day_offset: i64, subday_nanos: i64) {
10322            let nanos = day_offset.saturating_mul(DAY).saturating_add(subday_nanos);
10323            let ts = Timestamp::from_nanos(nanos);
10324            let expected_day_offset = nanos.div_euclid(DAY);
10325            let expected_ordinal = EPOCH_ORDINAL + expected_day_offset;
10326
10327            assert_eq!(
10328                ts.toordinal(),
10329                Some(expected_ordinal),
10330                "case {case}: toordinal"
10331            );
10332
10333            let midnight = Timestamp::fromordinal(expected_ordinal);
10334            assert_eq!(
10335                midnight.nanos,
10336                expected_day_offset * DAY,
10337                "case {case}: fromordinal nanos"
10338            );
10339            assert_eq!(
10340                midnight.toordinal(),
10341                Some(expected_ordinal),
10342                "case {case}: fromordinal roundtrip"
10343            );
10344        }
10345
10346        assert_eq!(Timestamp::nat().toordinal(), None);
10347        assert!(Timestamp::fromordinal(0).is_nat());
10348        assert!(Timestamp::fromordinal(-1).is_nat());
10349
10350        assert_ordinal_case(usize::MAX, -1, DAY - 1);
10351        assert_ordinal_case(usize::MAX - 1, 0, -1);
10352        assert_ordinal_case(usize::MAX - 2, 19_723, 0);
10353
10354        let mut seed = 0x1f20_f0d1_0a11_0d1e_u64;
10355        for case in 0..260 {
10356            let day_offset = (next(&mut seed) % 40_001) as i64 - 10_000;
10357            let subday_nanos = match case % 7 {
10358                0 => 0,
10359                1 => DAY - 1,
10360                2 => -1,
10361                _ => (next(&mut seed) % (2 * DAY as u64 - 1)) as i64 - (DAY - 1),
10362            };
10363            assert_ordinal_case(case, day_offset, subday_nanos);
10364        }
10365    }
10366
10367    #[test]
10368    fn timestamp_parse_iso8601_date_only() {
10369        let ts = Timestamp::parse("2024-01-15").unwrap();
10370        assert_eq!(ts.year(), Some(2024));
10371        assert_eq!(ts.month(), Some(1));
10372        assert_eq!(ts.day(), Some(15));
10373        assert_eq!(ts.hour(), Some(0));
10374        assert_eq!(ts.minute(), Some(0));
10375        assert_eq!(ts.second(), Some(0));
10376    }
10377
10378    #[test]
10379    fn timestamp_parse_iso8601_datetime() {
10380        let ts = Timestamp::parse("2024-01-15T10:30:45").unwrap();
10381        assert_eq!(ts.year(), Some(2024));
10382        assert_eq!(ts.month(), Some(1));
10383        assert_eq!(ts.day(), Some(15));
10384        assert_eq!(ts.hour(), Some(10));
10385        assert_eq!(ts.minute(), Some(30));
10386        assert_eq!(ts.second(), Some(45));
10387    }
10388
10389    #[test]
10390    fn timestamp_parse_space_separator() {
10391        let ts = Timestamp::parse("2024-01-15 10:30:45").unwrap();
10392        assert_eq!(ts.year(), Some(2024));
10393        assert_eq!(ts.hour(), Some(10));
10394    }
10395
10396    #[test]
10397    fn timestamp_parse_with_fractional_seconds() {
10398        let ts = Timestamp::parse("2024-01-15T10:30:45.123456789").unwrap();
10399        assert_eq!(ts.second(), Some(45));
10400        assert_eq!(ts.microsecond(), Some(123456));
10401        assert_eq!(ts.nanosecond(), Some(789));
10402    }
10403
10404    #[test]
10405    fn timestamp_parse_utc_timezone() {
10406        let ts = Timestamp::parse("2024-01-15T10:30:45Z").unwrap();
10407        assert_eq!(ts.tz, Some("UTC".to_string()));
10408    }
10409
10410    #[test]
10411    fn timestamp_parse_offset_timezone() {
10412        let ts = Timestamp::parse("2024-01-15T10:30:45+05:30").unwrap();
10413        assert_eq!(ts.tz, Some("+05:30".to_string()));
10414
10415        let ts = Timestamp::parse("2024-01-15T10:30:45-05:30").unwrap();
10416        assert_eq!(ts.tz, Some("-05:30".to_string()));
10417    }
10418
10419    #[test]
10420    fn timestamp_parse_rejects_invalid_timezone_offsets_0v676() {
10421        assert!(Timestamp::parse("2024-01-15T10:30:45+bad").is_err());
10422        assert!(Timestamp::parse("2024-01-15T10:30:45+0500").is_err());
10423        assert!(Timestamp::parse("2024-01-15T10:30:45+24:00").is_err());
10424        assert!(Timestamp::parse("2024-01-15T10:30:45+05:60").is_err());
10425        assert!(Timestamp::parse("2024-01-15T10:30:45-25:00").is_err());
10426    }
10427
10428    #[test]
10429    fn timestamp_parse_nat() {
10430        let ts = Timestamp::parse("NaT").unwrap();
10431        assert!(ts.is_nat());
10432        let ts2 = Timestamp::parse("nat").unwrap();
10433        assert!(ts2.is_nat());
10434    }
10435
10436    #[test]
10437    fn timestamp_parse_invalid() {
10438        assert!(Timestamp::parse("not a date").is_err());
10439        assert!(Timestamp::parse("2024-13-01").is_err()); // invalid month
10440        assert!(Timestamp::parse("2024-01-32").is_err()); // invalid day
10441    }
10442
10443    #[test]
10444    fn timestamp_parse_rejects_invalid_fractional_seconds_87se2() {
10445        assert!(Timestamp::parse("2024-01-15T10:30:45.").is_err());
10446        assert!(Timestamp::parse("2024-01-15T10:30:45.abc").is_err());
10447        assert!(Timestamp::parse("2024-01-15T10:30:45.-1").is_err());
10448        assert!(Timestamp::parse("2024-01-15T10:30:45.１２３").is_err());
10449
10450        let ts = Timestamp::parse("2024-01-15T10:30:45.123456789987").unwrap();
10451        assert_eq!(ts.second(), Some(45));
10452        assert_eq!(ts.microsecond(), Some(123456));
10453        assert_eq!(ts.nanosecond(), Some(789));
10454    }
10455
10456    #[test]
10457    fn timestamp_parse_matches_seeded_iso_component_oracle_1u7a0() {
10458        fn next(seed: &mut u64) -> u64 {
10459            *seed = seed
10460                .wrapping_mul(3935559000370003845)
10461                .wrapping_add(2691343689449507681);
10462            *seed
10463        }
10464
10465        fn leap(year: i64) -> bool {
10466            (year % 4 == 0 && year % 100 != 0) || year % 400 == 0
10467        }
10468
10469        fn month_len(year: i64, month: u32) -> u32 {
10470            match month {
10471                1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
10472                4 | 6 | 9 | 11 => 30,
10473                2 if leap(year) => 29,
10474                2 => 28,
10475                _ => 0,
10476            }
10477        }
10478
10479        struct Components {
10480            year: i64,
10481            month: u32,
10482            day: u32,
10483            hour: u32,
10484            minute: u32,
10485            second: u32,
10486            nanos: u64,
10487        }
10488
10489        fn assert_components(case: usize, text: &str, expected: Components) {
10490            let ts = Timestamp::parse(text).expect("seeded valid timestamp");
10491            let Components {
10492                year,
10493                month,
10494                day,
10495                hour,
10496                minute,
10497                second,
10498                nanos,
10499            } = expected;
10500            assert_eq!(ts.year(), Some(year), "case {case}: year");
10501            assert_eq!(ts.month(), Some(month as i64), "case {case}: month");
10502            assert_eq!(ts.day(), Some(day as i64), "case {case}: day");
10503            assert_eq!(ts.hour(), Some(hour as i64), "case {case}: hour");
10504            assert_eq!(ts.minute(), Some(minute as i64), "case {case}: minute");
10505            assert_eq!(ts.second(), Some(second as i64), "case {case}: second");
10506            assert_eq!(
10507                ts.microsecond(),
10508                Some((nanos / 1000) as i64),
10509                "case {case}: microsecond"
10510            );
10511            assert_eq!(
10512                ts.nanosecond(),
10513                Some((nanos % 1000) as i64),
10514                "case {case}: nanosecond"
10515            );
10516        }
10517
10518        assert!(Timestamp::parse("NaT").expect("NaT parses").is_nat());
10519        assert!(
10520            Timestamp::parse("nAt")
10521                .expect("mixed-case NaT parses")
10522                .is_nat()
10523        );
10524        assert!(Timestamp::parse("1900-02-29").is_err());
10525        assert!(Timestamp::parse("2001-04-31").is_err());
10526        assert!(Timestamp::parse("2024-00-15").is_err());
10527
10528        assert_components(
10529            usize::MAX,
10530            "2000-02-29",
10531            Components {
10532                year: 2000,
10533                month: 2,
10534                day: 29,
10535                hour: 0,
10536                minute: 0,
10537                second: 0,
10538                nanos: 0,
10539            },
10540        );
10541        assert_components(
10542            usize::MAX - 1,
10543            "2024-02-29T23:59:59.000000001",
10544            Components {
10545                year: 2024,
10546                month: 2,
10547                day: 29,
10548                hour: 23,
10549                minute: 59,
10550                second: 59,
10551                nanos: 1,
10552            },
10553        );
10554
10555        let mut seed = 0x15e0_1d50_1f0a_cade_u64;
10556        for case in 0..260 {
10557            let year = 1900 + (next(&mut seed) % 201) as i64;
10558            let month = 1 + (next(&mut seed) % 12) as u32;
10559            let day = 1 + (next(&mut seed) % month_len(year, month) as u64) as u32;
10560            let hour = (next(&mut seed) % 24) as u32;
10561            let minute = (next(&mut seed) % 60) as u32;
10562            let second = (next(&mut seed) % 60) as u32;
10563            let nanos = next(&mut seed) % 1_000_000_000;
10564
10565            match case % 4 {
10566                0 => {
10567                    let text = format!("{year:04}-{month:02}-{day:02}");
10568                    assert_components(
10569                        case,
10570                        &text,
10571                        Components {
10572                            year,
10573                            month,
10574                            day,
10575                            hour: 0,
10576                            minute: 0,
10577                            second: 0,
10578                            nanos: 0,
10579                        },
10580                    );
10581                }
10582                1 => {
10583                    let text =
10584                        format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}");
10585                    assert_components(
10586                        case,
10587                        &text,
10588                        Components {
10589                            year,
10590                            month,
10591                            day,
10592                            hour,
10593                            minute,
10594                            second,
10595                            nanos: 0,
10596                        },
10597                    );
10598                }
10599                2 => {
10600                    let text =
10601                        format!("{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}");
10602                    assert_components(
10603                        case,
10604                        &text,
10605                        Components {
10606                            year,
10607                            month,
10608                            day,
10609                            hour,
10610                            minute,
10611                            second,
10612                            nanos: 0,
10613                        },
10614                    );
10615                }
10616                _ => {
10617                    let text = format!(
10618                        "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{nanos:09}"
10619                    );
10620                    assert_components(
10621                        case,
10622                        &text,
10623                        Components {
10624                            year,
10625                            month,
10626                            day,
10627                            hour,
10628                            minute,
10629                            second,
10630                            nanos,
10631                        },
10632                    );
10633                }
10634            }
10635        }
10636    }
10637
10638    #[test]
10639    fn period_parse_annual() {
10640        let p = Period::parse("2024").unwrap();
10641        assert_eq!(p.freq(), PeriodFreq::Annual);
10642        assert_eq!(p.ordinal(), 2024 - 1970);
10643    }
10644
10645    #[test]
10646    fn period_parse_quarterly() {
10647        let p = Period::parse("2024Q1").unwrap();
10648        assert_eq!(p.freq(), PeriodFreq::Quarterly);
10649        assert_eq!(p.ordinal(), (2024 - 1970) * 4);
10650
10651        let p2 = Period::parse("2024q3").unwrap();
10652        assert_eq!(p2.freq(), PeriodFreq::Quarterly);
10653        assert_eq!(p2.ordinal(), (2024 - 1970) * 4 + 2);
10654    }
10655
10656    #[test]
10657    fn period_parse_monthly() {
10658        let p = Period::parse("2024-01").unwrap();
10659        assert_eq!(p.freq(), PeriodFreq::Monthly);
10660        assert_eq!(p.ordinal(), (2024 - 1970) * 12);
10661
10662        let p2 = Period::parse("2024-12").unwrap();
10663        assert_eq!(p2.freq(), PeriodFreq::Monthly);
10664        assert_eq!(p2.ordinal(), (2024 - 1970) * 12 + 11);
10665    }
10666
10667    #[test]
10668    fn period_parse_nat() {
10669        let p = Period::parse("NaT").unwrap();
10670        assert_eq!(p.ordinal(), i64::MIN);
10671    }
10672
10673    #[test]
10674    fn period_parse_invalid() {
10675        assert!(Period::parse("not a period").is_err());
10676        assert!(Period::parse("2024Q5").is_err()); // invalid quarter
10677        assert!(Period::parse("2024-13").is_err()); // invalid month
10678    }
10679
10680    #[test]
10681    fn interval_parse_basic() {
10682        let i = Interval::parse("[0, 1]").unwrap();
10683        assert_eq!(i.left, 0.0);
10684        assert_eq!(i.right, 1.0);
10685        assert_eq!(i.closed, IntervalClosed::Both);
10686
10687        let i2 = Interval::parse("(0, 1)").unwrap();
10688        assert_eq!(i2.left, 0.0);
10689        assert_eq!(i2.right, 1.0);
10690        assert_eq!(i2.closed, IntervalClosed::Neither);
10691
10692        let i3 = Interval::parse("[0, 1)").unwrap();
10693        assert_eq!(i3.closed, IntervalClosed::Left);
10694
10695        let i4 = Interval::parse("(0, 1]").unwrap();
10696        assert_eq!(i4.closed, IntervalClosed::Right);
10697    }
10698
10699    #[test]
10700    fn interval_parse_floats() {
10701        let i = Interval::parse("[-1.5, 2.5)").unwrap();
10702        assert_eq!(i.left, -1.5);
10703        assert_eq!(i.right, 2.5);
10704        assert_eq!(i.closed, IntervalClosed::Left);
10705    }
10706
10707    #[test]
10708    fn interval_parse_invalid() {
10709        assert!(Interval::parse("invalid").is_err());
10710        assert!(Interval::parse("[0]").is_err());
10711        assert!(Interval::parse("0, 1").is_err()); // missing brackets
10712    }
10713}
fp_types/lib.rs

fp_types/
lib.rs